src/gromacs/gmxlib/nonbonded/nb_free_energy.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
   7  * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
   8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   9  * and including many others, as listed in the AUTHORS file in the
  10  * top-level source directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38 #include "gmxpre.h"
  39
  40 #include "nb_free_energy.h"
  41
  42 #include "config.h"
  43
  44 #include <cmath>
  45
  46 #include <algorithm>
  47
  48 #include "gromacs/gmxlib/nrnb.h"
  49 #include "gromacs/gmxlib/nonbonded/nb_kernel.h"
  50 #include "gromacs/gmxlib/nonbonded/nonbonded.h"
  51 #include "gromacs/math/functions.h"
  52 #include "gromacs/math/vec.h"
  53 #include "gromacs/mdtypes/forceoutput.h"
  54 #include "gromacs/mdtypes/forcerec.h"
  55 #include "gromacs/mdtypes/interaction_const.h"
  56 #include "gromacs/mdtypes/md_enums.h"
  57 #include "gromacs/mdtypes/mdatom.h"
  58 #include "gromacs/simd/simd.h"
  59 #include "gromacs/utility/fatalerror.h"
  60
  61
  62 //! Scalar (non-SIMD) data types.
  63 struct ScalarDataTypes
  64 {
  65     using RealType                     = real; //!< The data type to use as real.
  66     using IntType                      = int;  //!< The data type to use as int.
  67     static constexpr int simdRealWidth = 1;    //!< The width of the RealType.
  68     static constexpr int simdIntWidth  = 1;    //!< The width of the IntType.
  69 };
  70
  71 #if GMX_SIMD_HAVE_REAL && GMX_SIMD_HAVE_INT32_ARITHMETICS
  72 //! SIMD data types.
  73 struct SimdDataTypes
  74 {
  75     using RealType                     = gmx::SimdReal;         //!< The data type to use as real.
  76     using IntType                      = gmx::SimdInt32;        //!< The data type to use as int.
  77     static constexpr int simdRealWidth = GMX_SIMD_REAL_WIDTH;   //!< The width of the RealType.
  78     static constexpr int simdIntWidth  = GMX_SIMD_FINT32_WIDTH; //!< The width of the IntType.
  79 };
  80 #endif
  81
  82 //! Computes r^(1/p) and 1/r^(1/p) for the standard p=6
  83 template<class RealType>
  84 static inline void pthRoot(const RealType r, RealType* pthRoot, RealType* invPthRoot)
  85 {
  86     *invPthRoot = gmx::invsqrt(std::cbrt(r));
  87     *pthRoot    = 1 / (*invPthRoot);
  88 }
  89
  90 template<class RealType>
  91 static inline RealType calculateRinv6(const RealType rinvV)
  92 {
  93     RealType rinv6 = rinvV * rinvV;
  94     return (rinv6 * rinv6 * rinv6);
  95 }
  96
  97 template<class RealType>
  98 static inline RealType calculateVdw6(const RealType c6, const RealType rinv6)
  99 {
 100     return (c6 * rinv6);
 101 }
 102
 103 template<class RealType>
 104 static inline RealType calculateVdw12(const RealType c12, const RealType rinv6)
 105 {
 106     return (c12 * rinv6 * rinv6);
 107 }
 108
 109 /* reaction-field electrostatics */
 110 template<class RealType>
 111 static inline RealType reactionFieldScalarForce(const RealType qq,
 112                                                 const RealType rinv,
 113                                                 const RealType r,
 114                                                 const real     krf,
 115                                                 const real     two)
 116 {
 117     return (qq * (rinv - two * krf * r * r));
 118 }
 119 template<class RealType>
 120 static inline RealType reactionFieldPotential(const RealType qq,
 121                                               const RealType rinv,
 122                                               const RealType r,
 123                                               const real     krf,
 124                                               const real     potentialShift)
 125 {
 126     return (qq * (rinv + krf * r * r - potentialShift));
 127 }
 128
 129 /* Ewald electrostatics */
 130 template<class RealType>
 131 static inline RealType ewaldScalarForce(const RealType coulomb, const RealType rinv)
 132 {
 133     return (coulomb * rinv);
 134 }
 135 template<class RealType>
 136 static inline RealType ewaldPotential(const RealType coulomb, const RealType rinv, const real potentialShift)
 137 {
 138     return (coulomb * (rinv - potentialShift));
 139 }
 140
 141 /* cutoff LJ */
 142 template<class RealType>
 143 static inline RealType lennardJonesScalarForce(const RealType v6, const RealType v12)
 144 {
 145     return (v12 - v6);
 146 }
 147 template<class RealType>
 148 static inline RealType lennardJonesPotential(const RealType v6,
 149                                              const RealType v12,
 150                                              const RealType c6,
 151                                              const RealType c12,
 152                                              const real     repulsionShift,
 153                                              const real     dispersionShift,
 154                                              const real     onesixth,
 155                                              const real     onetwelfth)
 156 {
 157     return ((v12 + c12 * repulsionShift) * onetwelfth - (v6 + c6 * dispersionShift) * onesixth);
 158 }
 159
 160 /* Ewald LJ */
 161 static inline real ewaldLennardJonesGridSubtract(const real c6grid, const real potentialShift, const real onesixth)
 162 {
 163     return (c6grid * potentialShift * onesixth);
 164 }
 165
 166 /* LJ Potential switch */
 167 template<class RealType>
 168 static inline RealType potSwitchScalarForceMod(const RealType fScalarInp,
 169                                                const RealType potential,
 170                                                const RealType sw,
 171                                                const RealType r,
 172                                                const RealType rVdw,
 173                                                const RealType dsw,
 174                                                const real     zero)
 175 {
 176     if (r < rVdw)
 177     {
 178         real fScalar = fScalarInp * sw - r * potential * dsw;
 179         return (fScalar);
 180     }
 181     return (zero);
 182 }
 183 template<class RealType>
 184 static inline RealType potSwitchPotentialMod(const RealType potentialInp,
 185                                              const RealType sw,
 186                                              const RealType r,
 187                                              const RealType rVdw,
 188                                              const real     zero)
 189 {
 190     if (r < rVdw)
 191     {
 192         real potential = potentialInp * sw;
 193         return (potential);
 194     }
 195     return (zero);
 196 }
 197
 198
 199 //! Templated free-energy non-bonded kernel
 200 template<typename DataTypes, bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald, bool vdwModifierIsPotSwitch>
 201 static void nb_free_energy_kernel(const t_nblist* gmx_restrict nlist,
 202                                   rvec* gmx_restrict         xx,
 203                                   gmx::ForceWithShiftForces* forceWithShiftForces,
 204                                   const t_forcerec* gmx_restrict fr,
 205                                   const t_mdatoms* gmx_restrict mdatoms,
 206                                   nb_kernel_data_t* gmx_restrict kernel_data,
 207                                   t_nrnb* gmx_restrict nrnb)
 208 {
 209 #define STATE_A 0
 210 #define STATE_B 1
 211 #define NSTATES 2
 212
 213     using RealType = typename DataTypes::RealType;
 214     using IntType  = typename DataTypes::IntType;
 215
 216     /* FIXME: How should these be handled with SIMD? */
 217     constexpr real onetwelfth = 1.0 / 12.0;
 218     constexpr real onesixth   = 1.0 / 6.0;
 219     constexpr real zero       = 0.0;
 220     constexpr real half       = 0.5;
 221     constexpr real one        = 1.0;
 222     constexpr real two        = 2.0;
 223     constexpr real six        = 6.0;
 224
 225     /* Extract pointer to non-bonded interaction constants */
 226     const interaction_const_t* ic = fr->ic;
 227
 228     // Extract pair list data
 229     const int  nri    = nlist->nri;
 230     const int* iinr   = nlist->iinr;
 231     const int* jindex = nlist->jindex;
 232     const int* jjnr   = nlist->jjnr;
 233     const int* shift  = nlist->shift;
 234     const int* gid    = nlist->gid;
 235
 236     const real* shiftvec      = fr->shift_vec[0];
 237     const real* chargeA       = mdatoms->chargeA;
 238     const real* chargeB       = mdatoms->chargeB;
 239     real*       Vc            = kernel_data->energygrp_elec;
 240     const int*  typeA         = mdatoms->typeA;
 241     const int*  typeB         = mdatoms->typeB;
 242     const int   ntype         = fr->ntype;
 243     const real* nbfp          = fr->nbfp.data();
 244     const real* nbfp_grid     = fr->ljpme_c6grid;
 245     real*       Vv            = kernel_data->energygrp_vdw;
 246     const real  lambda_coul   = kernel_data->lambda[efptCOUL];
 247     const real  lambda_vdw    = kernel_data->lambda[efptVDW];
 248     real*       dvdl          = kernel_data->dvdl;
 249     const real  alpha_coul    = fr->sc_alphacoul;
 250     const real  alpha_vdw     = fr->sc_alphavdw;
 251     const real  lam_power     = fr->sc_power;
 252     const real  sigma6_def    = fr->sc_sigma6_def;
 253     const real  sigma6_min    = fr->sc_sigma6_min;
 254     const bool  doForces      = ((kernel_data->flags & GMX_NONBONDED_DO_FORCE) != 0);
 255     const bool  doShiftForces = ((kernel_data->flags & GMX_NONBONDED_DO_SHIFTFORCE) != 0);
 256     const bool  doPotential   = ((kernel_data->flags & GMX_NONBONDED_DO_POTENTIAL) != 0);
 257
 258     // Extract data from interaction_const_t
 259     const real facel           = ic->epsfac;
 260     const real rcoulomb        = ic->rcoulomb;
 261     const real krf             = ic->k_rf;
 262     const real crf             = ic->c_rf;
 263     const real sh_lj_ewald     = ic->sh_lj_ewald;
 264     const real rvdw            = ic->rvdw;
 265     const real dispersionShift = ic->dispersion_shift.cpot;
 266     const real repulsionShift  = ic->repulsion_shift.cpot;
 267
 268     // Note that the nbnxm kernels do not support Coulomb potential switching at all
 269     GMX_ASSERT(ic->coulomb_modifier != eintmodPOTSWITCH,
 270                "Potential switching is not supported for Coulomb with FEP");
 271
 272     real vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4;
 273     if (vdwModifierIsPotSwitch)
 274     {
 275         const real d = ic->rvdw - ic->rvdw_switch;
 276         vdw_swV3     = -10.0 / (d * d * d);
 277         vdw_swV4     = 15.0 / (d * d * d * d);
 278         vdw_swV5     = -6.0 / (d * d * d * d * d);
 279         vdw_swF2     = -30.0 / (d * d * d);
 280         vdw_swF3     = 60.0 / (d * d * d * d);
 281         vdw_swF4     = -30.0 / (d * d * d * d * d);
 282     }
 283     else
 284     {
 285         /* Avoid warnings from stupid compilers (looking at you, Clang!) */
 286         vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0;
 287     }
 288
 289     int icoul;
 290     if (ic->eeltype == eelCUT || EEL_RF(ic->eeltype))
 291     {
 292         icoul = GMX_NBKERNEL_ELEC_REACTIONFIELD;
 293     }
 294     else
 295     {
 296         icoul = GMX_NBKERNEL_ELEC_NONE;
 297     }
 298
 299     real rcutoff_max2 = std::max(ic->rcoulomb, ic->rvdw);
 300     rcutoff_max2      = rcutoff_max2 * rcutoff_max2;
 301
 302     const real* tab_ewald_F_lj           = nullptr;
 303     const real* tab_ewald_V_lj           = nullptr;
 304     const real* ewtab                    = nullptr;
 305     real        coulombTableScale        = 0;
 306     real        coulombTableScaleInvHalf = 0;
 307     real        vdwTableScale            = 0;
 308     real        vdwTableScaleInvHalf     = 0;
 309     real        sh_ewald                 = 0;
 310     if (elecInteractionTypeIsEwald || vdwInteractionTypeIsEwald)
 311     {
 312         sh_ewald = ic->sh_ewald;
 313     }
 314     if (elecInteractionTypeIsEwald)
 315     {
 316         const auto& coulombTables = *ic->coulombEwaldTables;
 317         ewtab                     = coulombTables.tableFDV0.data();
 318         coulombTableScale         = coulombTables.scale;
 319         coulombTableScaleInvHalf  = half / coulombTableScale;
 320     }
 321     if (vdwInteractionTypeIsEwald)
 322     {
 323         const auto& vdwTables = *ic->vdwEwaldTables;
 324         tab_ewald_F_lj        = vdwTables.tableF.data();
 325         tab_ewald_V_lj        = vdwTables.tableV.data();
 326         vdwTableScale         = vdwTables.scale;
 327         vdwTableScaleInvHalf  = half / vdwTableScale;
 328     }
 329
 330     /* For Ewald/PME interactions we cannot easily apply the soft-core component to
 331      * reciprocal space. When we use non-switched Ewald interactions, we
 332      * assume the soft-coring does not significantly affect the grid contribution
 333      * and apply the soft-core only to the full 1/r (- shift) pair contribution.
 334      *
 335      * However, we cannot use this approach for switch-modified since we would then
 336      * effectively end up evaluating a significantly different interaction here compared to the
 337      * normal (non-free-energy) kernels, either by applying a cutoff at a different
 338      * position than what the user requested, or by switching different
 339      * things (1/r rather than short-range Ewald). For these settings, we just
 340      * use the traditional short-range Ewald interaction in that case.
 341      */
 342     GMX_RELEASE_ASSERT(!(vdwInteractionTypeIsEwald && vdwModifierIsPotSwitch),
 343                        "Can not apply soft-core to switched Ewald potentials");
 344
 345     real dvdl_coul = 0;
 346     real dvdl_vdw  = 0;
 347
 348     /* Lambda factor for state A, 1-lambda*/
 349     real LFC[NSTATES], LFV[NSTATES];
 350     LFC[STATE_A] = one - lambda_coul;
 351     LFV[STATE_A] = one - lambda_vdw;
 352
 353     /* Lambda factor for state B, lambda*/
 354     LFC[STATE_B] = lambda_coul;
 355     LFV[STATE_B] = lambda_vdw;
 356
 357     /*derivative of the lambda factor for state A and B */
 358     real DLF[NSTATES];
 359     DLF[STATE_A] = -1;
 360     DLF[STATE_B] = 1;
 361
 362     real           lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
 363     constexpr real sc_r_power = 6.0_real;
 364     for (int i = 0; i < NSTATES; i++)
 365     {
 366         lfac_coul[i]  = (lam_power == 2 ? (1 - LFC[i]) * (1 - LFC[i]) : (1 - LFC[i]));
 367         dlfac_coul[i] = DLF[i] * lam_power / sc_r_power * (lam_power == 2 ? (1 - LFC[i]) : 1);
 368         lfac_vdw[i]   = (lam_power == 2 ? (1 - LFV[i]) * (1 - LFV[i]) : (1 - LFV[i]));
 369         dlfac_vdw[i]  = DLF[i] * lam_power / sc_r_power * (lam_power == 2 ? (1 - LFV[i]) : 1);
 370     }
 371
 372     // TODO: We should get rid of using pointers to real
 373     const real* x             = xx[0];
 374     real* gmx_restrict f      = &(forceWithShiftForces->force()[0][0]);
 375     real* gmx_restrict fshift = &(forceWithShiftForces->shiftForces()[0][0]);
 376
 377     for (int n = 0; n < nri; n++)
 378     {
 379         int npair_within_cutoff = 0;
 380
 381         const int  is3   = 3 * shift[n];
 382         const real shX   = shiftvec[is3];
 383         const real shY   = shiftvec[is3 + 1];
 384         const real shZ   = shiftvec[is3 + 2];
 385         const int  nj0   = jindex[n];
 386         const int  nj1   = jindex[n + 1];
 387         const int  ii    = iinr[n];
 388         const int  ii3   = 3 * ii;
 389         const real ix    = shX + x[ii3 + 0];
 390         const real iy    = shY + x[ii3 + 1];
 391         const real iz    = shZ + x[ii3 + 2];
 392         const real iqA   = facel * chargeA[ii];
 393         const real iqB   = facel * chargeB[ii];
 394         const int  ntiA  = 2 * ntype * typeA[ii];
 395         const int  ntiB  = 2 * ntype * typeB[ii];
 396         real       vctot = 0;
 397         real       vvtot = 0;
 398         real       fix   = 0;
 399         real       fiy   = 0;
 400         real       fiz   = 0;
 401
 402         for (int k = nj0; k < nj1; k++)
 403         {
 404             int            tj[NSTATES];
 405             const int      jnr = jjnr[k];
 406             const int      j3  = 3 * jnr;
 407             RealType       c6[NSTATES], c12[NSTATES], qq[NSTATES], Vcoul[NSTATES], Vvdw[NSTATES];
 408             RealType       r, rinv, rp, rpm2;
 409             RealType       alpha_vdw_eff, alpha_coul_eff, sigma6[NSTATES];
 410             const RealType dx  = ix - x[j3];
 411             const RealType dy  = iy - x[j3 + 1];
 412             const RealType dz  = iz - x[j3 + 2];
 413             const RealType rsq = dx * dx + dy * dy + dz * dz;
 414             RealType       FscalC[NSTATES], FscalV[NSTATES];
 415             /* Check if this pair on the exlusions list.*/
 416             const bool bPairIncluded = nlist->excl_fep == nullptr || nlist->excl_fep[k];
 417
 418             if (rsq >= rcutoff_max2 && bPairIncluded)
 419             {
 420                 /* We save significant time by skipping all code below.
 421                  * Note that with soft-core interactions, the actual cut-off
 422                  * check might be different. But since the soft-core distance
 423                  * is always larger than r, checking on r here is safe.
 424                  * Exclusions outside the cutoff can not be skipped as
 425                  * when using Ewald: the reciprocal-space
 426                  * Ewald component still needs to be subtracted.
 427                  */
 428
 429                 continue;
 430             }
 431             npair_within_cutoff++;
 432
 433             if (rsq > 0)
 434             {
 435                 /* Note that unlike in the nbnxn kernels, we do not need
 436                  * to clamp the value of rsq before taking the invsqrt
 437                  * to avoid NaN in the LJ calculation, since here we do
 438                  * not calculate LJ interactions when C6 and C12 are zero.
 439                  */
 440
 441                 rinv = gmx::invsqrt(rsq);
 442                 r    = rsq * rinv;
 443             }
 444             else
 445             {
 446                 /* The force at r=0 is zero, because of symmetry.
 447                  * But note that the potential is in general non-zero,
 448                  * since the soft-cored r will be non-zero.
 449                  */
 450                 rinv = 0;
 451                 r    = 0;
 452             }
 453
 454             if (useSoftCore)
 455             {
 456                 rpm2 = rsq * rsq;  /* r4 */
 457                 rp   = rpm2 * rsq; /* r6 */
 458             }
 459             else
 460             {
 461                 /* The soft-core power p will not affect the results
 462                  * with not using soft-core, so we use power of 0 which gives
 463                  * the simplest math and cheapest code.
 464                  */
 465                 rpm2 = rinv * rinv;
 466                 rp   = 1;
 467             }
 468
 469             RealType Fscal = 0;
 470
 471             qq[STATE_A] = iqA * chargeA[jnr];
 472             qq[STATE_B] = iqB * chargeB[jnr];
 473
 474             tj[STATE_A] = ntiA + 2 * typeA[jnr];
 475             tj[STATE_B] = ntiB + 2 * typeB[jnr];
 476
 477             if (bPairIncluded)
 478             {
 479                 c6[STATE_A] = nbfp[tj[STATE_A]];
 480                 c6[STATE_B] = nbfp[tj[STATE_B]];
 481
 482                 for (int i = 0; i < NSTATES; i++)
 483                 {
 484                     c12[i] = nbfp[tj[i] + 1];
 485                     if (useSoftCore)
 486                     {
 487                         if ((c6[i] > 0) && (c12[i] > 0))
 488                         {
 489                             /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
 490                             sigma6[i] = half * c12[i] / c6[i];
 491                             if (sigma6[i] < sigma6_min) /* for disappearing coul and vdw with soft core at the same time */
 492                             {
 493                                 sigma6[i] = sigma6_min;
 494                             }
 495                         }
 496                         else
 497                         {
 498                             sigma6[i] = sigma6_def;
 499                         }
 500                     }
 501                 }
 502
 503                 if (useSoftCore)
 504                 {
 505                     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
 506                     if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
 507                     {
 508                         alpha_vdw_eff  = 0;
 509                         alpha_coul_eff = 0;
 510                     }
 511                     else
 512                     {
 513                         alpha_vdw_eff  = alpha_vdw;
 514                         alpha_coul_eff = alpha_coul;
 515                     }
 516                 }
 517
 518                 for (int i = 0; i < NSTATES; i++)
 519                 {
 520                     FscalC[i] = 0;
 521                     FscalV[i] = 0;
 522                     Vcoul[i]  = 0;
 523                     Vvdw[i]   = 0;
 524
 525                     RealType rinvC, rinvV, rC, rV, rpinvC, rpinvV;
 526
 527                     /* Only spend time on A or B state if it is non-zero */
 528                     if ((qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0))
 529                     {
 530                         /* this section has to be inside the loop because of the dependence on sigma6 */
 531                         if (useSoftCore)
 532                         {
 533                             rpinvC = one / (alpha_coul_eff * lfac_coul[i] * sigma6[i] + rp);
 534                             pthRoot(rpinvC, &rinvC, &rC);
 535                             if (scLambdasOrAlphasDiffer)
 536                             {
 537                                 rpinvV = one / (alpha_vdw_eff * lfac_vdw[i] * sigma6[i] + rp);
 538                                 pthRoot(rpinvV, &rinvV, &rV);
 539                             }
 540                             else
 541                             {
 542                                 /* We can avoid one expensive pow and one / operation */
 543                                 rpinvV = rpinvC;
 544                                 rinvV  = rinvC;
 545                                 rV     = rC;
 546                             }
 547                         }
 548                         else
 549                         {
 550                             rpinvC = 1;
 551                             rinvC  = rinv;
 552                             rC     = r;
 553
 554                             rpinvV = 1;
 555                             rinvV  = rinv;
 556                             rV     = r;
 557                         }
 558
 559                         /* Only process the coulomb interactions if we have charges,
 560                          * and if we either include all entries in the list (no cutoff
 561                          * used in the kernel), or if we are within the cutoff.
 562                          */
 563                         bool computeElecInteraction = (elecInteractionTypeIsEwald && r < rcoulomb)
 564                                                       || (!elecInteractionTypeIsEwald && rC < rcoulomb);
 565
 566                         if ((qq[i] != 0) && computeElecInteraction)
 567                         {
 568                             if (elecInteractionTypeIsEwald)
 569                             {
 570                                 Vcoul[i]  = ewaldPotential(qq[i], rinvC, sh_ewald);
 571                                 FscalC[i] = ewaldScalarForce(qq[i], rinvC);
 572                             }
 573                             else
 574                             {
 575                                 Vcoul[i]  = reactionFieldPotential(qq[i], rinvC, rC, krf, crf);
 576                                 FscalC[i] = reactionFieldScalarForce(qq[i], rinvC, rC, krf, two);
 577                             }
 578                         }
 579
 580                         /* Only process the VDW interactions if we have
 581                          * some non-zero parameters, and if we either
 582                          * include all entries in the list (no cutoff used
 583                          * in the kernel), or if we are within the cutoff.
 584                          */
 585                         bool computeVdwInteraction = (vdwInteractionTypeIsEwald && r < rvdw)
 586                                                      || (!vdwInteractionTypeIsEwald && rV < rvdw);
 587                         if ((c6[i] != 0 || c12[i] != 0) && computeVdwInteraction)
 588                         {
 589                             RealType rinv6;
 590                             if (useSoftCore)
 591                             {
 592                                 rinv6 = rpinvV;
 593                             }
 594                             else
 595                             {
 596                                 rinv6 = calculateRinv6(rinvV);
 597                             }
 598                             RealType Vvdw6  = calculateVdw6(c6[i], rinv6);
 599                             RealType Vvdw12 = calculateVdw12(c12[i], rinv6);
 600
 601                             Vvdw[i] = lennardJonesPotential(Vvdw6, Vvdw12, c6[i], c12[i], repulsionShift,
 602                                                             dispersionShift, onesixth, onetwelfth);
 603                             FscalV[i] = lennardJonesScalarForce(Vvdw6, Vvdw12);
 604
 605                             if (vdwInteractionTypeIsEwald)
 606                             {
 607                                 /* Subtract the grid potential at the cut-off */
 608                                 Vvdw[i] += ewaldLennardJonesGridSubtract(nbfp_grid[tj[i]],
 609                                                                          sh_lj_ewald, onesixth);
 610                             }
 611
 612                             if (vdwModifierIsPotSwitch)
 613                             {
 614                                 RealType d        = rV - ic->rvdw_switch;
 615                                 d                 = (d > zero) ? d : zero;
 616                                 const RealType d2 = d * d;
 617                                 const RealType sw =
 618                                         one + d2 * d * (vdw_swV3 + d * (vdw_swV4 + d * vdw_swV5));
 619                                 const RealType dsw = d2 * (vdw_swF2 + d * (vdw_swF3 + d * vdw_swF4));
 620
 621                                 FscalV[i] = potSwitchScalarForceMod(FscalV[i], Vvdw[i], sw, rV,
 622                                                                     rvdw, dsw, zero);
 623                                 Vvdw[i]   = potSwitchPotentialMod(Vvdw[i], sw, rV, rvdw, zero);
 624                             }
 625                         }
 626
 627                         /* FscalC (and FscalV) now contain: dV/drC * rC
 628                          * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
 629                          * Further down we first multiply by r^p-2 and then by
 630                          * the vector r, which in total gives: dV/drC * (r/rC)^1-p
 631                          */
 632                         FscalC[i] *= rpinvC;
 633                         FscalV[i] *= rpinvV;
 634                     }
 635                 } // end for (int i = 0; i < NSTATES; i++)
 636
 637                 /* Assemble A and B states */
 638                 for (int i = 0; i < NSTATES; i++)
 639                 {
 640                     vctot += LFC[i] * Vcoul[i];
 641                     vvtot += LFV[i] * Vvdw[i];
 642
 643                     Fscal += LFC[i] * FscalC[i] * rpm2;
 644                     Fscal += LFV[i] * FscalV[i] * rpm2;
 645
 646                     if (useSoftCore)
 647                     {
 648                         dvdl_coul += Vcoul[i] * DLF[i]
 649                                      + LFC[i] * alpha_coul_eff * dlfac_coul[i] * FscalC[i] * sigma6[i];
 650                         dvdl_vdw += Vvdw[i] * DLF[i]
 651                                     + LFV[i] * alpha_vdw_eff * dlfac_vdw[i] * FscalV[i] * sigma6[i];
 652                     }
 653                     else
 654                     {
 655                         dvdl_coul += Vcoul[i] * DLF[i];
 656                         dvdl_vdw += Vvdw[i] * DLF[i];
 657                     }
 658                 }
 659             } // end if (bPairIncluded)
 660             else if (icoul == GMX_NBKERNEL_ELEC_REACTIONFIELD)
 661             {
 662                 /* For excluded pairs, which are only in this pair list when
 663                  * using the Verlet scheme, we don't use soft-core.
 664                  * As there is no singularity, there is no need for soft-core.
 665                  */
 666                 const real FF = -two * krf;
 667                 RealType   VV = krf * rsq - crf;
 668
 669                 if (ii == jnr)
 670                 {
 671                     VV *= half;
 672                 }
 673
 674                 for (int i = 0; i < NSTATES; i++)
 675                 {
 676                     vctot += LFC[i] * qq[i] * VV;
 677                     Fscal += LFC[i] * qq[i] * FF;
 678                     dvdl_coul += DLF[i] * qq[i] * VV;
 679                 }
 680             }
 681
 682             if (elecInteractionTypeIsEwald && (r < rcoulomb || !bPairIncluded))
 683             {
 684                 /* See comment in the preamble. When using Ewald interactions
 685                  * (unless we use a switch modifier) we subtract the reciprocal-space
 686                  * Ewald component here which made it possible to apply the free
 687                  * energy interaction to 1/r (vanilla coulomb short-range part)
 688                  * above. This gets us closer to the ideal case of applying
 689                  * the softcore to the entire electrostatic interaction,
 690                  * including the reciprocal-space component.
 691                  */
 692                 real v_lr, f_lr;
 693
 694                 const RealType ewrt   = r * coulombTableScale;
 695                 IntType        ewitab = static_cast<IntType>(ewrt);
 696                 const RealType eweps  = ewrt - ewitab;
 697                 ewitab                = 4 * ewitab;
 698                 f_lr                  = ewtab[ewitab] + eweps * ewtab[ewitab + 1];
 699                 v_lr = (ewtab[ewitab + 2] - coulombTableScaleInvHalf * eweps * (ewtab[ewitab] + f_lr));
 700                 f_lr *= rinv;
 701
 702                 /* Note that any possible Ewald shift has already been applied in
 703                  * the normal interaction part above.
 704                  */
 705
 706                 if (ii == jnr)
 707                 {
 708                     /* If we get here, the i particle (ii) has itself (jnr)
 709                      * in its neighborlist. This can only happen with the Verlet
 710                      * scheme, and corresponds to a self-interaction that will
 711                      * occur twice. Scale it down by 50% to only include it once.
 712                      */
 713                     v_lr *= half;
 714                 }
 715
 716                 for (int i = 0; i < NSTATES; i++)
 717                 {
 718                     vctot -= LFC[i] * qq[i] * v_lr;
 719                     Fscal -= LFC[i] * qq[i] * f_lr;
 720                     dvdl_coul -= (DLF[i] * qq[i]) * v_lr;
 721                 }
 722             }
 723
 724             if (vdwInteractionTypeIsEwald && r < rvdw)
 725             {
 726                 /* See comment in the preamble. When using LJ-Ewald interactions
 727                  * (unless we use a switch modifier) we subtract the reciprocal-space
 728                  * Ewald component here which made it possible to apply the free
 729                  * energy interaction to r^-6 (vanilla LJ6 short-range part)
 730                  * above. This gets us closer to the ideal case of applying
 731                  * the softcore to the entire VdW interaction,
 732                  * including the reciprocal-space component.
 733                  */
 734                 /* We could also use the analytical form here
 735                  * iso a table, but that can cause issues for
 736                  * r close to 0 for non-interacting pairs.
 737                  */
 738
 739                 const RealType rs   = rsq * rinv * vdwTableScale;
 740                 const IntType  ri   = static_cast<IntType>(rs);
 741                 const RealType frac = rs - ri;
 742                 const RealType f_lr = (1 - frac) * tab_ewald_F_lj[ri] + frac * tab_ewald_F_lj[ri + 1];
 743                 /* TODO: Currently the Ewald LJ table does not contain
 744                  * the factor 1/6, we should add this.
 745                  */
 746                 const RealType FF = f_lr * rinv / six;
 747                 RealType       VV =
 748                         (tab_ewald_V_lj[ri] - vdwTableScaleInvHalf * frac * (tab_ewald_F_lj[ri] + f_lr))
 749                         / six;
 750
 751                 if (ii == jnr)
 752                 {
 753                     /* If we get here, the i particle (ii) has itself (jnr)
 754                      * in its neighborlist. This can only happen with the Verlet
 755                      * scheme, and corresponds to a self-interaction that will
 756                      * occur twice. Scale it down by 50% to only include it once.
 757                      */
 758                     VV *= half;
 759                 }
 760
 761                 for (int i = 0; i < NSTATES; i++)
 762                 {
 763                     const real c6grid = nbfp_grid[tj[i]];
 764                     vvtot += LFV[i] * c6grid * VV;
 765                     Fscal += LFV[i] * c6grid * FF;
 766                     dvdl_vdw += (DLF[i] * c6grid) * VV;
 767                 }
 768             }
 769
 770             if (doForces)
 771             {
 772                 const real tx = Fscal * dx;
 773                 const real ty = Fscal * dy;
 774                 const real tz = Fscal * dz;
 775                 fix           = fix + tx;
 776                 fiy           = fiy + ty;
 777                 fiz           = fiz + tz;
 778                 /* OpenMP atomics are expensive, but this kernels is also
 779                  * expensive, so we can take this hit, instead of using
 780                  * thread-local output buffers and extra reduction.
 781                  *
 782                  * All the OpenMP regions in this file are trivial and should
 783                  * not throw, so no need for try/catch.
 784                  */
 785 #pragma omp atomic
 786                 f[j3] -= tx;
 787 #pragma omp atomic
 788                 f[j3 + 1] -= ty;
 789 #pragma omp atomic
 790                 f[j3 + 2] -= tz;
 791             }
 792         } // end for (int k = nj0; k < nj1; k++)
 793
 794         /* The atomics below are expensive with many OpenMP threads.
 795          * Here unperturbed i-particles will usually only have a few
 796          * (perturbed) j-particles in the list. Thus with a buffered list
 797          * we can skip a significant number of i-reductions with a check.
 798          */
 799         if (npair_within_cutoff > 0)
 800         {
 801             if (doForces)
 802             {
 803 #pragma omp atomic
 804                 f[ii3] += fix;
 805 #pragma omp atomic
 806                 f[ii3 + 1] += fiy;
 807 #pragma omp atomic
 808                 f[ii3 + 2] += fiz;
 809             }
 810             if (doShiftForces)
 811             {
 812 #pragma omp atomic
 813                 fshift[is3] += fix;
 814 #pragma omp atomic
 815                 fshift[is3 + 1] += fiy;
 816 #pragma omp atomic
 817                 fshift[is3 + 2] += fiz;
 818             }
 819             if (doPotential)
 820             {
 821                 int ggid = gid[n];
 822 #pragma omp atomic
 823                 Vc[ggid] += vctot;
 824 #pragma omp atomic
 825                 Vv[ggid] += vvtot;
 826             }
 827         }
 828     } // end for (int n = 0; n < nri; n++)
 829
 830 #pragma omp atomic
 831     dvdl[efptCOUL] += dvdl_coul;
 832 #pragma omp atomic
 833     dvdl[efptVDW] += dvdl_vdw;
 834
 835     /* Estimate flops, average for free energy stuff:
 836      * 12  flops per outer iteration
 837      * 150 flops per inner iteration
 838      */
 839 #pragma omp atomic
 840     inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri * 12 + nlist->jindex[nri] * 150);
 841 }
 842
 843 typedef void (*KernelFunction)(const t_nblist* gmx_restrict nlist,
 844                                rvec* gmx_restrict         xx,
 845                                gmx::ForceWithShiftForces* forceWithShiftForces,
 846                                const t_forcerec* gmx_restrict fr,
 847                                const t_mdatoms* gmx_restrict mdatoms,
 848                                nb_kernel_data_t* gmx_restrict kernel_data,
 849                                t_nrnb* gmx_restrict nrnb);
 850
 851 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald, bool vdwModifierIsPotSwitch>
 852 static KernelFunction dispatchKernelOnUseSimd(const bool useSimd)
 853 {
 854     if (useSimd)
 855     {
 856 #if GMX_SIMD_HAVE_REAL && GMX_SIMD_HAVE_INT32_ARITHMETICS && GMX_USE_SIMD_KERNELS
 857         /* FIXME: Here SimdDataTypes should be used to enable SIMD. So far, the code in nb_free_energy_kernel is not adapted to SIMD */
 858         return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
 859                                       elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
 860 #else
 861         return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
 862                                       elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
 863 #endif
 864     }
 865     else
 866     {
 867         return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
 868                                       elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
 869     }
 870 }
 871
 872 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald>
 873 static KernelFunction dispatchKernelOnVdwModifier(const bool vdwModifierIsPotSwitch, const bool useSimd)
 874 {
 875     if (vdwModifierIsPotSwitch)
 876     {
 877         return (dispatchKernelOnUseSimd<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
 878                                         elecInteractionTypeIsEwald, true>(useSimd));
 879     }
 880     else
 881     {
 882         return (dispatchKernelOnUseSimd<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
 883                                         elecInteractionTypeIsEwald, false>(useSimd));
 884     }
 885 }
 886
 887 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald>
 888 static KernelFunction dispatchKernelOnElecInteractionType(const bool elecInteractionTypeIsEwald,
 889                                                           const bool vdwModifierIsPotSwitch,
 890                                                           const bool useSimd)
 891 {
 892     if (elecInteractionTypeIsEwald)
 893     {
 894         return (dispatchKernelOnVdwModifier<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, true>(
 895                 vdwModifierIsPotSwitch, useSimd));
 896     }
 897     else
 898     {
 899         return (dispatchKernelOnVdwModifier<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, false>(
 900                 vdwModifierIsPotSwitch, useSimd));
 901     }
 902 }
 903
 904 template<bool useSoftCore, bool scLambdasOrAlphasDiffer>
 905 static KernelFunction dispatchKernelOnVdwInteractionType(const bool vdwInteractionTypeIsEwald,
 906                                                          const bool elecInteractionTypeIsEwald,
 907                                                          const bool vdwModifierIsPotSwitch,
 908                                                          const bool useSimd)
 909 {
 910     if (vdwInteractionTypeIsEwald)
 911     {
 912         return (dispatchKernelOnElecInteractionType<useSoftCore, scLambdasOrAlphasDiffer, true>(
 913                 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
 914     }
 915     else
 916     {
 917         return (dispatchKernelOnElecInteractionType<useSoftCore, scLambdasOrAlphasDiffer, false>(
 918                 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
 919     }
 920 }
 921
 922 template<bool useSoftCore>
 923 static KernelFunction dispatchKernelOnScLambdasOrAlphasDifference(const bool scLambdasOrAlphasDiffer,
 924                                                                   const bool vdwInteractionTypeIsEwald,
 925                                                                   const bool elecInteractionTypeIsEwald,
 926                                                                   const bool vdwModifierIsPotSwitch,
 927                                                                   const bool useSimd)
 928 {
 929     if (scLambdasOrAlphasDiffer)
 930     {
 931         return (dispatchKernelOnVdwInteractionType<useSoftCore, true>(
 932                 vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
 933     }
 934     else
 935     {
 936         return (dispatchKernelOnVdwInteractionType<useSoftCore, false>(
 937                 vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
 938     }
 939 }
 940
 941 static KernelFunction dispatchKernel(const bool        scLambdasOrAlphasDiffer,
 942                                      const bool        vdwInteractionTypeIsEwald,
 943                                      const bool        elecInteractionTypeIsEwald,
 944                                      const bool        vdwModifierIsPotSwitch,
 945                                      const bool        useSimd,
 946                                      const t_forcerec* fr)
 947 {
 948     if (fr->sc_alphacoul == 0 && fr->sc_alphavdw == 0)
 949     {
 950         return (dispatchKernelOnScLambdasOrAlphasDifference<false>(
 951                 scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
 952                 vdwModifierIsPotSwitch, useSimd));
 953     }
 954     else
 955     {
 956         return (dispatchKernelOnScLambdasOrAlphasDifference<true>(
 957                 scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
 958                 vdwModifierIsPotSwitch, useSimd));
 959     }
 960 }
 961
 962
 963 void gmx_nb_free_energy_kernel(const t_nblist*            nlist,
 964                                rvec*                      xx,
 965                                gmx::ForceWithShiftForces* ff,
 966                                const t_forcerec*          fr,
 967                                const t_mdatoms*           mdatoms,
 968                                nb_kernel_data_t*          kernel_data,
 969                                t_nrnb*                    nrnb)
 970 {
 971     GMX_ASSERT(EEL_PME_EWALD(fr->ic->eeltype) || fr->ic->eeltype == eelCUT || EEL_RF(fr->ic->eeltype),
 972                "Unsupported eeltype with free energy");
 973
 974     const bool vdwInteractionTypeIsEwald  = (EVDW_PME(fr->ic->vdwtype));
 975     const bool elecInteractionTypeIsEwald = (EEL_PME_EWALD(fr->ic->eeltype));
 976     const bool vdwModifierIsPotSwitch     = (fr->ic->vdw_modifier == eintmodPOTSWITCH);
 977     bool       scLambdasOrAlphasDiffer    = true;
 978     const bool useSimd                    = fr->use_simd_kernels;
 979
 980     if (fr->sc_alphacoul == 0 && fr->sc_alphavdw == 0)
 981     {
 982         scLambdasOrAlphasDiffer = false;
 983     }
 984     else if (fr->sc_r_power == 6.0_real)
 985     {
 986         if (kernel_data->lambda[efptCOUL] == kernel_data->lambda[efptVDW] && fr->sc_alphacoul == fr->sc_alphavdw)
 987         {
 988             scLambdasOrAlphasDiffer = false;
 989         }
 990     }
 991     else
 992     {
 993         GMX_RELEASE_ASSERT(false, "Unsupported soft-core r-power");
 994     }
 995
 996     KernelFunction kernelFunc;
 997     kernelFunc = dispatchKernel(scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
 998                                 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd, fr);
 999     kernelFunc(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
1000 }