src/gromacs/gmxlib/nonbonded/nb_free_energy.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
   7  * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
   8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   9  * and including many others, as listed in the AUTHORS file in the
  10  * top-level source directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38 #include "gmxpre.h"
  39
  40 #include "nb_free_energy.h"
  41
  42 #include "config.h"
  43
  44 #include <cmath>
  45
  46 #include <algorithm>
  47
  48 #include "gromacs/gmxlib/nrnb.h"
  49 #include "gromacs/gmxlib/nonbonded/nb_kernel.h"
  50 #include "gromacs/gmxlib/nonbonded/nonbonded.h"
  51 #include "gromacs/math/functions.h"
  52 #include "gromacs/math/vec.h"
  53 #include "gromacs/mdtypes/forceoutput.h"
  54 #include "gromacs/mdtypes/forcerec.h"
  55 #include "gromacs/mdtypes/interaction_const.h"
  56 #include "gromacs/mdtypes/md_enums.h"
  57 #include "gromacs/mdtypes/mdatom.h"
  58 #include "gromacs/simd/simd.h"
  59 #include "gromacs/utility/fatalerror.h"
  60 #include "gromacs/utility/arrayref.h"
  61
  62
  63 //! Scalar (non-SIMD) data types.
  64 struct ScalarDataTypes
  65 {
  66     using RealType                     = real; //!< The data type to use as real.
  67     using IntType                      = int;  //!< The data type to use as int.
  68     static constexpr int simdRealWidth = 1;    //!< The width of the RealType.
  69     static constexpr int simdIntWidth  = 1;    //!< The width of the IntType.
  70 };
  71
  72 #if GMX_SIMD_HAVE_REAL && GMX_SIMD_HAVE_INT32_ARITHMETICS
  73 //! SIMD data types.
  74 struct SimdDataTypes
  75 {
  76     using RealType                     = gmx::SimdReal;         //!< The data type to use as real.
  77     using IntType                      = gmx::SimdInt32;        //!< The data type to use as int.
  78     static constexpr int simdRealWidth = GMX_SIMD_REAL_WIDTH;   //!< The width of the RealType.
  79     static constexpr int simdIntWidth  = GMX_SIMD_FINT32_WIDTH; //!< The width of the IntType.
  80 };
  81 #endif
  82
  83 //! Computes r^(1/p) and 1/r^(1/p) for the standard p=6
  84 template<class RealType>
  85 static inline void pthRoot(const RealType r, RealType* pthRoot, RealType* invPthRoot)
  86 {
  87     *invPthRoot = gmx::invsqrt(std::cbrt(r));
  88     *pthRoot    = 1 / (*invPthRoot);
  89 }
  90
  91 template<class RealType>
  92 static inline RealType calculateRinv6(const RealType rInvV)
  93 {
  94     RealType rInv6 = rInvV * rInvV;
  95     return (rInv6 * rInv6 * rInv6);
  96 }
  97
  98 template<class RealType>
  99 static inline RealType calculateVdw6(const RealType c6, const RealType rInv6)
 100 {
 101     return (c6 * rInv6);
 102 }
 103
 104 template<class RealType>
 105 static inline RealType calculateVdw12(const RealType c12, const RealType rInv6)
 106 {
 107     return (c12 * rInv6 * rInv6);
 108 }
 109
 110 /* reaction-field electrostatics */
 111 template<class RealType>
 112 static inline RealType reactionFieldScalarForce(const RealType qq,
 113                                                 const RealType rInv,
 114                                                 const RealType r,
 115                                                 const real     krf,
 116                                                 const real     two)
 117 {
 118     return (qq * (rInv - two * krf * r * r));
 119 }
 120 template<class RealType>
 121 static inline RealType reactionFieldPotential(const RealType qq,
 122                                               const RealType rInv,
 123                                               const RealType r,
 124                                               const real     krf,
 125                                               const real     potentialShift)
 126 {
 127     return (qq * (rInv + krf * r * r - potentialShift));
 128 }
 129
 130 /* Ewald electrostatics */
 131 template<class RealType>
 132 static inline RealType ewaldScalarForce(const RealType coulomb, const RealType rInv)
 133 {
 134     return (coulomb * rInv);
 135 }
 136 template<class RealType>
 137 static inline RealType ewaldPotential(const RealType coulomb, const RealType rInv, const real potentialShift)
 138 {
 139     return (coulomb * (rInv - potentialShift));
 140 }
 141
 142 /* cutoff LJ */
 143 template<class RealType>
 144 static inline RealType lennardJonesScalarForce(const RealType v6, const RealType v12)
 145 {
 146     return (v12 - v6);
 147 }
 148 template<class RealType>
 149 static inline RealType lennardJonesPotential(const RealType v6,
 150                                              const RealType v12,
 151                                              const RealType c6,
 152                                              const RealType c12,
 153                                              const real     repulsionShift,
 154                                              const real     dispersionShift,
 155                                              const real     oneSixth,
 156                                              const real     oneTwelfth)
 157 {
 158     return ((v12 + c12 * repulsionShift) * oneTwelfth - (v6 + c6 * dispersionShift) * oneSixth);
 159 }
 160
 161 /* Ewald LJ */
 162 static inline real ewaldLennardJonesGridSubtract(const real c6grid, const real potentialShift, const real oneSixth)
 163 {
 164     return (c6grid * potentialShift * oneSixth);
 165 }
 166
 167 /* LJ Potential switch */
 168 template<class RealType>
 169 static inline RealType potSwitchScalarForceMod(const RealType fScalarInp,
 170                                                const RealType potential,
 171                                                const RealType sw,
 172                                                const RealType r,
 173                                                const RealType rVdw,
 174                                                const RealType dsw,
 175                                                const real     zero)
 176 {
 177     if (r < rVdw)
 178     {
 179         real fScalar = fScalarInp * sw - r * potential * dsw;
 180         return (fScalar);
 181     }
 182     return (zero);
 183 }
 184 template<class RealType>
 185 static inline RealType potSwitchPotentialMod(const RealType potentialInp,
 186                                              const RealType sw,
 187                                              const RealType r,
 188                                              const RealType rVdw,
 189                                              const real     zero)
 190 {
 191     if (r < rVdw)
 192     {
 193         real potential = potentialInp * sw;
 194         return (potential);
 195     }
 196     return (zero);
 197 }
 198
 199
 200 //! Templated free-energy non-bonded kernel
 201 template<typename DataTypes, bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald, bool vdwModifierIsPotSwitch>
 202 static void nb_free_energy_kernel(const t_nblist* gmx_restrict nlist,
 203                                   rvec* gmx_restrict         xx,
 204                                   gmx::ForceWithShiftForces* forceWithShiftForces,
 205                                   const t_forcerec* gmx_restrict fr,
 206                                   const t_mdatoms* gmx_restrict mdatoms,
 207                                   nb_kernel_data_t* gmx_restrict kernel_data,
 208                                   t_nrnb* gmx_restrict nrnb)
 209 {
 210 #define STATE_A 0
 211 #define STATE_B 1
 212 #define NSTATES 2
 213
 214     using RealType = typename DataTypes::RealType;
 215     using IntType  = typename DataTypes::IntType;
 216
 217     /* FIXME: How should these be handled with SIMD? */
 218     constexpr real oneTwelfth = 1.0 / 12.0;
 219     constexpr real oneSixth   = 1.0 / 6.0;
 220     constexpr real zero       = 0.0;
 221     constexpr real half       = 0.5;
 222     constexpr real one        = 1.0;
 223     constexpr real two        = 2.0;
 224     constexpr real six        = 6.0;
 225
 226     /* Extract pointer to non-bonded interaction constants */
 227     const interaction_const_t* ic = fr->ic;
 228
 229     // Extract pair list data
 230     const int                nri    = nlist->nri;
 231     gmx::ArrayRef<const int> iinr   = nlist->iinr;
 232     gmx::ArrayRef<const int> jindex = nlist->jindex;
 233     gmx::ArrayRef<const int> jjnr   = nlist->jjnr;
 234     gmx::ArrayRef<const int> shift  = nlist->shift;
 235     gmx::ArrayRef<const int> gid    = nlist->gid;
 236
 237     const real*               shiftvec  = fr->shift_vec[0];
 238     const real*               chargeA   = mdatoms->chargeA;
 239     const real*               chargeB   = mdatoms->chargeB;
 240     real*                     Vc        = kernel_data->energygrp_elec;
 241     const int*                typeA     = mdatoms->typeA;
 242     const int*                typeB     = mdatoms->typeB;
 243     const int                 ntype     = fr->ntype;
 244     gmx::ArrayRef<const real> nbfp      = fr->nbfp;
 245     gmx::ArrayRef<const real> nbfp_grid = fr->ljpme_c6grid;
 246
 247     real*      Vv = kernel_data->energygrp_vdw;
 248     const real lambda_coul =
 249             kernel_data->lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)];
 250     const real lambda_vdw =
 251             kernel_data->lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Vdw)];
 252     gmx::ArrayRef<real> dvdl          = kernel_data->dvdl;
 253     const auto&         scParams      = *ic->softCoreParameters;
 254     const real          alpha_coul    = scParams.alphaCoulomb;
 255     const real          alpha_vdw     = scParams.alphaVdw;
 256     const real          lam_power     = scParams.lambdaPower;
 257     const real          sigma6_def    = scParams.sigma6WithInvalidSigma;
 258     const real          sigma6_min    = scParams.sigma6Minimum;
 259     const bool          doForces      = ((kernel_data->flags & GMX_NONBONDED_DO_FORCE) != 0);
 260     const bool          doShiftForces = ((kernel_data->flags & GMX_NONBONDED_DO_SHIFTFORCE) != 0);
 261     const bool          doPotential   = ((kernel_data->flags & GMX_NONBONDED_DO_POTENTIAL) != 0);
 262
 263     // Extract data from interaction_const_t
 264     const real facel           = ic->epsfac;
 265     const real rCoulomb        = ic->rcoulomb;
 266     const real krf             = ic->reactionFieldCoefficient;
 267     const real crf             = ic->reactionFieldShift;
 268     const real shLjEwald       = ic->sh_lj_ewald;
 269     const real rVdw            = ic->rvdw;
 270     const real dispersionShift = ic->dispersion_shift.cpot;
 271     const real repulsionShift  = ic->repulsion_shift.cpot;
 272
 273     // Note that the nbnxm kernels do not support Coulomb potential switching at all
 274     GMX_ASSERT(ic->coulomb_modifier != InteractionModifiers::PotSwitch,
 275                "Potential switching is not supported for Coulomb with FEP");
 276
 277     real vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4;
 278     if (vdwModifierIsPotSwitch)
 279     {
 280         const real d = ic->rvdw - ic->rvdw_switch;
 281         vdw_swV3     = -10.0 / (d * d * d);
 282         vdw_swV4     = 15.0 / (d * d * d * d);
 283         vdw_swV5     = -6.0 / (d * d * d * d * d);
 284         vdw_swF2     = -30.0 / (d * d * d);
 285         vdw_swF3     = 60.0 / (d * d * d * d);
 286         vdw_swF4     = -30.0 / (d * d * d * d * d);
 287     }
 288     else
 289     {
 290         /* Avoid warnings from stupid compilers (looking at you, Clang!) */
 291         vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0;
 292     }
 293
 294     NbkernelElecType icoul;
 295     if (ic->eeltype == CoulombInteractionType::Cut || EEL_RF(ic->eeltype))
 296     {
 297         icoul = NbkernelElecType::ReactionField;
 298     }
 299     else
 300     {
 301         icoul = NbkernelElecType::None;
 302     }
 303
 304     real rcutoff_max2 = std::max(ic->rcoulomb, ic->rvdw);
 305     rcutoff_max2      = rcutoff_max2 * rcutoff_max2;
 306
 307     const real* tab_ewald_F_lj           = nullptr;
 308     const real* tab_ewald_V_lj           = nullptr;
 309     const real* ewtab                    = nullptr;
 310     real        coulombTableScale        = 0;
 311     real        coulombTableScaleInvHalf = 0;
 312     real        vdwTableScale            = 0;
 313     real        vdwTableScaleInvHalf     = 0;
 314     real        sh_ewald                 = 0;
 315     if (elecInteractionTypeIsEwald || vdwInteractionTypeIsEwald)
 316     {
 317         sh_ewald = ic->sh_ewald;
 318     }
 319     if (elecInteractionTypeIsEwald)
 320     {
 321         const auto& coulombTables = *ic->coulombEwaldTables;
 322         ewtab                     = coulombTables.tableFDV0.data();
 323         coulombTableScale         = coulombTables.scale;
 324         coulombTableScaleInvHalf  = half / coulombTableScale;
 325     }
 326     if (vdwInteractionTypeIsEwald)
 327     {
 328         const auto& vdwTables = *ic->vdwEwaldTables;
 329         tab_ewald_F_lj        = vdwTables.tableF.data();
 330         tab_ewald_V_lj        = vdwTables.tableV.data();
 331         vdwTableScale         = vdwTables.scale;
 332         vdwTableScaleInvHalf  = half / vdwTableScale;
 333     }
 334
 335     /* For Ewald/PME interactions we cannot easily apply the soft-core component to
 336      * reciprocal space. When we use non-switched Ewald interactions, we
 337      * assume the soft-coring does not significantly affect the grid contribution
 338      * and apply the soft-core only to the full 1/r (- shift) pair contribution.
 339      *
 340      * However, we cannot use this approach for switch-modified since we would then
 341      * effectively end up evaluating a significantly different interaction here compared to the
 342      * normal (non-free-energy) kernels, either by applying a cutoff at a different
 343      * position than what the user requested, or by switching different
 344      * things (1/r rather than short-range Ewald). For these settings, we just
 345      * use the traditional short-range Ewald interaction in that case.
 346      */
 347     GMX_RELEASE_ASSERT(!(vdwInteractionTypeIsEwald && vdwModifierIsPotSwitch),
 348                        "Can not apply soft-core to switched Ewald potentials");
 349
 350     real dvdlCoul = 0;
 351     real dvdlVdw  = 0;
 352
 353     /* Lambda factor for state A, 1-lambda*/
 354     real LFC[NSTATES], LFV[NSTATES];
 355     LFC[STATE_A] = one - lambda_coul;
 356     LFV[STATE_A] = one - lambda_vdw;
 357
 358     /* Lambda factor for state B, lambda*/
 359     LFC[STATE_B] = lambda_coul;
 360     LFV[STATE_B] = lambda_vdw;
 361
 362     /*derivative of the lambda factor for state A and B */
 363     real DLF[NSTATES];
 364     DLF[STATE_A] = -1;
 365     DLF[STATE_B] = 1;
 366
 367     real           lFacCoul[NSTATES], dlFacCoul[NSTATES], lFacVdw[NSTATES], dlFacVdw[NSTATES];
 368     constexpr real sc_r_power = 6.0_real;
 369     for (int i = 0; i < NSTATES; i++)
 370     {
 371         lFacCoul[i]  = (lam_power == 2 ? (1 - LFC[i]) * (1 - LFC[i]) : (1 - LFC[i]));
 372         dlFacCoul[i] = DLF[i] * lam_power / sc_r_power * (lam_power == 2 ? (1 - LFC[i]) : 1);
 373         lFacVdw[i]   = (lam_power == 2 ? (1 - LFV[i]) * (1 - LFV[i]) : (1 - LFV[i]));
 374         dlFacVdw[i]  = DLF[i] * lam_power / sc_r_power * (lam_power == 2 ? (1 - LFV[i]) : 1);
 375     }
 376
 377     // TODO: We should get rid of using pointers to real
 378     const real* x             = xx[0];
 379     real* gmx_restrict f      = &(forceWithShiftForces->force()[0][0]);
 380     real* gmx_restrict fshift = &(forceWithShiftForces->shiftForces()[0][0]);
 381
 382     const real rlistSquared = gmx::square(fr->rlist);
 383
 384     int numExcludedPairsBeyondRlist = 0;
 385
 386     for (int n = 0; n < nri; n++)
 387     {
 388         int npair_within_cutoff = 0;
 389
 390         const int  is3   = 3 * shift[n];
 391         const real shX   = shiftvec[is3];
 392         const real shY   = shiftvec[is3 + 1];
 393         const real shZ   = shiftvec[is3 + 2];
 394         const int  nj0   = jindex[n];
 395         const int  nj1   = jindex[n + 1];
 396         const int  ii    = iinr[n];
 397         const int  ii3   = 3 * ii;
 398         const real ix    = shX + x[ii3 + 0];
 399         const real iy    = shY + x[ii3 + 1];
 400         const real iz    = shZ + x[ii3 + 2];
 401         const real iqA   = facel * chargeA[ii];
 402         const real iqB   = facel * chargeB[ii];
 403         const int  ntiA  = 2 * ntype * typeA[ii];
 404         const int  ntiB  = 2 * ntype * typeB[ii];
 405         real       vCTot = 0;
 406         real       vVTot = 0;
 407         real       fIX   = 0;
 408         real       fIY   = 0;
 409         real       fIZ   = 0;
 410
 411         for (int k = nj0; k < nj1; k++)
 412         {
 413             int            tj[NSTATES];
 414             const int      jnr = jjnr[k];
 415             const int      j3  = 3 * jnr;
 416             RealType       c6[NSTATES], c12[NSTATES], qq[NSTATES], vCoul[NSTATES], vVdw[NSTATES];
 417             RealType       r, rInv, rp, rpm2;
 418             RealType       alphaVdwEff, alphaCoulEff, sigma6[NSTATES];
 419             const RealType dX  = ix - x[j3];
 420             const RealType dY  = iy - x[j3 + 1];
 421             const RealType dZ  = iz - x[j3 + 2];
 422             const RealType rSq = dX * dX + dY * dY + dZ * dZ;
 423             RealType       fScalC[NSTATES], fScalV[NSTATES];
 424             /* Check if this pair on the exlusions list.*/
 425             const bool bPairIncluded = nlist->excl_fep.empty() || nlist->excl_fep[k];
 426
 427             if (rSq >= rcutoff_max2 && bPairIncluded)
 428             {
 429                 /* We save significant time by skipping all code below.
 430                  * Note that with soft-core interactions, the actual cut-off
 431                  * check might be different. But since the soft-core distance
 432                  * is always larger than r, checking on r here is safe.
 433                  * Exclusions outside the cutoff can not be skipped as
 434                  * when using Ewald: the reciprocal-space
 435                  * Ewald component still needs to be subtracted.
 436                  */
 437
 438                 continue;
 439             }
 440             npair_within_cutoff++;
 441
 442             if (rSq > rlistSquared)
 443             {
 444                 numExcludedPairsBeyondRlist++;
 445             }
 446
 447             if (rSq > 0)
 448             {
 449                 /* Note that unlike in the nbnxn kernels, we do not need
 450                  * to clamp the value of rSq before taking the invsqrt
 451                  * to avoid NaN in the LJ calculation, since here we do
 452                  * not calculate LJ interactions when C6 and C12 are zero.
 453                  */
 454
 455                 rInv = gmx::invsqrt(rSq);
 456                 r    = rSq * rInv;
 457             }
 458             else
 459             {
 460                 /* The force at r=0 is zero, because of symmetry.
 461                  * But note that the potential is in general non-zero,
 462                  * since the soft-cored r will be non-zero.
 463                  */
 464                 rInv = 0;
 465                 r    = 0;
 466             }
 467
 468             if (useSoftCore)
 469             {
 470                 rpm2 = rSq * rSq;  /* r4 */
 471                 rp   = rpm2 * rSq; /* r6 */
 472             }
 473             else
 474             {
 475                 /* The soft-core power p will not affect the results
 476                  * with not using soft-core, so we use power of 0 which gives
 477                  * the simplest math and cheapest code.
 478                  */
 479                 rpm2 = rInv * rInv;
 480                 rp   = 1;
 481             }
 482
 483             RealType fScal = 0;
 484
 485             qq[STATE_A] = iqA * chargeA[jnr];
 486             qq[STATE_B] = iqB * chargeB[jnr];
 487
 488             tj[STATE_A] = ntiA + 2 * typeA[jnr];
 489             tj[STATE_B] = ntiB + 2 * typeB[jnr];
 490
 491             if (bPairIncluded)
 492             {
 493                 c6[STATE_A] = nbfp[tj[STATE_A]];
 494                 c6[STATE_B] = nbfp[tj[STATE_B]];
 495
 496                 for (int i = 0; i < NSTATES; i++)
 497                 {
 498                     c12[i] = nbfp[tj[i] + 1];
 499                     if (useSoftCore)
 500                     {
 501                         if ((c6[i] > 0) && (c12[i] > 0))
 502                         {
 503                             /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
 504                             sigma6[i] = half * c12[i] / c6[i];
 505                             if (sigma6[i] < sigma6_min) /* for disappearing coul and vdw with soft core at the same time */
 506                             {
 507                                 sigma6[i] = sigma6_min;
 508                             }
 509                         }
 510                         else
 511                         {
 512                             sigma6[i] = sigma6_def;
 513                         }
 514                     }
 515                 }
 516
 517                 if (useSoftCore)
 518                 {
 519                     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
 520                     if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
 521                     {
 522                         alphaVdwEff  = 0;
 523                         alphaCoulEff = 0;
 524                     }
 525                     else
 526                     {
 527                         alphaVdwEff  = alpha_vdw;
 528                         alphaCoulEff = alpha_coul;
 529                     }
 530                 }
 531
 532                 for (int i = 0; i < NSTATES; i++)
 533                 {
 534                     fScalC[i] = 0;
 535                     fScalV[i] = 0;
 536                     vCoul[i]  = 0;
 537                     vVdw[i]   = 0;
 538
 539                     RealType rInvC, rInvV, rC, rV, rPInvC, rPInvV;
 540
 541                     /* Only spend time on A or B state if it is non-zero */
 542                     if ((qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0))
 543                     {
 544                         /* this section has to be inside the loop because of the dependence on sigma6 */
 545                         if (useSoftCore)
 546                         {
 547                             rPInvC = one / (alphaCoulEff * lFacCoul[i] * sigma6[i] + rp);
 548                             pthRoot(rPInvC, &rInvC, &rC);
 549                             if (scLambdasOrAlphasDiffer)
 550                             {
 551                                 rPInvV = one / (alphaVdwEff * lFacVdw[i] * sigma6[i] + rp);
 552                                 pthRoot(rPInvV, &rInvV, &rV);
 553                             }
 554                             else
 555                             {
 556                                 /* We can avoid one expensive pow and one / operation */
 557                                 rPInvV = rPInvC;
 558                                 rInvV  = rInvC;
 559                                 rV     = rC;
 560                             }
 561                         }
 562                         else
 563                         {
 564                             rPInvC = 1;
 565                             rInvC  = rInv;
 566                             rC     = r;
 567
 568                             rPInvV = 1;
 569                             rInvV  = rInv;
 570                             rV     = r;
 571                         }
 572
 573                         /* Only process the coulomb interactions if we have charges,
 574                          * and if we either include all entries in the list (no cutoff
 575                          * used in the kernel), or if we are within the cutoff.
 576                          */
 577                         bool computeElecInteraction = (elecInteractionTypeIsEwald && r < rCoulomb)
 578                                                       || (!elecInteractionTypeIsEwald && rC < rCoulomb);
 579
 580                         if ((qq[i] != 0) && computeElecInteraction)
 581                         {
 582                             if (elecInteractionTypeIsEwald)
 583                             {
 584                                 vCoul[i]  = ewaldPotential(qq[i], rInvC, sh_ewald);
 585                                 fScalC[i] = ewaldScalarForce(qq[i], rInvC);
 586                             }
 587                             else
 588                             {
 589                                 vCoul[i]  = reactionFieldPotential(qq[i], rInvC, rC, krf, crf);
 590                                 fScalC[i] = reactionFieldScalarForce(qq[i], rInvC, rC, krf, two);
 591                             }
 592                         }
 593
 594                         /* Only process the VDW interactions if we have
 595                          * some non-zero parameters, and if we either
 596                          * include all entries in the list (no cutoff used
 597                          * in the kernel), or if we are within the cutoff.
 598                          */
 599                         bool computeVdwInteraction = (vdwInteractionTypeIsEwald && r < rVdw)
 600                                                      || (!vdwInteractionTypeIsEwald && rV < rVdw);
 601                         if ((c6[i] != 0 || c12[i] != 0) && computeVdwInteraction)
 602                         {
 603                             RealType rInv6;
 604                             if (useSoftCore)
 605                             {
 606                                 rInv6 = rPInvV;
 607                             }
 608                             else
 609                             {
 610                                 rInv6 = calculateRinv6(rInvV);
 611                             }
 612                             RealType vVdw6  = calculateVdw6(c6[i], rInv6);
 613                             RealType vVdw12 = calculateVdw12(c12[i], rInv6);
 614
 615                             vVdw[i] = lennardJonesPotential(
 616                                     vVdw6, vVdw12, c6[i], c12[i], repulsionShift, dispersionShift, oneSixth, oneTwelfth);
 617                             fScalV[i] = lennardJonesScalarForce(vVdw6, vVdw12);
 618
 619                             if (vdwInteractionTypeIsEwald)
 620                             {
 621                                 /* Subtract the grid potential at the cut-off */
 622                                 vVdw[i] += ewaldLennardJonesGridSubtract(
 623                                         nbfp_grid[tj[i]], shLjEwald, oneSixth);
 624                             }
 625
 626                             if (vdwModifierIsPotSwitch)
 627                             {
 628                                 RealType d        = rV - ic->rvdw_switch;
 629                                 d                 = (d > zero) ? d : zero;
 630                                 const RealType d2 = d * d;
 631                                 const RealType sw =
 632                                         one + d2 * d * (vdw_swV3 + d * (vdw_swV4 + d * vdw_swV5));
 633                                 const RealType dsw = d2 * (vdw_swF2 + d * (vdw_swF3 + d * vdw_swF4));
 634
 635                                 fScalV[i] = potSwitchScalarForceMod(
 636                                         fScalV[i], vVdw[i], sw, rV, rVdw, dsw, zero);
 637                                 vVdw[i] = potSwitchPotentialMod(vVdw[i], sw, rV, rVdw, zero);
 638                             }
 639                         }
 640
 641                         /* fScalC (and fScalV) now contain: dV/drC * rC
 642                          * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
 643                          * Further down we first multiply by r^p-2 and then by
 644                          * the vector r, which in total gives: dV/drC * (r/rC)^1-p
 645                          */
 646                         fScalC[i] *= rPInvC;
 647                         fScalV[i] *= rPInvV;
 648                     }
 649                 } // end for (int i = 0; i < NSTATES; i++)
 650
 651                 /* Assemble A and B states */
 652                 for (int i = 0; i < NSTATES; i++)
 653                 {
 654                     vCTot += LFC[i] * vCoul[i];
 655                     vVTot += LFV[i] * vVdw[i];
 656
 657                     fScal += LFC[i] * fScalC[i] * rpm2;
 658                     fScal += LFV[i] * fScalV[i] * rpm2;
 659
 660                     if (useSoftCore)
 661                     {
 662                         dvdlCoul += vCoul[i] * DLF[i]
 663                                     + LFC[i] * alphaCoulEff * dlFacCoul[i] * fScalC[i] * sigma6[i];
 664                         dvdlVdw += vVdw[i] * DLF[i]
 665                                    + LFV[i] * alphaVdwEff * dlFacVdw[i] * fScalV[i] * sigma6[i];
 666                     }
 667                     else
 668                     {
 669                         dvdlCoul += vCoul[i] * DLF[i];
 670                         dvdlVdw += vVdw[i] * DLF[i];
 671                     }
 672                 }
 673             } // end if (bPairIncluded)
 674             else if (icoul == NbkernelElecType::ReactionField)
 675             {
 676                 /* For excluded pairs, which are only in this pair list when
 677                  * using the Verlet scheme, we don't use soft-core.
 678                  * As there is no singularity, there is no need for soft-core.
 679                  */
 680                 const real FF = -two * krf;
 681                 RealType   VV = krf * rSq - crf;
 682
 683                 if (ii == jnr)
 684                 {
 685                     VV *= half;
 686                 }
 687
 688                 for (int i = 0; i < NSTATES; i++)
 689                 {
 690                     vCTot += LFC[i] * qq[i] * VV;
 691                     fScal += LFC[i] * qq[i] * FF;
 692                     dvdlCoul += DLF[i] * qq[i] * VV;
 693                 }
 694             }
 695
 696             if (elecInteractionTypeIsEwald && (r < rCoulomb || !bPairIncluded))
 697             {
 698                 /* See comment in the preamble. When using Ewald interactions
 699                  * (unless we use a switch modifier) we subtract the reciprocal-space
 700                  * Ewald component here which made it possible to apply the free
 701                  * energy interaction to 1/r (vanilla coulomb short-range part)
 702                  * above. This gets us closer to the ideal case of applying
 703                  * the softcore to the entire electrostatic interaction,
 704                  * including the reciprocal-space component.
 705                  */
 706                 real v_lr, f_lr;
 707
 708                 const RealType ewrt   = r * coulombTableScale;
 709                 IntType        ewitab = static_cast<IntType>(ewrt);
 710                 const RealType eweps  = ewrt - ewitab;
 711                 ewitab                = 4 * ewitab;
 712                 f_lr                  = ewtab[ewitab] + eweps * ewtab[ewitab + 1];
 713                 v_lr = (ewtab[ewitab + 2] - coulombTableScaleInvHalf * eweps * (ewtab[ewitab] + f_lr));
 714                 f_lr *= rInv;
 715
 716                 /* Note that any possible Ewald shift has already been applied in
 717                  * the normal interaction part above.
 718                  */
 719
 720                 if (ii == jnr)
 721                 {
 722                     /* If we get here, the i particle (ii) has itself (jnr)
 723                      * in its neighborlist. This can only happen with the Verlet
 724                      * scheme, and corresponds to a self-interaction that will
 725                      * occur twice. Scale it down by 50% to only include it once.
 726                      */
 727                     v_lr *= half;
 728                 }
 729
 730                 for (int i = 0; i < NSTATES; i++)
 731                 {
 732                     vCTot -= LFC[i] * qq[i] * v_lr;
 733                     fScal -= LFC[i] * qq[i] * f_lr;
 734                     dvdlCoul -= (DLF[i] * qq[i]) * v_lr;
 735                 }
 736             }
 737
 738             if (vdwInteractionTypeIsEwald && (r < rVdw || !bPairIncluded))
 739             {
 740                 /* See comment in the preamble. When using LJ-Ewald interactions
 741                  * (unless we use a switch modifier) we subtract the reciprocal-space
 742                  * Ewald component here which made it possible to apply the free
 743                  * energy interaction to r^-6 (vanilla LJ6 short-range part)
 744                  * above. This gets us closer to the ideal case of applying
 745                  * the softcore to the entire VdW interaction,
 746                  * including the reciprocal-space component.
 747                  */
 748                 /* We could also use the analytical form here
 749                  * iso a table, but that can cause issues for
 750                  * r close to 0 for non-interacting pairs.
 751                  */
 752
 753                 const RealType rs   = rSq * rInv * vdwTableScale;
 754                 const IntType  ri   = static_cast<IntType>(rs);
 755                 const RealType frac = rs - ri;
 756                 const RealType f_lr = (1 - frac) * tab_ewald_F_lj[ri] + frac * tab_ewald_F_lj[ri + 1];
 757                 /* TODO: Currently the Ewald LJ table does not contain
 758                  * the factor 1/6, we should add this.
 759                  */
 760                 const RealType FF = f_lr * rInv / six;
 761                 RealType       VV =
 762                         (tab_ewald_V_lj[ri] - vdwTableScaleInvHalf * frac * (tab_ewald_F_lj[ri] + f_lr))
 763                         / six;
 764
 765                 if (ii == jnr)
 766                 {
 767                     /* If we get here, the i particle (ii) has itself (jnr)
 768                      * in its neighborlist. This can only happen with the Verlet
 769                      * scheme, and corresponds to a self-interaction that will
 770                      * occur twice. Scale it down by 50% to only include it once.
 771                      */
 772                     VV *= half;
 773                 }
 774
 775                 for (int i = 0; i < NSTATES; i++)
 776                 {
 777                     const real c6grid = nbfp_grid[tj[i]];
 778                     vVTot += LFV[i] * c6grid * VV;
 779                     fScal += LFV[i] * c6grid * FF;
 780                     dvdlVdw += (DLF[i] * c6grid) * VV;
 781                 }
 782             }
 783
 784             if (doForces)
 785             {
 786                 const real tX = fScal * dX;
 787                 const real tY = fScal * dY;
 788                 const real tZ = fScal * dZ;
 789                 fIX           = fIX + tX;
 790                 fIY           = fIY + tY;
 791                 fIZ           = fIZ + tZ;
 792                 /* OpenMP atomics are expensive, but this kernels is also
 793                  * expensive, so we can take this hit, instead of using
 794                  * thread-local output buffers and extra reduction.
 795                  *
 796                  * All the OpenMP regions in this file are trivial and should
 797                  * not throw, so no need for try/catch.
 798                  */
 799 #pragma omp atomic
 800                 f[j3] -= tX;
 801 #pragma omp atomic
 802                 f[j3 + 1] -= tY;
 803 #pragma omp atomic
 804                 f[j3 + 2] -= tZ;
 805             }
 806         } // end for (int k = nj0; k < nj1; k++)
 807
 808         /* The atomics below are expensive with many OpenMP threads.
 809          * Here unperturbed i-particles will usually only have a few
 810          * (perturbed) j-particles in the list. Thus with a buffered list
 811          * we can skip a significant number of i-reductions with a check.
 812          */
 813         if (npair_within_cutoff > 0)
 814         {
 815             if (doForces)
 816             {
 817 #pragma omp atomic
 818                 f[ii3] += fIX;
 819 #pragma omp atomic
 820                 f[ii3 + 1] += fIY;
 821 #pragma omp atomic
 822                 f[ii3 + 2] += fIZ;
 823             }
 824             if (doShiftForces)
 825             {
 826 #pragma omp atomic
 827                 fshift[is3] += fIX;
 828 #pragma omp atomic
 829                 fshift[is3 + 1] += fIY;
 830 #pragma omp atomic
 831                 fshift[is3 + 2] += fIZ;
 832             }
 833             if (doPotential)
 834             {
 835                 int ggid = gid[n];
 836 #pragma omp atomic
 837                 Vc[ggid] += vCTot;
 838 #pragma omp atomic
 839                 Vv[ggid] += vVTot;
 840             }
 841         }
 842     } // end for (int n = 0; n < nri; n++)
 843
 844 #pragma omp atomic
 845     dvdl[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)] += dvdlCoul;
 846 #pragma omp atomic
 847     dvdl[static_cast<int>(FreeEnergyPerturbationCouplingType::Vdw)] += dvdlVdw;
 848
 849     /* Estimate flops, average for free energy stuff:
 850      * 12  flops per outer iteration
 851      * 150 flops per inner iteration
 852      */
 853 #pragma omp atomic
 854     inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri * 12 + nlist->jindex[nri] * 150);
 855
 856     if (numExcludedPairsBeyondRlist > 0)
 857     {
 858         gmx_fatal(FARGS,
 859                   "There are %d perturbed non-bonded pair interactions beyond the pair-list cutoff "
 860                   "of %g nm, which is not supported. This can happen because the system is "
 861                   "unstable or because intra-molecular interactions at long distances are "
 862                   "excluded. If the "
 863                   "latter is the case, you can try to increase nstlist or rlist to avoid this."
 864                   "The error is likely triggered by the use of couple-intramol=no "
 865                   "and the maximal distance in the decoupled molecule exceeding rlist.",
 866                   numExcludedPairsBeyondRlist,
 867                   fr->rlist);
 868     }
 869 }
 870
 871 typedef void (*KernelFunction)(const t_nblist* gmx_restrict nlist,
 872                                rvec* gmx_restrict         xx,
 873                                gmx::ForceWithShiftForces* forceWithShiftForces,
 874                                const t_forcerec* gmx_restrict fr,
 875                                const t_mdatoms* gmx_restrict mdatoms,
 876                                nb_kernel_data_t* gmx_restrict kernel_data,
 877                                t_nrnb* gmx_restrict nrnb);
 878
 879 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald, bool vdwModifierIsPotSwitch>
 880 static KernelFunction dispatchKernelOnUseSimd(const bool useSimd)
 881 {
 882     if (useSimd)
 883     {
 884 #if GMX_SIMD_HAVE_REAL && GMX_SIMD_HAVE_INT32_ARITHMETICS && GMX_USE_SIMD_KERNELS
 885         /* FIXME: Here SimdDataTypes should be used to enable SIMD. So far, the code in nb_free_energy_kernel is not adapted to SIMD */
 886         return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
 887 #else
 888         return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
 889 #endif
 890     }
 891     else
 892     {
 893         return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
 894     }
 895 }
 896
 897 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald>
 898 static KernelFunction dispatchKernelOnVdwModifier(const bool vdwModifierIsPotSwitch, const bool useSimd)
 899 {
 900     if (vdwModifierIsPotSwitch)
 901     {
 902         return (dispatchKernelOnUseSimd<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, true>(
 903                 useSimd));
 904     }
 905     else
 906     {
 907         return (dispatchKernelOnUseSimd<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, false>(
 908                 useSimd));
 909     }
 910 }
 911
 912 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald>
 913 static KernelFunction dispatchKernelOnElecInteractionType(const bool elecInteractionTypeIsEwald,
 914                                                           const bool vdwModifierIsPotSwitch,
 915                                                           const bool useSimd)
 916 {
 917     if (elecInteractionTypeIsEwald)
 918     {
 919         return (dispatchKernelOnVdwModifier<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, true>(
 920                 vdwModifierIsPotSwitch, useSimd));
 921     }
 922     else
 923     {
 924         return (dispatchKernelOnVdwModifier<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, false>(
 925                 vdwModifierIsPotSwitch, useSimd));
 926     }
 927 }
 928
 929 template<bool useSoftCore, bool scLambdasOrAlphasDiffer>
 930 static KernelFunction dispatchKernelOnVdwInteractionType(const bool vdwInteractionTypeIsEwald,
 931                                                          const bool elecInteractionTypeIsEwald,
 932                                                          const bool vdwModifierIsPotSwitch,
 933                                                          const bool useSimd)
 934 {
 935     if (vdwInteractionTypeIsEwald)
 936     {
 937         return (dispatchKernelOnElecInteractionType<useSoftCore, scLambdasOrAlphasDiffer, true>(
 938                 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
 939     }
 940     else
 941     {
 942         return (dispatchKernelOnElecInteractionType<useSoftCore, scLambdasOrAlphasDiffer, false>(
 943                 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
 944     }
 945 }
 946
 947 template<bool useSoftCore>
 948 static KernelFunction dispatchKernelOnScLambdasOrAlphasDifference(const bool scLambdasOrAlphasDiffer,
 949                                                                   const bool vdwInteractionTypeIsEwald,
 950                                                                   const bool elecInteractionTypeIsEwald,
 951                                                                   const bool vdwModifierIsPotSwitch,
 952                                                                   const bool useSimd)
 953 {
 954     if (scLambdasOrAlphasDiffer)
 955     {
 956         return (dispatchKernelOnVdwInteractionType<useSoftCore, true>(
 957                 vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
 958     }
 959     else
 960     {
 961         return (dispatchKernelOnVdwInteractionType<useSoftCore, false>(
 962                 vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
 963     }
 964 }
 965
 966 static KernelFunction dispatchKernel(const bool                 scLambdasOrAlphasDiffer,
 967                                      const bool                 vdwInteractionTypeIsEwald,
 968                                      const bool                 elecInteractionTypeIsEwald,
 969                                      const bool                 vdwModifierIsPotSwitch,
 970                                      const bool                 useSimd,
 971                                      const interaction_const_t& ic)
 972 {
 973     if (ic.softCoreParameters->alphaCoulomb == 0 && ic.softCoreParameters->alphaVdw == 0)
 974     {
 975         return (dispatchKernelOnScLambdasOrAlphasDifference<false>(scLambdasOrAlphasDiffer,
 976                                                                    vdwInteractionTypeIsEwald,
 977                                                                    elecInteractionTypeIsEwald,
 978                                                                    vdwModifierIsPotSwitch,
 979                                                                    useSimd));
 980     }
 981     else
 982     {
 983         return (dispatchKernelOnScLambdasOrAlphasDifference<true>(scLambdasOrAlphasDiffer,
 984                                                                   vdwInteractionTypeIsEwald,
 985                                                                   elecInteractionTypeIsEwald,
 986                                                                   vdwModifierIsPotSwitch,
 987                                                                   useSimd));
 988     }
 989 }
 990
 991
 992 void gmx_nb_free_energy_kernel(const t_nblist*            nlist,
 993                                rvec*                      xx,
 994                                gmx::ForceWithShiftForces* ff,
 995                                const t_forcerec*          fr,
 996                                const t_mdatoms*           mdatoms,
 997                                nb_kernel_data_t*          kernel_data,
 998                                t_nrnb*                    nrnb)
 999 {
1000     const interaction_const_t& ic = *fr->ic;
1001     GMX_ASSERT(EEL_PME_EWALD(ic.eeltype) || ic.eeltype == CoulombInteractionType::Cut || EEL_RF(ic.eeltype),
1002                "Unsupported eeltype with free energy");
1003     GMX_ASSERT(ic.softCoreParameters, "We need soft-core parameters");
1004
1005     const auto& scParams                   = *ic.softCoreParameters;
1006     const bool  vdwInteractionTypeIsEwald  = (EVDW_PME(ic.vdwtype));
1007     const bool  elecInteractionTypeIsEwald = (EEL_PME_EWALD(ic.eeltype));
1008     const bool  vdwModifierIsPotSwitch     = (ic.vdw_modifier == InteractionModifiers::PotSwitch);
1009     bool        scLambdasOrAlphasDiffer    = true;
1010     const bool  useSimd                    = fr->use_simd_kernels;
1011
1012     if (scParams.alphaCoulomb == 0 && scParams.alphaVdw == 0)
1013     {
1014         scLambdasOrAlphasDiffer = false;
1015     }
1016     else
1017     {
1018         if (kernel_data->lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)]
1019                     == kernel_data->lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Vdw)]
1020             && scParams.alphaCoulomb == scParams.alphaVdw)
1021         {
1022             scLambdasOrAlphasDiffer = false;
1023         }
1024     }
1025
1026     KernelFunction kernelFunc;
1027     kernelFunc = dispatchKernel(scLambdasOrAlphasDiffer,
1028                                 vdwInteractionTypeIsEwald,
1029                                 elecInteractionTypeIsEwald,
1030                                 vdwModifierIsPotSwitch,
1031                                 useSimd,
1032                                 ic);
1033     kernelFunc(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
1034 }