src/gromacs/gmxlib/nonbonded/nb_free_energy.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
   7  * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
   8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   9  * and including many others, as listed in the AUTHORS file in the
  10  * top-level source directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38 #include "gmxpre.h"
  39
  40 #include "nb_free_energy.h"
  41
  42 #include "config.h"
  43
  44 #include <cmath>
  45
  46 #include <algorithm>
  47
  48 #include "gromacs/gmxlib/nrnb.h"
  49 #include "gromacs/gmxlib/nonbonded/nb_kernel.h"
  50 #include "gromacs/gmxlib/nonbonded/nonbonded.h"
  51 #include "gromacs/math/functions.h"
  52 #include "gromacs/math/vec.h"
  53 #include "gromacs/mdtypes/forceoutput.h"
  54 #include "gromacs/mdtypes/forcerec.h"
  55 #include "gromacs/mdtypes/interaction_const.h"
  56 #include "gromacs/mdtypes/md_enums.h"
  57 #include "gromacs/mdtypes/mdatom.h"
  58 #include "gromacs/simd/simd.h"
  59 #include "gromacs/utility/fatalerror.h"
  60
  61
  62 //! Scalar (non-SIMD) data types.
  63 struct ScalarDataTypes
  64 {
  65     using RealType                     = real; //!< The data type to use as real.
  66     using IntType                      = int;  //!< The data type to use as int.
  67     static constexpr int simdRealWidth = 1;    //!< The width of the RealType.
  68     static constexpr int simdIntWidth  = 1;    //!< The width of the IntType.
  69 };
  70
  71 #if GMX_SIMD_HAVE_REAL && GMX_SIMD_HAVE_INT32_ARITHMETICS
  72 //! SIMD data types.
  73 struct SimdDataTypes
  74 {
  75     using RealType                     = gmx::SimdReal;         //!< The data type to use as real.
  76     using IntType                      = gmx::SimdInt32;        //!< The data type to use as int.
  77     static constexpr int simdRealWidth = GMX_SIMD_REAL_WIDTH;   //!< The width of the RealType.
  78     static constexpr int simdIntWidth  = GMX_SIMD_FINT32_WIDTH; //!< The width of the IntType.
  79 };
  80 #endif
  81
  82 //! Computes r^(1/p) and 1/r^(1/p) for the standard p=6
  83 template<class RealType>
  84 static inline void pthRoot(const RealType r, RealType* pthRoot, RealType* invPthRoot)
  85 {
  86     *invPthRoot = gmx::invsqrt(std::cbrt(r));
  87     *pthRoot    = 1 / (*invPthRoot);
  88 }
  89
  90 template<class RealType>
  91 static inline RealType calculateRinv6(const RealType rinvV)
  92 {
  93     RealType rinv6 = rinvV * rinvV;
  94     return (rinv6 * rinv6 * rinv6);
  95 }
  96
  97 template<class RealType>
  98 static inline RealType calculateVdw6(const RealType c6, const RealType rinv6)
  99 {
 100     return (c6 * rinv6);
 101 }
 102
 103 template<class RealType>
 104 static inline RealType calculateVdw12(const RealType c12, const RealType rinv6)
 105 {
 106     return (c12 * rinv6 * rinv6);
 107 }
 108
 109 /* reaction-field electrostatics */
 110 template<class RealType>
 111 static inline RealType reactionFieldScalarForce(const RealType qq,
 112                                                 const RealType rinv,
 113                                                 const RealType r,
 114                                                 const real     krf,
 115                                                 const real     two)
 116 {
 117     return (qq * (rinv - two * krf * r * r));
 118 }
 119 template<class RealType>
 120 static inline RealType reactionFieldPotential(const RealType qq,
 121                                               const RealType rinv,
 122                                               const RealType r,
 123                                               const real     krf,
 124                                               const real     potentialShift)
 125 {
 126     return (qq * (rinv + krf * r * r - potentialShift));
 127 }
 128
 129 /* Ewald electrostatics */
 130 template<class RealType>
 131 static inline RealType ewaldScalarForce(const RealType coulomb, const RealType rinv)
 132 {
 133     return (coulomb * rinv);
 134 }
 135 template<class RealType>
 136 static inline RealType ewaldPotential(const RealType coulomb, const RealType rinv, const real potentialShift)
 137 {
 138     return (coulomb * (rinv - potentialShift));
 139 }
 140
 141 /* cutoff LJ */
 142 template<class RealType>
 143 static inline RealType lennardJonesScalarForce(const RealType v6, const RealType v12)
 144 {
 145     return (v12 - v6);
 146 }
 147 template<class RealType>
 148 static inline RealType lennardJonesPotential(const RealType v6,
 149                                              const RealType v12,
 150                                              const RealType c6,
 151                                              const RealType c12,
 152                                              const real     repulsionShift,
 153                                              const real     dispersionShift,
 154                                              const real     onesixth,
 155                                              const real     onetwelfth)
 156 {
 157     return ((v12 + c12 * repulsionShift) * onetwelfth - (v6 + c6 * dispersionShift) * onesixth);
 158 }
 159
 160 /* Ewald LJ */
 161 static inline real ewaldLennardJonesGridSubtract(const real c6grid, const real potentialShift, const real onesixth)
 162 {
 163     return (c6grid * potentialShift * onesixth);
 164 }
 165
 166 /* LJ Potential switch */
 167 template<class RealType>
 168 static inline RealType potSwitchScalarForceMod(const RealType fScalarInp,
 169                                                const RealType potential,
 170                                                const RealType sw,
 171                                                const RealType r,
 172                                                const RealType rVdw,
 173                                                const RealType dsw,
 174                                                const real     zero)
 175 {
 176     if (r < rVdw)
 177     {
 178         real fScalar = fScalarInp * sw - r * potential * dsw;
 179         return (fScalar);
 180     }
 181     return (zero);
 182 }
 183 template<class RealType>
 184 static inline RealType potSwitchPotentialMod(const RealType potentialInp,
 185                                              const RealType sw,
 186                                              const RealType r,
 187                                              const RealType rVdw,
 188                                              const real     zero)
 189 {
 190     if (r < rVdw)
 191     {
 192         real potential = potentialInp * sw;
 193         return (potential);
 194     }
 195     return (zero);
 196 }
 197
 198
 199 //! Templated free-energy non-bonded kernel
 200 template<typename DataTypes, bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald, bool vdwModifierIsPotSwitch>
 201 static void nb_free_energy_kernel(const t_nblist* gmx_restrict nlist,
 202                                   rvec* gmx_restrict         xx,
 203                                   gmx::ForceWithShiftForces* forceWithShiftForces,
 204                                   const t_forcerec* gmx_restrict fr,
 205                                   const t_mdatoms* gmx_restrict mdatoms,
 206                                   nb_kernel_data_t* gmx_restrict kernel_data,
 207                                   t_nrnb* gmx_restrict nrnb)
 208 {
 209 #define STATE_A 0
 210 #define STATE_B 1
 211 #define NSTATES 2
 212
 213     using RealType = typename DataTypes::RealType;
 214     using IntType  = typename DataTypes::IntType;
 215
 216     /* FIXME: How should these be handled with SIMD? */
 217     constexpr real onetwelfth = 1.0 / 12.0;
 218     constexpr real onesixth   = 1.0 / 6.0;
 219     constexpr real zero       = 0.0;
 220     constexpr real half       = 0.5;
 221     constexpr real one        = 1.0;
 222     constexpr real two        = 2.0;
 223     constexpr real six        = 6.0;
 224
 225     /* Extract pointer to non-bonded interaction constants */
 226     const interaction_const_t* ic = fr->ic;
 227
 228     // Extract pair list data
 229     const int  nri    = nlist->nri;
 230     const int* iinr   = nlist->iinr;
 231     const int* jindex = nlist->jindex;
 232     const int* jjnr   = nlist->jjnr;
 233     const int* shift  = nlist->shift;
 234     const int* gid    = nlist->gid;
 235
 236     const real* shiftvec      = fr->shift_vec[0];
 237     const real* chargeA       = mdatoms->chargeA;
 238     const real* chargeB       = mdatoms->chargeB;
 239     real*       Vc            = kernel_data->energygrp_elec;
 240     const int*  typeA         = mdatoms->typeA;
 241     const int*  typeB         = mdatoms->typeB;
 242     const int   ntype         = fr->ntype;
 243     const real* nbfp          = fr->nbfp.data();
 244     const real* nbfp_grid     = fr->ljpme_c6grid;
 245     real*       Vv            = kernel_data->energygrp_vdw;
 246     const real  lambda_coul   = kernel_data->lambda[efptCOUL];
 247     const real  lambda_vdw    = kernel_data->lambda[efptVDW];
 248     real*       dvdl          = kernel_data->dvdl;
 249     const auto& scParams      = *ic->softCoreParameters;
 250     const real  alpha_coul    = scParams.alphaCoulomb;
 251     const real  alpha_vdw     = scParams.alphaVdw;
 252     const real  lam_power     = scParams.lambdaPower;
 253     const real  sigma6_def    = scParams.sigma6WithInvalidSigma;
 254     const real  sigma6_min    = scParams.sigma6Minimum;
 255     const bool  doForces      = ((kernel_data->flags & GMX_NONBONDED_DO_FORCE) != 0);
 256     const bool  doShiftForces = ((kernel_data->flags & GMX_NONBONDED_DO_SHIFTFORCE) != 0);
 257     const bool  doPotential   = ((kernel_data->flags & GMX_NONBONDED_DO_POTENTIAL) != 0);
 258
 259     // Extract data from interaction_const_t
 260     const real facel           = ic->epsfac;
 261     const real rcoulomb        = ic->rcoulomb;
 262     const real krf             = ic->k_rf;
 263     const real crf             = ic->c_rf;
 264     const real sh_lj_ewald     = ic->sh_lj_ewald;
 265     const real rvdw            = ic->rvdw;
 266     const real dispersionShift = ic->dispersion_shift.cpot;
 267     const real repulsionShift  = ic->repulsion_shift.cpot;
 268
 269     // Note that the nbnxm kernels do not support Coulomb potential switching at all
 270     GMX_ASSERT(ic->coulomb_modifier != eintmodPOTSWITCH,
 271                "Potential switching is not supported for Coulomb with FEP");
 272
 273     real vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4;
 274     if (vdwModifierIsPotSwitch)
 275     {
 276         const real d = ic->rvdw - ic->rvdw_switch;
 277         vdw_swV3     = -10.0 / (d * d * d);
 278         vdw_swV4     = 15.0 / (d * d * d * d);
 279         vdw_swV5     = -6.0 / (d * d * d * d * d);
 280         vdw_swF2     = -30.0 / (d * d * d);
 281         vdw_swF3     = 60.0 / (d * d * d * d);
 282         vdw_swF4     = -30.0 / (d * d * d * d * d);
 283     }
 284     else
 285     {
 286         /* Avoid warnings from stupid compilers (looking at you, Clang!) */
 287         vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0;
 288     }
 289
 290     int icoul;
 291     if (ic->eeltype == eelCUT || EEL_RF(ic->eeltype))
 292     {
 293         icoul = GMX_NBKERNEL_ELEC_REACTIONFIELD;
 294     }
 295     else
 296     {
 297         icoul = GMX_NBKERNEL_ELEC_NONE;
 298     }
 299
 300     real rcutoff_max2 = std::max(ic->rcoulomb, ic->rvdw);
 301     rcutoff_max2      = rcutoff_max2 * rcutoff_max2;
 302
 303     const real* tab_ewald_F_lj           = nullptr;
 304     const real* tab_ewald_V_lj           = nullptr;
 305     const real* ewtab                    = nullptr;
 306     real        coulombTableScale        = 0;
 307     real        coulombTableScaleInvHalf = 0;
 308     real        vdwTableScale            = 0;
 309     real        vdwTableScaleInvHalf     = 0;
 310     real        sh_ewald                 = 0;
 311     if (elecInteractionTypeIsEwald || vdwInteractionTypeIsEwald)
 312     {
 313         sh_ewald = ic->sh_ewald;
 314     }
 315     if (elecInteractionTypeIsEwald)
 316     {
 317         const auto& coulombTables = *ic->coulombEwaldTables;
 318         ewtab                     = coulombTables.tableFDV0.data();
 319         coulombTableScale         = coulombTables.scale;
 320         coulombTableScaleInvHalf  = half / coulombTableScale;
 321     }
 322     if (vdwInteractionTypeIsEwald)
 323     {
 324         const auto& vdwTables = *ic->vdwEwaldTables;
 325         tab_ewald_F_lj        = vdwTables.tableF.data();
 326         tab_ewald_V_lj        = vdwTables.tableV.data();
 327         vdwTableScale         = vdwTables.scale;
 328         vdwTableScaleInvHalf  = half / vdwTableScale;
 329     }
 330
 331     /* For Ewald/PME interactions we cannot easily apply the soft-core component to
 332      * reciprocal space. When we use non-switched Ewald interactions, we
 333      * assume the soft-coring does not significantly affect the grid contribution
 334      * and apply the soft-core only to the full 1/r (- shift) pair contribution.
 335      *
 336      * However, we cannot use this approach for switch-modified since we would then
 337      * effectively end up evaluating a significantly different interaction here compared to the
 338      * normal (non-free-energy) kernels, either by applying a cutoff at a different
 339      * position than what the user requested, or by switching different
 340      * things (1/r rather than short-range Ewald). For these settings, we just
 341      * use the traditional short-range Ewald interaction in that case.
 342      */
 343     GMX_RELEASE_ASSERT(!(vdwInteractionTypeIsEwald && vdwModifierIsPotSwitch),
 344                        "Can not apply soft-core to switched Ewald potentials");
 345
 346     real dvdl_coul = 0;
 347     real dvdl_vdw  = 0;
 348
 349     /* Lambda factor for state A, 1-lambda*/
 350     real LFC[NSTATES], LFV[NSTATES];
 351     LFC[STATE_A] = one - lambda_coul;
 352     LFV[STATE_A] = one - lambda_vdw;
 353
 354     /* Lambda factor for state B, lambda*/
 355     LFC[STATE_B] = lambda_coul;
 356     LFV[STATE_B] = lambda_vdw;
 357
 358     /*derivative of the lambda factor for state A and B */
 359     real DLF[NSTATES];
 360     DLF[STATE_A] = -1;
 361     DLF[STATE_B] = 1;
 362
 363     real           lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
 364     constexpr real sc_r_power = 6.0_real;
 365     for (int i = 0; i < NSTATES; i++)
 366     {
 367         lfac_coul[i]  = (lam_power == 2 ? (1 - LFC[i]) * (1 - LFC[i]) : (1 - LFC[i]));
 368         dlfac_coul[i] = DLF[i] * lam_power / sc_r_power * (lam_power == 2 ? (1 - LFC[i]) : 1);
 369         lfac_vdw[i]   = (lam_power == 2 ? (1 - LFV[i]) * (1 - LFV[i]) : (1 - LFV[i]));
 370         dlfac_vdw[i]  = DLF[i] * lam_power / sc_r_power * (lam_power == 2 ? (1 - LFV[i]) : 1);
 371     }
 372
 373     // TODO: We should get rid of using pointers to real
 374     const real* x             = xx[0];
 375     real* gmx_restrict f      = &(forceWithShiftForces->force()[0][0]);
 376     real* gmx_restrict fshift = &(forceWithShiftForces->shiftForces()[0][0]);
 377
 378     const real rlistSquared = gmx::square(fr->rlist);
 379
 380     int numExcludedPairsBeyondRlist = 0;
 381
 382     for (int n = 0; n < nri; n++)
 383     {
 384         int npair_within_cutoff = 0;
 385
 386         const int  is3   = 3 * shift[n];
 387         const real shX   = shiftvec[is3];
 388         const real shY   = shiftvec[is3 + 1];
 389         const real shZ   = shiftvec[is3 + 2];
 390         const int  nj0   = jindex[n];
 391         const int  nj1   = jindex[n + 1];
 392         const int  ii    = iinr[n];
 393         const int  ii3   = 3 * ii;
 394         const real ix    = shX + x[ii3 + 0];
 395         const real iy    = shY + x[ii3 + 1];
 396         const real iz    = shZ + x[ii3 + 2];
 397         const real iqA   = facel * chargeA[ii];
 398         const real iqB   = facel * chargeB[ii];
 399         const int  ntiA  = 2 * ntype * typeA[ii];
 400         const int  ntiB  = 2 * ntype * typeB[ii];
 401         real       vctot = 0;
 402         real       vvtot = 0;
 403         real       fix   = 0;
 404         real       fiy   = 0;
 405         real       fiz   = 0;
 406
 407         for (int k = nj0; k < nj1; k++)
 408         {
 409             int            tj[NSTATES];
 410             const int      jnr = jjnr[k];
 411             const int      j3  = 3 * jnr;
 412             RealType       c6[NSTATES], c12[NSTATES], qq[NSTATES], Vcoul[NSTATES], Vvdw[NSTATES];
 413             RealType       r, rinv, rp, rpm2;
 414             RealType       alpha_vdw_eff, alpha_coul_eff, sigma6[NSTATES];
 415             const RealType dx  = ix - x[j3];
 416             const RealType dy  = iy - x[j3 + 1];
 417             const RealType dz  = iz - x[j3 + 2];
 418             const RealType rsq = dx * dx + dy * dy + dz * dz;
 419             RealType       FscalC[NSTATES], FscalV[NSTATES];
 420             /* Check if this pair on the exlusions list.*/
 421             const bool bPairIncluded = nlist->excl_fep == nullptr || nlist->excl_fep[k];
 422
 423             if (rsq >= rcutoff_max2 && bPairIncluded)
 424             {
 425                 /* We save significant time by skipping all code below.
 426                  * Note that with soft-core interactions, the actual cut-off
 427                  * check might be different. But since the soft-core distance
 428                  * is always larger than r, checking on r here is safe.
 429                  * Exclusions outside the cutoff can not be skipped as
 430                  * when using Ewald: the reciprocal-space
 431                  * Ewald component still needs to be subtracted.
 432                  */
 433
 434                 continue;
 435             }
 436             npair_within_cutoff++;
 437
 438             if (rsq > rlistSquared)
 439             {
 440                 numExcludedPairsBeyondRlist++;
 441             }
 442
 443             if (rsq > 0)
 444             {
 445                 /* Note that unlike in the nbnxn kernels, we do not need
 446                  * to clamp the value of rsq before taking the invsqrt
 447                  * to avoid NaN in the LJ calculation, since here we do
 448                  * not calculate LJ interactions when C6 and C12 are zero.
 449                  */
 450
 451                 rinv = gmx::invsqrt(rsq);
 452                 r    = rsq * rinv;
 453             }
 454             else
 455             {
 456                 /* The force at r=0 is zero, because of symmetry.
 457                  * But note that the potential is in general non-zero,
 458                  * since the soft-cored r will be non-zero.
 459                  */
 460                 rinv = 0;
 461                 r    = 0;
 462             }
 463
 464             if (useSoftCore)
 465             {
 466                 rpm2 = rsq * rsq;  /* r4 */
 467                 rp   = rpm2 * rsq; /* r6 */
 468             }
 469             else
 470             {
 471                 /* The soft-core power p will not affect the results
 472                  * with not using soft-core, so we use power of 0 which gives
 473                  * the simplest math and cheapest code.
 474                  */
 475                 rpm2 = rinv * rinv;
 476                 rp   = 1;
 477             }
 478
 479             RealType Fscal = 0;
 480
 481             qq[STATE_A] = iqA * chargeA[jnr];
 482             qq[STATE_B] = iqB * chargeB[jnr];
 483
 484             tj[STATE_A] = ntiA + 2 * typeA[jnr];
 485             tj[STATE_B] = ntiB + 2 * typeB[jnr];
 486
 487             if (bPairIncluded)
 488             {
 489                 c6[STATE_A] = nbfp[tj[STATE_A]];
 490                 c6[STATE_B] = nbfp[tj[STATE_B]];
 491
 492                 for (int i = 0; i < NSTATES; i++)
 493                 {
 494                     c12[i] = nbfp[tj[i] + 1];
 495                     if (useSoftCore)
 496                     {
 497                         if ((c6[i] > 0) && (c12[i] > 0))
 498                         {
 499                             /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
 500                             sigma6[i] = half * c12[i] / c6[i];
 501                             if (sigma6[i] < sigma6_min) /* for disappearing coul and vdw with soft core at the same time */
 502                             {
 503                                 sigma6[i] = sigma6_min;
 504                             }
 505                         }
 506                         else
 507                         {
 508                             sigma6[i] = sigma6_def;
 509                         }
 510                     }
 511                 }
 512
 513                 if (useSoftCore)
 514                 {
 515                     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
 516                     if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
 517                     {
 518                         alpha_vdw_eff  = 0;
 519                         alpha_coul_eff = 0;
 520                     }
 521                     else
 522                     {
 523                         alpha_vdw_eff  = alpha_vdw;
 524                         alpha_coul_eff = alpha_coul;
 525                     }
 526                 }
 527
 528                 for (int i = 0; i < NSTATES; i++)
 529                 {
 530                     FscalC[i] = 0;
 531                     FscalV[i] = 0;
 532                     Vcoul[i]  = 0;
 533                     Vvdw[i]   = 0;
 534
 535                     RealType rinvC, rinvV, rC, rV, rpinvC, rpinvV;
 536
 537                     /* Only spend time on A or B state if it is non-zero */
 538                     if ((qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0))
 539                     {
 540                         /* this section has to be inside the loop because of the dependence on sigma6 */
 541                         if (useSoftCore)
 542                         {
 543                             rpinvC = one / (alpha_coul_eff * lfac_coul[i] * sigma6[i] + rp);
 544                             pthRoot(rpinvC, &rinvC, &rC);
 545                             if (scLambdasOrAlphasDiffer)
 546                             {
 547                                 rpinvV = one / (alpha_vdw_eff * lfac_vdw[i] * sigma6[i] + rp);
 548                                 pthRoot(rpinvV, &rinvV, &rV);
 549                             }
 550                             else
 551                             {
 552                                 /* We can avoid one expensive pow and one / operation */
 553                                 rpinvV = rpinvC;
 554                                 rinvV  = rinvC;
 555                                 rV     = rC;
 556                             }
 557                         }
 558                         else
 559                         {
 560                             rpinvC = 1;
 561                             rinvC  = rinv;
 562                             rC     = r;
 563
 564                             rpinvV = 1;
 565                             rinvV  = rinv;
 566                             rV     = r;
 567                         }
 568
 569                         /* Only process the coulomb interactions if we have charges,
 570                          * and if we either include all entries in the list (no cutoff
 571                          * used in the kernel), or if we are within the cutoff.
 572                          */
 573                         bool computeElecInteraction = (elecInteractionTypeIsEwald && r < rcoulomb)
 574                                                       || (!elecInteractionTypeIsEwald && rC < rcoulomb);
 575
 576                         if ((qq[i] != 0) && computeElecInteraction)
 577                         {
 578                             if (elecInteractionTypeIsEwald)
 579                             {
 580                                 Vcoul[i]  = ewaldPotential(qq[i], rinvC, sh_ewald);
 581                                 FscalC[i] = ewaldScalarForce(qq[i], rinvC);
 582                             }
 583                             else
 584                             {
 585                                 Vcoul[i]  = reactionFieldPotential(qq[i], rinvC, rC, krf, crf);
 586                                 FscalC[i] = reactionFieldScalarForce(qq[i], rinvC, rC, krf, two);
 587                             }
 588                         }
 589
 590                         /* Only process the VDW interactions if we have
 591                          * some non-zero parameters, and if we either
 592                          * include all entries in the list (no cutoff used
 593                          * in the kernel), or if we are within the cutoff.
 594                          */
 595                         bool computeVdwInteraction = (vdwInteractionTypeIsEwald && r < rvdw)
 596                                                      || (!vdwInteractionTypeIsEwald && rV < rvdw);
 597                         if ((c6[i] != 0 || c12[i] != 0) && computeVdwInteraction)
 598                         {
 599                             RealType rinv6;
 600                             if (useSoftCore)
 601                             {
 602                                 rinv6 = rpinvV;
 603                             }
 604                             else
 605                             {
 606                                 rinv6 = calculateRinv6(rinvV);
 607                             }
 608                             RealType Vvdw6  = calculateVdw6(c6[i], rinv6);
 609                             RealType Vvdw12 = calculateVdw12(c12[i], rinv6);
 610
 611                             Vvdw[i] = lennardJonesPotential(Vvdw6, Vvdw12, c6[i], c12[i], repulsionShift,
 612                                                             dispersionShift, onesixth, onetwelfth);
 613                             FscalV[i] = lennardJonesScalarForce(Vvdw6, Vvdw12);
 614
 615                             if (vdwInteractionTypeIsEwald)
 616                             {
 617                                 /* Subtract the grid potential at the cut-off */
 618                                 Vvdw[i] += ewaldLennardJonesGridSubtract(nbfp_grid[tj[i]],
 619                                                                          sh_lj_ewald, onesixth);
 620                             }
 621
 622                             if (vdwModifierIsPotSwitch)
 623                             {
 624                                 RealType d        = rV - ic->rvdw_switch;
 625                                 d                 = (d > zero) ? d : zero;
 626                                 const RealType d2 = d * d;
 627                                 const RealType sw =
 628                                         one + d2 * d * (vdw_swV3 + d * (vdw_swV4 + d * vdw_swV5));
 629                                 const RealType dsw = d2 * (vdw_swF2 + d * (vdw_swF3 + d * vdw_swF4));
 630
 631                                 FscalV[i] = potSwitchScalarForceMod(FscalV[i], Vvdw[i], sw, rV,
 632                                                                     rvdw, dsw, zero);
 633                                 Vvdw[i]   = potSwitchPotentialMod(Vvdw[i], sw, rV, rvdw, zero);
 634                             }
 635                         }
 636
 637                         /* FscalC (and FscalV) now contain: dV/drC * rC
 638                          * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
 639                          * Further down we first multiply by r^p-2 and then by
 640                          * the vector r, which in total gives: dV/drC * (r/rC)^1-p
 641                          */
 642                         FscalC[i] *= rpinvC;
 643                         FscalV[i] *= rpinvV;
 644                     }
 645                 } // end for (int i = 0; i < NSTATES; i++)
 646
 647                 /* Assemble A and B states */
 648                 for (int i = 0; i < NSTATES; i++)
 649                 {
 650                     vctot += LFC[i] * Vcoul[i];
 651                     vvtot += LFV[i] * Vvdw[i];
 652
 653                     Fscal += LFC[i] * FscalC[i] * rpm2;
 654                     Fscal += LFV[i] * FscalV[i] * rpm2;
 655
 656                     if (useSoftCore)
 657                     {
 658                         dvdl_coul += Vcoul[i] * DLF[i]
 659                                      + LFC[i] * alpha_coul_eff * dlfac_coul[i] * FscalC[i] * sigma6[i];
 660                         dvdl_vdw += Vvdw[i] * DLF[i]
 661                                     + LFV[i] * alpha_vdw_eff * dlfac_vdw[i] * FscalV[i] * sigma6[i];
 662                     }
 663                     else
 664                     {
 665                         dvdl_coul += Vcoul[i] * DLF[i];
 666                         dvdl_vdw += Vvdw[i] * DLF[i];
 667                     }
 668                 }
 669             } // end if (bPairIncluded)
 670             else if (icoul == GMX_NBKERNEL_ELEC_REACTIONFIELD)
 671             {
 672                 /* For excluded pairs, which are only in this pair list when
 673                  * using the Verlet scheme, we don't use soft-core.
 674                  * As there is no singularity, there is no need for soft-core.
 675                  */
 676                 const real FF = -two * krf;
 677                 RealType   VV = krf * rsq - crf;
 678
 679                 if (ii == jnr)
 680                 {
 681                     VV *= half;
 682                 }
 683
 684                 for (int i = 0; i < NSTATES; i++)
 685                 {
 686                     vctot += LFC[i] * qq[i] * VV;
 687                     Fscal += LFC[i] * qq[i] * FF;
 688                     dvdl_coul += DLF[i] * qq[i] * VV;
 689                 }
 690             }
 691
 692             if (elecInteractionTypeIsEwald && (r < rcoulomb || !bPairIncluded))
 693             {
 694                 /* See comment in the preamble. When using Ewald interactions
 695                  * (unless we use a switch modifier) we subtract the reciprocal-space
 696                  * Ewald component here which made it possible to apply the free
 697                  * energy interaction to 1/r (vanilla coulomb short-range part)
 698                  * above. This gets us closer to the ideal case of applying
 699                  * the softcore to the entire electrostatic interaction,
 700                  * including the reciprocal-space component.
 701                  */
 702                 real v_lr, f_lr;
 703
 704                 const RealType ewrt   = r * coulombTableScale;
 705                 IntType        ewitab = static_cast<IntType>(ewrt);
 706                 const RealType eweps  = ewrt - ewitab;
 707                 ewitab                = 4 * ewitab;
 708                 f_lr                  = ewtab[ewitab] + eweps * ewtab[ewitab + 1];
 709                 v_lr = (ewtab[ewitab + 2] - coulombTableScaleInvHalf * eweps * (ewtab[ewitab] + f_lr));
 710                 f_lr *= rinv;
 711
 712                 /* Note that any possible Ewald shift has already been applied in
 713                  * the normal interaction part above.
 714                  */
 715
 716                 if (ii == jnr)
 717                 {
 718                     /* If we get here, the i particle (ii) has itself (jnr)
 719                      * in its neighborlist. This can only happen with the Verlet
 720                      * scheme, and corresponds to a self-interaction that will
 721                      * occur twice. Scale it down by 50% to only include it once.
 722                      */
 723                     v_lr *= half;
 724                 }
 725
 726                 for (int i = 0; i < NSTATES; i++)
 727                 {
 728                     vctot -= LFC[i] * qq[i] * v_lr;
 729                     Fscal -= LFC[i] * qq[i] * f_lr;
 730                     dvdl_coul -= (DLF[i] * qq[i]) * v_lr;
 731                 }
 732             }
 733
 734             if (vdwInteractionTypeIsEwald && (r < rvdw || !bPairIncluded))
 735             {
 736                 /* See comment in the preamble. When using LJ-Ewald interactions
 737                  * (unless we use a switch modifier) we subtract the reciprocal-space
 738                  * Ewald component here which made it possible to apply the free
 739                  * energy interaction to r^-6 (vanilla LJ6 short-range part)
 740                  * above. This gets us closer to the ideal case of applying
 741                  * the softcore to the entire VdW interaction,
 742                  * including the reciprocal-space component.
 743                  */
 744                 /* We could also use the analytical form here
 745                  * iso a table, but that can cause issues for
 746                  * r close to 0 for non-interacting pairs.
 747                  */
 748
 749                 const RealType rs   = rsq * rinv * vdwTableScale;
 750                 const IntType  ri   = static_cast<IntType>(rs);
 751                 const RealType frac = rs - ri;
 752                 const RealType f_lr = (1 - frac) * tab_ewald_F_lj[ri] + frac * tab_ewald_F_lj[ri + 1];
 753                 /* TODO: Currently the Ewald LJ table does not contain
 754                  * the factor 1/6, we should add this.
 755                  */
 756                 const RealType FF = f_lr * rinv / six;
 757                 RealType       VV =
 758                         (tab_ewald_V_lj[ri] - vdwTableScaleInvHalf * frac * (tab_ewald_F_lj[ri] + f_lr))
 759                         / six;
 760
 761                 if (ii == jnr)
 762                 {
 763                     /* If we get here, the i particle (ii) has itself (jnr)
 764                      * in its neighborlist. This can only happen with the Verlet
 765                      * scheme, and corresponds to a self-interaction that will
 766                      * occur twice. Scale it down by 50% to only include it once.
 767                      */
 768                     VV *= half;
 769                 }
 770
 771                 for (int i = 0; i < NSTATES; i++)
 772                 {
 773                     const real c6grid = nbfp_grid[tj[i]];
 774                     vvtot += LFV[i] * c6grid * VV;
 775                     Fscal += LFV[i] * c6grid * FF;
 776                     dvdl_vdw += (DLF[i] * c6grid) * VV;
 777                 }
 778             }
 779
 780             if (doForces)
 781             {
 782                 const real tx = Fscal * dx;
 783                 const real ty = Fscal * dy;
 784                 const real tz = Fscal * dz;
 785                 fix           = fix + tx;
 786                 fiy           = fiy + ty;
 787                 fiz           = fiz + tz;
 788                 /* OpenMP atomics are expensive, but this kernels is also
 789                  * expensive, so we can take this hit, instead of using
 790                  * thread-local output buffers and extra reduction.
 791                  *
 792                  * All the OpenMP regions in this file are trivial and should
 793                  * not throw, so no need for try/catch.
 794                  */
 795 #pragma omp atomic
 796                 f[j3] -= tx;
 797 #pragma omp atomic
 798                 f[j3 + 1] -= ty;
 799 #pragma omp atomic
 800                 f[j3 + 2] -= tz;
 801             }
 802         } // end for (int k = nj0; k < nj1; k++)
 803
 804         /* The atomics below are expensive with many OpenMP threads.
 805          * Here unperturbed i-particles will usually only have a few
 806          * (perturbed) j-particles in the list. Thus with a buffered list
 807          * we can skip a significant number of i-reductions with a check.
 808          */
 809         if (npair_within_cutoff > 0)
 810         {
 811             if (doForces)
 812             {
 813 #pragma omp atomic
 814                 f[ii3] += fix;
 815 #pragma omp atomic
 816                 f[ii3 + 1] += fiy;
 817 #pragma omp atomic
 818                 f[ii3 + 2] += fiz;
 819             }
 820             if (doShiftForces)
 821             {
 822 #pragma omp atomic
 823                 fshift[is3] += fix;
 824 #pragma omp atomic
 825                 fshift[is3 + 1] += fiy;
 826 #pragma omp atomic
 827                 fshift[is3 + 2] += fiz;
 828             }
 829             if (doPotential)
 830             {
 831                 int ggid = gid[n];
 832 #pragma omp atomic
 833                 Vc[ggid] += vctot;
 834 #pragma omp atomic
 835                 Vv[ggid] += vvtot;
 836             }
 837         }
 838     } // end for (int n = 0; n < nri; n++)
 839
 840 #pragma omp atomic
 841     dvdl[efptCOUL] += dvdl_coul;
 842 #pragma omp atomic
 843     dvdl[efptVDW] += dvdl_vdw;
 844
 845     /* Estimate flops, average for free energy stuff:
 846      * 12  flops per outer iteration
 847      * 150 flops per inner iteration
 848      */
 849 #pragma omp atomic
 850     inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri * 12 + nlist->jindex[nri] * 150);
 851
 852     if (numExcludedPairsBeyondRlist > 0)
 853     {
 854         gmx_fatal(FARGS,
 855                   "There are %d perturbed non-bonded pair interactions beyond the pair-list cutoff "
 856                   "of %g nm, which is not supported. This can happen because the system is "
 857                   "unstable or because intra-molecular interactions at long distances are "
 858                   "excluded. If the "
 859                   "latter is the case, you can try to increase nstlist or rlist to avoid this."
 860                   "The error is likely triggered by the use of couple-intramol=no "
 861                   "and the maximal distance in the decoupled molecule exceeding rlist.",
 862                   numExcludedPairsBeyondRlist, fr->rlist);
 863     }
 864 }
 865
 866 typedef void (*KernelFunction)(const t_nblist* gmx_restrict nlist,
 867                                rvec* gmx_restrict         xx,
 868                                gmx::ForceWithShiftForces* forceWithShiftForces,
 869                                const t_forcerec* gmx_restrict fr,
 870                                const t_mdatoms* gmx_restrict mdatoms,
 871                                nb_kernel_data_t* gmx_restrict kernel_data,
 872                                t_nrnb* gmx_restrict nrnb);
 873
 874 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald, bool vdwModifierIsPotSwitch>
 875 static KernelFunction dispatchKernelOnUseSimd(const bool useSimd)
 876 {
 877     if (useSimd)
 878     {
 879 #if GMX_SIMD_HAVE_REAL && GMX_SIMD_HAVE_INT32_ARITHMETICS && GMX_USE_SIMD_KERNELS
 880         /* FIXME: Here SimdDataTypes should be used to enable SIMD. So far, the code in nb_free_energy_kernel is not adapted to SIMD */
 881         return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
 882                                       elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
 883 #else
 884         return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
 885                                       elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
 886 #endif
 887     }
 888     else
 889     {
 890         return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
 891                                       elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
 892     }
 893 }
 894
 895 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald>
 896 static KernelFunction dispatchKernelOnVdwModifier(const bool vdwModifierIsPotSwitch, const bool useSimd)
 897 {
 898     if (vdwModifierIsPotSwitch)
 899     {
 900         return (dispatchKernelOnUseSimd<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
 901                                         elecInteractionTypeIsEwald, true>(useSimd));
 902     }
 903     else
 904     {
 905         return (dispatchKernelOnUseSimd<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
 906                                         elecInteractionTypeIsEwald, false>(useSimd));
 907     }
 908 }
 909
 910 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald>
 911 static KernelFunction dispatchKernelOnElecInteractionType(const bool elecInteractionTypeIsEwald,
 912                                                           const bool vdwModifierIsPotSwitch,
 913                                                           const bool useSimd)
 914 {
 915     if (elecInteractionTypeIsEwald)
 916     {
 917         return (dispatchKernelOnVdwModifier<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, true>(
 918                 vdwModifierIsPotSwitch, useSimd));
 919     }
 920     else
 921     {
 922         return (dispatchKernelOnVdwModifier<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, false>(
 923                 vdwModifierIsPotSwitch, useSimd));
 924     }
 925 }
 926
 927 template<bool useSoftCore, bool scLambdasOrAlphasDiffer>
 928 static KernelFunction dispatchKernelOnVdwInteractionType(const bool vdwInteractionTypeIsEwald,
 929                                                          const bool elecInteractionTypeIsEwald,
 930                                                          const bool vdwModifierIsPotSwitch,
 931                                                          const bool useSimd)
 932 {
 933     if (vdwInteractionTypeIsEwald)
 934     {
 935         return (dispatchKernelOnElecInteractionType<useSoftCore, scLambdasOrAlphasDiffer, true>(
 936                 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
 937     }
 938     else
 939     {
 940         return (dispatchKernelOnElecInteractionType<useSoftCore, scLambdasOrAlphasDiffer, false>(
 941                 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
 942     }
 943 }
 944
 945 template<bool useSoftCore>
 946 static KernelFunction dispatchKernelOnScLambdasOrAlphasDifference(const bool scLambdasOrAlphasDiffer,
 947                                                                   const bool vdwInteractionTypeIsEwald,
 948                                                                   const bool elecInteractionTypeIsEwald,
 949                                                                   const bool vdwModifierIsPotSwitch,
 950                                                                   const bool useSimd)
 951 {
 952     if (scLambdasOrAlphasDiffer)
 953     {
 954         return (dispatchKernelOnVdwInteractionType<useSoftCore, true>(
 955                 vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
 956     }
 957     else
 958     {
 959         return (dispatchKernelOnVdwInteractionType<useSoftCore, false>(
 960                 vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
 961     }
 962 }
 963
 964 static KernelFunction dispatchKernel(const bool                 scLambdasOrAlphasDiffer,
 965                                      const bool                 vdwInteractionTypeIsEwald,
 966                                      const bool                 elecInteractionTypeIsEwald,
 967                                      const bool                 vdwModifierIsPotSwitch,
 968                                      const bool                 useSimd,
 969                                      const interaction_const_t& ic)
 970 {
 971     if (ic.softCoreParameters->alphaCoulomb == 0 && ic.softCoreParameters->alphaVdw == 0)
 972     {
 973         return (dispatchKernelOnScLambdasOrAlphasDifference<false>(
 974                 scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
 975                 vdwModifierIsPotSwitch, useSimd));
 976     }
 977     else
 978     {
 979         return (dispatchKernelOnScLambdasOrAlphasDifference<true>(
 980                 scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
 981                 vdwModifierIsPotSwitch, useSimd));
 982     }
 983 }
 984
 985
 986 void gmx_nb_free_energy_kernel(const t_nblist*            nlist,
 987                                rvec*                      xx,
 988                                gmx::ForceWithShiftForces* ff,
 989                                const t_forcerec*          fr,
 990                                const t_mdatoms*           mdatoms,
 991                                nb_kernel_data_t*          kernel_data,
 992                                t_nrnb*                    nrnb)
 993 {
 994     const interaction_const_t& ic = *fr->ic;
 995     GMX_ASSERT(EEL_PME_EWALD(ic.eeltype) || ic.eeltype == eelCUT || EEL_RF(ic.eeltype),
 996                "Unsupported eeltype with free energy");
 997     GMX_ASSERT(ic.softCoreParameters, "We need soft-core parameters");
 998
 999     const auto& scParams                   = *ic.softCoreParameters;
1000     const bool  vdwInteractionTypeIsEwald  = (EVDW_PME(ic.vdwtype));
1001     const bool  elecInteractionTypeIsEwald = (EEL_PME_EWALD(ic.eeltype));
1002     const bool  vdwModifierIsPotSwitch     = (ic.vdw_modifier == eintmodPOTSWITCH);
1003     bool        scLambdasOrAlphasDiffer    = true;
1004     const bool  useSimd                    = fr->use_simd_kernels;
1005
1006     if (scParams.alphaCoulomb == 0 && scParams.alphaVdw == 0)
1007     {
1008         scLambdasOrAlphasDiffer = false;
1009     }
1010     else
1011     {
1012         if (kernel_data->lambda[efptCOUL] == kernel_data->lambda[efptVDW]
1013             && scParams.alphaCoulomb == scParams.alphaVdw)
1014         {
1015             scLambdasOrAlphasDiffer = false;
1016         }
1017     }
1018
1019     KernelFunction kernelFunc;
1020     kernelFunc = dispatchKernel(scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
1021                                 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd, ic);
1022     kernelFunc(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
1023 }