src/gromacs/gmxlib/nonbonded/nb_free_energy.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
   7  * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
   8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   9  * and including many others, as listed in the AUTHORS file in the
  10  * top-level source directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38 #include "gmxpre.h"
  39
  40 #include "nb_free_energy.h"
  41
  42 #include "config.h"
  43
  44 #include <cmath>
  45
  46 #include <algorithm>
  47
  48 #include "gromacs/gmxlib/nrnb.h"
  49 #include "gromacs/gmxlib/nonbonded/nb_kernel.h"
  50 #include "gromacs/gmxlib/nonbonded/nonbonded.h"
  51 #include "gromacs/math/functions.h"
  52 #include "gromacs/math/vec.h"
  53 #include "gromacs/mdtypes/forceoutput.h"
  54 #include "gromacs/mdtypes/forcerec.h"
  55 #include "gromacs/mdtypes/interaction_const.h"
  56 #include "gromacs/mdtypes/md_enums.h"
  57 #include "gromacs/mdtypes/mdatom.h"
  58 #include "gromacs/simd/simd.h"
  59 #include "gromacs/utility/fatalerror.h"
  60
  61
  62 //! Scalar (non-SIMD) data types.
  63 struct ScalarDataTypes
  64 {
  65     using RealType                     = real; //!< The data type to use as real.
  66     using IntType                      = int;  //!< The data type to use as int.
  67     static constexpr int simdRealWidth = 1;    //!< The width of the RealType.
  68     static constexpr int simdIntWidth  = 1;    //!< The width of the IntType.
  69 };
  70
  71 #if GMX_SIMD_HAVE_REAL && GMX_SIMD_HAVE_INT32_ARITHMETICS
  72 //! SIMD data types.
  73 struct SimdDataTypes
  74 {
  75     using RealType                     = gmx::SimdReal;         //!< The data type to use as real.
  76     using IntType                      = gmx::SimdInt32;        //!< The data type to use as int.
  77     static constexpr int simdRealWidth = GMX_SIMD_REAL_WIDTH;   //!< The width of the RealType.
  78     static constexpr int simdIntWidth  = GMX_SIMD_FINT32_WIDTH; //!< The width of the IntType.
  79 };
  80 #endif
  81
  82 //! Computes r^(1/p) and 1/r^(1/p) for the standard p=6
  83 template<class RealType>
  84 static inline void pthRoot(const RealType r, RealType* pthRoot, RealType* invPthRoot)
  85 {
  86     *invPthRoot = gmx::invsqrt(std::cbrt(r));
  87     *pthRoot    = 1 / (*invPthRoot);
  88 }
  89
  90 template<class RealType>
  91 static inline RealType calculateRinv6(const RealType rinvV)
  92 {
  93     RealType rinv6 = rinvV * rinvV;
  94     return (rinv6 * rinv6 * rinv6);
  95 }
  96
  97 template<class RealType>
  98 static inline RealType calculateVdw6(const RealType c6, const RealType rinv6)
  99 {
 100     return (c6 * rinv6);
 101 }
 102
 103 template<class RealType>
 104 static inline RealType calculateVdw12(const RealType c12, const RealType rinv6)
 105 {
 106     return (c12 * rinv6 * rinv6);
 107 }
 108
 109 /* reaction-field electrostatics */
 110 template<class RealType>
 111 static inline RealType reactionFieldScalarForce(const RealType qq,
 112                                                 const RealType rinv,
 113                                                 const RealType r,
 114                                                 const real     krf,
 115                                                 const real     two)
 116 {
 117     return (qq * (rinv - two * krf * r * r));
 118 }
 119 template<class RealType>
 120 static inline RealType reactionFieldPotential(const RealType qq,
 121                                               const RealType rinv,
 122                                               const RealType r,
 123                                               const real     krf,
 124                                               const real     potentialShift)
 125 {
 126     return (qq * (rinv + krf * r * r - potentialShift));
 127 }
 128
 129 /* Ewald electrostatics */
 130 template<class RealType>
 131 static inline RealType ewaldScalarForce(const RealType coulomb, const RealType rinv)
 132 {
 133     return (coulomb * rinv);
 134 }
 135 template<class RealType>
 136 static inline RealType ewaldPotential(const RealType coulomb, const RealType rinv, const real potentialShift)
 137 {
 138     return (coulomb * (rinv - potentialShift));
 139 }
 140
 141 /* cutoff LJ */
 142 template<class RealType>
 143 static inline RealType lennardJonesScalarForce(const RealType v6, const RealType v12)
 144 {
 145     return (v12 - v6);
 146 }
 147 template<class RealType>
 148 static inline RealType lennardJonesPotential(const RealType v6,
 149                                              const RealType v12,
 150                                              const RealType c6,
 151                                              const RealType c12,
 152                                              const real     repulsionShift,
 153                                              const real     dispersionShift,
 154                                              const real     onesixth,
 155                                              const real     onetwelfth)
 156 {
 157     return ((v12 + c12 * repulsionShift) * onetwelfth - (v6 + c6 * dispersionShift) * onesixth);
 158 }
 159
 160 /* Ewald LJ */
 161 static inline real ewaldLennardJonesGridSubtract(const real c6grid, const real potentialShift, const real onesixth)
 162 {
 163     return (c6grid * potentialShift * onesixth);
 164 }
 165
 166 /* LJ Potential switch */
 167 template<class RealType>
 168 static inline RealType potSwitchScalarForceMod(const RealType fScalarInp,
 169                                                const RealType potential,
 170                                                const RealType sw,
 171                                                const RealType r,
 172                                                const RealType rVdw,
 173                                                const RealType dsw,
 174                                                const real     zero)
 175 {
 176     if (r < rVdw)
 177     {
 178         real fScalar = fScalarInp * sw - r * potential * dsw;
 179         return (fScalar);
 180     }
 181     return (zero);
 182 }
 183 template<class RealType>
 184 static inline RealType potSwitchPotentialMod(const RealType potentialInp,
 185                                              const RealType sw,
 186                                              const RealType r,
 187                                              const RealType rVdw,
 188                                              const real     zero)
 189 {
 190     if (r < rVdw)
 191     {
 192         real potential = potentialInp * sw;
 193         return (potential);
 194     }
 195     return (zero);
 196 }
 197
 198
 199 //! Templated free-energy non-bonded kernel
 200 template<typename DataTypes, bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald, bool vdwModifierIsPotSwitch>
 201 static void nb_free_energy_kernel(const t_nblist* gmx_restrict nlist,
 202                                   rvec* gmx_restrict         xx,
 203                                   gmx::ForceWithShiftForces* forceWithShiftForces,
 204                                   const t_forcerec* gmx_restrict fr,
 205                                   const t_mdatoms* gmx_restrict mdatoms,
 206                                   nb_kernel_data_t* gmx_restrict kernel_data,
 207                                   t_nrnb* gmx_restrict nrnb)
 208 {
 209 #define STATE_A 0
 210 #define STATE_B 1
 211 #define NSTATES 2
 212
 213     using RealType = typename DataTypes::RealType;
 214     using IntType  = typename DataTypes::IntType;
 215
 216     /* FIXME: How should these be handled with SIMD? */
 217     constexpr real onetwelfth = 1.0 / 12.0;
 218     constexpr real onesixth   = 1.0 / 6.0;
 219     constexpr real zero       = 0.0;
 220     constexpr real half       = 0.5;
 221     constexpr real one        = 1.0;
 222     constexpr real two        = 2.0;
 223     constexpr real six        = 6.0;
 224
 225     /* Extract pointer to non-bonded interaction constants */
 226     const interaction_const_t* ic = fr->ic;
 227
 228     // Extract pair list data
 229     const int  nri    = nlist->nri;
 230     const int* iinr   = nlist->iinr;
 231     const int* jindex = nlist->jindex;
 232     const int* jjnr   = nlist->jjnr;
 233     const int* shift  = nlist->shift;
 234     const int* gid    = nlist->gid;
 235
 236     const real* shiftvec      = fr->shift_vec[0];
 237     const real* chargeA       = mdatoms->chargeA;
 238     const real* chargeB       = mdatoms->chargeB;
 239     real*       Vc            = kernel_data->energygrp_elec;
 240     const int*  typeA         = mdatoms->typeA;
 241     const int*  typeB         = mdatoms->typeB;
 242     const int   ntype         = fr->ntype;
 243     const real* nbfp          = fr->nbfp.data();
 244     const real* nbfp_grid     = fr->ljpme_c6grid;
 245     real*       Vv            = kernel_data->energygrp_vdw;
 246     const real  lambda_coul   = kernel_data->lambda[efptCOUL];
 247     const real  lambda_vdw    = kernel_data->lambda[efptVDW];
 248     real*       dvdl          = kernel_data->dvdl;
 249     const auto& scParams      = *ic->softCoreParameters;
 250     const real  alpha_coul    = scParams.alphaCoulomb;
 251     const real  alpha_vdw     = scParams.alphaVdw;
 252     const real  lam_power     = scParams.lambdaPower;
 253     const real  sigma6_def    = scParams.sigma6WithInvalidSigma;
 254     const real  sigma6_min    = scParams.sigma6Minimum;
 255     const bool  doForces      = ((kernel_data->flags & GMX_NONBONDED_DO_FORCE) != 0);
 256     const bool  doShiftForces = ((kernel_data->flags & GMX_NONBONDED_DO_SHIFTFORCE) != 0);
 257     const bool  doPotential   = ((kernel_data->flags & GMX_NONBONDED_DO_POTENTIAL) != 0);
 258
 259     // Extract data from interaction_const_t
 260     const real facel           = ic->epsfac;
 261     const real rcoulomb        = ic->rcoulomb;
 262     const real krf             = ic->k_rf;
 263     const real crf             = ic->c_rf;
 264     const real sh_lj_ewald     = ic->sh_lj_ewald;
 265     const real rvdw            = ic->rvdw;
 266     const real dispersionShift = ic->dispersion_shift.cpot;
 267     const real repulsionShift  = ic->repulsion_shift.cpot;
 268
 269     // Note that the nbnxm kernels do not support Coulomb potential switching at all
 270     GMX_ASSERT(ic->coulomb_modifier != eintmodPOTSWITCH,
 271                "Potential switching is not supported for Coulomb with FEP");
 272
 273     real vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4;
 274     if (vdwModifierIsPotSwitch)
 275     {
 276         const real d = ic->rvdw - ic->rvdw_switch;
 277         vdw_swV3     = -10.0 / (d * d * d);
 278         vdw_swV4     = 15.0 / (d * d * d * d);
 279         vdw_swV5     = -6.0 / (d * d * d * d * d);
 280         vdw_swF2     = -30.0 / (d * d * d);
 281         vdw_swF3     = 60.0 / (d * d * d * d);
 282         vdw_swF4     = -30.0 / (d * d * d * d * d);
 283     }
 284     else
 285     {
 286         /* Avoid warnings from stupid compilers (looking at you, Clang!) */
 287         vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0;
 288     }
 289
 290     int icoul;
 291     if (ic->eeltype == eelCUT || EEL_RF(ic->eeltype))
 292     {
 293         icoul = GMX_NBKERNEL_ELEC_REACTIONFIELD;
 294     }
 295     else
 296     {
 297         icoul = GMX_NBKERNEL_ELEC_NONE;
 298     }
 299
 300     real rcutoff_max2 = std::max(ic->rcoulomb, ic->rvdw);
 301     rcutoff_max2      = rcutoff_max2 * rcutoff_max2;
 302
 303     const real* tab_ewald_F_lj           = nullptr;
 304     const real* tab_ewald_V_lj           = nullptr;
 305     const real* ewtab                    = nullptr;
 306     real        coulombTableScale        = 0;
 307     real        coulombTableScaleInvHalf = 0;
 308     real        vdwTableScale            = 0;
 309     real        vdwTableScaleInvHalf     = 0;
 310     real        sh_ewald                 = 0;
 311     if (elecInteractionTypeIsEwald || vdwInteractionTypeIsEwald)
 312     {
 313         sh_ewald = ic->sh_ewald;
 314     }
 315     if (elecInteractionTypeIsEwald)
 316     {
 317         const auto& coulombTables = *ic->coulombEwaldTables;
 318         ewtab                     = coulombTables.tableFDV0.data();
 319         coulombTableScale         = coulombTables.scale;
 320         coulombTableScaleInvHalf  = half / coulombTableScale;
 321     }
 322     if (vdwInteractionTypeIsEwald)
 323     {
 324         const auto& vdwTables = *ic->vdwEwaldTables;
 325         tab_ewald_F_lj        = vdwTables.tableF.data();
 326         tab_ewald_V_lj        = vdwTables.tableV.data();
 327         vdwTableScale         = vdwTables.scale;
 328         vdwTableScaleInvHalf  = half / vdwTableScale;
 329     }
 330
 331     /* For Ewald/PME interactions we cannot easily apply the soft-core component to
 332      * reciprocal space. When we use non-switched Ewald interactions, we
 333      * assume the soft-coring does not significantly affect the grid contribution
 334      * and apply the soft-core only to the full 1/r (- shift) pair contribution.
 335      *
 336      * However, we cannot use this approach for switch-modified since we would then
 337      * effectively end up evaluating a significantly different interaction here compared to the
 338      * normal (non-free-energy) kernels, either by applying a cutoff at a different
 339      * position than what the user requested, or by switching different
 340      * things (1/r rather than short-range Ewald). For these settings, we just
 341      * use the traditional short-range Ewald interaction in that case.
 342      */
 343     GMX_RELEASE_ASSERT(!(vdwInteractionTypeIsEwald && vdwModifierIsPotSwitch),
 344                        "Can not apply soft-core to switched Ewald potentials");
 345
 346     real dvdl_coul = 0;
 347     real dvdl_vdw  = 0;
 348
 349     /* Lambda factor for state A, 1-lambda*/
 350     real LFC[NSTATES], LFV[NSTATES];
 351     LFC[STATE_A] = one - lambda_coul;
 352     LFV[STATE_A] = one - lambda_vdw;
 353
 354     /* Lambda factor for state B, lambda*/
 355     LFC[STATE_B] = lambda_coul;
 356     LFV[STATE_B] = lambda_vdw;
 357
 358     /*derivative of the lambda factor for state A and B */
 359     real DLF[NSTATES];
 360     DLF[STATE_A] = -1;
 361     DLF[STATE_B] = 1;
 362
 363     real           lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
 364     constexpr real sc_r_power = 6.0_real;
 365     for (int i = 0; i < NSTATES; i++)
 366     {
 367         lfac_coul[i]  = (lam_power == 2 ? (1 - LFC[i]) * (1 - LFC[i]) : (1 - LFC[i]));
 368         dlfac_coul[i] = DLF[i] * lam_power / sc_r_power * (lam_power == 2 ? (1 - LFC[i]) : 1);
 369         lfac_vdw[i]   = (lam_power == 2 ? (1 - LFV[i]) * (1 - LFV[i]) : (1 - LFV[i]));
 370         dlfac_vdw[i]  = DLF[i] * lam_power / sc_r_power * (lam_power == 2 ? (1 - LFV[i]) : 1);
 371     }
 372
 373     // TODO: We should get rid of using pointers to real
 374     const real* x             = xx[0];
 375     real* gmx_restrict f      = &(forceWithShiftForces->force()[0][0]);
 376     real* gmx_restrict fshift = &(forceWithShiftForces->shiftForces()[0][0]);
 377
 378     for (int n = 0; n < nri; n++)
 379     {
 380         int npair_within_cutoff = 0;
 381
 382         const int  is3   = 3 * shift[n];
 383         const real shX   = shiftvec[is3];
 384         const real shY   = shiftvec[is3 + 1];
 385         const real shZ   = shiftvec[is3 + 2];
 386         const int  nj0   = jindex[n];
 387         const int  nj1   = jindex[n + 1];
 388         const int  ii    = iinr[n];
 389         const int  ii3   = 3 * ii;
 390         const real ix    = shX + x[ii3 + 0];
 391         const real iy    = shY + x[ii3 + 1];
 392         const real iz    = shZ + x[ii3 + 2];
 393         const real iqA   = facel * chargeA[ii];
 394         const real iqB   = facel * chargeB[ii];
 395         const int  ntiA  = 2 * ntype * typeA[ii];
 396         const int  ntiB  = 2 * ntype * typeB[ii];
 397         real       vctot = 0;
 398         real       vvtot = 0;
 399         real       fix   = 0;
 400         real       fiy   = 0;
 401         real       fiz   = 0;
 402
 403         for (int k = nj0; k < nj1; k++)
 404         {
 405             int            tj[NSTATES];
 406             const int      jnr = jjnr[k];
 407             const int      j3  = 3 * jnr;
 408             RealType       c6[NSTATES], c12[NSTATES], qq[NSTATES], Vcoul[NSTATES], Vvdw[NSTATES];
 409             RealType       r, rinv, rp, rpm2;
 410             RealType       alpha_vdw_eff, alpha_coul_eff, sigma6[NSTATES];
 411             const RealType dx  = ix - x[j3];
 412             const RealType dy  = iy - x[j3 + 1];
 413             const RealType dz  = iz - x[j3 + 2];
 414             const RealType rsq = dx * dx + dy * dy + dz * dz;
 415             RealType       FscalC[NSTATES], FscalV[NSTATES];
 416             /* Check if this pair on the exlusions list.*/
 417             const bool bPairIncluded = nlist->excl_fep == nullptr || nlist->excl_fep[k];
 418
 419             if (rsq >= rcutoff_max2 && bPairIncluded)
 420             {
 421                 /* We save significant time by skipping all code below.
 422                  * Note that with soft-core interactions, the actual cut-off
 423                  * check might be different. But since the soft-core distance
 424                  * is always larger than r, checking on r here is safe.
 425                  * Exclusions outside the cutoff can not be skipped as
 426                  * when using Ewald: the reciprocal-space
 427                  * Ewald component still needs to be subtracted.
 428                  */
 429
 430                 continue;
 431             }
 432             npair_within_cutoff++;
 433
 434             if (rsq > 0)
 435             {
 436                 /* Note that unlike in the nbnxn kernels, we do not need
 437                  * to clamp the value of rsq before taking the invsqrt
 438                  * to avoid NaN in the LJ calculation, since here we do
 439                  * not calculate LJ interactions when C6 and C12 are zero.
 440                  */
 441
 442                 rinv = gmx::invsqrt(rsq);
 443                 r    = rsq * rinv;
 444             }
 445             else
 446             {
 447                 /* The force at r=0 is zero, because of symmetry.
 448                  * But note that the potential is in general non-zero,
 449                  * since the soft-cored r will be non-zero.
 450                  */
 451                 rinv = 0;
 452                 r    = 0;
 453             }
 454
 455             if (useSoftCore)
 456             {
 457                 rpm2 = rsq * rsq;  /* r4 */
 458                 rp   = rpm2 * rsq; /* r6 */
 459             }
 460             else
 461             {
 462                 /* The soft-core power p will not affect the results
 463                  * with not using soft-core, so we use power of 0 which gives
 464                  * the simplest math and cheapest code.
 465                  */
 466                 rpm2 = rinv * rinv;
 467                 rp   = 1;
 468             }
 469
 470             RealType Fscal = 0;
 471
 472             qq[STATE_A] = iqA * chargeA[jnr];
 473             qq[STATE_B] = iqB * chargeB[jnr];
 474
 475             tj[STATE_A] = ntiA + 2 * typeA[jnr];
 476             tj[STATE_B] = ntiB + 2 * typeB[jnr];
 477
 478             if (bPairIncluded)
 479             {
 480                 c6[STATE_A] = nbfp[tj[STATE_A]];
 481                 c6[STATE_B] = nbfp[tj[STATE_B]];
 482
 483                 for (int i = 0; i < NSTATES; i++)
 484                 {
 485                     c12[i] = nbfp[tj[i] + 1];
 486                     if (useSoftCore)
 487                     {
 488                         if ((c6[i] > 0) && (c12[i] > 0))
 489                         {
 490                             /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
 491                             sigma6[i] = half * c12[i] / c6[i];
 492                             if (sigma6[i] < sigma6_min) /* for disappearing coul and vdw with soft core at the same time */
 493                             {
 494                                 sigma6[i] = sigma6_min;
 495                             }
 496                         }
 497                         else
 498                         {
 499                             sigma6[i] = sigma6_def;
 500                         }
 501                     }
 502                 }
 503
 504                 if (useSoftCore)
 505                 {
 506                     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
 507                     if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
 508                     {
 509                         alpha_vdw_eff  = 0;
 510                         alpha_coul_eff = 0;
 511                     }
 512                     else
 513                     {
 514                         alpha_vdw_eff  = alpha_vdw;
 515                         alpha_coul_eff = alpha_coul;
 516                     }
 517                 }
 518
 519                 for (int i = 0; i < NSTATES; i++)
 520                 {
 521                     FscalC[i] = 0;
 522                     FscalV[i] = 0;
 523                     Vcoul[i]  = 0;
 524                     Vvdw[i]   = 0;
 525
 526                     RealType rinvC, rinvV, rC, rV, rpinvC, rpinvV;
 527
 528                     /* Only spend time on A or B state if it is non-zero */
 529                     if ((qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0))
 530                     {
 531                         /* this section has to be inside the loop because of the dependence on sigma6 */
 532                         if (useSoftCore)
 533                         {
 534                             rpinvC = one / (alpha_coul_eff * lfac_coul[i] * sigma6[i] + rp);
 535                             pthRoot(rpinvC, &rinvC, &rC);
 536                             if (scLambdasOrAlphasDiffer)
 537                             {
 538                                 rpinvV = one / (alpha_vdw_eff * lfac_vdw[i] * sigma6[i] + rp);
 539                                 pthRoot(rpinvV, &rinvV, &rV);
 540                             }
 541                             else
 542                             {
 543                                 /* We can avoid one expensive pow and one / operation */
 544                                 rpinvV = rpinvC;
 545                                 rinvV  = rinvC;
 546                                 rV     = rC;
 547                             }
 548                         }
 549                         else
 550                         {
 551                             rpinvC = 1;
 552                             rinvC  = rinv;
 553                             rC     = r;
 554
 555                             rpinvV = 1;
 556                             rinvV  = rinv;
 557                             rV     = r;
 558                         }
 559
 560                         /* Only process the coulomb interactions if we have charges,
 561                          * and if we either include all entries in the list (no cutoff
 562                          * used in the kernel), or if we are within the cutoff.
 563                          */
 564                         bool computeElecInteraction = (elecInteractionTypeIsEwald && r < rcoulomb)
 565                                                       || (!elecInteractionTypeIsEwald && rC < rcoulomb);
 566
 567                         if ((qq[i] != 0) && computeElecInteraction)
 568                         {
 569                             if (elecInteractionTypeIsEwald)
 570                             {
 571                                 Vcoul[i]  = ewaldPotential(qq[i], rinvC, sh_ewald);
 572                                 FscalC[i] = ewaldScalarForce(qq[i], rinvC);
 573                             }
 574                             else
 575                             {
 576                                 Vcoul[i]  = reactionFieldPotential(qq[i], rinvC, rC, krf, crf);
 577                                 FscalC[i] = reactionFieldScalarForce(qq[i], rinvC, rC, krf, two);
 578                             }
 579                         }
 580
 581                         /* Only process the VDW interactions if we have
 582                          * some non-zero parameters, and if we either
 583                          * include all entries in the list (no cutoff used
 584                          * in the kernel), or if we are within the cutoff.
 585                          */
 586                         bool computeVdwInteraction = (vdwInteractionTypeIsEwald && r < rvdw)
 587                                                      || (!vdwInteractionTypeIsEwald && rV < rvdw);
 588                         if ((c6[i] != 0 || c12[i] != 0) && computeVdwInteraction)
 589                         {
 590                             RealType rinv6;
 591                             if (useSoftCore)
 592                             {
 593                                 rinv6 = rpinvV;
 594                             }
 595                             else
 596                             {
 597                                 rinv6 = calculateRinv6(rinvV);
 598                             }
 599                             RealType Vvdw6  = calculateVdw6(c6[i], rinv6);
 600                             RealType Vvdw12 = calculateVdw12(c12[i], rinv6);
 601
 602                             Vvdw[i] = lennardJonesPotential(Vvdw6, Vvdw12, c6[i], c12[i], repulsionShift,
 603                                                             dispersionShift, onesixth, onetwelfth);
 604                             FscalV[i] = lennardJonesScalarForce(Vvdw6, Vvdw12);
 605
 606                             if (vdwInteractionTypeIsEwald)
 607                             {
 608                                 /* Subtract the grid potential at the cut-off */
 609                                 Vvdw[i] += ewaldLennardJonesGridSubtract(nbfp_grid[tj[i]],
 610                                                                          sh_lj_ewald, onesixth);
 611                             }
 612
 613                             if (vdwModifierIsPotSwitch)
 614                             {
 615                                 RealType d        = rV - ic->rvdw_switch;
 616                                 d                 = (d > zero) ? d : zero;
 617                                 const RealType d2 = d * d;
 618                                 const RealType sw =
 619                                         one + d2 * d * (vdw_swV3 + d * (vdw_swV4 + d * vdw_swV5));
 620                                 const RealType dsw = d2 * (vdw_swF2 + d * (vdw_swF3 + d * vdw_swF4));
 621
 622                                 FscalV[i] = potSwitchScalarForceMod(FscalV[i], Vvdw[i], sw, rV,
 623                                                                     rvdw, dsw, zero);
 624                                 Vvdw[i]   = potSwitchPotentialMod(Vvdw[i], sw, rV, rvdw, zero);
 625                             }
 626                         }
 627
 628                         /* FscalC (and FscalV) now contain: dV/drC * rC
 629                          * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
 630                          * Further down we first multiply by r^p-2 and then by
 631                          * the vector r, which in total gives: dV/drC * (r/rC)^1-p
 632                          */
 633                         FscalC[i] *= rpinvC;
 634                         FscalV[i] *= rpinvV;
 635                     }
 636                 } // end for (int i = 0; i < NSTATES; i++)
 637
 638                 /* Assemble A and B states */
 639                 for (int i = 0; i < NSTATES; i++)
 640                 {
 641                     vctot += LFC[i] * Vcoul[i];
 642                     vvtot += LFV[i] * Vvdw[i];
 643
 644                     Fscal += LFC[i] * FscalC[i] * rpm2;
 645                     Fscal += LFV[i] * FscalV[i] * rpm2;
 646
 647                     if (useSoftCore)
 648                     {
 649                         dvdl_coul += Vcoul[i] * DLF[i]
 650                                      + LFC[i] * alpha_coul_eff * dlfac_coul[i] * FscalC[i] * sigma6[i];
 651                         dvdl_vdw += Vvdw[i] * DLF[i]
 652                                     + LFV[i] * alpha_vdw_eff * dlfac_vdw[i] * FscalV[i] * sigma6[i];
 653                     }
 654                     else
 655                     {
 656                         dvdl_coul += Vcoul[i] * DLF[i];
 657                         dvdl_vdw += Vvdw[i] * DLF[i];
 658                     }
 659                 }
 660             } // end if (bPairIncluded)
 661             else if (icoul == GMX_NBKERNEL_ELEC_REACTIONFIELD)
 662             {
 663                 /* For excluded pairs, which are only in this pair list when
 664                  * using the Verlet scheme, we don't use soft-core.
 665                  * As there is no singularity, there is no need for soft-core.
 666                  */
 667                 const real FF = -two * krf;
 668                 RealType   VV = krf * rsq - crf;
 669
 670                 if (ii == jnr)
 671                 {
 672                     VV *= half;
 673                 }
 674
 675                 for (int i = 0; i < NSTATES; i++)
 676                 {
 677                     vctot += LFC[i] * qq[i] * VV;
 678                     Fscal += LFC[i] * qq[i] * FF;
 679                     dvdl_coul += DLF[i] * qq[i] * VV;
 680                 }
 681             }
 682
 683             if (elecInteractionTypeIsEwald && (r < rcoulomb || !bPairIncluded))
 684             {
 685                 /* See comment in the preamble. When using Ewald interactions
 686                  * (unless we use a switch modifier) we subtract the reciprocal-space
 687                  * Ewald component here which made it possible to apply the free
 688                  * energy interaction to 1/r (vanilla coulomb short-range part)
 689                  * above. This gets us closer to the ideal case of applying
 690                  * the softcore to the entire electrostatic interaction,
 691                  * including the reciprocal-space component.
 692                  */
 693                 real v_lr, f_lr;
 694
 695                 const RealType ewrt   = r * coulombTableScale;
 696                 IntType        ewitab = static_cast<IntType>(ewrt);
 697                 const RealType eweps  = ewrt - ewitab;
 698                 ewitab                = 4 * ewitab;
 699                 f_lr                  = ewtab[ewitab] + eweps * ewtab[ewitab + 1];
 700                 v_lr = (ewtab[ewitab + 2] - coulombTableScaleInvHalf * eweps * (ewtab[ewitab] + f_lr));
 701                 f_lr *= rinv;
 702
 703                 /* Note that any possible Ewald shift has already been applied in
 704                  * the normal interaction part above.
 705                  */
 706
 707                 if (ii == jnr)
 708                 {
 709                     /* If we get here, the i particle (ii) has itself (jnr)
 710                      * in its neighborlist. This can only happen with the Verlet
 711                      * scheme, and corresponds to a self-interaction that will
 712                      * occur twice. Scale it down by 50% to only include it once.
 713                      */
 714                     v_lr *= half;
 715                 }
 716
 717                 for (int i = 0; i < NSTATES; i++)
 718                 {
 719                     vctot -= LFC[i] * qq[i] * v_lr;
 720                     Fscal -= LFC[i] * qq[i] * f_lr;
 721                     dvdl_coul -= (DLF[i] * qq[i]) * v_lr;
 722                 }
 723             }
 724
 725             if (vdwInteractionTypeIsEwald && r < rvdw)
 726             {
 727                 /* See comment in the preamble. When using LJ-Ewald interactions
 728                  * (unless we use a switch modifier) we subtract the reciprocal-space
 729                  * Ewald component here which made it possible to apply the free
 730                  * energy interaction to r^-6 (vanilla LJ6 short-range part)
 731                  * above. This gets us closer to the ideal case of applying
 732                  * the softcore to the entire VdW interaction,
 733                  * including the reciprocal-space component.
 734                  */
 735                 /* We could also use the analytical form here
 736                  * iso a table, but that can cause issues for
 737                  * r close to 0 for non-interacting pairs.
 738                  */
 739
 740                 const RealType rs   = rsq * rinv * vdwTableScale;
 741                 const IntType  ri   = static_cast<IntType>(rs);
 742                 const RealType frac = rs - ri;
 743                 const RealType f_lr = (1 - frac) * tab_ewald_F_lj[ri] + frac * tab_ewald_F_lj[ri + 1];
 744                 /* TODO: Currently the Ewald LJ table does not contain
 745                  * the factor 1/6, we should add this.
 746                  */
 747                 const RealType FF = f_lr * rinv / six;
 748                 RealType       VV =
 749                         (tab_ewald_V_lj[ri] - vdwTableScaleInvHalf * frac * (tab_ewald_F_lj[ri] + f_lr))
 750                         / six;
 751
 752                 if (ii == jnr)
 753                 {
 754                     /* If we get here, the i particle (ii) has itself (jnr)
 755                      * in its neighborlist. This can only happen with the Verlet
 756                      * scheme, and corresponds to a self-interaction that will
 757                      * occur twice. Scale it down by 50% to only include it once.
 758                      */
 759                     VV *= half;
 760                 }
 761
 762                 for (int i = 0; i < NSTATES; i++)
 763                 {
 764                     const real c6grid = nbfp_grid[tj[i]];
 765                     vvtot += LFV[i] * c6grid * VV;
 766                     Fscal += LFV[i] * c6grid * FF;
 767                     dvdl_vdw += (DLF[i] * c6grid) * VV;
 768                 }
 769             }
 770
 771             if (doForces)
 772             {
 773                 const real tx = Fscal * dx;
 774                 const real ty = Fscal * dy;
 775                 const real tz = Fscal * dz;
 776                 fix           = fix + tx;
 777                 fiy           = fiy + ty;
 778                 fiz           = fiz + tz;
 779                 /* OpenMP atomics are expensive, but this kernels is also
 780                  * expensive, so we can take this hit, instead of using
 781                  * thread-local output buffers and extra reduction.
 782                  *
 783                  * All the OpenMP regions in this file are trivial and should
 784                  * not throw, so no need for try/catch.
 785                  */
 786 #pragma omp atomic
 787                 f[j3] -= tx;
 788 #pragma omp atomic
 789                 f[j3 + 1] -= ty;
 790 #pragma omp atomic
 791                 f[j3 + 2] -= tz;
 792             }
 793         } // end for (int k = nj0; k < nj1; k++)
 794
 795         /* The atomics below are expensive with many OpenMP threads.
 796          * Here unperturbed i-particles will usually only have a few
 797          * (perturbed) j-particles in the list. Thus with a buffered list
 798          * we can skip a significant number of i-reductions with a check.
 799          */
 800         if (npair_within_cutoff > 0)
 801         {
 802             if (doForces)
 803             {
 804 #pragma omp atomic
 805                 f[ii3] += fix;
 806 #pragma omp atomic
 807                 f[ii3 + 1] += fiy;
 808 #pragma omp atomic
 809                 f[ii3 + 2] += fiz;
 810             }
 811             if (doShiftForces)
 812             {
 813 #pragma omp atomic
 814                 fshift[is3] += fix;
 815 #pragma omp atomic
 816                 fshift[is3 + 1] += fiy;
 817 #pragma omp atomic
 818                 fshift[is3 + 2] += fiz;
 819             }
 820             if (doPotential)
 821             {
 822                 int ggid = gid[n];
 823 #pragma omp atomic
 824                 Vc[ggid] += vctot;
 825 #pragma omp atomic
 826                 Vv[ggid] += vvtot;
 827             }
 828         }
 829     } // end for (int n = 0; n < nri; n++)
 830
 831 #pragma omp atomic
 832     dvdl[efptCOUL] += dvdl_coul;
 833 #pragma omp atomic
 834     dvdl[efptVDW] += dvdl_vdw;
 835
 836     /* Estimate flops, average for free energy stuff:
 837      * 12  flops per outer iteration
 838      * 150 flops per inner iteration
 839      */
 840 #pragma omp atomic
 841     inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri * 12 + nlist->jindex[nri] * 150);
 842 }
 843
 844 typedef void (*KernelFunction)(const t_nblist* gmx_restrict nlist,
 845                                rvec* gmx_restrict         xx,
 846                                gmx::ForceWithShiftForces* forceWithShiftForces,
 847                                const t_forcerec* gmx_restrict fr,
 848                                const t_mdatoms* gmx_restrict mdatoms,
 849                                nb_kernel_data_t* gmx_restrict kernel_data,
 850                                t_nrnb* gmx_restrict nrnb);
 851
 852 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald, bool vdwModifierIsPotSwitch>
 853 static KernelFunction dispatchKernelOnUseSimd(const bool useSimd)
 854 {
 855     if (useSimd)
 856     {
 857 #if GMX_SIMD_HAVE_REAL && GMX_SIMD_HAVE_INT32_ARITHMETICS && GMX_USE_SIMD_KERNELS
 858         /* FIXME: Here SimdDataTypes should be used to enable SIMD. So far, the code in nb_free_energy_kernel is not adapted to SIMD */
 859         return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
 860                                       elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
 861 #else
 862         return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
 863                                       elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
 864 #endif
 865     }
 866     else
 867     {
 868         return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
 869                                       elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
 870     }
 871 }
 872
 873 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald>
 874 static KernelFunction dispatchKernelOnVdwModifier(const bool vdwModifierIsPotSwitch, const bool useSimd)
 875 {
 876     if (vdwModifierIsPotSwitch)
 877     {
 878         return (dispatchKernelOnUseSimd<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
 879                                         elecInteractionTypeIsEwald, true>(useSimd));
 880     }
 881     else
 882     {
 883         return (dispatchKernelOnUseSimd<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
 884                                         elecInteractionTypeIsEwald, false>(useSimd));
 885     }
 886 }
 887
 888 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald>
 889 static KernelFunction dispatchKernelOnElecInteractionType(const bool elecInteractionTypeIsEwald,
 890                                                           const bool vdwModifierIsPotSwitch,
 891                                                           const bool useSimd)
 892 {
 893     if (elecInteractionTypeIsEwald)
 894     {
 895         return (dispatchKernelOnVdwModifier<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, true>(
 896                 vdwModifierIsPotSwitch, useSimd));
 897     }
 898     else
 899     {
 900         return (dispatchKernelOnVdwModifier<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, false>(
 901                 vdwModifierIsPotSwitch, useSimd));
 902     }
 903 }
 904
 905 template<bool useSoftCore, bool scLambdasOrAlphasDiffer>
 906 static KernelFunction dispatchKernelOnVdwInteractionType(const bool vdwInteractionTypeIsEwald,
 907                                                          const bool elecInteractionTypeIsEwald,
 908                                                          const bool vdwModifierIsPotSwitch,
 909                                                          const bool useSimd)
 910 {
 911     if (vdwInteractionTypeIsEwald)
 912     {
 913         return (dispatchKernelOnElecInteractionType<useSoftCore, scLambdasOrAlphasDiffer, true>(
 914                 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
 915     }
 916     else
 917     {
 918         return (dispatchKernelOnElecInteractionType<useSoftCore, scLambdasOrAlphasDiffer, false>(
 919                 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
 920     }
 921 }
 922
 923 template<bool useSoftCore>
 924 static KernelFunction dispatchKernelOnScLambdasOrAlphasDifference(const bool scLambdasOrAlphasDiffer,
 925                                                                   const bool vdwInteractionTypeIsEwald,
 926                                                                   const bool elecInteractionTypeIsEwald,
 927                                                                   const bool vdwModifierIsPotSwitch,
 928                                                                   const bool useSimd)
 929 {
 930     if (scLambdasOrAlphasDiffer)
 931     {
 932         return (dispatchKernelOnVdwInteractionType<useSoftCore, true>(
 933                 vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
 934     }
 935     else
 936     {
 937         return (dispatchKernelOnVdwInteractionType<useSoftCore, false>(
 938                 vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
 939     }
 940 }
 941
 942 static KernelFunction dispatchKernel(const bool                 scLambdasOrAlphasDiffer,
 943                                      const bool                 vdwInteractionTypeIsEwald,
 944                                      const bool                 elecInteractionTypeIsEwald,
 945                                      const bool                 vdwModifierIsPotSwitch,
 946                                      const bool                 useSimd,
 947                                      const interaction_const_t& ic)
 948 {
 949     if (ic.softCoreParameters->alphaCoulomb == 0 && ic.softCoreParameters->alphaVdw == 0)
 950     {
 951         return (dispatchKernelOnScLambdasOrAlphasDifference<false>(
 952                 scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
 953                 vdwModifierIsPotSwitch, useSimd));
 954     }
 955     else
 956     {
 957         return (dispatchKernelOnScLambdasOrAlphasDifference<true>(
 958                 scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
 959                 vdwModifierIsPotSwitch, useSimd));
 960     }
 961 }
 962
 963
 964 void gmx_nb_free_energy_kernel(const t_nblist*            nlist,
 965                                rvec*                      xx,
 966                                gmx::ForceWithShiftForces* ff,
 967                                const t_forcerec*          fr,
 968                                const t_mdatoms*           mdatoms,
 969                                nb_kernel_data_t*          kernel_data,
 970                                t_nrnb*                    nrnb)
 971 {
 972     const interaction_const_t& ic = *fr->ic;
 973     GMX_ASSERT(EEL_PME_EWALD(ic.eeltype) || ic.eeltype == eelCUT || EEL_RF(ic.eeltype),
 974                "Unsupported eeltype with free energy");
 975     GMX_ASSERT(ic.softCoreParameters, "We need soft-core parameters");
 976
 977     const auto& scParams                   = *ic.softCoreParameters;
 978     const bool  vdwInteractionTypeIsEwald  = (EVDW_PME(ic.vdwtype));
 979     const bool  elecInteractionTypeIsEwald = (EEL_PME_EWALD(ic.eeltype));
 980     const bool  vdwModifierIsPotSwitch     = (ic.vdw_modifier == eintmodPOTSWITCH);
 981     bool        scLambdasOrAlphasDiffer    = true;
 982     const bool  useSimd                    = fr->use_simd_kernels;
 983
 984     if (scParams.alphaCoulomb == 0 && scParams.alphaVdw == 0)
 985     {
 986         scLambdasOrAlphasDiffer = false;
 987     }
 988     else
 989     {
 990         if (kernel_data->lambda[efptCOUL] == kernel_data->lambda[efptVDW]
 991             && scParams.alphaCoulomb == scParams.alphaVdw)
 992         {
 993             scLambdasOrAlphasDiffer = false;
 994         }
 995     }
 996
 997     KernelFunction kernelFunc;
 998     kernelFunc = dispatchKernel(scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
 999                                 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd, ic);
1000     kernelFunc(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
1001 }