src/gromacs/gmxlib/nonbonded/nb_free_energy.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
   7  * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
   8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   9  * and including many others, as listed in the AUTHORS file in the
  10  * top-level source directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38 #include "gmxpre.h"
  39
  40 #include "nb_free_energy.h"
  41
  42 #include <cmath>
  43
  44 #include <algorithm>
  45
  46 #include "gromacs/gmxlib/nrnb.h"
  47 #include "gromacs/gmxlib/nonbonded/nb_kernel.h"
  48 #include "gromacs/gmxlib/nonbonded/nonbonded.h"
  49 #include "gromacs/math/functions.h"
  50 #include "gromacs/math/vec.h"
  51 #include "gromacs/mdtypes/forceoutput.h"
  52 #include "gromacs/mdtypes/forcerec.h"
  53 #include "gromacs/mdtypes/md_enums.h"
  54 #include "gromacs/simd/simd.h"
  55 #include "gromacs/utility/fatalerror.h"
  56
  57
  58 //! Scalar (non-SIMD) data types.
  59 struct ScalarDataTypes
  60 {
  61     using RealType                     = real; //!< The data type to use as real.
  62     using IntType                      = int;  //!< The data type to use as int.
  63     static constexpr int simdRealWidth = 1;    //!< The width of the RealType.
  64     static constexpr int simdIntWidth  = 1;    //!< The width of the IntType.
  65 };
  66
  67 #if GMX_SIMD_HAVE_REAL && GMX_SIMD_HAVE_INT32_ARITHMETICS
  68 //! SIMD data types.
  69 struct SimdDataTypes
  70 {
  71     using RealType                     = gmx::SimdReal;         //!< The data type to use as real.
  72     using IntType                      = gmx::SimdInt32;        //!< The data type to use as int.
  73     static constexpr int simdRealWidth = GMX_SIMD_REAL_WIDTH;   //!< The width of the RealType.
  74     static constexpr int simdIntWidth  = GMX_SIMD_FINT32_WIDTH; //!< The width of the IntType.
  75 };
  76 #endif
  77
  78 //! Computes r^(1/p) and 1/r^(1/p) for the standard p=6
  79 template<class RealType>
  80 static inline void pthRoot(const RealType r, RealType* pthRoot, RealType* invPthRoot)
  81 {
  82     *invPthRoot = gmx::invsqrt(std::cbrt(r));
  83     *pthRoot    = 1 / (*invPthRoot);
  84 }
  85
  86 template<class RealType>
  87 static inline RealType calculateRinv6(const RealType rinvV)
  88 {
  89     RealType rinv6 = rinvV * rinvV;
  90     return (rinv6 * rinv6 * rinv6);
  91 }
  92
  93 template<class RealType>
  94 static inline RealType calculateVdw6(const RealType c6, const RealType rinv6)
  95 {
  96     return (c6 * rinv6);
  97 }
  98
  99 template<class RealType>
 100 static inline RealType calculateVdw12(const RealType c12, const RealType rinv6)
 101 {
 102     return (c12 * rinv6 * rinv6);
 103 }
 104
 105 /* reaction-field electrostatics */
 106 template<class RealType>
 107 static inline RealType reactionFieldScalarForce(const RealType qq,
 108                                                 const RealType rinv,
 109                                                 const RealType r,
 110                                                 const real     krf,
 111                                                 const real     two)
 112 {
 113     return (qq * (rinv - two * krf * r * r));
 114 }
 115 template<class RealType>
 116 static inline RealType reactionFieldPotential(const RealType qq,
 117                                               const RealType rinv,
 118                                               const RealType r,
 119                                               const real     krf,
 120                                               const real     potentialShift)
 121 {
 122     return (qq * (rinv + krf * r * r - potentialShift));
 123 }
 124
 125 /* Ewald electrostatics */
 126 template<class RealType>
 127 static inline RealType ewaldScalarForce(const RealType coulomb, const RealType rinv)
 128 {
 129     return (coulomb * rinv);
 130 }
 131 template<class RealType>
 132 static inline RealType ewaldPotential(const RealType coulomb, const RealType rinv, const real potentialShift)
 133 {
 134     return (coulomb * (rinv - potentialShift));
 135 }
 136
 137 /* cutoff LJ */
 138 template<class RealType>
 139 static inline RealType lennardJonesScalarForce(const RealType v6, const RealType v12)
 140 {
 141     return (v12 - v6);
 142 }
 143 template<class RealType>
 144 static inline RealType lennardJonesPotential(const RealType v6,
 145                                              const RealType v12,
 146                                              const RealType c6,
 147                                              const RealType c12,
 148                                              const real     repulsionShift,
 149                                              const real     dispersionShift,
 150                                              const real     onesixth,
 151                                              const real     onetwelfth)
 152 {
 153     return ((v12 + c12 * repulsionShift) * onetwelfth - (v6 + c6 * dispersionShift) * onesixth);
 154 }
 155
 156 /* Ewald LJ */
 157 static inline real ewaldLennardJonesGridSubtract(const real c6grid, const real potentialShift, const real onesixth)
 158 {
 159     return (c6grid * potentialShift * onesixth);
 160 }
 161
 162 /* LJ Potential switch */
 163 template<class RealType>
 164 static inline RealType potSwitchScalarForceMod(const RealType fScalarInp,
 165                                                const RealType potential,
 166                                                const RealType sw,
 167                                                const RealType r,
 168                                                const RealType rVdw,
 169                                                const RealType dsw,
 170                                                const real     zero)
 171 {
 172     if (r < rVdw)
 173     {
 174         real fScalar = fScalarInp * sw - r * potential * dsw;
 175         return (fScalar);
 176     }
 177     return (zero);
 178 }
 179 template<class RealType>
 180 static inline RealType potSwitchPotentialMod(const RealType potentialInp,
 181                                              const RealType sw,
 182                                              const RealType r,
 183                                              const RealType rVdw,
 184                                              const real     zero)
 185 {
 186     if (r < rVdw)
 187     {
 188         real potential = potentialInp * sw;
 189         return (potential);
 190     }
 191     return (zero);
 192 }
 193
 194
 195 //! Templated free-energy non-bonded kernel
 196 template<typename DataTypes, bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald, bool vdwModifierIsPotSwitch>
 197 static void nb_free_energy_kernel(const t_nblist* gmx_restrict nlist,
 198                                   rvec* gmx_restrict         xx,
 199                                   gmx::ForceWithShiftForces* forceWithShiftForces,
 200                                   const t_forcerec* gmx_restrict fr,
 201                                   const t_mdatoms* gmx_restrict mdatoms,
 202                                   nb_kernel_data_t* gmx_restrict kernel_data,
 203                                   t_nrnb* gmx_restrict nrnb)
 204 {
 205 #define STATE_A 0
 206 #define STATE_B 1
 207 #define NSTATES 2
 208
 209     using RealType = typename DataTypes::RealType;
 210     using IntType  = typename DataTypes::IntType;
 211
 212     /* FIXME: How should these be handled with SIMD? */
 213     constexpr real onetwelfth = 1.0 / 12.0;
 214     constexpr real onesixth   = 1.0 / 6.0;
 215     constexpr real zero       = 0.0;
 216     constexpr real half       = 0.5;
 217     constexpr real one        = 1.0;
 218     constexpr real two        = 2.0;
 219     constexpr real six        = 6.0;
 220
 221     /* Extract pointer to non-bonded interaction constants */
 222     const interaction_const_t* ic = fr->ic;
 223
 224     // Extract pair list data
 225     const int  nri    = nlist->nri;
 226     const int* iinr   = nlist->iinr;
 227     const int* jindex = nlist->jindex;
 228     const int* jjnr   = nlist->jjnr;
 229     const int* shift  = nlist->shift;
 230     const int* gid    = nlist->gid;
 231
 232     const real* shiftvec      = fr->shift_vec[0];
 233     const real* chargeA       = mdatoms->chargeA;
 234     const real* chargeB       = mdatoms->chargeB;
 235     real*       Vc            = kernel_data->energygrp_elec;
 236     const int*  typeA         = mdatoms->typeA;
 237     const int*  typeB         = mdatoms->typeB;
 238     const int   ntype         = fr->ntype;
 239     const real* nbfp          = fr->nbfp.data();
 240     const real* nbfp_grid     = fr->ljpme_c6grid;
 241     real*       Vv            = kernel_data->energygrp_vdw;
 242     const real  lambda_coul   = kernel_data->lambda[efptCOUL];
 243     const real  lambda_vdw    = kernel_data->lambda[efptVDW];
 244     real*       dvdl          = kernel_data->dvdl;
 245     const real  alpha_coul    = fr->sc_alphacoul;
 246     const real  alpha_vdw     = fr->sc_alphavdw;
 247     const real  lam_power     = fr->sc_power;
 248     const real  sigma6_def    = fr->sc_sigma6_def;
 249     const real  sigma6_min    = fr->sc_sigma6_min;
 250     const bool  doForces      = ((kernel_data->flags & GMX_NONBONDED_DO_FORCE) != 0);
 251     const bool  doShiftForces = ((kernel_data->flags & GMX_NONBONDED_DO_SHIFTFORCE) != 0);
 252     const bool  doPotential   = ((kernel_data->flags & GMX_NONBONDED_DO_POTENTIAL) != 0);
 253
 254     // Extract data from interaction_const_t
 255     const real facel           = ic->epsfac;
 256     const real rcoulomb        = ic->rcoulomb;
 257     const real krf             = ic->k_rf;
 258     const real crf             = ic->c_rf;
 259     const real sh_lj_ewald     = ic->sh_lj_ewald;
 260     const real rvdw            = ic->rvdw;
 261     const real dispersionShift = ic->dispersion_shift.cpot;
 262     const real repulsionShift  = ic->repulsion_shift.cpot;
 263
 264     // Note that the nbnxm kernels do not support Coulomb potential switching at all
 265     GMX_ASSERT(ic->coulomb_modifier != eintmodPOTSWITCH,
 266                "Potential switching is not supported for Coulomb with FEP");
 267
 268     real vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4;
 269     if (vdwModifierIsPotSwitch)
 270     {
 271         const real d = ic->rvdw - ic->rvdw_switch;
 272         vdw_swV3     = -10.0 / (d * d * d);
 273         vdw_swV4     = 15.0 / (d * d * d * d);
 274         vdw_swV5     = -6.0 / (d * d * d * d * d);
 275         vdw_swF2     = -30.0 / (d * d * d);
 276         vdw_swF3     = 60.0 / (d * d * d * d);
 277         vdw_swF4     = -30.0 / (d * d * d * d * d);
 278     }
 279     else
 280     {
 281         /* Avoid warnings from stupid compilers (looking at you, Clang!) */
 282         vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0;
 283     }
 284
 285     int icoul;
 286     if (ic->eeltype == eelCUT || EEL_RF(ic->eeltype))
 287     {
 288         icoul = GMX_NBKERNEL_ELEC_REACTIONFIELD;
 289     }
 290     else
 291     {
 292         icoul = GMX_NBKERNEL_ELEC_NONE;
 293     }
 294
 295     real rcutoff_max2 = std::max(ic->rcoulomb, ic->rvdw);
 296     rcutoff_max2      = rcutoff_max2 * rcutoff_max2;
 297
 298     const real* tab_ewald_F_lj = nullptr;
 299     const real* tab_ewald_V_lj = nullptr;
 300     const real* ewtab          = nullptr;
 301     real        ewtabscale     = 0;
 302     real        ewtabhalfspace = 0;
 303     real        sh_ewald       = 0;
 304     if (elecInteractionTypeIsEwald || vdwInteractionTypeIsEwald)
 305     {
 306         const auto& tables = *ic->coulombEwaldTables;
 307         sh_ewald           = ic->sh_ewald;
 308         ewtab              = tables.tableFDV0.data();
 309         ewtabscale         = tables.scale;
 310         ewtabhalfspace     = half / ewtabscale;
 311         tab_ewald_F_lj     = tables.tableF.data();
 312         tab_ewald_V_lj     = tables.tableV.data();
 313     }
 314
 315     /* For Ewald/PME interactions we cannot easily apply the soft-core component to
 316      * reciprocal space. When we use non-switched Ewald interactions, we
 317      * assume the soft-coring does not significantly affect the grid contribution
 318      * and apply the soft-core only to the full 1/r (- shift) pair contribution.
 319      *
 320      * However, we cannot use this approach for switch-modified since we would then
 321      * effectively end up evaluating a significantly different interaction here compared to the
 322      * normal (non-free-energy) kernels, either by applying a cutoff at a different
 323      * position than what the user requested, or by switching different
 324      * things (1/r rather than short-range Ewald). For these settings, we just
 325      * use the traditional short-range Ewald interaction in that case.
 326      */
 327     GMX_RELEASE_ASSERT(!(vdwInteractionTypeIsEwald && vdwModifierIsPotSwitch),
 328                        "Can not apply soft-core to switched Ewald potentials");
 329
 330     real dvdl_coul = 0;
 331     real dvdl_vdw  = 0;
 332
 333     /* Lambda factor for state A, 1-lambda*/
 334     real LFC[NSTATES], LFV[NSTATES];
 335     LFC[STATE_A] = one - lambda_coul;
 336     LFV[STATE_A] = one - lambda_vdw;
 337
 338     /* Lambda factor for state B, lambda*/
 339     LFC[STATE_B] = lambda_coul;
 340     LFV[STATE_B] = lambda_vdw;
 341
 342     /*derivative of the lambda factor for state A and B */
 343     real DLF[NSTATES];
 344     DLF[STATE_A] = -1;
 345     DLF[STATE_B] = 1;
 346
 347     real           lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
 348     constexpr real sc_r_power = 6.0_real;
 349     for (int i = 0; i < NSTATES; i++)
 350     {
 351         lfac_coul[i]  = (lam_power == 2 ? (1 - LFC[i]) * (1 - LFC[i]) : (1 - LFC[i]));
 352         dlfac_coul[i] = DLF[i] * lam_power / sc_r_power * (lam_power == 2 ? (1 - LFC[i]) : 1);
 353         lfac_vdw[i]   = (lam_power == 2 ? (1 - LFV[i]) * (1 - LFV[i]) : (1 - LFV[i]));
 354         dlfac_vdw[i]  = DLF[i] * lam_power / sc_r_power * (lam_power == 2 ? (1 - LFV[i]) : 1);
 355     }
 356
 357     // TODO: We should get rid of using pointers to real
 358     const real* x             = xx[0];
 359     real* gmx_restrict f      = &(forceWithShiftForces->force()[0][0]);
 360     real* gmx_restrict fshift = &(forceWithShiftForces->shiftForces()[0][0]);
 361
 362     for (int n = 0; n < nri; n++)
 363     {
 364         int npair_within_cutoff = 0;
 365
 366         const int  is3   = 3 * shift[n];
 367         const real shX   = shiftvec[is3];
 368         const real shY   = shiftvec[is3 + 1];
 369         const real shZ   = shiftvec[is3 + 2];
 370         const int  nj0   = jindex[n];
 371         const int  nj1   = jindex[n + 1];
 372         const int  ii    = iinr[n];
 373         const int  ii3   = 3 * ii;
 374         const real ix    = shX + x[ii3 + 0];
 375         const real iy    = shY + x[ii3 + 1];
 376         const real iz    = shZ + x[ii3 + 2];
 377         const real iqA   = facel * chargeA[ii];
 378         const real iqB   = facel * chargeB[ii];
 379         const int  ntiA  = 2 * ntype * typeA[ii];
 380         const int  ntiB  = 2 * ntype * typeB[ii];
 381         real       vctot = 0;
 382         real       vvtot = 0;
 383         real       fix   = 0;
 384         real       fiy   = 0;
 385         real       fiz   = 0;
 386
 387         for (int k = nj0; k < nj1; k++)
 388         {
 389             int            tj[NSTATES];
 390             const int      jnr = jjnr[k];
 391             const int      j3  = 3 * jnr;
 392             RealType       c6[NSTATES], c12[NSTATES], qq[NSTATES], Vcoul[NSTATES], Vvdw[NSTATES];
 393             RealType       r, rinv, rp, rpm2;
 394             RealType       alpha_vdw_eff, alpha_coul_eff, sigma6[NSTATES];
 395             const RealType dx  = ix - x[j3];
 396             const RealType dy  = iy - x[j3 + 1];
 397             const RealType dz  = iz - x[j3 + 2];
 398             const RealType rsq = dx * dx + dy * dy + dz * dz;
 399             RealType       FscalC[NSTATES], FscalV[NSTATES];
 400
 401             if (rsq >= rcutoff_max2)
 402             {
 403                 /* We save significant time by skipping all code below.
 404                  * Note that with soft-core interactions, the actual cut-off
 405                  * check might be different. But since the soft-core distance
 406                  * is always larger than r, checking on r here is safe.
 407                  */
 408                 continue;
 409             }
 410             npair_within_cutoff++;
 411
 412             if (rsq > 0)
 413             {
 414                 /* Note that unlike in the nbnxn kernels, we do not need
 415                  * to clamp the value of rsq before taking the invsqrt
 416                  * to avoid NaN in the LJ calculation, since here we do
 417                  * not calculate LJ interactions when C6 and C12 are zero.
 418                  */
 419
 420                 rinv = gmx::invsqrt(rsq);
 421                 r    = rsq * rinv;
 422             }
 423             else
 424             {
 425                 /* The force at r=0 is zero, because of symmetry.
 426                  * But note that the potential is in general non-zero,
 427                  * since the soft-cored r will be non-zero.
 428                  */
 429                 rinv = 0;
 430                 r    = 0;
 431             }
 432
 433             if (useSoftCore)
 434             {
 435                 rpm2 = rsq * rsq;  /* r4 */
 436                 rp   = rpm2 * rsq; /* r6 */
 437             }
 438             else
 439             {
 440                 /* The soft-core power p will not affect the results
 441                  * with not using soft-core, so we use power of 0 which gives
 442                  * the simplest math and cheapest code.
 443                  */
 444                 rpm2 = rinv * rinv;
 445                 rp   = 1;
 446             }
 447
 448             RealType Fscal = 0;
 449
 450             qq[STATE_A] = iqA * chargeA[jnr];
 451             qq[STATE_B] = iqB * chargeB[jnr];
 452
 453             tj[STATE_A] = ntiA + 2 * typeA[jnr];
 454             tj[STATE_B] = ntiB + 2 * typeB[jnr];
 455
 456             if (nlist->excl_fep == nullptr || nlist->excl_fep[k])
 457             {
 458                 c6[STATE_A] = nbfp[tj[STATE_A]];
 459                 c6[STATE_B] = nbfp[tj[STATE_B]];
 460
 461                 for (int i = 0; i < NSTATES; i++)
 462                 {
 463                     c12[i] = nbfp[tj[i] + 1];
 464                     if (useSoftCore)
 465                     {
 466                         if ((c6[i] > 0) && (c12[i] > 0))
 467                         {
 468                             /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
 469                             sigma6[i] = half * c12[i] / c6[i];
 470                             if (sigma6[i] < sigma6_min) /* for disappearing coul and vdw with soft core at the same time */
 471                             {
 472                                 sigma6[i] = sigma6_min;
 473                             }
 474                         }
 475                         else
 476                         {
 477                             sigma6[i] = sigma6_def;
 478                         }
 479                     }
 480                 }
 481
 482                 if (useSoftCore)
 483                 {
 484                     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
 485                     if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
 486                     {
 487                         alpha_vdw_eff  = 0;
 488                         alpha_coul_eff = 0;
 489                     }
 490                     else
 491                     {
 492                         alpha_vdw_eff  = alpha_vdw;
 493                         alpha_coul_eff = alpha_coul;
 494                     }
 495                 }
 496
 497                 for (int i = 0; i < NSTATES; i++)
 498                 {
 499                     FscalC[i] = 0;
 500                     FscalV[i] = 0;
 501                     Vcoul[i]  = 0;
 502                     Vvdw[i]   = 0;
 503
 504                     RealType rinvC, rinvV, rC, rV, rpinvC, rpinvV;
 505
 506                     /* Only spend time on A or B state if it is non-zero */
 507                     if ((qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0))
 508                     {
 509                         /* this section has to be inside the loop because of the dependence on sigma6 */
 510                         if (useSoftCore)
 511                         {
 512                             rpinvC = one / (alpha_coul_eff * lfac_coul[i] * sigma6[i] + rp);
 513                             pthRoot(rpinvC, &rinvC, &rC);
 514                             if (scLambdasOrAlphasDiffer)
 515                             {
 516                                 rpinvV = one / (alpha_vdw_eff * lfac_vdw[i] * sigma6[i] + rp);
 517                                 pthRoot(rpinvV, &rinvV, &rV);
 518                             }
 519                             else
 520                             {
 521                                 /* We can avoid one expensive pow and one / operation */
 522                                 rpinvV = rpinvC;
 523                                 rinvV  = rinvC;
 524                                 rV     = rC;
 525                             }
 526                         }
 527                         else
 528                         {
 529                             rpinvC = 1;
 530                             rinvC  = rinv;
 531                             rC     = r;
 532
 533                             rpinvV = 1;
 534                             rinvV  = rinv;
 535                             rV     = r;
 536                         }
 537
 538                         /* Only process the coulomb interactions if we have charges,
 539                          * and if we either include all entries in the list (no cutoff
 540                          * used in the kernel), or if we are within the cutoff.
 541                          */
 542                         bool computeElecInteraction = (elecInteractionTypeIsEwald && r < rcoulomb)
 543                                                       || (!elecInteractionTypeIsEwald && rC < rcoulomb);
 544
 545                         if ((qq[i] != 0) && computeElecInteraction)
 546                         {
 547                             if (elecInteractionTypeIsEwald)
 548                             {
 549                                 Vcoul[i]  = ewaldPotential(qq[i], rinvC, sh_ewald);
 550                                 FscalC[i] = ewaldScalarForce(qq[i], rinvC);
 551                             }
 552                             else
 553                             {
 554                                 Vcoul[i]  = reactionFieldPotential(qq[i], rinvC, rC, krf, crf);
 555                                 FscalC[i] = reactionFieldScalarForce(qq[i], rinvC, rC, krf, two);
 556                             }
 557                         }
 558
 559                         /* Only process the VDW interactions if we have
 560                          * some non-zero parameters, and if we either
 561                          * include all entries in the list (no cutoff used
 562                          * in the kernel), or if we are within the cutoff.
 563                          */
 564                         bool computeVdwInteraction = (vdwInteractionTypeIsEwald && r < rvdw)
 565                                                      || (!vdwInteractionTypeIsEwald && rV < rvdw);
 566                         if ((c6[i] != 0 || c12[i] != 0) && computeVdwInteraction)
 567                         {
 568                             RealType rinv6;
 569                             if (useSoftCore)
 570                             {
 571                                 rinv6 = rpinvV;
 572                             }
 573                             else
 574                             {
 575                                 rinv6 = calculateRinv6(rinvV);
 576                             }
 577                             RealType Vvdw6  = calculateVdw6(c6[i], rinv6);
 578                             RealType Vvdw12 = calculateVdw12(c12[i], rinv6);
 579
 580                             Vvdw[i] = lennardJonesPotential(Vvdw6, Vvdw12, c6[i], c12[i], repulsionShift,
 581                                                             dispersionShift, onesixth, onetwelfth);
 582                             FscalV[i] = lennardJonesScalarForce(Vvdw6, Vvdw12);
 583
 584                             if (vdwInteractionTypeIsEwald)
 585                             {
 586                                 /* Subtract the grid potential at the cut-off */
 587                                 Vvdw[i] += ewaldLennardJonesGridSubtract(nbfp_grid[tj[i]],
 588                                                                          sh_lj_ewald, onesixth);
 589                             }
 590
 591                             if (vdwModifierIsPotSwitch)
 592                             {
 593                                 RealType d        = rV - ic->rvdw_switch;
 594                                 d                 = (d > zero) ? d : zero;
 595                                 const RealType d2 = d * d;
 596                                 const RealType sw =
 597                                         one + d2 * d * (vdw_swV3 + d * (vdw_swV4 + d * vdw_swV5));
 598                                 const RealType dsw = d2 * (vdw_swF2 + d * (vdw_swF3 + d * vdw_swF4));
 599
 600                                 FscalV[i] = potSwitchScalarForceMod(FscalV[i], Vvdw[i], sw, rV,
 601                                                                     rvdw, dsw, zero);
 602                                 Vvdw[i]   = potSwitchPotentialMod(Vvdw[i], sw, rV, rvdw, zero);
 603                             }
 604                         }
 605
 606                         /* FscalC (and FscalV) now contain: dV/drC * rC
 607                          * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
 608                          * Further down we first multiply by r^p-2 and then by
 609                          * the vector r, which in total gives: dV/drC * (r/rC)^1-p
 610                          */
 611                         FscalC[i] *= rpinvC;
 612                         FscalV[i] *= rpinvV;
 613                     }
 614                 }
 615
 616                 /* Assemble A and B states */
 617                 for (int i = 0; i < NSTATES; i++)
 618                 {
 619                     vctot += LFC[i] * Vcoul[i];
 620                     vvtot += LFV[i] * Vvdw[i];
 621
 622                     Fscal += LFC[i] * FscalC[i] * rpm2;
 623                     Fscal += LFV[i] * FscalV[i] * rpm2;
 624
 625                     if (useSoftCore)
 626                     {
 627                         dvdl_coul += Vcoul[i] * DLF[i]
 628                                      + LFC[i] * alpha_coul_eff * dlfac_coul[i] * FscalC[i] * sigma6[i];
 629                         dvdl_vdw += Vvdw[i] * DLF[i]
 630                                     + LFV[i] * alpha_vdw_eff * dlfac_vdw[i] * FscalV[i] * sigma6[i];
 631                     }
 632                     else
 633                     {
 634                         dvdl_coul += Vcoul[i] * DLF[i];
 635                         dvdl_vdw += Vvdw[i] * DLF[i];
 636                     }
 637                 }
 638             }
 639             else if (icoul == GMX_NBKERNEL_ELEC_REACTIONFIELD)
 640             {
 641                 /* For excluded pairs, which are only in this pair list when
 642                  * using the Verlet scheme, we don't use soft-core.
 643                  * As there is no singularity, there is no need for soft-core.
 644                  */
 645                 const real FF = -two * krf;
 646                 RealType   VV = krf * rsq - crf;
 647
 648                 if (ii == jnr)
 649                 {
 650                     VV *= half;
 651                 }
 652
 653                 for (int i = 0; i < NSTATES; i++)
 654                 {
 655                     vctot += LFC[i] * qq[i] * VV;
 656                     Fscal += LFC[i] * qq[i] * FF;
 657                     dvdl_coul += DLF[i] * qq[i] * VV;
 658                 }
 659             }
 660
 661             if (elecInteractionTypeIsEwald && r < rcoulomb)
 662             {
 663                 /* See comment in the preamble. When using Ewald interactions
 664                  * (unless we use a switch modifier) we subtract the reciprocal-space
 665                  * Ewald component here which made it possible to apply the free
 666                  * energy interaction to 1/r (vanilla coulomb short-range part)
 667                  * above. This gets us closer to the ideal case of applying
 668                  * the softcore to the entire electrostatic interaction,
 669                  * including the reciprocal-space component.
 670                  */
 671                 real v_lr, f_lr;
 672
 673                 const RealType ewrt   = r * ewtabscale;
 674                 IntType        ewitab = static_cast<IntType>(ewrt);
 675                 const RealType eweps  = ewrt - ewitab;
 676                 ewitab                = 4 * ewitab;
 677                 f_lr                  = ewtab[ewitab] + eweps * ewtab[ewitab + 1];
 678                 v_lr = (ewtab[ewitab + 2] - ewtabhalfspace * eweps * (ewtab[ewitab] + f_lr));
 679                 f_lr *= rinv;
 680
 681                 /* Note that any possible Ewald shift has already been applied in
 682                  * the normal interaction part above.
 683                  */
 684
 685                 if (ii == jnr)
 686                 {
 687                     /* If we get here, the i particle (ii) has itself (jnr)
 688                      * in its neighborlist. This can only happen with the Verlet
 689                      * scheme, and corresponds to a self-interaction that will
 690                      * occur twice. Scale it down by 50% to only include it once.
 691                      */
 692                     v_lr *= half;
 693                 }
 694
 695                 for (int i = 0; i < NSTATES; i++)
 696                 {
 697                     vctot -= LFC[i] * qq[i] * v_lr;
 698                     Fscal -= LFC[i] * qq[i] * f_lr;
 699                     dvdl_coul -= (DLF[i] * qq[i]) * v_lr;
 700                 }
 701             }
 702
 703             if (vdwInteractionTypeIsEwald && r < rvdw)
 704             {
 705                 /* See comment in the preamble. When using LJ-Ewald interactions
 706                  * (unless we use a switch modifier) we subtract the reciprocal-space
 707                  * Ewald component here which made it possible to apply the free
 708                  * energy interaction to r^-6 (vanilla LJ6 short-range part)
 709                  * above. This gets us closer to the ideal case of applying
 710                  * the softcore to the entire VdW interaction,
 711                  * including the reciprocal-space component.
 712                  */
 713                 /* We could also use the analytical form here
 714                  * iso a table, but that can cause issues for
 715                  * r close to 0 for non-interacting pairs.
 716                  */
 717
 718                 const RealType rs   = rsq * rinv * ewtabscale;
 719                 const IntType  ri   = static_cast<IntType>(rs);
 720                 const RealType frac = rs - ri;
 721                 const RealType f_lr = (1 - frac) * tab_ewald_F_lj[ri] + frac * tab_ewald_F_lj[ri + 1];
 722                 /* TODO: Currently the Ewald LJ table does not contain
 723                  * the factor 1/6, we should add this.
 724                  */
 725                 const RealType FF = f_lr * rinv / six;
 726                 RealType       VV =
 727                         (tab_ewald_V_lj[ri] - ewtabhalfspace * frac * (tab_ewald_F_lj[ri] + f_lr)) / six;
 728
 729                 if (ii == jnr)
 730                 {
 731                     /* If we get here, the i particle (ii) has itself (jnr)
 732                      * in its neighborlist. This can only happen with the Verlet
 733                      * scheme, and corresponds to a self-interaction that will
 734                      * occur twice. Scale it down by 50% to only include it once.
 735                      */
 736                     VV *= half;
 737                 }
 738
 739                 for (int i = 0; i < NSTATES; i++)
 740                 {
 741                     const real c6grid = nbfp_grid[tj[i]];
 742                     vvtot += LFV[i] * c6grid * VV;
 743                     Fscal += LFV[i] * c6grid * FF;
 744                     dvdl_vdw += (DLF[i] * c6grid) * VV;
 745                 }
 746             }
 747
 748             if (doForces)
 749             {
 750                 const real tx = Fscal * dx;
 751                 const real ty = Fscal * dy;
 752                 const real tz = Fscal * dz;
 753                 fix           = fix + tx;
 754                 fiy           = fiy + ty;
 755                 fiz           = fiz + tz;
 756                 /* OpenMP atomics are expensive, but this kernels is also
 757                  * expensive, so we can take this hit, instead of using
 758                  * thread-local output buffers and extra reduction.
 759                  *
 760                  * All the OpenMP regions in this file are trivial and should
 761                  * not throw, so no need for try/catch.
 762                  */
 763 #pragma omp atomic
 764                 f[j3] -= tx;
 765 #pragma omp atomic
 766                 f[j3 + 1] -= ty;
 767 #pragma omp atomic
 768                 f[j3 + 2] -= tz;
 769             }
 770         }
 771
 772         /* The atomics below are expensive with many OpenMP threads.
 773          * Here unperturbed i-particles will usually only have a few
 774          * (perturbed) j-particles in the list. Thus with a buffered list
 775          * we can skip a significant number of i-reductions with a check.
 776          */
 777         if (npair_within_cutoff > 0)
 778         {
 779             if (doForces)
 780             {
 781 #pragma omp atomic
 782                 f[ii3] += fix;
 783 #pragma omp atomic
 784                 f[ii3 + 1] += fiy;
 785 #pragma omp atomic
 786                 f[ii3 + 2] += fiz;
 787             }
 788             if (doShiftForces)
 789             {
 790 #pragma omp atomic
 791                 fshift[is3] += fix;
 792 #pragma omp atomic
 793                 fshift[is3 + 1] += fiy;
 794 #pragma omp atomic
 795                 fshift[is3 + 2] += fiz;
 796             }
 797             if (doPotential)
 798             {
 799                 int ggid = gid[n];
 800 #pragma omp atomic
 801                 Vc[ggid] += vctot;
 802 #pragma omp atomic
 803                 Vv[ggid] += vvtot;
 804             }
 805         }
 806     }
 807
 808 #pragma omp atomic
 809     dvdl[efptCOUL] += dvdl_coul;
 810 #pragma omp atomic
 811     dvdl[efptVDW] += dvdl_vdw;
 812
 813     /* Estimate flops, average for free energy stuff:
 814      * 12  flops per outer iteration
 815      * 150 flops per inner iteration
 816      */
 817 #pragma omp atomic
 818     inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri * 12 + nlist->jindex[nri] * 150);
 819 }
 820
 821 typedef void (*KernelFunction)(const t_nblist* gmx_restrict nlist,
 822                                rvec* gmx_restrict         xx,
 823                                gmx::ForceWithShiftForces* forceWithShiftForces,
 824                                const t_forcerec* gmx_restrict fr,
 825                                const t_mdatoms* gmx_restrict mdatoms,
 826                                nb_kernel_data_t* gmx_restrict kernel_data,
 827                                t_nrnb* gmx_restrict nrnb);
 828
 829 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald, bool vdwModifierIsPotSwitch>
 830 static KernelFunction dispatchKernelOnUseSimd(const bool useSimd)
 831 {
 832     if (useSimd)
 833     {
 834 #if GMX_SIMD_HAVE_REAL && GMX_SIMD_HAVE_INT32_ARITHMETICS
 835         /* FIXME: Here SimdDataTypes should be used to enable SIMD. So far, the code in nb_free_energy_kernel is not adapted to SIMD */
 836         return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
 837                                       elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
 838 #else
 839         return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
 840                                       elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
 841 #endif
 842     }
 843     else
 844     {
 845         return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
 846                                       elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
 847     }
 848 }
 849
 850 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald>
 851 static KernelFunction dispatchKernelOnVdwModifier(const bool vdwModifierIsPotSwitch, const bool useSimd)
 852 {
 853     if (vdwModifierIsPotSwitch)
 854     {
 855         return (dispatchKernelOnUseSimd<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
 856                                         elecInteractionTypeIsEwald, true>(useSimd));
 857     }
 858     else
 859     {
 860         return (dispatchKernelOnUseSimd<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
 861                                         elecInteractionTypeIsEwald, false>(useSimd));
 862     }
 863 }
 864
 865 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald>
 866 static KernelFunction dispatchKernelOnElecInteractionType(const bool elecInteractionTypeIsEwald,
 867                                                           const bool vdwModifierIsPotSwitch,
 868                                                           const bool useSimd)
 869 {
 870     if (elecInteractionTypeIsEwald)
 871     {
 872         return (dispatchKernelOnVdwModifier<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, true>(
 873                 vdwModifierIsPotSwitch, useSimd));
 874     }
 875     else
 876     {
 877         return (dispatchKernelOnVdwModifier<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, false>(
 878                 vdwModifierIsPotSwitch, useSimd));
 879     }
 880 }
 881
 882 template<bool useSoftCore, bool scLambdasOrAlphasDiffer>
 883 static KernelFunction dispatchKernelOnVdwInteractionType(const bool vdwInteractionTypeIsEwald,
 884                                                          const bool elecInteractionTypeIsEwald,
 885                                                          const bool vdwModifierIsPotSwitch,
 886                                                          const bool useSimd)
 887 {
 888     if (vdwInteractionTypeIsEwald)
 889     {
 890         return (dispatchKernelOnElecInteractionType<useSoftCore, scLambdasOrAlphasDiffer, true>(
 891                 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
 892     }
 893     else
 894     {
 895         return (dispatchKernelOnElecInteractionType<useSoftCore, scLambdasOrAlphasDiffer, false>(
 896                 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
 897     }
 898 }
 899
 900 template<bool useSoftCore>
 901 static KernelFunction dispatchKernelOnScLambdasOrAlphasDifference(const bool scLambdasOrAlphasDiffer,
 902                                                                   const bool vdwInteractionTypeIsEwald,
 903                                                                   const bool elecInteractionTypeIsEwald,
 904                                                                   const bool vdwModifierIsPotSwitch,
 905                                                                   const bool useSimd)
 906 {
 907     if (scLambdasOrAlphasDiffer)
 908     {
 909         return (dispatchKernelOnVdwInteractionType<useSoftCore, true>(
 910                 vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
 911     }
 912     else
 913     {
 914         return (dispatchKernelOnVdwInteractionType<useSoftCore, false>(
 915                 vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
 916     }
 917 }
 918
 919 static KernelFunction dispatchKernel(const bool        scLambdasOrAlphasDiffer,
 920                                      const bool        vdwInteractionTypeIsEwald,
 921                                      const bool        elecInteractionTypeIsEwald,
 922                                      const bool        vdwModifierIsPotSwitch,
 923                                      const bool        useSimd,
 924                                      const t_forcerec* fr)
 925 {
 926     if (fr->sc_alphacoul == 0 && fr->sc_alphavdw == 0)
 927     {
 928         return (dispatchKernelOnScLambdasOrAlphasDifference<false>(
 929                 scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
 930                 vdwModifierIsPotSwitch, useSimd));
 931     }
 932     else
 933     {
 934         return (dispatchKernelOnScLambdasOrAlphasDifference<true>(
 935                 scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
 936                 vdwModifierIsPotSwitch, useSimd));
 937     }
 938 }
 939
 940
 941 void gmx_nb_free_energy_kernel(const t_nblist*            nlist,
 942                                rvec*                      xx,
 943                                gmx::ForceWithShiftForces* ff,
 944                                const t_forcerec*          fr,
 945                                const t_mdatoms*           mdatoms,
 946                                nb_kernel_data_t*          kernel_data,
 947                                t_nrnb*                    nrnb)
 948 {
 949     GMX_ASSERT(EEL_PME_EWALD(fr->ic->eeltype) || fr->ic->eeltype == eelCUT || EEL_RF(fr->ic->eeltype),
 950                "Unsupported eeltype with free energy");
 951
 952     const bool vdwInteractionTypeIsEwald  = (EVDW_PME(fr->ic->vdwtype));
 953     const bool elecInteractionTypeIsEwald = (EEL_PME_EWALD(fr->ic->eeltype));
 954     const bool vdwModifierIsPotSwitch     = (fr->ic->vdw_modifier == eintmodPOTSWITCH);
 955     bool       scLambdasOrAlphasDiffer    = true;
 956     const bool useSimd                    = fr->use_simd_kernels;
 957
 958     if (fr->sc_alphacoul == 0 && fr->sc_alphavdw == 0)
 959     {
 960         scLambdasOrAlphasDiffer = false;
 961     }
 962     else if (fr->sc_r_power == 6.0_real)
 963     {
 964         if (kernel_data->lambda[efptCOUL] == kernel_data->lambda[efptVDW] && fr->sc_alphacoul == fr->sc_alphavdw)
 965         {
 966             scLambdasOrAlphasDiffer = false;
 967         }
 968     }
 969     else
 970     {
 971         GMX_RELEASE_ASSERT(false, "Unsupported soft-core r-power");
 972     }
 973
 974     KernelFunction kernelFunc;
 975     kernelFunc = dispatchKernel(scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
 976                                 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd, fr);
 977     kernelFunc(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
 978 }