src/gromacs/gmxlib/nonbonded/nb_free_energy.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
   7  * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
   8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   9  * and including many others, as listed in the AUTHORS file in the
  10  * top-level source directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38 #include "gmxpre.h"
  39
  40 #include "nb_free_energy.h"
  41
  42 #include <cmath>
  43
  44 #include <algorithm>
  45
  46 #include "gromacs/gmxlib/nrnb.h"
  47 #include "gromacs/gmxlib/nonbonded/nb_kernel.h"
  48 #include "gromacs/gmxlib/nonbonded/nonbonded.h"
  49 #include "gromacs/math/functions.h"
  50 #include "gromacs/math/vec.h"
  51 #include "gromacs/mdtypes/forceoutput.h"
  52 #include "gromacs/mdtypes/forcerec.h"
  53 #include "gromacs/mdtypes/interaction_const.h"
  54 #include "gromacs/mdtypes/md_enums.h"
  55 #include "gromacs/mdtypes/mdatom.h"
  56 #include "gromacs/simd/simd.h"
  57 #include "gromacs/utility/fatalerror.h"
  58
  59
  60 //! Scalar (non-SIMD) data types.
  61 struct ScalarDataTypes
  62 {
  63     using RealType                     = real; //!< The data type to use as real.
  64     using IntType                      = int;  //!< The data type to use as int.
  65     static constexpr int simdRealWidth = 1;    //!< The width of the RealType.
  66     static constexpr int simdIntWidth  = 1;    //!< The width of the IntType.
  67 };
  68
  69 #if GMX_SIMD_HAVE_REAL && GMX_SIMD_HAVE_INT32_ARITHMETICS
  70 //! SIMD data types.
  71 struct SimdDataTypes
  72 {
  73     using RealType                     = gmx::SimdReal;         //!< The data type to use as real.
  74     using IntType                      = gmx::SimdInt32;        //!< The data type to use as int.
  75     static constexpr int simdRealWidth = GMX_SIMD_REAL_WIDTH;   //!< The width of the RealType.
  76     static constexpr int simdIntWidth  = GMX_SIMD_FINT32_WIDTH; //!< The width of the IntType.
  77 };
  78 #endif
  79
  80 //! Computes r^(1/p) and 1/r^(1/p) for the standard p=6
  81 template<class RealType>
  82 static inline void pthRoot(const RealType r, RealType* pthRoot, RealType* invPthRoot)
  83 {
  84     *invPthRoot = gmx::invsqrt(std::cbrt(r));
  85     *pthRoot    = 1 / (*invPthRoot);
  86 }
  87
  88 template<class RealType>
  89 static inline RealType calculateRinv6(const RealType rinvV)
  90 {
  91     RealType rinv6 = rinvV * rinvV;
  92     return (rinv6 * rinv6 * rinv6);
  93 }
  94
  95 template<class RealType>
  96 static inline RealType calculateVdw6(const RealType c6, const RealType rinv6)
  97 {
  98     return (c6 * rinv6);
  99 }
 100
 101 template<class RealType>
 102 static inline RealType calculateVdw12(const RealType c12, const RealType rinv6)
 103 {
 104     return (c12 * rinv6 * rinv6);
 105 }
 106
 107 /* reaction-field electrostatics */
 108 template<class RealType>
 109 static inline RealType reactionFieldScalarForce(const RealType qq,
 110                                                 const RealType rinv,
 111                                                 const RealType r,
 112                                                 const real     krf,
 113                                                 const real     two)
 114 {
 115     return (qq * (rinv - two * krf * r * r));
 116 }
 117 template<class RealType>
 118 static inline RealType reactionFieldPotential(const RealType qq,
 119                                               const RealType rinv,
 120                                               const RealType r,
 121                                               const real     krf,
 122                                               const real     potentialShift)
 123 {
 124     return (qq * (rinv + krf * r * r - potentialShift));
 125 }
 126
 127 /* Ewald electrostatics */
 128 template<class RealType>
 129 static inline RealType ewaldScalarForce(const RealType coulomb, const RealType rinv)
 130 {
 131     return (coulomb * rinv);
 132 }
 133 template<class RealType>
 134 static inline RealType ewaldPotential(const RealType coulomb, const RealType rinv, const real potentialShift)
 135 {
 136     return (coulomb * (rinv - potentialShift));
 137 }
 138
 139 /* cutoff LJ */
 140 template<class RealType>
 141 static inline RealType lennardJonesScalarForce(const RealType v6, const RealType v12)
 142 {
 143     return (v12 - v6);
 144 }
 145 template<class RealType>
 146 static inline RealType lennardJonesPotential(const RealType v6,
 147                                              const RealType v12,
 148                                              const RealType c6,
 149                                              const RealType c12,
 150                                              const real     repulsionShift,
 151                                              const real     dispersionShift,
 152                                              const real     onesixth,
 153                                              const real     onetwelfth)
 154 {
 155     return ((v12 + c12 * repulsionShift) * onetwelfth - (v6 + c6 * dispersionShift) * onesixth);
 156 }
 157
 158 /* Ewald LJ */
 159 static inline real ewaldLennardJonesGridSubtract(const real c6grid, const real potentialShift, const real onesixth)
 160 {
 161     return (c6grid * potentialShift * onesixth);
 162 }
 163
 164 /* LJ Potential switch */
 165 template<class RealType>
 166 static inline RealType potSwitchScalarForceMod(const RealType fScalarInp,
 167                                                const RealType potential,
 168                                                const RealType sw,
 169                                                const RealType r,
 170                                                const RealType rVdw,
 171                                                const RealType dsw,
 172                                                const real     zero)
 173 {
 174     if (r < rVdw)
 175     {
 176         real fScalar = fScalarInp * sw - r * potential * dsw;
 177         return (fScalar);
 178     }
 179     return (zero);
 180 }
 181 template<class RealType>
 182 static inline RealType potSwitchPotentialMod(const RealType potentialInp,
 183                                              const RealType sw,
 184                                              const RealType r,
 185                                              const RealType rVdw,
 186                                              const real     zero)
 187 {
 188     if (r < rVdw)
 189     {
 190         real potential = potentialInp * sw;
 191         return (potential);
 192     }
 193     return (zero);
 194 }
 195
 196
 197 //! Templated free-energy non-bonded kernel
 198 template<typename DataTypes, bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald, bool vdwModifierIsPotSwitch>
 199 static void nb_free_energy_kernel(const t_nblist* gmx_restrict nlist,
 200                                   rvec* gmx_restrict         xx,
 201                                   gmx::ForceWithShiftForces* forceWithShiftForces,
 202                                   const t_forcerec* gmx_restrict fr,
 203                                   const t_mdatoms* gmx_restrict mdatoms,
 204                                   nb_kernel_data_t* gmx_restrict kernel_data,
 205                                   t_nrnb* gmx_restrict nrnb)
 206 {
 207 #define STATE_A 0
 208 #define STATE_B 1
 209 #define NSTATES 2
 210
 211     using RealType = typename DataTypes::RealType;
 212     using IntType  = typename DataTypes::IntType;
 213
 214     /* FIXME: How should these be handled with SIMD? */
 215     constexpr real onetwelfth = 1.0 / 12.0;
 216     constexpr real onesixth   = 1.0 / 6.0;
 217     constexpr real zero       = 0.0;
 218     constexpr real half       = 0.5;
 219     constexpr real one        = 1.0;
 220     constexpr real two        = 2.0;
 221     constexpr real six        = 6.0;
 222
 223     /* Extract pointer to non-bonded interaction constants */
 224     const interaction_const_t* ic = fr->ic;
 225
 226     // Extract pair list data
 227     const int  nri    = nlist->nri;
 228     const int* iinr   = nlist->iinr;
 229     const int* jindex = nlist->jindex;
 230     const int* jjnr   = nlist->jjnr;
 231     const int* shift  = nlist->shift;
 232     const int* gid    = nlist->gid;
 233
 234     const real* shiftvec      = fr->shift_vec[0];
 235     const real* chargeA       = mdatoms->chargeA;
 236     const real* chargeB       = mdatoms->chargeB;
 237     real*       Vc            = kernel_data->energygrp_elec;
 238     const int*  typeA         = mdatoms->typeA;
 239     const int*  typeB         = mdatoms->typeB;
 240     const int   ntype         = fr->ntype;
 241     const real* nbfp          = fr->nbfp.data();
 242     const real* nbfp_grid     = fr->ljpme_c6grid;
 243     real*       Vv            = kernel_data->energygrp_vdw;
 244     const real  lambda_coul   = kernel_data->lambda[efptCOUL];
 245     const real  lambda_vdw    = kernel_data->lambda[efptVDW];
 246     real*       dvdl          = kernel_data->dvdl;
 247     const real  alpha_coul    = fr->sc_alphacoul;
 248     const real  alpha_vdw     = fr->sc_alphavdw;
 249     const real  lam_power     = fr->sc_power;
 250     const real  sigma6_def    = fr->sc_sigma6_def;
 251     const real  sigma6_min    = fr->sc_sigma6_min;
 252     const bool  doForces      = ((kernel_data->flags & GMX_NONBONDED_DO_FORCE) != 0);
 253     const bool  doShiftForces = ((kernel_data->flags & GMX_NONBONDED_DO_SHIFTFORCE) != 0);
 254     const bool  doPotential   = ((kernel_data->flags & GMX_NONBONDED_DO_POTENTIAL) != 0);
 255
 256     // Extract data from interaction_const_t
 257     const real facel           = ic->epsfac;
 258     const real rcoulomb        = ic->rcoulomb;
 259     const real krf             = ic->k_rf;
 260     const real crf             = ic->c_rf;
 261     const real sh_lj_ewald     = ic->sh_lj_ewald;
 262     const real rvdw            = ic->rvdw;
 263     const real dispersionShift = ic->dispersion_shift.cpot;
 264     const real repulsionShift  = ic->repulsion_shift.cpot;
 265
 266     // Note that the nbnxm kernels do not support Coulomb potential switching at all
 267     GMX_ASSERT(ic->coulomb_modifier != eintmodPOTSWITCH,
 268                "Potential switching is not supported for Coulomb with FEP");
 269
 270     real vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4;
 271     if (vdwModifierIsPotSwitch)
 272     {
 273         const real d = ic->rvdw - ic->rvdw_switch;
 274         vdw_swV3     = -10.0 / (d * d * d);
 275         vdw_swV4     = 15.0 / (d * d * d * d);
 276         vdw_swV5     = -6.0 / (d * d * d * d * d);
 277         vdw_swF2     = -30.0 / (d * d * d);
 278         vdw_swF3     = 60.0 / (d * d * d * d);
 279         vdw_swF4     = -30.0 / (d * d * d * d * d);
 280     }
 281     else
 282     {
 283         /* Avoid warnings from stupid compilers (looking at you, Clang!) */
 284         vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0;
 285     }
 286
 287     int icoul;
 288     if (ic->eeltype == eelCUT || EEL_RF(ic->eeltype))
 289     {
 290         icoul = GMX_NBKERNEL_ELEC_REACTIONFIELD;
 291     }
 292     else
 293     {
 294         icoul = GMX_NBKERNEL_ELEC_NONE;
 295     }
 296
 297     real rcutoff_max2 = std::max(ic->rcoulomb, ic->rvdw);
 298     rcutoff_max2      = rcutoff_max2 * rcutoff_max2;
 299
 300     const real* tab_ewald_F_lj = nullptr;
 301     const real* tab_ewald_V_lj = nullptr;
 302     const real* ewtab          = nullptr;
 303     real        ewtabscale     = 0;
 304     real        ewtabhalfspace = 0;
 305     real        sh_ewald       = 0;
 306     if (elecInteractionTypeIsEwald || vdwInteractionTypeIsEwald)
 307     {
 308         const auto& tables = *ic->coulombEwaldTables;
 309         sh_ewald           = ic->sh_ewald;
 310         ewtab              = tables.tableFDV0.data();
 311         ewtabscale         = tables.scale;
 312         ewtabhalfspace     = half / ewtabscale;
 313         tab_ewald_F_lj     = tables.tableF.data();
 314         tab_ewald_V_lj     = tables.tableV.data();
 315     }
 316
 317     /* For Ewald/PME interactions we cannot easily apply the soft-core component to
 318      * reciprocal space. When we use non-switched Ewald interactions, we
 319      * assume the soft-coring does not significantly affect the grid contribution
 320      * and apply the soft-core only to the full 1/r (- shift) pair contribution.
 321      *
 322      * However, we cannot use this approach for switch-modified since we would then
 323      * effectively end up evaluating a significantly different interaction here compared to the
 324      * normal (non-free-energy) kernels, either by applying a cutoff at a different
 325      * position than what the user requested, or by switching different
 326      * things (1/r rather than short-range Ewald). For these settings, we just
 327      * use the traditional short-range Ewald interaction in that case.
 328      */
 329     GMX_RELEASE_ASSERT(!(vdwInteractionTypeIsEwald && vdwModifierIsPotSwitch),
 330                        "Can not apply soft-core to switched Ewald potentials");
 331
 332     real dvdl_coul = 0;
 333     real dvdl_vdw  = 0;
 334
 335     /* Lambda factor for state A, 1-lambda*/
 336     real LFC[NSTATES], LFV[NSTATES];
 337     LFC[STATE_A] = one - lambda_coul;
 338     LFV[STATE_A] = one - lambda_vdw;
 339
 340     /* Lambda factor for state B, lambda*/
 341     LFC[STATE_B] = lambda_coul;
 342     LFV[STATE_B] = lambda_vdw;
 343
 344     /*derivative of the lambda factor for state A and B */
 345     real DLF[NSTATES];
 346     DLF[STATE_A] = -1;
 347     DLF[STATE_B] = 1;
 348
 349     real           lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
 350     constexpr real sc_r_power = 6.0_real;
 351     for (int i = 0; i < NSTATES; i++)
 352     {
 353         lfac_coul[i]  = (lam_power == 2 ? (1 - LFC[i]) * (1 - LFC[i]) : (1 - LFC[i]));
 354         dlfac_coul[i] = DLF[i] * lam_power / sc_r_power * (lam_power == 2 ? (1 - LFC[i]) : 1);
 355         lfac_vdw[i]   = (lam_power == 2 ? (1 - LFV[i]) * (1 - LFV[i]) : (1 - LFV[i]));
 356         dlfac_vdw[i]  = DLF[i] * lam_power / sc_r_power * (lam_power == 2 ? (1 - LFV[i]) : 1);
 357     }
 358
 359     // TODO: We should get rid of using pointers to real
 360     const real* x             = xx[0];
 361     real* gmx_restrict f      = &(forceWithShiftForces->force()[0][0]);
 362     real* gmx_restrict fshift = &(forceWithShiftForces->shiftForces()[0][0]);
 363
 364     for (int n = 0; n < nri; n++)
 365     {
 366         int npair_within_cutoff = 0;
 367
 368         const int  is3   = 3 * shift[n];
 369         const real shX   = shiftvec[is3];
 370         const real shY   = shiftvec[is3 + 1];
 371         const real shZ   = shiftvec[is3 + 2];
 372         const int  nj0   = jindex[n];
 373         const int  nj1   = jindex[n + 1];
 374         const int  ii    = iinr[n];
 375         const int  ii3   = 3 * ii;
 376         const real ix    = shX + x[ii3 + 0];
 377         const real iy    = shY + x[ii3 + 1];
 378         const real iz    = shZ + x[ii3 + 2];
 379         const real iqA   = facel * chargeA[ii];
 380         const real iqB   = facel * chargeB[ii];
 381         const int  ntiA  = 2 * ntype * typeA[ii];
 382         const int  ntiB  = 2 * ntype * typeB[ii];
 383         real       vctot = 0;
 384         real       vvtot = 0;
 385         real       fix   = 0;
 386         real       fiy   = 0;
 387         real       fiz   = 0;
 388
 389         for (int k = nj0; k < nj1; k++)
 390         {
 391             int            tj[NSTATES];
 392             const int      jnr = jjnr[k];
 393             const int      j3  = 3 * jnr;
 394             RealType       c6[NSTATES], c12[NSTATES], qq[NSTATES], Vcoul[NSTATES], Vvdw[NSTATES];
 395             RealType       r, rinv, rp, rpm2;
 396             RealType       alpha_vdw_eff, alpha_coul_eff, sigma6[NSTATES];
 397             const RealType dx  = ix - x[j3];
 398             const RealType dy  = iy - x[j3 + 1];
 399             const RealType dz  = iz - x[j3 + 2];
 400             const RealType rsq = dx * dx + dy * dy + dz * dz;
 401             RealType       FscalC[NSTATES], FscalV[NSTATES];
 402
 403             if (rsq >= rcutoff_max2)
 404             {
 405                 /* We save significant time by skipping all code below.
 406                  * Note that with soft-core interactions, the actual cut-off
 407                  * check might be different. But since the soft-core distance
 408                  * is always larger than r, checking on r here is safe.
 409                  */
 410                 continue;
 411             }
 412             npair_within_cutoff++;
 413
 414             if (rsq > 0)
 415             {
 416                 /* Note that unlike in the nbnxn kernels, we do not need
 417                  * to clamp the value of rsq before taking the invsqrt
 418                  * to avoid NaN in the LJ calculation, since here we do
 419                  * not calculate LJ interactions when C6 and C12 are zero.
 420                  */
 421
 422                 rinv = gmx::invsqrt(rsq);
 423                 r    = rsq * rinv;
 424             }
 425             else
 426             {
 427                 /* The force at r=0 is zero, because of symmetry.
 428                  * But note that the potential is in general non-zero,
 429                  * since the soft-cored r will be non-zero.
 430                  */
 431                 rinv = 0;
 432                 r    = 0;
 433             }
 434
 435             if (useSoftCore)
 436             {
 437                 rpm2 = rsq * rsq;  /* r4 */
 438                 rp   = rpm2 * rsq; /* r6 */
 439             }
 440             else
 441             {
 442                 /* The soft-core power p will not affect the results
 443                  * with not using soft-core, so we use power of 0 which gives
 444                  * the simplest math and cheapest code.
 445                  */
 446                 rpm2 = rinv * rinv;
 447                 rp   = 1;
 448             }
 449
 450             RealType Fscal = 0;
 451
 452             qq[STATE_A] = iqA * chargeA[jnr];
 453             qq[STATE_B] = iqB * chargeB[jnr];
 454
 455             tj[STATE_A] = ntiA + 2 * typeA[jnr];
 456             tj[STATE_B] = ntiB + 2 * typeB[jnr];
 457
 458             if (nlist->excl_fep == nullptr || nlist->excl_fep[k])
 459             {
 460                 c6[STATE_A] = nbfp[tj[STATE_A]];
 461                 c6[STATE_B] = nbfp[tj[STATE_B]];
 462
 463                 for (int i = 0; i < NSTATES; i++)
 464                 {
 465                     c12[i] = nbfp[tj[i] + 1];
 466                     if (useSoftCore)
 467                     {
 468                         if ((c6[i] > 0) && (c12[i] > 0))
 469                         {
 470                             /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
 471                             sigma6[i] = half * c12[i] / c6[i];
 472                             if (sigma6[i] < sigma6_min) /* for disappearing coul and vdw with soft core at the same time */
 473                             {
 474                                 sigma6[i] = sigma6_min;
 475                             }
 476                         }
 477                         else
 478                         {
 479                             sigma6[i] = sigma6_def;
 480                         }
 481                     }
 482                 }
 483
 484                 if (useSoftCore)
 485                 {
 486                     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
 487                     if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
 488                     {
 489                         alpha_vdw_eff  = 0;
 490                         alpha_coul_eff = 0;
 491                     }
 492                     else
 493                     {
 494                         alpha_vdw_eff  = alpha_vdw;
 495                         alpha_coul_eff = alpha_coul;
 496                     }
 497                 }
 498
 499                 for (int i = 0; i < NSTATES; i++)
 500                 {
 501                     FscalC[i] = 0;
 502                     FscalV[i] = 0;
 503                     Vcoul[i]  = 0;
 504                     Vvdw[i]   = 0;
 505
 506                     RealType rinvC, rinvV, rC, rV, rpinvC, rpinvV;
 507
 508                     /* Only spend time on A or B state if it is non-zero */
 509                     if ((qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0))
 510                     {
 511                         /* this section has to be inside the loop because of the dependence on sigma6 */
 512                         if (useSoftCore)
 513                         {
 514                             rpinvC = one / (alpha_coul_eff * lfac_coul[i] * sigma6[i] + rp);
 515                             pthRoot(rpinvC, &rinvC, &rC);
 516                             if (scLambdasOrAlphasDiffer)
 517                             {
 518                                 rpinvV = one / (alpha_vdw_eff * lfac_vdw[i] * sigma6[i] + rp);
 519                                 pthRoot(rpinvV, &rinvV, &rV);
 520                             }
 521                             else
 522                             {
 523                                 /* We can avoid one expensive pow and one / operation */
 524                                 rpinvV = rpinvC;
 525                                 rinvV  = rinvC;
 526                                 rV     = rC;
 527                             }
 528                         }
 529                         else
 530                         {
 531                             rpinvC = 1;
 532                             rinvC  = rinv;
 533                             rC     = r;
 534
 535                             rpinvV = 1;
 536                             rinvV  = rinv;
 537                             rV     = r;
 538                         }
 539
 540                         /* Only process the coulomb interactions if we have charges,
 541                          * and if we either include all entries in the list (no cutoff
 542                          * used in the kernel), or if we are within the cutoff.
 543                          */
 544                         bool computeElecInteraction = (elecInteractionTypeIsEwald && r < rcoulomb)
 545                                                       || (!elecInteractionTypeIsEwald && rC < rcoulomb);
 546
 547                         if ((qq[i] != 0) && computeElecInteraction)
 548                         {
 549                             if (elecInteractionTypeIsEwald)
 550                             {
 551                                 Vcoul[i]  = ewaldPotential(qq[i], rinvC, sh_ewald);
 552                                 FscalC[i] = ewaldScalarForce(qq[i], rinvC);
 553                             }
 554                             else
 555                             {
 556                                 Vcoul[i]  = reactionFieldPotential(qq[i], rinvC, rC, krf, crf);
 557                                 FscalC[i] = reactionFieldScalarForce(qq[i], rinvC, rC, krf, two);
 558                             }
 559                         }
 560
 561                         /* Only process the VDW interactions if we have
 562                          * some non-zero parameters, and if we either
 563                          * include all entries in the list (no cutoff used
 564                          * in the kernel), or if we are within the cutoff.
 565                          */
 566                         bool computeVdwInteraction = (vdwInteractionTypeIsEwald && r < rvdw)
 567                                                      || (!vdwInteractionTypeIsEwald && rV < rvdw);
 568                         if ((c6[i] != 0 || c12[i] != 0) && computeVdwInteraction)
 569                         {
 570                             RealType rinv6;
 571                             if (useSoftCore)
 572                             {
 573                                 rinv6 = rpinvV;
 574                             }
 575                             else
 576                             {
 577                                 rinv6 = calculateRinv6(rinvV);
 578                             }
 579                             RealType Vvdw6  = calculateVdw6(c6[i], rinv6);
 580                             RealType Vvdw12 = calculateVdw12(c12[i], rinv6);
 581
 582                             Vvdw[i] = lennardJonesPotential(Vvdw6, Vvdw12, c6[i], c12[i], repulsionShift,
 583                                                             dispersionShift, onesixth, onetwelfth);
 584                             FscalV[i] = lennardJonesScalarForce(Vvdw6, Vvdw12);
 585
 586                             if (vdwInteractionTypeIsEwald)
 587                             {
 588                                 /* Subtract the grid potential at the cut-off */
 589                                 Vvdw[i] += ewaldLennardJonesGridSubtract(nbfp_grid[tj[i]],
 590                                                                          sh_lj_ewald, onesixth);
 591                             }
 592
 593                             if (vdwModifierIsPotSwitch)
 594                             {
 595                                 RealType d        = rV - ic->rvdw_switch;
 596                                 d                 = (d > zero) ? d : zero;
 597                                 const RealType d2 = d * d;
 598                                 const RealType sw =
 599                                         one + d2 * d * (vdw_swV3 + d * (vdw_swV4 + d * vdw_swV5));
 600                                 const RealType dsw = d2 * (vdw_swF2 + d * (vdw_swF3 + d * vdw_swF4));
 601
 602                                 FscalV[i] = potSwitchScalarForceMod(FscalV[i], Vvdw[i], sw, rV,
 603                                                                     rvdw, dsw, zero);
 604                                 Vvdw[i]   = potSwitchPotentialMod(Vvdw[i], sw, rV, rvdw, zero);
 605                             }
 606                         }
 607
 608                         /* FscalC (and FscalV) now contain: dV/drC * rC
 609                          * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
 610                          * Further down we first multiply by r^p-2 and then by
 611                          * the vector r, which in total gives: dV/drC * (r/rC)^1-p
 612                          */
 613                         FscalC[i] *= rpinvC;
 614                         FscalV[i] *= rpinvV;
 615                     }
 616                 }
 617
 618                 /* Assemble A and B states */
 619                 for (int i = 0; i < NSTATES; i++)
 620                 {
 621                     vctot += LFC[i] * Vcoul[i];
 622                     vvtot += LFV[i] * Vvdw[i];
 623
 624                     Fscal += LFC[i] * FscalC[i] * rpm2;
 625                     Fscal += LFV[i] * FscalV[i] * rpm2;
 626
 627                     if (useSoftCore)
 628                     {
 629                         dvdl_coul += Vcoul[i] * DLF[i]
 630                                      + LFC[i] * alpha_coul_eff * dlfac_coul[i] * FscalC[i] * sigma6[i];
 631                         dvdl_vdw += Vvdw[i] * DLF[i]
 632                                     + LFV[i] * alpha_vdw_eff * dlfac_vdw[i] * FscalV[i] * sigma6[i];
 633                     }
 634                     else
 635                     {
 636                         dvdl_coul += Vcoul[i] * DLF[i];
 637                         dvdl_vdw += Vvdw[i] * DLF[i];
 638                     }
 639                 }
 640             }
 641             else if (icoul == GMX_NBKERNEL_ELEC_REACTIONFIELD)
 642             {
 643                 /* For excluded pairs, which are only in this pair list when
 644                  * using the Verlet scheme, we don't use soft-core.
 645                  * As there is no singularity, there is no need for soft-core.
 646                  */
 647                 const real FF = -two * krf;
 648                 RealType   VV = krf * rsq - crf;
 649
 650                 if (ii == jnr)
 651                 {
 652                     VV *= half;
 653                 }
 654
 655                 for (int i = 0; i < NSTATES; i++)
 656                 {
 657                     vctot += LFC[i] * qq[i] * VV;
 658                     Fscal += LFC[i] * qq[i] * FF;
 659                     dvdl_coul += DLF[i] * qq[i] * VV;
 660                 }
 661             }
 662
 663             if (elecInteractionTypeIsEwald && r < rcoulomb)
 664             {
 665                 /* See comment in the preamble. When using Ewald interactions
 666                  * (unless we use a switch modifier) we subtract the reciprocal-space
 667                  * Ewald component here which made it possible to apply the free
 668                  * energy interaction to 1/r (vanilla coulomb short-range part)
 669                  * above. This gets us closer to the ideal case of applying
 670                  * the softcore to the entire electrostatic interaction,
 671                  * including the reciprocal-space component.
 672                  */
 673                 real v_lr, f_lr;
 674
 675                 const RealType ewrt   = r * ewtabscale;
 676                 IntType        ewitab = static_cast<IntType>(ewrt);
 677                 const RealType eweps  = ewrt - ewitab;
 678                 ewitab                = 4 * ewitab;
 679                 f_lr                  = ewtab[ewitab] + eweps * ewtab[ewitab + 1];
 680                 v_lr = (ewtab[ewitab + 2] - ewtabhalfspace * eweps * (ewtab[ewitab] + f_lr));
 681                 f_lr *= rinv;
 682
 683                 /* Note that any possible Ewald shift has already been applied in
 684                  * the normal interaction part above.
 685                  */
 686
 687                 if (ii == jnr)
 688                 {
 689                     /* If we get here, the i particle (ii) has itself (jnr)
 690                      * in its neighborlist. This can only happen with the Verlet
 691                      * scheme, and corresponds to a self-interaction that will
 692                      * occur twice. Scale it down by 50% to only include it once.
 693                      */
 694                     v_lr *= half;
 695                 }
 696
 697                 for (int i = 0; i < NSTATES; i++)
 698                 {
 699                     vctot -= LFC[i] * qq[i] * v_lr;
 700                     Fscal -= LFC[i] * qq[i] * f_lr;
 701                     dvdl_coul -= (DLF[i] * qq[i]) * v_lr;
 702                 }
 703             }
 704
 705             if (vdwInteractionTypeIsEwald && r < rvdw)
 706             {
 707                 /* See comment in the preamble. When using LJ-Ewald interactions
 708                  * (unless we use a switch modifier) we subtract the reciprocal-space
 709                  * Ewald component here which made it possible to apply the free
 710                  * energy interaction to r^-6 (vanilla LJ6 short-range part)
 711                  * above. This gets us closer to the ideal case of applying
 712                  * the softcore to the entire VdW interaction,
 713                  * including the reciprocal-space component.
 714                  */
 715                 /* We could also use the analytical form here
 716                  * iso a table, but that can cause issues for
 717                  * r close to 0 for non-interacting pairs.
 718                  */
 719
 720                 const RealType rs   = rsq * rinv * ewtabscale;
 721                 const IntType  ri   = static_cast<IntType>(rs);
 722                 const RealType frac = rs - ri;
 723                 const RealType f_lr = (1 - frac) * tab_ewald_F_lj[ri] + frac * tab_ewald_F_lj[ri + 1];
 724                 /* TODO: Currently the Ewald LJ table does not contain
 725                  * the factor 1/6, we should add this.
 726                  */
 727                 const RealType FF = f_lr * rinv / six;
 728                 RealType       VV =
 729                         (tab_ewald_V_lj[ri] - ewtabhalfspace * frac * (tab_ewald_F_lj[ri] + f_lr)) / six;
 730
 731                 if (ii == jnr)
 732                 {
 733                     /* If we get here, the i particle (ii) has itself (jnr)
 734                      * in its neighborlist. This can only happen with the Verlet
 735                      * scheme, and corresponds to a self-interaction that will
 736                      * occur twice. Scale it down by 50% to only include it once.
 737                      */
 738                     VV *= half;
 739                 }
 740
 741                 for (int i = 0; i < NSTATES; i++)
 742                 {
 743                     const real c6grid = nbfp_grid[tj[i]];
 744                     vvtot += LFV[i] * c6grid * VV;
 745                     Fscal += LFV[i] * c6grid * FF;
 746                     dvdl_vdw += (DLF[i] * c6grid) * VV;
 747                 }
 748             }
 749
 750             if (doForces)
 751             {
 752                 const real tx = Fscal * dx;
 753                 const real ty = Fscal * dy;
 754                 const real tz = Fscal * dz;
 755                 fix           = fix + tx;
 756                 fiy           = fiy + ty;
 757                 fiz           = fiz + tz;
 758                 /* OpenMP atomics are expensive, but this kernels is also
 759                  * expensive, so we can take this hit, instead of using
 760                  * thread-local output buffers and extra reduction.
 761                  *
 762                  * All the OpenMP regions in this file are trivial and should
 763                  * not throw, so no need for try/catch.
 764                  */
 765 #pragma omp atomic
 766                 f[j3] -= tx;
 767 #pragma omp atomic
 768                 f[j3 + 1] -= ty;
 769 #pragma omp atomic
 770                 f[j3 + 2] -= tz;
 771             }
 772         }
 773
 774         /* The atomics below are expensive with many OpenMP threads.
 775          * Here unperturbed i-particles will usually only have a few
 776          * (perturbed) j-particles in the list. Thus with a buffered list
 777          * we can skip a significant number of i-reductions with a check.
 778          */
 779         if (npair_within_cutoff > 0)
 780         {
 781             if (doForces)
 782             {
 783 #pragma omp atomic
 784                 f[ii3] += fix;
 785 #pragma omp atomic
 786                 f[ii3 + 1] += fiy;
 787 #pragma omp atomic
 788                 f[ii3 + 2] += fiz;
 789             }
 790             if (doShiftForces)
 791             {
 792 #pragma omp atomic
 793                 fshift[is3] += fix;
 794 #pragma omp atomic
 795                 fshift[is3 + 1] += fiy;
 796 #pragma omp atomic
 797                 fshift[is3 + 2] += fiz;
 798             }
 799             if (doPotential)
 800             {
 801                 int ggid = gid[n];
 802 #pragma omp atomic
 803                 Vc[ggid] += vctot;
 804 #pragma omp atomic
 805                 Vv[ggid] += vvtot;
 806             }
 807         }
 808     }
 809
 810 #pragma omp atomic
 811     dvdl[efptCOUL] += dvdl_coul;
 812 #pragma omp atomic
 813     dvdl[efptVDW] += dvdl_vdw;
 814
 815     /* Estimate flops, average for free energy stuff:
 816      * 12  flops per outer iteration
 817      * 150 flops per inner iteration
 818      */
 819 #pragma omp atomic
 820     inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri * 12 + nlist->jindex[nri] * 150);
 821 }
 822
 823 typedef void (*KernelFunction)(const t_nblist* gmx_restrict nlist,
 824                                rvec* gmx_restrict         xx,
 825                                gmx::ForceWithShiftForces* forceWithShiftForces,
 826                                const t_forcerec* gmx_restrict fr,
 827                                const t_mdatoms* gmx_restrict mdatoms,
 828                                nb_kernel_data_t* gmx_restrict kernel_data,
 829                                t_nrnb* gmx_restrict nrnb);
 830
 831 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald, bool vdwModifierIsPotSwitch>
 832 static KernelFunction dispatchKernelOnUseSimd(const bool useSimd)
 833 {
 834     if (useSimd)
 835     {
 836 #if GMX_SIMD_HAVE_REAL && GMX_SIMD_HAVE_INT32_ARITHMETICS
 837         /* FIXME: Here SimdDataTypes should be used to enable SIMD. So far, the code in nb_free_energy_kernel is not adapted to SIMD */
 838         return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
 839                                       elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
 840 #else
 841         return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
 842                                       elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
 843 #endif
 844     }
 845     else
 846     {
 847         return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
 848                                       elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
 849     }
 850 }
 851
 852 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald>
 853 static KernelFunction dispatchKernelOnVdwModifier(const bool vdwModifierIsPotSwitch, const bool useSimd)
 854 {
 855     if (vdwModifierIsPotSwitch)
 856     {
 857         return (dispatchKernelOnUseSimd<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
 858                                         elecInteractionTypeIsEwald, true>(useSimd));
 859     }
 860     else
 861     {
 862         return (dispatchKernelOnUseSimd<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
 863                                         elecInteractionTypeIsEwald, false>(useSimd));
 864     }
 865 }
 866
 867 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald>
 868 static KernelFunction dispatchKernelOnElecInteractionType(const bool elecInteractionTypeIsEwald,
 869                                                           const bool vdwModifierIsPotSwitch,
 870                                                           const bool useSimd)
 871 {
 872     if (elecInteractionTypeIsEwald)
 873     {
 874         return (dispatchKernelOnVdwModifier<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, true>(
 875                 vdwModifierIsPotSwitch, useSimd));
 876     }
 877     else
 878     {
 879         return (dispatchKernelOnVdwModifier<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, false>(
 880                 vdwModifierIsPotSwitch, useSimd));
 881     }
 882 }
 883
 884 template<bool useSoftCore, bool scLambdasOrAlphasDiffer>
 885 static KernelFunction dispatchKernelOnVdwInteractionType(const bool vdwInteractionTypeIsEwald,
 886                                                          const bool elecInteractionTypeIsEwald,
 887                                                          const bool vdwModifierIsPotSwitch,
 888                                                          const bool useSimd)
 889 {
 890     if (vdwInteractionTypeIsEwald)
 891     {
 892         return (dispatchKernelOnElecInteractionType<useSoftCore, scLambdasOrAlphasDiffer, true>(
 893                 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
 894     }
 895     else
 896     {
 897         return (dispatchKernelOnElecInteractionType<useSoftCore, scLambdasOrAlphasDiffer, false>(
 898                 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
 899     }
 900 }
 901
 902 template<bool useSoftCore>
 903 static KernelFunction dispatchKernelOnScLambdasOrAlphasDifference(const bool scLambdasOrAlphasDiffer,
 904                                                                   const bool vdwInteractionTypeIsEwald,
 905                                                                   const bool elecInteractionTypeIsEwald,
 906                                                                   const bool vdwModifierIsPotSwitch,
 907                                                                   const bool useSimd)
 908 {
 909     if (scLambdasOrAlphasDiffer)
 910     {
 911         return (dispatchKernelOnVdwInteractionType<useSoftCore, true>(
 912                 vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
 913     }
 914     else
 915     {
 916         return (dispatchKernelOnVdwInteractionType<useSoftCore, false>(
 917                 vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
 918     }
 919 }
 920
 921 static KernelFunction dispatchKernel(const bool        scLambdasOrAlphasDiffer,
 922                                      const bool        vdwInteractionTypeIsEwald,
 923                                      const bool        elecInteractionTypeIsEwald,
 924                                      const bool        vdwModifierIsPotSwitch,
 925                                      const bool        useSimd,
 926                                      const t_forcerec* fr)
 927 {
 928     if (fr->sc_alphacoul == 0 && fr->sc_alphavdw == 0)
 929     {
 930         return (dispatchKernelOnScLambdasOrAlphasDifference<false>(
 931                 scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
 932                 vdwModifierIsPotSwitch, useSimd));
 933     }
 934     else
 935     {
 936         return (dispatchKernelOnScLambdasOrAlphasDifference<true>(
 937                 scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
 938                 vdwModifierIsPotSwitch, useSimd));
 939     }
 940 }
 941
 942
 943 void gmx_nb_free_energy_kernel(const t_nblist*            nlist,
 944                                rvec*                      xx,
 945                                gmx::ForceWithShiftForces* ff,
 946                                const t_forcerec*          fr,
 947                                const t_mdatoms*           mdatoms,
 948                                nb_kernel_data_t*          kernel_data,
 949                                t_nrnb*                    nrnb)
 950 {
 951     GMX_ASSERT(EEL_PME_EWALD(fr->ic->eeltype) || fr->ic->eeltype == eelCUT || EEL_RF(fr->ic->eeltype),
 952                "Unsupported eeltype with free energy");
 953
 954     const bool vdwInteractionTypeIsEwald  = (EVDW_PME(fr->ic->vdwtype));
 955     const bool elecInteractionTypeIsEwald = (EEL_PME_EWALD(fr->ic->eeltype));
 956     const bool vdwModifierIsPotSwitch     = (fr->ic->vdw_modifier == eintmodPOTSWITCH);
 957     bool       scLambdasOrAlphasDiffer    = true;
 958     const bool useSimd                    = fr->use_simd_kernels;
 959
 960     if (fr->sc_alphacoul == 0 && fr->sc_alphavdw == 0)
 961     {
 962         scLambdasOrAlphasDiffer = false;
 963     }
 964     else if (fr->sc_r_power == 6.0_real)
 965     {
 966         if (kernel_data->lambda[efptCOUL] == kernel_data->lambda[efptVDW] && fr->sc_alphacoul == fr->sc_alphavdw)
 967         {
 968             scLambdasOrAlphasDiffer = false;
 969         }
 970     }
 971     else
 972     {
 973         GMX_RELEASE_ASSERT(false, "Unsupported soft-core r-power");
 974     }
 975
 976     KernelFunction kernelFunc;
 977     kernelFunc = dispatchKernel(scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
 978                                 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd, fr);
 979     kernelFunc(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
 980 }