src/gromacs/gmxlib/nonbonded/nb_free_energy.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017,2018,2019,2020, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #include "gmxpre.h"
  38
  39 #include "nb_free_energy.h"
  40
  41 #include <cmath>
  42
  43 #include <algorithm>
  44
  45 #include "gromacs/gmxlib/nrnb.h"
  46 #include "gromacs/gmxlib/nonbonded/nb_kernel.h"
  47 #include "gromacs/gmxlib/nonbonded/nonbonded.h"
  48 #include "gromacs/math/functions.h"
  49 #include "gromacs/math/vec.h"
  50 #include "gromacs/mdtypes/forceoutput.h"
  51 #include "gromacs/mdtypes/forcerec.h"
  52 #include "gromacs/mdtypes/md_enums.h"
  53 #include "gromacs/utility/fatalerror.h"
  54
  55
  56 //! Enum for templating the soft-core treatment in the kernel
  57 enum class SoftCoreTreatment
  58 {
  59     None,    //!< No soft-core
  60     RPower6, //!< Soft-core with r-power = 6
  61     RPower48 //!< Soft-core with r-power = 48
  62 };
  63
  64 //! Most treatments are fine with float in mixed-precision mode.
  65 template<SoftCoreTreatment softCoreTreatment>
  66 struct SoftCoreReal
  67 {
  68     //! Real type for soft-core calculations
  69     using Real = real;
  70 };
  71
  72 //! This treatment requires double precision for some computations.
  73 template<>
  74 struct SoftCoreReal<SoftCoreTreatment::RPower48>
  75 {
  76     //! Real type for soft-core calculations
  77     using Real = double;
  78 };
  79
  80 //! Computes r^(1/p) and 1/r^(1/p) for the standard p=6
  81 template<SoftCoreTreatment softCoreTreatment>
  82 static inline void pthRoot(const real r, real* pthRoot, real* invPthRoot)
  83 {
  84     *invPthRoot = gmx::invsqrt(std::cbrt(r));
  85     *pthRoot    = 1 / (*invPthRoot);
  86 }
  87
  88 // We need a double version to make the specialization below work
  89 #if !GMX_DOUBLE
  90 //! Computes r^(1/p) and 1/r^(1/p) for the standard p=6
  91 template<SoftCoreTreatment softCoreTreatment>
  92 static inline void pthRoot(const double r, real* pthRoot, double* invPthRoot)
  93 {
  94     *invPthRoot = gmx::invsqrt(std::cbrt(r));
  95     *pthRoot    = 1 / (*invPthRoot);
  96 }
  97 #endif
  98
  99 //! Computes r^(1/p) and 1/r^(1/p) for p=48
 100 template<>
 101 inline void pthRoot<SoftCoreTreatment::RPower48>(const double r, real* pthRoot, double* invPthRoot)
 102 {
 103     *pthRoot    = std::pow(r, 1.0 / 48.0);
 104     *invPthRoot = 1 / (*pthRoot);
 105 }
 106
 107 template<SoftCoreTreatment softCoreTreatment>
 108 static inline real calculateSigmaPow(const real sigma6)
 109 {
 110     if (softCoreTreatment == SoftCoreTreatment::RPower6)
 111     {
 112         return sigma6;
 113     }
 114     else
 115     {
 116         real sigmaPow = sigma6 * sigma6;     /* sigma^12 */
 117         sigmaPow      = sigmaPow * sigmaPow; /* sigma^24 */
 118         sigmaPow      = sigmaPow * sigmaPow; /* sigma^48 */
 119         return (sigmaPow);
 120     }
 121 }
 122
 123 template<SoftCoreTreatment softCoreTreatment, class SCReal>
 124 static inline real calculateRinv6(const SCReal rinvV)
 125 {
 126     if (softCoreTreatment == SoftCoreTreatment::RPower6)
 127     {
 128         return rinvV;
 129     }
 130     else
 131     {
 132         real rinv6 = rinvV * rinvV;
 133         return (rinv6 * rinv6 * rinv6);
 134     }
 135 }
 136
 137 static inline real calculateVdw6(const real c6, const real rinv6)
 138 {
 139     return (c6 * rinv6);
 140 }
 141
 142 static inline real calculateVdw12(const real c12, const real rinv6)
 143 {
 144     return (c12 * rinv6 * rinv6);
 145 }
 146
 147 /* reaction-field electrostatics */
 148 template<class SCReal>
 149 static inline SCReal
 150 reactionFieldScalarForce(const real qq, const real rinv, const SCReal r, const real krf, const real two)
 151 {
 152     return (qq * (rinv - two * krf * r * r));
 153 }
 154 template<class SCReal>
 155 static inline real
 156 reactionFieldPotential(const real qq, const real rinv, const SCReal r, const real krf, const real potentialShift)
 157 {
 158     return (qq * (rinv + krf * r * r - potentialShift));
 159 }
 160
 161 /* Ewald electrostatics */
 162 static inline real ewaldScalarForce(const real coulomb, const real rinv)
 163 {
 164     return (coulomb * rinv);
 165 }
 166 static inline real ewaldPotential(const real coulomb, const real rinv, const real potentialShift)
 167 {
 168     return (coulomb * (rinv - potentialShift));
 169 }
 170
 171 /* cutoff LJ */
 172 static inline real lennardJonesScalarForce(const real v6, const real v12)
 173 {
 174     return (v12 - v6);
 175 }
 176 static inline real lennardJonesPotential(const real v6,
 177                                          const real v12,
 178                                          const real c6,
 179                                          const real c12,
 180                                          const real repulsionShift,
 181                                          const real dispersionShift,
 182                                          const real onesixth,
 183                                          const real onetwelfth)
 184 {
 185     return ((v12 + c12 * repulsionShift) * onetwelfth - (v6 + c6 * dispersionShift) * onesixth);
 186 }
 187
 188 /* Ewald LJ */
 189 static inline real ewaldLennardJonesGridSubtract(const real c6grid, const real potentialShift, const real onesixth)
 190 {
 191     return (c6grid * potentialShift * onesixth);
 192 }
 193
 194 /* LJ Potential switch */
 195 template<class SCReal>
 196 static inline SCReal potSwitchScalarForceMod(const SCReal fScalarInp,
 197                                              const real   potential,
 198                                              const real   sw,
 199                                              const SCReal r,
 200                                              const real   rVdw,
 201                                              const real   dsw,
 202                                              const real   zero)
 203 {
 204     if (r < rVdw)
 205     {
 206         SCReal fScalar = fScalarInp * sw - r * potential * dsw;
 207         return (fScalar);
 208     }
 209     return (zero);
 210 }
 211 template<class SCReal>
 212 static inline real
 213 potSwitchPotentialMod(const real potentialInp, const real sw, const SCReal r, const real rVdw, const real zero)
 214 {
 215     if (r < rVdw)
 216     {
 217         real potential = potentialInp * sw;
 218         return (potential);
 219     }
 220     return (zero);
 221 }
 222
 223
 224 //! Templated free-energy non-bonded kernel
 225 template<SoftCoreTreatment softCoreTreatment, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald, bool vdwModifierIsPotSwitch>
 226 static void nb_free_energy_kernel(const t_nblist* gmx_restrict nlist,
 227                                   rvec* gmx_restrict         xx,
 228                                   gmx::ForceWithShiftForces* forceWithShiftForces,
 229                                   const t_forcerec* gmx_restrict fr,
 230                                   const t_mdatoms* gmx_restrict mdatoms,
 231                                   nb_kernel_data_t* gmx_restrict kernel_data,
 232                                   t_nrnb* gmx_restrict nrnb)
 233 {
 234     using SCReal = typename SoftCoreReal<softCoreTreatment>::Real;
 235
 236     constexpr bool useSoftCore = (softCoreTreatment != SoftCoreTreatment::None);
 237
 238 #define STATE_A 0
 239 #define STATE_B 1
 240 #define NSTATES 2
 241
 242     constexpr real onetwelfth = 1.0 / 12.0;
 243     constexpr real onesixth   = 1.0 / 6.0;
 244     constexpr real zero       = 0.0;
 245     constexpr real half       = 0.5;
 246     constexpr real one        = 1.0;
 247     constexpr real two        = 2.0;
 248     constexpr real six        = 6.0;
 249
 250     /* Extract pointer to non-bonded interaction constants */
 251     const interaction_const_t* ic = fr->ic;
 252
 253     // Extract pair list data
 254     const int  nri    = nlist->nri;
 255     const int* iinr   = nlist->iinr;
 256     const int* jindex = nlist->jindex;
 257     const int* jjnr   = nlist->jjnr;
 258     const int* shift  = nlist->shift;
 259     const int* gid    = nlist->gid;
 260
 261     const real* shiftvec      = fr->shift_vec[0];
 262     const real* chargeA       = mdatoms->chargeA;
 263     const real* chargeB       = mdatoms->chargeB;
 264     real*       Vc            = kernel_data->energygrp_elec;
 265     const int*  typeA         = mdatoms->typeA;
 266     const int*  typeB         = mdatoms->typeB;
 267     const int   ntype         = fr->ntype;
 268     const real* nbfp          = fr->nbfp;
 269     const real* nbfp_grid     = fr->ljpme_c6grid;
 270     real*       Vv            = kernel_data->energygrp_vdw;
 271     const real  lambda_coul   = kernel_data->lambda[efptCOUL];
 272     const real  lambda_vdw    = kernel_data->lambda[efptVDW];
 273     real*       dvdl          = kernel_data->dvdl;
 274     const real  alpha_coul    = fr->sc_alphacoul;
 275     const real  alpha_vdw     = fr->sc_alphavdw;
 276     const real  lam_power     = fr->sc_power;
 277     const real  sigma6_def    = fr->sc_sigma6_def;
 278     const real  sigma6_min    = fr->sc_sigma6_min;
 279     const bool  doForces      = ((kernel_data->flags & GMX_NONBONDED_DO_FORCE) != 0);
 280     const bool  doShiftForces = ((kernel_data->flags & GMX_NONBONDED_DO_SHIFTFORCE) != 0);
 281     const bool  doPotential   = ((kernel_data->flags & GMX_NONBONDED_DO_POTENTIAL) != 0);
 282
 283     // Extract data from interaction_const_t
 284     const real facel           = ic->epsfac;
 285     const real rcoulomb        = ic->rcoulomb;
 286     const real krf             = ic->k_rf;
 287     const real crf             = ic->c_rf;
 288     const real sh_lj_ewald     = ic->sh_lj_ewald;
 289     const real rvdw            = ic->rvdw;
 290     const real dispersionShift = ic->dispersion_shift.cpot;
 291     const real repulsionShift  = ic->repulsion_shift.cpot;
 292
 293     // Note that the nbnxm kernels do not support Coulomb potential switching at all
 294     GMX_ASSERT(ic->coulomb_modifier != eintmodPOTSWITCH,
 295                "Potential switching is not supported for Coulomb with FEP");
 296
 297     real vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4;
 298     if (vdwModifierIsPotSwitch)
 299     {
 300         const real d = ic->rvdw - ic->rvdw_switch;
 301         vdw_swV3     = -10.0 / (d * d * d);
 302         vdw_swV4     = 15.0 / (d * d * d * d);
 303         vdw_swV5     = -6.0 / (d * d * d * d * d);
 304         vdw_swF2     = -30.0 / (d * d * d);
 305         vdw_swF3     = 60.0 / (d * d * d * d);
 306         vdw_swF4     = -30.0 / (d * d * d * d * d);
 307     }
 308     else
 309     {
 310         /* Avoid warnings from stupid compilers (looking at you, Clang!) */
 311         vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0;
 312     }
 313
 314     int icoul;
 315     if (ic->eeltype == eelCUT || EEL_RF(ic->eeltype))
 316     {
 317         icoul = GMX_NBKERNEL_ELEC_REACTIONFIELD;
 318     }
 319     else
 320     {
 321         icoul = GMX_NBKERNEL_ELEC_NONE;
 322     }
 323
 324     real rcutoff_max2 = std::max(ic->rcoulomb, ic->rvdw);
 325     rcutoff_max2      = rcutoff_max2 * rcutoff_max2;
 326
 327     const real* tab_ewald_F_lj           = nullptr;
 328     const real* tab_ewald_V_lj           = nullptr;
 329     const real* ewtab                    = nullptr;
 330     real        coulombTableScale        = 0;
 331     real        coulombTableScaleInvHalf = 0;
 332     real        vdwTableScale            = 0;
 333     real        vdwTableScaleInvHalf     = 0;
 334     real        sh_ewald                 = 0;
 335     if (elecInteractionTypeIsEwald || vdwInteractionTypeIsEwald)
 336     {
 337         sh_ewald = ic->sh_ewald;
 338     }
 339     if (elecInteractionTypeIsEwald)
 340     {
 341         const auto& coulombTables = *ic->coulombEwaldTables;
 342         ewtab                     = coulombTables.tableFDV0.data();
 343         coulombTableScale         = coulombTables.scale;
 344         coulombTableScaleInvHalf  = half / coulombTableScale;
 345     }
 346     if (vdwInteractionTypeIsEwald)
 347     {
 348         const auto& vdwTables = *ic->vdwEwaldTables;
 349         tab_ewald_F_lj        = vdwTables.tableF.data();
 350         tab_ewald_V_lj        = vdwTables.tableV.data();
 351         vdwTableScale         = vdwTables.scale;
 352         vdwTableScaleInvHalf  = half / vdwTableScale;
 353     }
 354
 355     /* For Ewald/PME interactions we cannot easily apply the soft-core component to
 356      * reciprocal space. When we use non-switched Ewald interactions, we
 357      * assume the soft-coring does not significantly affect the grid contribution
 358      * and apply the soft-core only to the full 1/r (- shift) pair contribution.
 359      *
 360      * However, we cannot use this approach for switch-modified since we would then
 361      * effectively end up evaluating a significantly different interaction here compared to the
 362      * normal (non-free-energy) kernels, either by applying a cutoff at a different
 363      * position than what the user requested, or by switching different
 364      * things (1/r rather than short-range Ewald). For these settings, we just
 365      * use the traditional short-range Ewald interaction in that case.
 366      */
 367     GMX_RELEASE_ASSERT(!(vdwInteractionTypeIsEwald && vdwModifierIsPotSwitch),
 368                        "Can not apply soft-core to switched Ewald potentials");
 369
 370     SCReal dvdl_coul = 0; /* Needs double for sc_power==48 */
 371     SCReal dvdl_vdw  = 0; /* Needs double for sc_power==48 */
 372
 373     /* Lambda factor for state A, 1-lambda*/
 374     real LFC[NSTATES], LFV[NSTATES];
 375     LFC[STATE_A] = one - lambda_coul;
 376     LFV[STATE_A] = one - lambda_vdw;
 377
 378     /* Lambda factor for state B, lambda*/
 379     LFC[STATE_B] = lambda_coul;
 380     LFV[STATE_B] = lambda_vdw;
 381
 382     /*derivative of the lambda factor for state A and B */
 383     real DLF[NSTATES];
 384     DLF[STATE_A] = -1;
 385     DLF[STATE_B] = 1;
 386
 387     real           lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
 388     constexpr real sc_r_power = (softCoreTreatment == SoftCoreTreatment::RPower48 ? 48.0_real : 6.0_real);
 389     for (int i = 0; i < NSTATES; i++)
 390     {
 391         lfac_coul[i]  = (lam_power == 2 ? (1 - LFC[i]) * (1 - LFC[i]) : (1 - LFC[i]));
 392         dlfac_coul[i] = DLF[i] * lam_power / sc_r_power * (lam_power == 2 ? (1 - LFC[i]) : 1);
 393         lfac_vdw[i]   = (lam_power == 2 ? (1 - LFV[i]) * (1 - LFV[i]) : (1 - LFV[i]));
 394         dlfac_vdw[i]  = DLF[i] * lam_power / sc_r_power * (lam_power == 2 ? (1 - LFV[i]) : 1);
 395     }
 396
 397     // TODO: We should get rid of using pointers to real
 398     const real* x             = xx[0];
 399     real* gmx_restrict f      = &(forceWithShiftForces->force()[0][0]);
 400     real* gmx_restrict fshift = &(forceWithShiftForces->shiftForces()[0][0]);
 401
 402     for (int n = 0; n < nri; n++)
 403     {
 404         int npair_within_cutoff = 0;
 405
 406         const int  is3   = 3 * shift[n];
 407         const real shX   = shiftvec[is3];
 408         const real shY   = shiftvec[is3 + 1];
 409         const real shZ   = shiftvec[is3 + 2];
 410         const int  nj0   = jindex[n];
 411         const int  nj1   = jindex[n + 1];
 412         const int  ii    = iinr[n];
 413         const int  ii3   = 3 * ii;
 414         const real ix    = shX + x[ii3 + 0];
 415         const real iy    = shY + x[ii3 + 1];
 416         const real iz    = shZ + x[ii3 + 2];
 417         const real iqA   = facel * chargeA[ii];
 418         const real iqB   = facel * chargeB[ii];
 419         const int  ntiA  = 2 * ntype * typeA[ii];
 420         const int  ntiB  = 2 * ntype * typeB[ii];
 421         real       vctot = 0;
 422         real       vvtot = 0;
 423         real       fix   = 0;
 424         real       fiy   = 0;
 425         real       fiz   = 0;
 426
 427         for (int k = nj0; k < nj1; k++)
 428         {
 429             int        tj[NSTATES];
 430             const int  jnr = jjnr[k];
 431             const int  j3  = 3 * jnr;
 432             real       c6[NSTATES], c12[NSTATES], qq[NSTATES], Vcoul[NSTATES], Vvdw[NSTATES];
 433             real       r, rinv, rp, rpm2;
 434             real       alpha_vdw_eff, alpha_coul_eff, sigma_pow[NSTATES];
 435             const real dx  = ix - x[j3];
 436             const real dy  = iy - x[j3 + 1];
 437             const real dz  = iz - x[j3 + 2];
 438             const real rsq = dx * dx + dy * dy + dz * dz;
 439             SCReal     FscalC[NSTATES], FscalV[NSTATES]; /* Needs double for sc_power==48 */
 440
 441             if (rsq >= rcutoff_max2)
 442             {
 443                 /* We save significant time by skipping all code below.
 444                  * Note that with soft-core interactions, the actual cut-off
 445                  * check might be different. But since the soft-core distance
 446                  * is always larger than r, checking on r here is safe.
 447                  */
 448                 continue;
 449             }
 450             npair_within_cutoff++;
 451
 452             if (rsq > 0)
 453             {
 454                 /* Note that unlike in the nbnxn kernels, we do not need
 455                  * to clamp the value of rsq before taking the invsqrt
 456                  * to avoid NaN in the LJ calculation, since here we do
 457                  * not calculate LJ interactions when C6 and C12 are zero.
 458                  */
 459
 460                 rinv = gmx::invsqrt(rsq);
 461                 r    = rsq * rinv;
 462             }
 463             else
 464             {
 465                 /* The force at r=0 is zero, because of symmetry.
 466                  * But note that the potential is in general non-zero,
 467                  * since the soft-cored r will be non-zero.
 468                  */
 469                 rinv = 0;
 470                 r    = 0;
 471             }
 472
 473             if (softCoreTreatment == SoftCoreTreatment::None)
 474             {
 475                 /* The soft-core power p will not affect the results
 476                  * with not using soft-core, so we use power of 0 which gives
 477                  * the simplest math and cheapest code.
 478                  */
 479                 rpm2 = rinv * rinv;
 480                 rp   = 1;
 481             }
 482             if (softCoreTreatment == SoftCoreTreatment::RPower6)
 483             {
 484                 rpm2 = rsq * rsq;  /* r4 */
 485                 rp   = rpm2 * rsq; /* r6 */
 486             }
 487             if (softCoreTreatment == SoftCoreTreatment::RPower48)
 488             {
 489                 rp   = rsq * rsq * rsq; /* r6 */
 490                 rp   = rp * rp;         /* r12 */
 491                 rp   = rp * rp;         /* r24 */
 492                 rp   = rp * rp;         /* r48 */
 493                 rpm2 = rp / rsq;        /* r46 */
 494             }
 495
 496             real Fscal = 0;
 497
 498             qq[STATE_A] = iqA * chargeA[jnr];
 499             qq[STATE_B] = iqB * chargeB[jnr];
 500
 501             tj[STATE_A] = ntiA + 2 * typeA[jnr];
 502             tj[STATE_B] = ntiB + 2 * typeB[jnr];
 503
 504             if (nlist->excl_fep == nullptr || nlist->excl_fep[k])
 505             {
 506                 c6[STATE_A] = nbfp[tj[STATE_A]];
 507                 c6[STATE_B] = nbfp[tj[STATE_B]];
 508
 509                 for (int i = 0; i < NSTATES; i++)
 510                 {
 511                     c12[i] = nbfp[tj[i] + 1];
 512                     if (useSoftCore)
 513                     {
 514                         real sigma6[NSTATES];
 515                         if ((c6[i] > 0) && (c12[i] > 0))
 516                         {
 517                             /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
 518                             sigma6[i] = half * c12[i] / c6[i];
 519                             if (sigma6[i] < sigma6_min) /* for disappearing coul and vdw with soft core at the same time */
 520                             {
 521                                 sigma6[i] = sigma6_min;
 522                             }
 523                         }
 524                         else
 525                         {
 526                             sigma6[i] = sigma6_def;
 527                         }
 528                         sigma_pow[i] = calculateSigmaPow<softCoreTreatment>(sigma6[i]);
 529                     }
 530                 }
 531
 532                 if (useSoftCore)
 533                 {
 534                     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
 535                     if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
 536                     {
 537                         alpha_vdw_eff  = 0;
 538                         alpha_coul_eff = 0;
 539                     }
 540                     else
 541                     {
 542                         alpha_vdw_eff  = alpha_vdw;
 543                         alpha_coul_eff = alpha_coul;
 544                     }
 545                 }
 546
 547                 for (int i = 0; i < NSTATES; i++)
 548                 {
 549                     FscalC[i] = 0;
 550                     FscalV[i] = 0;
 551                     Vcoul[i]  = 0;
 552                     Vvdw[i]   = 0;
 553
 554                     real   rinvC, rinvV;
 555                     SCReal rC, rV, rpinvC, rpinvV; /* Needs double for sc_power==48 */
 556
 557                     /* Only spend time on A or B state if it is non-zero */
 558                     if ((qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0))
 559                     {
 560                         /* this section has to be inside the loop because of the dependence on sigma_pow */
 561                         if (useSoftCore)
 562                         {
 563                             rpinvC = one / (alpha_coul_eff * lfac_coul[i] * sigma_pow[i] + rp);
 564                             pthRoot<softCoreTreatment>(rpinvC, &rinvC, &rC);
 565                             if (scLambdasOrAlphasDiffer)
 566                             {
 567                                 rpinvV = one / (alpha_vdw_eff * lfac_vdw[i] * sigma_pow[i] + rp);
 568                                 pthRoot<softCoreTreatment>(rpinvV, &rinvV, &rV);
 569                             }
 570                             else
 571                             {
 572                                 /* We can avoid one expensive pow and one / operation */
 573                                 rpinvV = rpinvC;
 574                                 rinvV  = rinvC;
 575                                 rV     = rC;
 576                             }
 577                         }
 578                         else
 579                         {
 580                             rpinvC = 1;
 581                             rinvC  = rinv;
 582                             rC     = r;
 583
 584                             rpinvV = 1;
 585                             rinvV  = rinv;
 586                             rV     = r;
 587                         }
 588
 589                         /* Only process the coulomb interactions if we have charges,
 590                          * and if we either include all entries in the list (no cutoff
 591                          * used in the kernel), or if we are within the cutoff.
 592                          */
 593                         bool computeElecInteraction = (elecInteractionTypeIsEwald && r < rcoulomb)
 594                                                       || (!elecInteractionTypeIsEwald && rC < rcoulomb);
 595
 596                         if ((qq[i] != 0) && computeElecInteraction)
 597                         {
 598                             if (elecInteractionTypeIsEwald)
 599                             {
 600                                 Vcoul[i]  = ewaldPotential(qq[i], rinvC, sh_ewald);
 601                                 FscalC[i] = ewaldScalarForce(qq[i], rinvC);
 602                             }
 603                             else
 604                             {
 605                                 Vcoul[i]  = reactionFieldPotential(qq[i], rinvC, rC, krf, crf);
 606                                 FscalC[i] = reactionFieldScalarForce(qq[i], rinvC, rC, krf, two);
 607                             }
 608                         }
 609
 610                         /* Only process the VDW interactions if we have
 611                          * some non-zero parameters, and if we either
 612                          * include all entries in the list (no cutoff used
 613                          * in the kernel), or if we are within the cutoff.
 614                          */
 615                         bool computeVdwInteraction = (vdwInteractionTypeIsEwald && r < rvdw)
 616                                                      || (!vdwInteractionTypeIsEwald && rV < rvdw);
 617                         if ((c6[i] != 0 || c12[i] != 0) && computeVdwInteraction)
 618                         {
 619                             real rinv6;
 620                             if (softCoreTreatment == SoftCoreTreatment::RPower6)
 621                             {
 622                                 rinv6 = calculateRinv6<softCoreTreatment>(rpinvV);
 623                             }
 624                             else
 625                             {
 626                                 rinv6 = calculateRinv6<softCoreTreatment>(rinvV);
 627                             }
 628                             real Vvdw6  = calculateVdw6(c6[i], rinv6);
 629                             real Vvdw12 = calculateVdw12(c12[i], rinv6);
 630
 631                             Vvdw[i] = lennardJonesPotential(Vvdw6, Vvdw12, c6[i], c12[i], repulsionShift,
 632                                                             dispersionShift, onesixth, onetwelfth);
 633                             FscalV[i] = lennardJonesScalarForce(Vvdw6, Vvdw12);
 634
 635                             if (vdwInteractionTypeIsEwald)
 636                             {
 637                                 /* Subtract the grid potential at the cut-off */
 638                                 Vvdw[i] += ewaldLennardJonesGridSubtract(nbfp_grid[tj[i]],
 639                                                                          sh_lj_ewald, onesixth);
 640                             }
 641
 642                             if (vdwModifierIsPotSwitch)
 643                             {
 644                                 real d        = rV - ic->rvdw_switch;
 645                                 d             = (d > zero) ? d : zero;
 646                                 const real d2 = d * d;
 647                                 const real sw = one + d2 * d * (vdw_swV3 + d * (vdw_swV4 + d * vdw_swV5));
 648                                 const real dsw = d2 * (vdw_swF2 + d * (vdw_swF3 + d * vdw_swF4));
 649
 650                                 FscalV[i] = potSwitchScalarForceMod(FscalV[i], Vvdw[i], sw, rV,
 651                                                                     rvdw, dsw, zero);
 652                                 Vvdw[i]   = potSwitchPotentialMod(Vvdw[i], sw, rV, rvdw, zero);
 653                             }
 654                         }
 655
 656                         /* FscalC (and FscalV) now contain: dV/drC * rC
 657                          * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
 658                          * Further down we first multiply by r^p-2 and then by
 659                          * the vector r, which in total gives: dV/drC * (r/rC)^1-p
 660                          */
 661                         FscalC[i] *= rpinvC;
 662                         FscalV[i] *= rpinvV;
 663                     }
 664                 }
 665
 666                 /* Assemble A and B states */
 667                 for (int i = 0; i < NSTATES; i++)
 668                 {
 669                     vctot += LFC[i] * Vcoul[i];
 670                     vvtot += LFV[i] * Vvdw[i];
 671
 672                     Fscal += LFC[i] * FscalC[i] * rpm2;
 673                     Fscal += LFV[i] * FscalV[i] * rpm2;
 674
 675                     if (useSoftCore)
 676                     {
 677                         dvdl_coul +=
 678                                 Vcoul[i] * DLF[i]
 679                                 + LFC[i] * alpha_coul_eff * dlfac_coul[i] * FscalC[i] * sigma_pow[i];
 680                         dvdl_vdw += Vvdw[i] * DLF[i]
 681                                     + LFV[i] * alpha_vdw_eff * dlfac_vdw[i] * FscalV[i] * sigma_pow[i];
 682                     }
 683                     else
 684                     {
 685                         dvdl_coul += Vcoul[i] * DLF[i];
 686                         dvdl_vdw += Vvdw[i] * DLF[i];
 687                     }
 688                 }
 689             }
 690             else if (icoul == GMX_NBKERNEL_ELEC_REACTIONFIELD)
 691             {
 692                 /* For excluded pairs, which are only in this pair list when
 693                  * using the Verlet scheme, we don't use soft-core.
 694                  * As there is no singularity, there is no need for soft-core.
 695                  */
 696                 const real FF = -two * krf;
 697                 real       VV = krf * rsq - crf;
 698
 699                 if (ii == jnr)
 700                 {
 701                     VV *= half;
 702                 }
 703
 704                 for (int i = 0; i < NSTATES; i++)
 705                 {
 706                     vctot += LFC[i] * qq[i] * VV;
 707                     Fscal += LFC[i] * qq[i] * FF;
 708                     dvdl_coul += DLF[i] * qq[i] * VV;
 709                 }
 710             }
 711
 712             if (elecInteractionTypeIsEwald && r < rcoulomb)
 713             {
 714                 /* See comment in the preamble. When using Ewald interactions
 715                  * (unless we use a switch modifier) we subtract the reciprocal-space
 716                  * Ewald component here which made it possible to apply the free
 717                  * energy interaction to 1/r (vanilla coulomb short-range part)
 718                  * above. This gets us closer to the ideal case of applying
 719                  * the softcore to the entire electrostatic interaction,
 720                  * including the reciprocal-space component.
 721                  */
 722                 real v_lr, f_lr;
 723
 724                 const real ewrt   = r * coulombTableScale;
 725                 int        ewitab = static_cast<int>(ewrt);
 726                 const real eweps  = ewrt - ewitab;
 727                 ewitab            = 4 * ewitab;
 728                 f_lr              = ewtab[ewitab] + eweps * ewtab[ewitab + 1];
 729                 v_lr = (ewtab[ewitab + 2] - coulombTableScaleInvHalf * eweps * (ewtab[ewitab] + f_lr));
 730                 f_lr *= rinv;
 731
 732                 /* Note that any possible Ewald shift has already been applied in
 733                  * the normal interaction part above.
 734                  */
 735
 736                 if (ii == jnr)
 737                 {
 738                     /* If we get here, the i particle (ii) has itself (jnr)
 739                      * in its neighborlist. This can only happen with the Verlet
 740                      * scheme, and corresponds to a self-interaction that will
 741                      * occur twice. Scale it down by 50% to only include it once.
 742                      */
 743                     v_lr *= half;
 744                 }
 745
 746                 for (int i = 0; i < NSTATES; i++)
 747                 {
 748                     vctot -= LFC[i] * qq[i] * v_lr;
 749                     Fscal -= LFC[i] * qq[i] * f_lr;
 750                     dvdl_coul -= (DLF[i] * qq[i]) * v_lr;
 751                 }
 752             }
 753
 754             if (vdwInteractionTypeIsEwald && r < rvdw)
 755             {
 756                 /* See comment in the preamble. When using LJ-Ewald interactions
 757                  * (unless we use a switch modifier) we subtract the reciprocal-space
 758                  * Ewald component here which made it possible to apply the free
 759                  * energy interaction to r^-6 (vanilla LJ6 short-range part)
 760                  * above. This gets us closer to the ideal case of applying
 761                  * the softcore to the entire VdW interaction,
 762                  * including the reciprocal-space component.
 763                  */
 764                 /* We could also use the analytical form here
 765                  * iso a table, but that can cause issues for
 766                  * r close to 0 for non-interacting pairs.
 767                  */
 768
 769                 const real rs   = rsq * rinv * vdwTableScale;
 770                 const int  ri   = static_cast<int>(rs);
 771                 const real frac = rs - ri;
 772                 const real f_lr = (1 - frac) * tab_ewald_F_lj[ri] + frac * tab_ewald_F_lj[ri + 1];
 773                 /* TODO: Currently the Ewald LJ table does not contain
 774                  * the factor 1/6, we should add this.
 775                  */
 776                 const real FF = f_lr * rinv / six;
 777                 real VV = (tab_ewald_V_lj[ri] - vdwTableScaleInvHalf * frac * (tab_ewald_F_lj[ri] + f_lr))
 778                           / six;
 779
 780                 if (ii == jnr)
 781                 {
 782                     /* If we get here, the i particle (ii) has itself (jnr)
 783                      * in its neighborlist. This can only happen with the Verlet
 784                      * scheme, and corresponds to a self-interaction that will
 785                      * occur twice. Scale it down by 50% to only include it once.
 786                      */
 787                     VV *= half;
 788                 }
 789
 790                 for (int i = 0; i < NSTATES; i++)
 791                 {
 792                     const real c6grid = nbfp_grid[tj[i]];
 793                     vvtot += LFV[i] * c6grid * VV;
 794                     Fscal += LFV[i] * c6grid * FF;
 795                     dvdl_vdw += (DLF[i] * c6grid) * VV;
 796                 }
 797             }
 798
 799             if (doForces)
 800             {
 801                 const real tx = Fscal * dx;
 802                 const real ty = Fscal * dy;
 803                 const real tz = Fscal * dz;
 804                 fix           = fix + tx;
 805                 fiy           = fiy + ty;
 806                 fiz           = fiz + tz;
 807                 /* OpenMP atomics are expensive, but this kernels is also
 808                  * expensive, so we can take this hit, instead of using
 809                  * thread-local output buffers and extra reduction.
 810                  *
 811                  * All the OpenMP regions in this file are trivial and should
 812                  * not throw, so no need for try/catch.
 813                  */
 814 #pragma omp atomic
 815                 f[j3] -= tx;
 816 #pragma omp atomic
 817                 f[j3 + 1] -= ty;
 818 #pragma omp atomic
 819                 f[j3 + 2] -= tz;
 820             }
 821         }
 822
 823         /* The atomics below are expensive with many OpenMP threads.
 824          * Here unperturbed i-particles will usually only have a few
 825          * (perturbed) j-particles in the list. Thus with a buffered list
 826          * we can skip a significant number of i-reductions with a check.
 827          */
 828         if (npair_within_cutoff > 0)
 829         {
 830             if (doForces)
 831             {
 832 #pragma omp atomic
 833                 f[ii3] += fix;
 834 #pragma omp atomic
 835                 f[ii3 + 1] += fiy;
 836 #pragma omp atomic
 837                 f[ii3 + 2] += fiz;
 838             }
 839             if (doShiftForces)
 840             {
 841 #pragma omp atomic
 842                 fshift[is3] += fix;
 843 #pragma omp atomic
 844                 fshift[is3 + 1] += fiy;
 845 #pragma omp atomic
 846                 fshift[is3 + 2] += fiz;
 847             }
 848             if (doPotential)
 849             {
 850                 int ggid = gid[n];
 851 #pragma omp atomic
 852                 Vc[ggid] += vctot;
 853 #pragma omp atomic
 854                 Vv[ggid] += vvtot;
 855             }
 856         }
 857     }
 858
 859 #pragma omp atomic
 860     dvdl[efptCOUL] += dvdl_coul;
 861 #pragma omp atomic
 862     dvdl[efptVDW] += dvdl_vdw;
 863
 864     /* Estimate flops, average for free energy stuff:
 865      * 12  flops per outer iteration
 866      * 150 flops per inner iteration
 867      */
 868 #pragma omp atomic
 869     inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri * 12 + nlist->jindex[nri] * 150);
 870 }
 871
 872 typedef void (*KernelFunction)(const t_nblist* gmx_restrict nlist,
 873                                rvec* gmx_restrict         xx,
 874                                gmx::ForceWithShiftForces* forceWithShiftForces,
 875                                const t_forcerec* gmx_restrict fr,
 876                                const t_mdatoms* gmx_restrict mdatoms,
 877                                nb_kernel_data_t* gmx_restrict kernel_data,
 878                                t_nrnb* gmx_restrict nrnb);
 879
 880 template<SoftCoreTreatment softCoreTreatment, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald>
 881 static KernelFunction dispatchKernelOnVdwModifier(const bool vdwModifierIsPotSwitch)
 882 {
 883     if (vdwModifierIsPotSwitch)
 884     {
 885         return (nb_free_energy_kernel<softCoreTreatment, scLambdasOrAlphasDiffer,
 886                                       vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, true>);
 887     }
 888     else
 889     {
 890         return (nb_free_energy_kernel<softCoreTreatment, scLambdasOrAlphasDiffer,
 891                                       vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, false>);
 892     }
 893 }
 894
 895 template<SoftCoreTreatment softCoreTreatment, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald>
 896 static KernelFunction dispatchKernelOnElecInteractionType(const bool elecInteractionTypeIsEwald,
 897                                                           const bool vdwModifierIsPotSwitch)
 898 {
 899     if (elecInteractionTypeIsEwald)
 900     {
 901         return (dispatchKernelOnVdwModifier<softCoreTreatment, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, true>(
 902                 vdwModifierIsPotSwitch));
 903     }
 904     else
 905     {
 906         return (dispatchKernelOnVdwModifier<softCoreTreatment, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, false>(
 907                 vdwModifierIsPotSwitch));
 908     }
 909 }
 910
 911 template<SoftCoreTreatment softCoreTreatment, bool scLambdasOrAlphasDiffer>
 912 static KernelFunction dispatchKernelOnVdwInteractionType(const bool vdwInteractionTypeIsEwald,
 913                                                          const bool elecInteractionTypeIsEwald,
 914                                                          const bool vdwModifierIsPotSwitch)
 915 {
 916     if (vdwInteractionTypeIsEwald)
 917     {
 918         return (dispatchKernelOnElecInteractionType<softCoreTreatment, scLambdasOrAlphasDiffer, true>(
 919                 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch));
 920     }
 921     else
 922     {
 923         return (dispatchKernelOnElecInteractionType<softCoreTreatment, scLambdasOrAlphasDiffer, false>(
 924                 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch));
 925     }
 926 }
 927
 928 template<SoftCoreTreatment softCoreTreatment>
 929 static KernelFunction dispatchKernelOnScLambdasOrAlphasDifference(const bool scLambdasOrAlphasDiffer,
 930                                                                   const bool vdwInteractionTypeIsEwald,
 931                                                                   const bool elecInteractionTypeIsEwald,
 932                                                                   const bool vdwModifierIsPotSwitch)
 933 {
 934     if (scLambdasOrAlphasDiffer)
 935     {
 936         return (dispatchKernelOnVdwInteractionType<softCoreTreatment, true>(
 937                 vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch));
 938     }
 939     else
 940     {
 941         return (dispatchKernelOnVdwInteractionType<softCoreTreatment, false>(
 942                 vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch));
 943     }
 944 }
 945
 946 static KernelFunction dispatchKernel(const bool        scLambdasOrAlphasDiffer,
 947                                      const bool        vdwInteractionTypeIsEwald,
 948                                      const bool        elecInteractionTypeIsEwald,
 949                                      const bool        vdwModifierIsPotSwitch,
 950                                      const t_forcerec* fr)
 951 {
 952     if (fr->sc_alphacoul == 0 && fr->sc_alphavdw == 0)
 953     {
 954         return (dispatchKernelOnScLambdasOrAlphasDifference<SoftCoreTreatment::None>(
 955                 scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
 956                 vdwModifierIsPotSwitch));
 957     }
 958     else if (fr->sc_r_power == 6.0_real)
 959     {
 960         return (dispatchKernelOnScLambdasOrAlphasDifference<SoftCoreTreatment::RPower6>(
 961                 scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
 962                 vdwModifierIsPotSwitch));
 963     }
 964     else
 965     {
 966         return (dispatchKernelOnScLambdasOrAlphasDifference<SoftCoreTreatment::RPower48>(
 967                 scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
 968                 vdwModifierIsPotSwitch));
 969     }
 970 }
 971
 972
 973 void gmx_nb_free_energy_kernel(const t_nblist*            nlist,
 974                                rvec*                      xx,
 975                                gmx::ForceWithShiftForces* ff,
 976                                const t_forcerec*          fr,
 977                                const t_mdatoms*           mdatoms,
 978                                nb_kernel_data_t*          kernel_data,
 979                                t_nrnb*                    nrnb)
 980 {
 981     GMX_ASSERT(EEL_PME_EWALD(fr->ic->eeltype) || fr->ic->eeltype == eelCUT || EEL_RF(fr->ic->eeltype),
 982                "Unsupported eeltype with free energy");
 983
 984     const bool vdwInteractionTypeIsEwald  = (EVDW_PME(fr->ic->vdwtype));
 985     const bool elecInteractionTypeIsEwald = (EEL_PME_EWALD(fr->ic->eeltype));
 986     const bool vdwModifierIsPotSwitch     = (fr->ic->vdw_modifier == eintmodPOTSWITCH);
 987     bool       scLambdasOrAlphasDiffer    = true;
 988
 989     if (fr->sc_alphacoul == 0 && fr->sc_alphavdw == 0)
 990     {
 991         scLambdasOrAlphasDiffer = false;
 992     }
 993     else if (fr->sc_r_power == 6.0_real || fr->sc_r_power == 48.0_real)
 994     {
 995         if (kernel_data->lambda[efptCOUL] == kernel_data->lambda[efptVDW] && fr->sc_alphacoul == fr->sc_alphavdw)
 996         {
 997             scLambdasOrAlphasDiffer = false;
 998         }
 999     }
1000     else
1001     {
1002         GMX_RELEASE_ASSERT(false, "Unsupported soft-core r-power");
1003     }
1004     KernelFunction kernelFunc = dispatchKernel(scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
1005                                                elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, fr);
1006     kernelFunc(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
1007 }