src/gromacs/gmxlib/nonbonded/nb_free_energy.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #include "gmxpre.h"
  38
  39 #include "nb_free_energy.h"
  40
  41 #include <cmath>
  42
  43 #include <algorithm>
  44
  45 #include "gromacs/gmxlib/nrnb.h"
  46 #include "gromacs/gmxlib/nonbonded/nb_kernel.h"
  47 #include "gromacs/gmxlib/nonbonded/nonbonded.h"
  48 #include "gromacs/math/functions.h"
  49 #include "gromacs/math/vec.h"
  50 #include "gromacs/mdtypes/forceoutput.h"
  51 #include "gromacs/mdtypes/forcerec.h"
  52 #include "gromacs/mdtypes/md_enums.h"
  53 #include "gromacs/utility/fatalerror.h"
  54
  55
  56 //! Enum for templating the soft-core treatment in the kernel
  57 enum class SoftCoreTreatment
  58 {
  59     None,    //!< No soft-core
  60     RPower6, //!< Soft-core with r-power = 6
  61     RPower48 //!< Soft-core with r-power = 48
  62 };
  63
  64 //! Most treatments are fine with float in mixed-precision mode.
  65 template<SoftCoreTreatment softCoreTreatment>
  66 struct SoftCoreReal
  67 {
  68     //! Real type for soft-core calculations
  69     using Real = real;
  70 };
  71
  72 //! This treatment requires double precision for some computations.
  73 template<>
  74 struct SoftCoreReal<SoftCoreTreatment::RPower48>
  75 {
  76     //! Real type for soft-core calculations
  77     using Real = double;
  78 };
  79
  80 //! Computes r^(1/p) and 1/r^(1/p) for the standard p=6
  81 template<SoftCoreTreatment softCoreTreatment>
  82 static inline void pthRoot(const real r, real* pthRoot, real* invPthRoot)
  83 {
  84     *invPthRoot = gmx::invsqrt(std::cbrt(r));
  85     *pthRoot    = 1 / (*invPthRoot);
  86 }
  87
  88 // We need a double version to make the specialization below work
  89 #if !GMX_DOUBLE
  90 //! Computes r^(1/p) and 1/r^(1/p) for the standard p=6
  91 template<SoftCoreTreatment softCoreTreatment>
  92 static inline void pthRoot(const double r, real* pthRoot, double* invPthRoot)
  93 {
  94     *invPthRoot = gmx::invsqrt(std::cbrt(r));
  95     *pthRoot    = 1 / (*invPthRoot);
  96 }
  97 #endif
  98
  99 //! Computes r^(1/p) and 1/r^(1/p) for p=48
 100 template<>
 101 inline void pthRoot<SoftCoreTreatment::RPower48>(const double r, real* pthRoot, double* invPthRoot)
 102 {
 103     *pthRoot    = std::pow(r, 1.0 / 48.0);
 104     *invPthRoot = 1 / (*pthRoot);
 105 }
 106
 107 template<SoftCoreTreatment softCoreTreatment>
 108 static inline real calculateSigmaPow(const real sigma6)
 109 {
 110     if (softCoreTreatment == SoftCoreTreatment::RPower6)
 111     {
 112         return sigma6;
 113     }
 114     else
 115     {
 116         real sigmaPow = sigma6 * sigma6;     /* sigma^12 */
 117         sigmaPow      = sigmaPow * sigmaPow; /* sigma^24 */
 118         sigmaPow      = sigmaPow * sigmaPow; /* sigma^48 */
 119         return (sigmaPow);
 120     }
 121 }
 122
 123 template<SoftCoreTreatment softCoreTreatment, class SCReal>
 124 static inline real calculateRinv6(const SCReal rinvV)
 125 {
 126     if (softCoreTreatment == SoftCoreTreatment::RPower6)
 127     {
 128         return rinvV;
 129     }
 130     else
 131     {
 132         real rinv6 = rinvV * rinvV;
 133         return (rinv6 * rinv6 * rinv6);
 134     }
 135 }
 136
 137 static inline real calculateVdw6(const real c6, const real rinv6)
 138 {
 139     return (c6 * rinv6);
 140 }
 141
 142 static inline real calculateVdw12(const real c12, const real rinv6)
 143 {
 144     return (c12 * rinv6 * rinv6);
 145 }
 146
 147 /* reaction-field electrostatics */
 148 template<class SCReal>
 149 static inline SCReal
 150 reactionFieldScalarForce(const real qq, const real rinv, const SCReal r, const real krf, const real two)
 151 {
 152     return (qq * (rinv - two * krf * r * r));
 153 }
 154 template<class SCReal>
 155 static inline real
 156 reactionFieldPotential(const real qq, const real rinv, const SCReal r, const real krf, const real potentialShift)
 157 {
 158     return (qq * (rinv + krf * r * r - potentialShift));
 159 }
 160
 161 /* Ewald electrostatics */
 162 static inline real ewaldScalarForce(const real coulomb, const real rinv)
 163 {
 164     return (coulomb * rinv);
 165 }
 166 static inline real ewaldPotential(const real coulomb, const real rinv, const real potentialShift)
 167 {
 168     return (coulomb * (rinv - potentialShift));
 169 }
 170
 171 /* cutoff LJ */
 172 static inline real lennardJonesScalarForce(const real v6, const real v12)
 173 {
 174     return (v12 - v6);
 175 }
 176 static inline real lennardJonesPotential(const real v6,
 177                                          const real v12,
 178                                          const real c6,
 179                                          const real c12,
 180                                          const real repulsionShift,
 181                                          const real dispersionShift,
 182                                          const real onesixth,
 183                                          const real onetwelfth)
 184 {
 185     return ((v12 + c12 * repulsionShift) * onetwelfth - (v6 + c6 * dispersionShift) * onesixth);
 186 }
 187
 188 /* Ewald LJ */
 189 static inline real ewaldLennardJonesGridSubtract(const real c6grid, const real potentialShift, const real onesixth)
 190 {
 191     return (c6grid * potentialShift * onesixth);
 192 }
 193
 194 /* LJ Potential switch */
 195 template<class SCReal>
 196 static inline SCReal potSwitchScalarForceMod(const SCReal fScalarInp,
 197                                              const real   potential,
 198                                              const real   sw,
 199                                              const SCReal r,
 200                                              const real   rVdw,
 201                                              const real   dsw,
 202                                              const real   zero)
 203 {
 204     if (r < rVdw)
 205     {
 206         SCReal fScalar = fScalarInp * sw - r * potential * dsw;
 207         return (fScalar);
 208     }
 209     return (zero);
 210 }
 211 template<class SCReal>
 212 static inline real
 213 potSwitchPotentialMod(const real potentialInp, const real sw, const SCReal r, const real rVdw, const real zero)
 214 {
 215     if (r < rVdw)
 216     {
 217         real potential = potentialInp * sw;
 218         return (potential);
 219     }
 220     return (zero);
 221 }
 222
 223
 224 //! Templated free-energy non-bonded kernel
 225 template<SoftCoreTreatment softCoreTreatment, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald, bool vdwModifierIsPotSwitch>
 226 static void nb_free_energy_kernel(const t_nblist* gmx_restrict nlist,
 227                                   rvec* gmx_restrict         xx,
 228                                   gmx::ForceWithShiftForces* forceWithShiftForces,
 229                                   const t_forcerec* gmx_restrict fr,
 230                                   const t_mdatoms* gmx_restrict mdatoms,
 231                                   nb_kernel_data_t* gmx_restrict kernel_data,
 232                                   t_nrnb* gmx_restrict nrnb)
 233 {
 234     using SCReal = typename SoftCoreReal<softCoreTreatment>::Real;
 235
 236     constexpr bool useSoftCore = (softCoreTreatment != SoftCoreTreatment::None);
 237
 238 #define STATE_A 0
 239 #define STATE_B 1
 240 #define NSTATES 2
 241
 242     constexpr real onetwelfth = 1.0 / 12.0;
 243     constexpr real onesixth   = 1.0 / 6.0;
 244     constexpr real zero       = 0.0;
 245     constexpr real half       = 0.5;
 246     constexpr real one        = 1.0;
 247     constexpr real two        = 2.0;
 248     constexpr real six        = 6.0;
 249
 250     /* Extract pointer to non-bonded interaction constants */
 251     const interaction_const_t* ic = fr->ic;
 252
 253     // Extract pair list data
 254     const int  nri    = nlist->nri;
 255     const int* iinr   = nlist->iinr;
 256     const int* jindex = nlist->jindex;
 257     const int* jjnr   = nlist->jjnr;
 258     const int* shift  = nlist->shift;
 259     const int* gid    = nlist->gid;
 260
 261     const real* shiftvec      = fr->shift_vec[0];
 262     const real* chargeA       = mdatoms->chargeA;
 263     const real* chargeB       = mdatoms->chargeB;
 264     real*       Vc            = kernel_data->energygrp_elec;
 265     const int*  typeA         = mdatoms->typeA;
 266     const int*  typeB         = mdatoms->typeB;
 267     const int   ntype         = fr->ntype;
 268     const real* nbfp          = fr->nbfp.data();
 269     const real* nbfp_grid     = fr->ljpme_c6grid;
 270     real*       Vv            = kernel_data->energygrp_vdw;
 271     const real  lambda_coul   = kernel_data->lambda[efptCOUL];
 272     const real  lambda_vdw    = kernel_data->lambda[efptVDW];
 273     real*       dvdl          = kernel_data->dvdl;
 274     const real  alpha_coul    = fr->sc_alphacoul;
 275     const real  alpha_vdw     = fr->sc_alphavdw;
 276     const real  lam_power     = fr->sc_power;
 277     const real  sigma6_def    = fr->sc_sigma6_def;
 278     const real  sigma6_min    = fr->sc_sigma6_min;
 279     const bool  doForces      = ((kernel_data->flags & GMX_NONBONDED_DO_FORCE) != 0);
 280     const bool  doShiftForces = ((kernel_data->flags & GMX_NONBONDED_DO_SHIFTFORCE) != 0);
 281     const bool  doPotential   = ((kernel_data->flags & GMX_NONBONDED_DO_POTENTIAL) != 0);
 282
 283     // Extract data from interaction_const_t
 284     const real facel           = ic->epsfac;
 285     const real rcoulomb        = ic->rcoulomb;
 286     const real krf             = ic->k_rf;
 287     const real crf             = ic->c_rf;
 288     const real sh_lj_ewald     = ic->sh_lj_ewald;
 289     const real rvdw            = ic->rvdw;
 290     const real dispersionShift = ic->dispersion_shift.cpot;
 291     const real repulsionShift  = ic->repulsion_shift.cpot;
 292
 293     // Note that the nbnxm kernels do not support Coulomb potential switching at all
 294     GMX_ASSERT(ic->coulomb_modifier != eintmodPOTSWITCH,
 295                "Potential switching is not supported for Coulomb with FEP");
 296
 297     real vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4;
 298     if (vdwModifierIsPotSwitch)
 299     {
 300         const real d = ic->rvdw - ic->rvdw_switch;
 301         vdw_swV3     = -10.0 / (d * d * d);
 302         vdw_swV4     = 15.0 / (d * d * d * d);
 303         vdw_swV5     = -6.0 / (d * d * d * d * d);
 304         vdw_swF2     = -30.0 / (d * d * d);
 305         vdw_swF3     = 60.0 / (d * d * d * d);
 306         vdw_swF4     = -30.0 / (d * d * d * d * d);
 307     }
 308     else
 309     {
 310         /* Avoid warnings from stupid compilers (looking at you, Clang!) */
 311         vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0;
 312     }
 313
 314     int icoul;
 315     if (ic->eeltype == eelCUT || EEL_RF(ic->eeltype))
 316     {
 317         icoul = GMX_NBKERNEL_ELEC_REACTIONFIELD;
 318     }
 319     else
 320     {
 321         icoul = GMX_NBKERNEL_ELEC_NONE;
 322     }
 323
 324     real rcutoff_max2 = std::max(ic->rcoulomb, ic->rvdw);
 325     rcutoff_max2      = rcutoff_max2 * rcutoff_max2;
 326
 327     const real* tab_ewald_F_lj = nullptr;
 328     const real* tab_ewald_V_lj = nullptr;
 329     const real* ewtab          = nullptr;
 330     real        ewtabscale     = 0;
 331     real        ewtabhalfspace = 0;
 332     real        sh_ewald       = 0;
 333     if (elecInteractionTypeIsEwald || vdwInteractionTypeIsEwald)
 334     {
 335         const auto& tables = *ic->coulombEwaldTables;
 336         sh_ewald           = ic->sh_ewald;
 337         ewtab              = tables.tableFDV0.data();
 338         ewtabscale         = tables.scale;
 339         ewtabhalfspace     = half / ewtabscale;
 340         tab_ewald_F_lj     = tables.tableF.data();
 341         tab_ewald_V_lj     = tables.tableV.data();
 342     }
 343
 344     /* For Ewald/PME interactions we cannot easily apply the soft-core component to
 345      * reciprocal space. When we use non-switched Ewald interactions, we
 346      * assume the soft-coring does not significantly affect the grid contribution
 347      * and apply the soft-core only to the full 1/r (- shift) pair contribution.
 348      *
 349      * However, we cannot use this approach for switch-modified since we would then
 350      * effectively end up evaluating a significantly different interaction here compared to the
 351      * normal (non-free-energy) kernels, either by applying a cutoff at a different
 352      * position than what the user requested, or by switching different
 353      * things (1/r rather than short-range Ewald). For these settings, we just
 354      * use the traditional short-range Ewald interaction in that case.
 355      */
 356     GMX_RELEASE_ASSERT(!(vdwInteractionTypeIsEwald && vdwModifierIsPotSwitch),
 357                        "Can not apply soft-core to switched Ewald potentials");
 358
 359     SCReal dvdl_coul = 0; /* Needs double for sc_power==48 */
 360     SCReal dvdl_vdw  = 0; /* Needs double for sc_power==48 */
 361
 362     /* Lambda factor for state A, 1-lambda*/
 363     real LFC[NSTATES], LFV[NSTATES];
 364     LFC[STATE_A] = one - lambda_coul;
 365     LFV[STATE_A] = one - lambda_vdw;
 366
 367     /* Lambda factor for state B, lambda*/
 368     LFC[STATE_B] = lambda_coul;
 369     LFV[STATE_B] = lambda_vdw;
 370
 371     /*derivative of the lambda factor for state A and B */
 372     real DLF[NSTATES];
 373     DLF[STATE_A] = -1;
 374     DLF[STATE_B] = 1;
 375
 376     real           lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
 377     constexpr real sc_r_power = (softCoreTreatment == SoftCoreTreatment::RPower48 ? 48.0_real : 6.0_real);
 378     for (int i = 0; i < NSTATES; i++)
 379     {
 380         lfac_coul[i]  = (lam_power == 2 ? (1 - LFC[i]) * (1 - LFC[i]) : (1 - LFC[i]));
 381         dlfac_coul[i] = DLF[i] * lam_power / sc_r_power * (lam_power == 2 ? (1 - LFC[i]) : 1);
 382         lfac_vdw[i]   = (lam_power == 2 ? (1 - LFV[i]) * (1 - LFV[i]) : (1 - LFV[i]));
 383         dlfac_vdw[i]  = DLF[i] * lam_power / sc_r_power * (lam_power == 2 ? (1 - LFV[i]) : 1);
 384     }
 385
 386     // TODO: We should get rid of using pointers to real
 387     const real* x             = xx[0];
 388     real* gmx_restrict f      = &(forceWithShiftForces->force()[0][0]);
 389     real* gmx_restrict fshift = &(forceWithShiftForces->shiftForces()[0][0]);
 390
 391     for (int n = 0; n < nri; n++)
 392     {
 393         int npair_within_cutoff = 0;
 394
 395         const int  is3   = 3 * shift[n];
 396         const real shX   = shiftvec[is3];
 397         const real shY   = shiftvec[is3 + 1];
 398         const real shZ   = shiftvec[is3 + 2];
 399         const int  nj0   = jindex[n];
 400         const int  nj1   = jindex[n + 1];
 401         const int  ii    = iinr[n];
 402         const int  ii3   = 3 * ii;
 403         const real ix    = shX + x[ii3 + 0];
 404         const real iy    = shY + x[ii3 + 1];
 405         const real iz    = shZ + x[ii3 + 2];
 406         const real iqA   = facel * chargeA[ii];
 407         const real iqB   = facel * chargeB[ii];
 408         const int  ntiA  = 2 * ntype * typeA[ii];
 409         const int  ntiB  = 2 * ntype * typeB[ii];
 410         real       vctot = 0;
 411         real       vvtot = 0;
 412         real       fix   = 0;
 413         real       fiy   = 0;
 414         real       fiz   = 0;
 415
 416         for (int k = nj0; k < nj1; k++)
 417         {
 418             int        tj[NSTATES];
 419             const int  jnr = jjnr[k];
 420             const int  j3  = 3 * jnr;
 421             real       c6[NSTATES], c12[NSTATES], qq[NSTATES], Vcoul[NSTATES], Vvdw[NSTATES];
 422             real       r, rinv, rp, rpm2;
 423             real       alpha_vdw_eff, alpha_coul_eff, sigma_pow[NSTATES];
 424             const real dx  = ix - x[j3];
 425             const real dy  = iy - x[j3 + 1];
 426             const real dz  = iz - x[j3 + 2];
 427             const real rsq = dx * dx + dy * dy + dz * dz;
 428             SCReal     FscalC[NSTATES], FscalV[NSTATES]; /* Needs double for sc_power==48 */
 429
 430             if (rsq >= rcutoff_max2)
 431             {
 432                 /* We save significant time by skipping all code below.
 433                  * Note that with soft-core interactions, the actual cut-off
 434                  * check might be different. But since the soft-core distance
 435                  * is always larger than r, checking on r here is safe.
 436                  */
 437                 continue;
 438             }
 439             npair_within_cutoff++;
 440
 441             if (rsq > 0)
 442             {
 443                 /* Note that unlike in the nbnxn kernels, we do not need
 444                  * to clamp the value of rsq before taking the invsqrt
 445                  * to avoid NaN in the LJ calculation, since here we do
 446                  * not calculate LJ interactions when C6 and C12 are zero.
 447                  */
 448
 449                 rinv = gmx::invsqrt(rsq);
 450                 r    = rsq * rinv;
 451             }
 452             else
 453             {
 454                 /* The force at r=0 is zero, because of symmetry.
 455                  * But note that the potential is in general non-zero,
 456                  * since the soft-cored r will be non-zero.
 457                  */
 458                 rinv = 0;
 459                 r    = 0;
 460             }
 461
 462             if (softCoreTreatment == SoftCoreTreatment::None)
 463             {
 464                 /* The soft-core power p will not affect the results
 465                  * with not using soft-core, so we use power of 0 which gives
 466                  * the simplest math and cheapest code.
 467                  */
 468                 rpm2 = rinv * rinv;
 469                 rp   = 1;
 470             }
 471             if (softCoreTreatment == SoftCoreTreatment::RPower6)
 472             {
 473                 rpm2 = rsq * rsq;  /* r4 */
 474                 rp   = rpm2 * rsq; /* r6 */
 475             }
 476             if (softCoreTreatment == SoftCoreTreatment::RPower48)
 477             {
 478                 rp   = rsq * rsq * rsq; /* r6 */
 479                 rp   = rp * rp;         /* r12 */
 480                 rp   = rp * rp;         /* r24 */
 481                 rp   = rp * rp;         /* r48 */
 482                 rpm2 = rp / rsq;        /* r46 */
 483             }
 484
 485             real Fscal = 0;
 486
 487             qq[STATE_A] = iqA * chargeA[jnr];
 488             qq[STATE_B] = iqB * chargeB[jnr];
 489
 490             tj[STATE_A] = ntiA + 2 * typeA[jnr];
 491             tj[STATE_B] = ntiB + 2 * typeB[jnr];
 492
 493             if (nlist->excl_fep == nullptr || nlist->excl_fep[k])
 494             {
 495                 c6[STATE_A] = nbfp[tj[STATE_A]];
 496                 c6[STATE_B] = nbfp[tj[STATE_B]];
 497
 498                 for (int i = 0; i < NSTATES; i++)
 499                 {
 500                     c12[i] = nbfp[tj[i] + 1];
 501                     if (useSoftCore)
 502                     {
 503                         real sigma6[NSTATES];
 504                         if ((c6[i] > 0) && (c12[i] > 0))
 505                         {
 506                             /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
 507                             sigma6[i] = half * c12[i] / c6[i];
 508                             if (sigma6[i] < sigma6_min) /* for disappearing coul and vdw with soft core at the same time */
 509                             {
 510                                 sigma6[i] = sigma6_min;
 511                             }
 512                         }
 513                         else
 514                         {
 515                             sigma6[i] = sigma6_def;
 516                         }
 517                         sigma_pow[i] = calculateSigmaPow<softCoreTreatment>(sigma6[i]);
 518                     }
 519                 }
 520
 521                 if (useSoftCore)
 522                 {
 523                     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
 524                     if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
 525                     {
 526                         alpha_vdw_eff  = 0;
 527                         alpha_coul_eff = 0;
 528                     }
 529                     else
 530                     {
 531                         alpha_vdw_eff  = alpha_vdw;
 532                         alpha_coul_eff = alpha_coul;
 533                     }
 534                 }
 535
 536                 for (int i = 0; i < NSTATES; i++)
 537                 {
 538                     FscalC[i] = 0;
 539                     FscalV[i] = 0;
 540                     Vcoul[i]  = 0;
 541                     Vvdw[i]   = 0;
 542
 543                     real   rinvC, rinvV;
 544                     SCReal rC, rV, rpinvC, rpinvV; /* Needs double for sc_power==48 */
 545
 546                     /* Only spend time on A or B state if it is non-zero */
 547                     if ((qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0))
 548                     {
 549                         /* this section has to be inside the loop because of the dependence on sigma_pow */
 550                         if (useSoftCore)
 551                         {
 552                             rpinvC = one / (alpha_coul_eff * lfac_coul[i] * sigma_pow[i] + rp);
 553                             pthRoot<softCoreTreatment>(rpinvC, &rinvC, &rC);
 554                             if (scLambdasOrAlphasDiffer)
 555                             {
 556                                 rpinvV = one / (alpha_vdw_eff * lfac_vdw[i] * sigma_pow[i] + rp);
 557                                 pthRoot<softCoreTreatment>(rpinvV, &rinvV, &rV);
 558                             }
 559                             else
 560                             {
 561                                 /* We can avoid one expensive pow and one / operation */
 562                                 rpinvV = rpinvC;
 563                                 rinvV  = rinvC;
 564                                 rV     = rC;
 565                             }
 566                         }
 567                         else
 568                         {
 569                             rpinvC = 1;
 570                             rinvC  = rinv;
 571                             rC     = r;
 572
 573                             rpinvV = 1;
 574                             rinvV  = rinv;
 575                             rV     = r;
 576                         }
 577
 578                         /* Only process the coulomb interactions if we have charges,
 579                          * and if we either include all entries in the list (no cutoff
 580                          * used in the kernel), or if we are within the cutoff.
 581                          */
 582                         bool computeElecInteraction = (elecInteractionTypeIsEwald && r < rcoulomb)
 583                                                       || (!elecInteractionTypeIsEwald && rC < rcoulomb);
 584
 585                         if ((qq[i] != 0) && computeElecInteraction)
 586                         {
 587                             if (elecInteractionTypeIsEwald)
 588                             {
 589                                 Vcoul[i]  = ewaldPotential(qq[i], rinvC, sh_ewald);
 590                                 FscalC[i] = ewaldScalarForce(qq[i], rinvC);
 591                             }
 592                             else
 593                             {
 594                                 Vcoul[i]  = reactionFieldPotential(qq[i], rinvC, rC, krf, crf);
 595                                 FscalC[i] = reactionFieldScalarForce(qq[i], rinvC, rC, krf, two);
 596                             }
 597                         }
 598
 599                         /* Only process the VDW interactions if we have
 600                          * some non-zero parameters, and if we either
 601                          * include all entries in the list (no cutoff used
 602                          * in the kernel), or if we are within the cutoff.
 603                          */
 604                         bool computeVdwInteraction = (vdwInteractionTypeIsEwald && r < rvdw)
 605                                                      || (!vdwInteractionTypeIsEwald && rV < rvdw);
 606                         if ((c6[i] != 0 || c12[i] != 0) && computeVdwInteraction)
 607                         {
 608                             real rinv6;
 609                             if (softCoreTreatment == SoftCoreTreatment::RPower6)
 610                             {
 611                                 rinv6 = calculateRinv6<softCoreTreatment>(rpinvV);
 612                             }
 613                             else
 614                             {
 615                                 rinv6 = calculateRinv6<softCoreTreatment>(rinvV);
 616                             }
 617                             real Vvdw6  = calculateVdw6(c6[i], rinv6);
 618                             real Vvdw12 = calculateVdw12(c12[i], rinv6);
 619
 620                             Vvdw[i] = lennardJonesPotential(Vvdw6, Vvdw12, c6[i], c12[i], repulsionShift,
 621                                                             dispersionShift, onesixth, onetwelfth);
 622                             FscalV[i] = lennardJonesScalarForce(Vvdw6, Vvdw12);
 623
 624                             if (vdwInteractionTypeIsEwald)
 625                             {
 626                                 /* Subtract the grid potential at the cut-off */
 627                                 Vvdw[i] += ewaldLennardJonesGridSubtract(nbfp_grid[tj[i]],
 628                                                                          sh_lj_ewald, onesixth);
 629                             }
 630
 631                             if (vdwModifierIsPotSwitch)
 632                             {
 633                                 real d        = rV - ic->rvdw_switch;
 634                                 d             = (d > zero) ? d : zero;
 635                                 const real d2 = d * d;
 636                                 const real sw = one + d2 * d * (vdw_swV3 + d * (vdw_swV4 + d * vdw_swV5));
 637                                 const real dsw = d2 * (vdw_swF2 + d * (vdw_swF3 + d * vdw_swF4));
 638
 639                                 FscalV[i] = potSwitchScalarForceMod(FscalV[i], Vvdw[i], sw, rV,
 640                                                                     rvdw, dsw, zero);
 641                                 Vvdw[i]   = potSwitchPotentialMod(Vvdw[i], sw, rV, rvdw, zero);
 642                             }
 643                         }
 644
 645                         /* FscalC (and FscalV) now contain: dV/drC * rC
 646                          * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
 647                          * Further down we first multiply by r^p-2 and then by
 648                          * the vector r, which in total gives: dV/drC * (r/rC)^1-p
 649                          */
 650                         FscalC[i] *= rpinvC;
 651                         FscalV[i] *= rpinvV;
 652                     }
 653                 }
 654
 655                 /* Assemble A and B states */
 656                 for (int i = 0; i < NSTATES; i++)
 657                 {
 658                     vctot += LFC[i] * Vcoul[i];
 659                     vvtot += LFV[i] * Vvdw[i];
 660
 661                     Fscal += LFC[i] * FscalC[i] * rpm2;
 662                     Fscal += LFV[i] * FscalV[i] * rpm2;
 663
 664                     if (useSoftCore)
 665                     {
 666                         dvdl_coul +=
 667                                 Vcoul[i] * DLF[i]
 668                                 + LFC[i] * alpha_coul_eff * dlfac_coul[i] * FscalC[i] * sigma_pow[i];
 669                         dvdl_vdw += Vvdw[i] * DLF[i]
 670                                     + LFV[i] * alpha_vdw_eff * dlfac_vdw[i] * FscalV[i] * sigma_pow[i];
 671                     }
 672                     else
 673                     {
 674                         dvdl_coul += Vcoul[i] * DLF[i];
 675                         dvdl_vdw += Vvdw[i] * DLF[i];
 676                     }
 677                 }
 678             }
 679             else if (icoul == GMX_NBKERNEL_ELEC_REACTIONFIELD)
 680             {
 681                 /* For excluded pairs, which are only in this pair list when
 682                  * using the Verlet scheme, we don't use soft-core.
 683                  * As there is no singularity, there is no need for soft-core.
 684                  */
 685                 const real FF = -two * krf;
 686                 real       VV = krf * rsq - crf;
 687
 688                 if (ii == jnr)
 689                 {
 690                     VV *= half;
 691                 }
 692
 693                 for (int i = 0; i < NSTATES; i++)
 694                 {
 695                     vctot += LFC[i] * qq[i] * VV;
 696                     Fscal += LFC[i] * qq[i] * FF;
 697                     dvdl_coul += DLF[i] * qq[i] * VV;
 698                 }
 699             }
 700
 701             if (elecInteractionTypeIsEwald && r < rcoulomb)
 702             {
 703                 /* See comment in the preamble. When using Ewald interactions
 704                  * (unless we use a switch modifier) we subtract the reciprocal-space
 705                  * Ewald component here which made it possible to apply the free
 706                  * energy interaction to 1/r (vanilla coulomb short-range part)
 707                  * above. This gets us closer to the ideal case of applying
 708                  * the softcore to the entire electrostatic interaction,
 709                  * including the reciprocal-space component.
 710                  */
 711                 real v_lr, f_lr;
 712
 713                 const real ewrt   = r * ewtabscale;
 714                 int        ewitab = static_cast<int>(ewrt);
 715                 const real eweps  = ewrt - ewitab;
 716                 ewitab            = 4 * ewitab;
 717                 f_lr              = ewtab[ewitab] + eweps * ewtab[ewitab + 1];
 718                 v_lr = (ewtab[ewitab + 2] - ewtabhalfspace * eweps * (ewtab[ewitab] + f_lr));
 719                 f_lr *= rinv;
 720
 721                 /* Note that any possible Ewald shift has already been applied in
 722                  * the normal interaction part above.
 723                  */
 724
 725                 if (ii == jnr)
 726                 {
 727                     /* If we get here, the i particle (ii) has itself (jnr)
 728                      * in its neighborlist. This can only happen with the Verlet
 729                      * scheme, and corresponds to a self-interaction that will
 730                      * occur twice. Scale it down by 50% to only include it once.
 731                      */
 732                     v_lr *= half;
 733                 }
 734
 735                 for (int i = 0; i < NSTATES; i++)
 736                 {
 737                     vctot -= LFC[i] * qq[i] * v_lr;
 738                     Fscal -= LFC[i] * qq[i] * f_lr;
 739                     dvdl_coul -= (DLF[i] * qq[i]) * v_lr;
 740                 }
 741             }
 742
 743             if (vdwInteractionTypeIsEwald && r < rvdw)
 744             {
 745                 /* See comment in the preamble. When using LJ-Ewald interactions
 746                  * (unless we use a switch modifier) we subtract the reciprocal-space
 747                  * Ewald component here which made it possible to apply the free
 748                  * energy interaction to r^-6 (vanilla LJ6 short-range part)
 749                  * above. This gets us closer to the ideal case of applying
 750                  * the softcore to the entire VdW interaction,
 751                  * including the reciprocal-space component.
 752                  */
 753                 /* We could also use the analytical form here
 754                  * iso a table, but that can cause issues for
 755                  * r close to 0 for non-interacting pairs.
 756                  */
 757
 758                 const real rs   = rsq * rinv * ewtabscale;
 759                 const int  ri   = static_cast<int>(rs);
 760                 const real frac = rs - ri;
 761                 const real f_lr = (1 - frac) * tab_ewald_F_lj[ri] + frac * tab_ewald_F_lj[ri + 1];
 762                 /* TODO: Currently the Ewald LJ table does not contain
 763                  * the factor 1/6, we should add this.
 764                  */
 765                 const real FF = f_lr * rinv / six;
 766                 real VV = (tab_ewald_V_lj[ri] - ewtabhalfspace * frac * (tab_ewald_F_lj[ri] + f_lr)) / six;
 767
 768                 if (ii == jnr)
 769                 {
 770                     /* If we get here, the i particle (ii) has itself (jnr)
 771                      * in its neighborlist. This can only happen with the Verlet
 772                      * scheme, and corresponds to a self-interaction that will
 773                      * occur twice. Scale it down by 50% to only include it once.
 774                      */
 775                     VV *= half;
 776                 }
 777
 778                 for (int i = 0; i < NSTATES; i++)
 779                 {
 780                     const real c6grid = nbfp_grid[tj[i]];
 781                     vvtot += LFV[i] * c6grid * VV;
 782                     Fscal += LFV[i] * c6grid * FF;
 783                     dvdl_vdw += (DLF[i] * c6grid) * VV;
 784                 }
 785             }
 786
 787             if (doForces)
 788             {
 789                 const real tx = Fscal * dx;
 790                 const real ty = Fscal * dy;
 791                 const real tz = Fscal * dz;
 792                 fix           = fix + tx;
 793                 fiy           = fiy + ty;
 794                 fiz           = fiz + tz;
 795                 /* OpenMP atomics are expensive, but this kernels is also
 796                  * expensive, so we can take this hit, instead of using
 797                  * thread-local output buffers and extra reduction.
 798                  *
 799                  * All the OpenMP regions in this file are trivial and should
 800                  * not throw, so no need for try/catch.
 801                  */
 802 #pragma omp atomic
 803                 f[j3] -= tx;
 804 #pragma omp atomic
 805                 f[j3 + 1] -= ty;
 806 #pragma omp atomic
 807                 f[j3 + 2] -= tz;
 808             }
 809         }
 810
 811         /* The atomics below are expensive with many OpenMP threads.
 812          * Here unperturbed i-particles will usually only have a few
 813          * (perturbed) j-particles in the list. Thus with a buffered list
 814          * we can skip a significant number of i-reductions with a check.
 815          */
 816         if (npair_within_cutoff > 0)
 817         {
 818             if (doForces)
 819             {
 820 #pragma omp atomic
 821                 f[ii3] += fix;
 822 #pragma omp atomic
 823                 f[ii3 + 1] += fiy;
 824 #pragma omp atomic
 825                 f[ii3 + 2] += fiz;
 826             }
 827             if (doShiftForces)
 828             {
 829 #pragma omp atomic
 830                 fshift[is3] += fix;
 831 #pragma omp atomic
 832                 fshift[is3 + 1] += fiy;
 833 #pragma omp atomic
 834                 fshift[is3 + 2] += fiz;
 835             }
 836             if (doPotential)
 837             {
 838                 int ggid = gid[n];
 839 #pragma omp atomic
 840                 Vc[ggid] += vctot;
 841 #pragma omp atomic
 842                 Vv[ggid] += vvtot;
 843             }
 844         }
 845     }
 846
 847 #pragma omp atomic
 848     dvdl[efptCOUL] += dvdl_coul;
 849 #pragma omp atomic
 850     dvdl[efptVDW] += dvdl_vdw;
 851
 852     /* Estimate flops, average for free energy stuff:
 853      * 12  flops per outer iteration
 854      * 150 flops per inner iteration
 855      */
 856 #pragma omp atomic
 857     inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri * 12 + nlist->jindex[nri] * 150);
 858 }
 859
 860 typedef void (*KernelFunction)(const t_nblist* gmx_restrict nlist,
 861                                rvec* gmx_restrict         xx,
 862                                gmx::ForceWithShiftForces* forceWithShiftForces,
 863                                const t_forcerec* gmx_restrict fr,
 864                                const t_mdatoms* gmx_restrict mdatoms,
 865                                nb_kernel_data_t* gmx_restrict kernel_data,
 866                                t_nrnb* gmx_restrict nrnb);
 867
 868 template<SoftCoreTreatment softCoreTreatment, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald>
 869 static KernelFunction dispatchKernelOnVdwModifier(const bool vdwModifierIsPotSwitch)
 870 {
 871     if (vdwModifierIsPotSwitch)
 872     {
 873         return (nb_free_energy_kernel<softCoreTreatment, scLambdasOrAlphasDiffer,
 874                                       vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, true>);
 875     }
 876     else
 877     {
 878         return (nb_free_energy_kernel<softCoreTreatment, scLambdasOrAlphasDiffer,
 879                                       vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, false>);
 880     }
 881 }
 882
 883 template<SoftCoreTreatment softCoreTreatment, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald>
 884 static KernelFunction dispatchKernelOnElecInteractionType(const bool elecInteractionTypeIsEwald,
 885                                                           const bool vdwModifierIsPotSwitch)
 886 {
 887     if (elecInteractionTypeIsEwald)
 888     {
 889         return (dispatchKernelOnVdwModifier<softCoreTreatment, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, true>(
 890                 vdwModifierIsPotSwitch));
 891     }
 892     else
 893     {
 894         return (dispatchKernelOnVdwModifier<softCoreTreatment, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, false>(
 895                 vdwModifierIsPotSwitch));
 896     }
 897 }
 898
 899 template<SoftCoreTreatment softCoreTreatment, bool scLambdasOrAlphasDiffer>
 900 static KernelFunction dispatchKernelOnVdwInteractionType(const bool vdwInteractionTypeIsEwald,
 901                                                          const bool elecInteractionTypeIsEwald,
 902                                                          const bool vdwModifierIsPotSwitch)
 903 {
 904     if (vdwInteractionTypeIsEwald)
 905     {
 906         return (dispatchKernelOnElecInteractionType<softCoreTreatment, scLambdasOrAlphasDiffer, true>(
 907                 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch));
 908     }
 909     else
 910     {
 911         return (dispatchKernelOnElecInteractionType<softCoreTreatment, scLambdasOrAlphasDiffer, false>(
 912                 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch));
 913     }
 914 }
 915
 916 template<SoftCoreTreatment softCoreTreatment>
 917 static KernelFunction dispatchKernelOnScLambdasOrAlphasDifference(const bool scLambdasOrAlphasDiffer,
 918                                                                   const bool vdwInteractionTypeIsEwald,
 919                                                                   const bool elecInteractionTypeIsEwald,
 920                                                                   const bool vdwModifierIsPotSwitch)
 921 {
 922     if (scLambdasOrAlphasDiffer)
 923     {
 924         return (dispatchKernelOnVdwInteractionType<softCoreTreatment, true>(
 925                 vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch));
 926     }
 927     else
 928     {
 929         return (dispatchKernelOnVdwInteractionType<softCoreTreatment, false>(
 930                 vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch));
 931     }
 932 }
 933
 934 static KernelFunction dispatchKernel(const bool        scLambdasOrAlphasDiffer,
 935                                      const bool        vdwInteractionTypeIsEwald,
 936                                      const bool        elecInteractionTypeIsEwald,
 937                                      const bool        vdwModifierIsPotSwitch,
 938                                      const t_forcerec* fr)
 939 {
 940     if (fr->sc_alphacoul == 0 && fr->sc_alphavdw == 0)
 941     {
 942         return (dispatchKernelOnScLambdasOrAlphasDifference<SoftCoreTreatment::None>(
 943                 scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
 944                 vdwModifierIsPotSwitch));
 945     }
 946     else if (fr->sc_r_power == 6.0_real)
 947     {
 948         return (dispatchKernelOnScLambdasOrAlphasDifference<SoftCoreTreatment::RPower6>(
 949                 scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
 950                 vdwModifierIsPotSwitch));
 951     }
 952     else
 953     {
 954         return (dispatchKernelOnScLambdasOrAlphasDifference<SoftCoreTreatment::RPower48>(
 955                 scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
 956                 vdwModifierIsPotSwitch));
 957     }
 958 }
 959
 960
 961 void gmx_nb_free_energy_kernel(const t_nblist*            nlist,
 962                                rvec*                      xx,
 963                                gmx::ForceWithShiftForces* ff,
 964                                const t_forcerec*          fr,
 965                                const t_mdatoms*           mdatoms,
 966                                nb_kernel_data_t*          kernel_data,
 967                                t_nrnb*                    nrnb)
 968 {
 969     GMX_ASSERT(EEL_PME_EWALD(fr->ic->eeltype) || fr->ic->eeltype == eelCUT || EEL_RF(fr->ic->eeltype),
 970                "Unsupported eeltype with free energy");
 971
 972     const bool vdwInteractionTypeIsEwald  = (EVDW_PME(fr->ic->vdwtype));
 973     const bool elecInteractionTypeIsEwald = (EEL_PME_EWALD(fr->ic->eeltype));
 974     const bool vdwModifierIsPotSwitch     = (fr->ic->vdw_modifier == eintmodPOTSWITCH);
 975     bool       scLambdasOrAlphasDiffer    = true;
 976
 977     if (fr->sc_alphacoul == 0 && fr->sc_alphavdw == 0)
 978     {
 979         scLambdasOrAlphasDiffer = false;
 980     }
 981     else if (fr->sc_r_power == 6.0_real || fr->sc_r_power == 48.0_real)
 982     {
 983         if (kernel_data->lambda[efptCOUL] == kernel_data->lambda[efptVDW] && fr->sc_alphacoul == fr->sc_alphavdw)
 984         {
 985             scLambdasOrAlphasDiffer = false;
 986         }
 987     }
 988     else
 989     {
 990         GMX_RELEASE_ASSERT(false, "Unsupported soft-core r-power");
 991     }
 992     KernelFunction kernelFunc = dispatchKernel(scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
 993                                                elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, fr);
 994     kernelFunc(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
 995 }