src/gromacs/gmxlib/nonbonded/nb_free_energy.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
   7  * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
   8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   9  * and including many others, as listed in the AUTHORS file in the
  10  * top-level source directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38 #include "gmxpre.h"
  39
  40 #include "nb_free_energy.h"
  41
  42 #include <cmath>
  43
  44 #include <algorithm>
  45
  46 #include "gromacs/gmxlib/nrnb.h"
  47 #include "gromacs/gmxlib/nonbonded/nb_kernel.h"
  48 #include "gromacs/gmxlib/nonbonded/nonbonded.h"
  49 #include "gromacs/math/functions.h"
  50 #include "gromacs/math/vec.h"
  51 #include "gromacs/mdtypes/forceoutput.h"
  52 #include "gromacs/mdtypes/forcerec.h"
  53 #include "gromacs/mdtypes/md_enums.h"
  54 #include "gromacs/utility/fatalerror.h"
  55
  56
  57 //! Enum for templating the soft-core treatment in the kernel
  58 enum class SoftCoreTreatment
  59 {
  60     None,    //!< No soft-core
  61     RPower6, //!< Soft-core with r-power = 6
  62     RPower48 //!< Soft-core with r-power = 48
  63 };
  64
  65 //! Most treatments are fine with float in mixed-precision mode.
  66 template<SoftCoreTreatment softCoreTreatment>
  67 struct SoftCoreReal
  68 {
  69     //! Real type for soft-core calculations
  70     using Real = real;
  71 };
  72
  73 //! This treatment requires double precision for some computations.
  74 template<>
  75 struct SoftCoreReal<SoftCoreTreatment::RPower48>
  76 {
  77     //! Real type for soft-core calculations
  78     using Real = double;
  79 };
  80
  81 //! Computes r^(1/p) and 1/r^(1/p) for the standard p=6
  82 template<SoftCoreTreatment softCoreTreatment>
  83 static inline void pthRoot(const real r, real* pthRoot, real* invPthRoot)
  84 {
  85     *invPthRoot = gmx::invsqrt(std::cbrt(r));
  86     *pthRoot    = 1 / (*invPthRoot);
  87 }
  88
  89 // We need a double version to make the specialization below work
  90 #if !GMX_DOUBLE
  91 //! Computes r^(1/p) and 1/r^(1/p) for the standard p=6
  92 template<SoftCoreTreatment softCoreTreatment>
  93 static inline void pthRoot(const double r, real* pthRoot, double* invPthRoot)
  94 {
  95     *invPthRoot = gmx::invsqrt(std::cbrt(r));
  96     *pthRoot    = 1 / (*invPthRoot);
  97 }
  98 #endif
  99
 100 //! Computes r^(1/p) and 1/r^(1/p) for p=48
 101 template<>
 102 inline void pthRoot<SoftCoreTreatment::RPower48>(const double r, real* pthRoot, double* invPthRoot)
 103 {
 104     *pthRoot    = std::pow(r, 1.0 / 48.0);
 105     *invPthRoot = 1 / (*pthRoot);
 106 }
 107
 108 template<SoftCoreTreatment softCoreTreatment>
 109 static inline real calculateSigmaPow(const real sigma6)
 110 {
 111     if (softCoreTreatment == SoftCoreTreatment::RPower6)
 112     {
 113         return sigma6;
 114     }
 115     else
 116     {
 117         real sigmaPow = sigma6 * sigma6;     /* sigma^12 */
 118         sigmaPow      = sigmaPow * sigmaPow; /* sigma^24 */
 119         sigmaPow      = sigmaPow * sigmaPow; /* sigma^48 */
 120         return (sigmaPow);
 121     }
 122 }
 123
 124 template<SoftCoreTreatment softCoreTreatment, class SCReal>
 125 static inline real calculateRinv6(const SCReal rinvV)
 126 {
 127     if (softCoreTreatment == SoftCoreTreatment::RPower6)
 128     {
 129         return rinvV;
 130     }
 131     else
 132     {
 133         real rinv6 = rinvV * rinvV;
 134         return (rinv6 * rinv6 * rinv6);
 135     }
 136 }
 137
 138 static inline real calculateVdw6(const real c6, const real rinv6)
 139 {
 140     return (c6 * rinv6);
 141 }
 142
 143 static inline real calculateVdw12(const real c12, const real rinv6)
 144 {
 145     return (c12 * rinv6 * rinv6);
 146 }
 147
 148 /* reaction-field electrostatics */
 149 template<class SCReal>
 150 static inline SCReal
 151 reactionFieldScalarForce(const real qq, const real rinv, const SCReal r, const real krf, const real two)
 152 {
 153     return (qq * (rinv - two * krf * r * r));
 154 }
 155 template<class SCReal>
 156 static inline real
 157 reactionFieldPotential(const real qq, const real rinv, const SCReal r, const real krf, const real potentialShift)
 158 {
 159     return (qq * (rinv + krf * r * r - potentialShift));
 160 }
 161
 162 /* Ewald electrostatics */
 163 static inline real ewaldScalarForce(const real coulomb, const real rinv)
 164 {
 165     return (coulomb * rinv);
 166 }
 167 static inline real ewaldPotential(const real coulomb, const real rinv, const real potentialShift)
 168 {
 169     return (coulomb * (rinv - potentialShift));
 170 }
 171
 172 /* cutoff LJ */
 173 static inline real lennardJonesScalarForce(const real v6, const real v12)
 174 {
 175     return (v12 - v6);
 176 }
 177 static inline real lennardJonesPotential(const real v6,
 178                                          const real v12,
 179                                          const real c6,
 180                                          const real c12,
 181                                          const real repulsionShift,
 182                                          const real dispersionShift,
 183                                          const real onesixth,
 184                                          const real onetwelfth)
 185 {
 186     return ((v12 + c12 * repulsionShift) * onetwelfth - (v6 + c6 * dispersionShift) * onesixth);
 187 }
 188
 189 /* Ewald LJ */
 190 static inline real ewaldLennardJonesGridSubtract(const real c6grid, const real potentialShift, const real onesixth)
 191 {
 192     return (c6grid * potentialShift * onesixth);
 193 }
 194
 195 /* LJ Potential switch */
 196 template<class SCReal>
 197 static inline SCReal potSwitchScalarForceMod(const SCReal fScalarInp,
 198                                              const real   potential,
 199                                              const real   sw,
 200                                              const SCReal r,
 201                                              const real   rVdw,
 202                                              const real   dsw,
 203                                              const real   zero)
 204 {
 205     if (r < rVdw)
 206     {
 207         SCReal fScalar = fScalarInp * sw - r * potential * dsw;
 208         return (fScalar);
 209     }
 210     return (zero);
 211 }
 212 template<class SCReal>
 213 static inline real
 214 potSwitchPotentialMod(const real potentialInp, const real sw, const SCReal r, const real rVdw, const real zero)
 215 {
 216     if (r < rVdw)
 217     {
 218         real potential = potentialInp * sw;
 219         return (potential);
 220     }
 221     return (zero);
 222 }
 223
 224
 225 //! Templated free-energy non-bonded kernel
 226 template<SoftCoreTreatment softCoreTreatment, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald, bool vdwModifierIsPotSwitch>
 227 static void nb_free_energy_kernel(const t_nblist* gmx_restrict nlist,
 228                                   rvec* gmx_restrict         xx,
 229                                   gmx::ForceWithShiftForces* forceWithShiftForces,
 230                                   const t_forcerec* gmx_restrict fr,
 231                                   const t_mdatoms* gmx_restrict mdatoms,
 232                                   nb_kernel_data_t* gmx_restrict kernel_data,
 233                                   t_nrnb* gmx_restrict nrnb)
 234 {
 235     using SCReal = typename SoftCoreReal<softCoreTreatment>::Real;
 236
 237     constexpr bool useSoftCore = (softCoreTreatment != SoftCoreTreatment::None);
 238
 239 #define STATE_A 0
 240 #define STATE_B 1
 241 #define NSTATES 2
 242
 243     constexpr real onetwelfth = 1.0 / 12.0;
 244     constexpr real onesixth   = 1.0 / 6.0;
 245     constexpr real zero       = 0.0;
 246     constexpr real half       = 0.5;
 247     constexpr real one        = 1.0;
 248     constexpr real two        = 2.0;
 249     constexpr real six        = 6.0;
 250
 251     /* Extract pointer to non-bonded interaction constants */
 252     const interaction_const_t* ic = fr->ic;
 253
 254     // Extract pair list data
 255     const int  nri    = nlist->nri;
 256     const int* iinr   = nlist->iinr;
 257     const int* jindex = nlist->jindex;
 258     const int* jjnr   = nlist->jjnr;
 259     const int* shift  = nlist->shift;
 260     const int* gid    = nlist->gid;
 261
 262     const real* shiftvec      = fr->shift_vec[0];
 263     const real* chargeA       = mdatoms->chargeA;
 264     const real* chargeB       = mdatoms->chargeB;
 265     real*       Vc            = kernel_data->energygrp_elec;
 266     const int*  typeA         = mdatoms->typeA;
 267     const int*  typeB         = mdatoms->typeB;
 268     const int   ntype         = fr->ntype;
 269     const real* nbfp          = fr->nbfp.data();
 270     const real* nbfp_grid     = fr->ljpme_c6grid;
 271     real*       Vv            = kernel_data->energygrp_vdw;
 272     const real  lambda_coul   = kernel_data->lambda[efptCOUL];
 273     const real  lambda_vdw    = kernel_data->lambda[efptVDW];
 274     real*       dvdl          = kernel_data->dvdl;
 275     const real  alpha_coul    = fr->sc_alphacoul;
 276     const real  alpha_vdw     = fr->sc_alphavdw;
 277     const real  lam_power     = fr->sc_power;
 278     const real  sigma6_def    = fr->sc_sigma6_def;
 279     const real  sigma6_min    = fr->sc_sigma6_min;
 280     const bool  doForces      = ((kernel_data->flags & GMX_NONBONDED_DO_FORCE) != 0);
 281     const bool  doShiftForces = ((kernel_data->flags & GMX_NONBONDED_DO_SHIFTFORCE) != 0);
 282     const bool  doPotential   = ((kernel_data->flags & GMX_NONBONDED_DO_POTENTIAL) != 0);
 283
 284     // Extract data from interaction_const_t
 285     const real facel           = ic->epsfac;
 286     const real rcoulomb        = ic->rcoulomb;
 287     const real krf             = ic->k_rf;
 288     const real crf             = ic->c_rf;
 289     const real sh_lj_ewald     = ic->sh_lj_ewald;
 290     const real rvdw            = ic->rvdw;
 291     const real dispersionShift = ic->dispersion_shift.cpot;
 292     const real repulsionShift  = ic->repulsion_shift.cpot;
 293
 294     // Note that the nbnxm kernels do not support Coulomb potential switching at all
 295     GMX_ASSERT(ic->coulomb_modifier != eintmodPOTSWITCH,
 296                "Potential switching is not supported for Coulomb with FEP");
 297
 298     real vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4;
 299     if (vdwModifierIsPotSwitch)
 300     {
 301         const real d = ic->rvdw - ic->rvdw_switch;
 302         vdw_swV3     = -10.0 / (d * d * d);
 303         vdw_swV4     = 15.0 / (d * d * d * d);
 304         vdw_swV5     = -6.0 / (d * d * d * d * d);
 305         vdw_swF2     = -30.0 / (d * d * d);
 306         vdw_swF3     = 60.0 / (d * d * d * d);
 307         vdw_swF4     = -30.0 / (d * d * d * d * d);
 308     }
 309     else
 310     {
 311         /* Avoid warnings from stupid compilers (looking at you, Clang!) */
 312         vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0;
 313     }
 314
 315     int icoul;
 316     if (ic->eeltype == eelCUT || EEL_RF(ic->eeltype))
 317     {
 318         icoul = GMX_NBKERNEL_ELEC_REACTIONFIELD;
 319     }
 320     else
 321     {
 322         icoul = GMX_NBKERNEL_ELEC_NONE;
 323     }
 324
 325     real rcutoff_max2 = std::max(ic->rcoulomb, ic->rvdw);
 326     rcutoff_max2      = rcutoff_max2 * rcutoff_max2;
 327
 328     const real* tab_ewald_F_lj = nullptr;
 329     const real* tab_ewald_V_lj = nullptr;
 330     const real* ewtab          = nullptr;
 331     real        ewtabscale     = 0;
 332     real        ewtabhalfspace = 0;
 333     real        sh_ewald       = 0;
 334     if (elecInteractionTypeIsEwald || vdwInteractionTypeIsEwald)
 335     {
 336         const auto& tables = *ic->coulombEwaldTables;
 337         sh_ewald           = ic->sh_ewald;
 338         ewtab              = tables.tableFDV0.data();
 339         ewtabscale         = tables.scale;
 340         ewtabhalfspace     = half / ewtabscale;
 341         tab_ewald_F_lj     = tables.tableF.data();
 342         tab_ewald_V_lj     = tables.tableV.data();
 343     }
 344
 345     /* For Ewald/PME interactions we cannot easily apply the soft-core component to
 346      * reciprocal space. When we use non-switched Ewald interactions, we
 347      * assume the soft-coring does not significantly affect the grid contribution
 348      * and apply the soft-core only to the full 1/r (- shift) pair contribution.
 349      *
 350      * However, we cannot use this approach for switch-modified since we would then
 351      * effectively end up evaluating a significantly different interaction here compared to the
 352      * normal (non-free-energy) kernels, either by applying a cutoff at a different
 353      * position than what the user requested, or by switching different
 354      * things (1/r rather than short-range Ewald). For these settings, we just
 355      * use the traditional short-range Ewald interaction in that case.
 356      */
 357     GMX_RELEASE_ASSERT(!(vdwInteractionTypeIsEwald && vdwModifierIsPotSwitch),
 358                        "Can not apply soft-core to switched Ewald potentials");
 359
 360     SCReal dvdl_coul = 0; /* Needs double for sc_power==48 */
 361     SCReal dvdl_vdw  = 0; /* Needs double for sc_power==48 */
 362
 363     /* Lambda factor for state A, 1-lambda*/
 364     real LFC[NSTATES], LFV[NSTATES];
 365     LFC[STATE_A] = one - lambda_coul;
 366     LFV[STATE_A] = one - lambda_vdw;
 367
 368     /* Lambda factor for state B, lambda*/
 369     LFC[STATE_B] = lambda_coul;
 370     LFV[STATE_B] = lambda_vdw;
 371
 372     /*derivative of the lambda factor for state A and B */
 373     real DLF[NSTATES];
 374     DLF[STATE_A] = -1;
 375     DLF[STATE_B] = 1;
 376
 377     real           lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
 378     constexpr real sc_r_power = (softCoreTreatment == SoftCoreTreatment::RPower48 ? 48.0_real : 6.0_real);
 379     for (int i = 0; i < NSTATES; i++)
 380     {
 381         lfac_coul[i]  = (lam_power == 2 ? (1 - LFC[i]) * (1 - LFC[i]) : (1 - LFC[i]));
 382         dlfac_coul[i] = DLF[i] * lam_power / sc_r_power * (lam_power == 2 ? (1 - LFC[i]) : 1);
 383         lfac_vdw[i]   = (lam_power == 2 ? (1 - LFV[i]) * (1 - LFV[i]) : (1 - LFV[i]));
 384         dlfac_vdw[i]  = DLF[i] * lam_power / sc_r_power * (lam_power == 2 ? (1 - LFV[i]) : 1);
 385     }
 386
 387     // TODO: We should get rid of using pointers to real
 388     const real* x             = xx[0];
 389     real* gmx_restrict f      = &(forceWithShiftForces->force()[0][0]);
 390     real* gmx_restrict fshift = &(forceWithShiftForces->shiftForces()[0][0]);
 391
 392     for (int n = 0; n < nri; n++)
 393     {
 394         int npair_within_cutoff = 0;
 395
 396         const int  is3   = 3 * shift[n];
 397         const real shX   = shiftvec[is3];
 398         const real shY   = shiftvec[is3 + 1];
 399         const real shZ   = shiftvec[is3 + 2];
 400         const int  nj0   = jindex[n];
 401         const int  nj1   = jindex[n + 1];
 402         const int  ii    = iinr[n];
 403         const int  ii3   = 3 * ii;
 404         const real ix    = shX + x[ii3 + 0];
 405         const real iy    = shY + x[ii3 + 1];
 406         const real iz    = shZ + x[ii3 + 2];
 407         const real iqA   = facel * chargeA[ii];
 408         const real iqB   = facel * chargeB[ii];
 409         const int  ntiA  = 2 * ntype * typeA[ii];
 410         const int  ntiB  = 2 * ntype * typeB[ii];
 411         real       vctot = 0;
 412         real       vvtot = 0;
 413         real       fix   = 0;
 414         real       fiy   = 0;
 415         real       fiz   = 0;
 416
 417         for (int k = nj0; k < nj1; k++)
 418         {
 419             int        tj[NSTATES];
 420             const int  jnr = jjnr[k];
 421             const int  j3  = 3 * jnr;
 422             real       c6[NSTATES], c12[NSTATES], qq[NSTATES], Vcoul[NSTATES], Vvdw[NSTATES];
 423             real       r, rinv, rp, rpm2;
 424             real       alpha_vdw_eff, alpha_coul_eff, sigma_pow[NSTATES];
 425             const real dx  = ix - x[j3];
 426             const real dy  = iy - x[j3 + 1];
 427             const real dz  = iz - x[j3 + 2];
 428             const real rsq = dx * dx + dy * dy + dz * dz;
 429             SCReal     FscalC[NSTATES], FscalV[NSTATES]; /* Needs double for sc_power==48 */
 430
 431             if (rsq >= rcutoff_max2)
 432             {
 433                 /* We save significant time by skipping all code below.
 434                  * Note that with soft-core interactions, the actual cut-off
 435                  * check might be different. But since the soft-core distance
 436                  * is always larger than r, checking on r here is safe.
 437                  */
 438                 continue;
 439             }
 440             npair_within_cutoff++;
 441
 442             if (rsq > 0)
 443             {
 444                 /* Note that unlike in the nbnxn kernels, we do not need
 445                  * to clamp the value of rsq before taking the invsqrt
 446                  * to avoid NaN in the LJ calculation, since here we do
 447                  * not calculate LJ interactions when C6 and C12 are zero.
 448                  */
 449
 450                 rinv = gmx::invsqrt(rsq);
 451                 r    = rsq * rinv;
 452             }
 453             else
 454             {
 455                 /* The force at r=0 is zero, because of symmetry.
 456                  * But note that the potential is in general non-zero,
 457                  * since the soft-cored r will be non-zero.
 458                  */
 459                 rinv = 0;
 460                 r    = 0;
 461             }
 462
 463             if (softCoreTreatment == SoftCoreTreatment::None)
 464             {
 465                 /* The soft-core power p will not affect the results
 466                  * with not using soft-core, so we use power of 0 which gives
 467                  * the simplest math and cheapest code.
 468                  */
 469                 rpm2 = rinv * rinv;
 470                 rp   = 1;
 471             }
 472             if (softCoreTreatment == SoftCoreTreatment::RPower6)
 473             {
 474                 rpm2 = rsq * rsq;  /* r4 */
 475                 rp   = rpm2 * rsq; /* r6 */
 476             }
 477             if (softCoreTreatment == SoftCoreTreatment::RPower48)
 478             {
 479                 rp   = rsq * rsq * rsq; /* r6 */
 480                 rp   = rp * rp;         /* r12 */
 481                 rp   = rp * rp;         /* r24 */
 482                 rp   = rp * rp;         /* r48 */
 483                 rpm2 = rp / rsq;        /* r46 */
 484             }
 485
 486             real Fscal = 0;
 487
 488             qq[STATE_A] = iqA * chargeA[jnr];
 489             qq[STATE_B] = iqB * chargeB[jnr];
 490
 491             tj[STATE_A] = ntiA + 2 * typeA[jnr];
 492             tj[STATE_B] = ntiB + 2 * typeB[jnr];
 493
 494             if (nlist->excl_fep == nullptr || nlist->excl_fep[k])
 495             {
 496                 c6[STATE_A] = nbfp[tj[STATE_A]];
 497                 c6[STATE_B] = nbfp[tj[STATE_B]];
 498
 499                 for (int i = 0; i < NSTATES; i++)
 500                 {
 501                     c12[i] = nbfp[tj[i] + 1];
 502                     if (useSoftCore)
 503                     {
 504                         real sigma6[NSTATES];
 505                         if ((c6[i] > 0) && (c12[i] > 0))
 506                         {
 507                             /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
 508                             sigma6[i] = half * c12[i] / c6[i];
 509                             if (sigma6[i] < sigma6_min) /* for disappearing coul and vdw with soft core at the same time */
 510                             {
 511                                 sigma6[i] = sigma6_min;
 512                             }
 513                         }
 514                         else
 515                         {
 516                             sigma6[i] = sigma6_def;
 517                         }
 518                         sigma_pow[i] = calculateSigmaPow<softCoreTreatment>(sigma6[i]);
 519                     }
 520                 }
 521
 522                 if (useSoftCore)
 523                 {
 524                     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
 525                     if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
 526                     {
 527                         alpha_vdw_eff  = 0;
 528                         alpha_coul_eff = 0;
 529                     }
 530                     else
 531                     {
 532                         alpha_vdw_eff  = alpha_vdw;
 533                         alpha_coul_eff = alpha_coul;
 534                     }
 535                 }
 536
 537                 for (int i = 0; i < NSTATES; i++)
 538                 {
 539                     FscalC[i] = 0;
 540                     FscalV[i] = 0;
 541                     Vcoul[i]  = 0;
 542                     Vvdw[i]   = 0;
 543
 544                     real   rinvC, rinvV;
 545                     SCReal rC, rV, rpinvC, rpinvV; /* Needs double for sc_power==48 */
 546
 547                     /* Only spend time on A or B state if it is non-zero */
 548                     if ((qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0))
 549                     {
 550                         /* this section has to be inside the loop because of the dependence on sigma_pow */
 551                         if (useSoftCore)
 552                         {
 553                             rpinvC = one / (alpha_coul_eff * lfac_coul[i] * sigma_pow[i] + rp);
 554                             pthRoot<softCoreTreatment>(rpinvC, &rinvC, &rC);
 555                             if (scLambdasOrAlphasDiffer)
 556                             {
 557                                 rpinvV = one / (alpha_vdw_eff * lfac_vdw[i] * sigma_pow[i] + rp);
 558                                 pthRoot<softCoreTreatment>(rpinvV, &rinvV, &rV);
 559                             }
 560                             else
 561                             {
 562                                 /* We can avoid one expensive pow and one / operation */
 563                                 rpinvV = rpinvC;
 564                                 rinvV  = rinvC;
 565                                 rV     = rC;
 566                             }
 567                         }
 568                         else
 569                         {
 570                             rpinvC = 1;
 571                             rinvC  = rinv;
 572                             rC     = r;
 573
 574                             rpinvV = 1;
 575                             rinvV  = rinv;
 576                             rV     = r;
 577                         }
 578
 579                         /* Only process the coulomb interactions if we have charges,
 580                          * and if we either include all entries in the list (no cutoff
 581                          * used in the kernel), or if we are within the cutoff.
 582                          */
 583                         bool computeElecInteraction = (elecInteractionTypeIsEwald && r < rcoulomb)
 584                                                       || (!elecInteractionTypeIsEwald && rC < rcoulomb);
 585
 586                         if ((qq[i] != 0) && computeElecInteraction)
 587                         {
 588                             if (elecInteractionTypeIsEwald)
 589                             {
 590                                 Vcoul[i]  = ewaldPotential(qq[i], rinvC, sh_ewald);
 591                                 FscalC[i] = ewaldScalarForce(qq[i], rinvC);
 592                             }
 593                             else
 594                             {
 595                                 Vcoul[i]  = reactionFieldPotential(qq[i], rinvC, rC, krf, crf);
 596                                 FscalC[i] = reactionFieldScalarForce(qq[i], rinvC, rC, krf, two);
 597                             }
 598                         }
 599
 600                         /* Only process the VDW interactions if we have
 601                          * some non-zero parameters, and if we either
 602                          * include all entries in the list (no cutoff used
 603                          * in the kernel), or if we are within the cutoff.
 604                          */
 605                         bool computeVdwInteraction = (vdwInteractionTypeIsEwald && r < rvdw)
 606                                                      || (!vdwInteractionTypeIsEwald && rV < rvdw);
 607                         if ((c6[i] != 0 || c12[i] != 0) && computeVdwInteraction)
 608                         {
 609                             real rinv6;
 610                             if (softCoreTreatment == SoftCoreTreatment::RPower6)
 611                             {
 612                                 rinv6 = calculateRinv6<softCoreTreatment>(rpinvV);
 613                             }
 614                             else
 615                             {
 616                                 rinv6 = calculateRinv6<softCoreTreatment>(rinvV);
 617                             }
 618                             real Vvdw6  = calculateVdw6(c6[i], rinv6);
 619                             real Vvdw12 = calculateVdw12(c12[i], rinv6);
 620
 621                             Vvdw[i] = lennardJonesPotential(Vvdw6, Vvdw12, c6[i], c12[i], repulsionShift,
 622                                                             dispersionShift, onesixth, onetwelfth);
 623                             FscalV[i] = lennardJonesScalarForce(Vvdw6, Vvdw12);
 624
 625                             if (vdwInteractionTypeIsEwald)
 626                             {
 627                                 /* Subtract the grid potential at the cut-off */
 628                                 Vvdw[i] += ewaldLennardJonesGridSubtract(nbfp_grid[tj[i]],
 629                                                                          sh_lj_ewald, onesixth);
 630                             }
 631
 632                             if (vdwModifierIsPotSwitch)
 633                             {
 634                                 real d        = rV - ic->rvdw_switch;
 635                                 d             = (d > zero) ? d : zero;
 636                                 const real d2 = d * d;
 637                                 const real sw = one + d2 * d * (vdw_swV3 + d * (vdw_swV4 + d * vdw_swV5));
 638                                 const real dsw = d2 * (vdw_swF2 + d * (vdw_swF3 + d * vdw_swF4));
 639
 640                                 FscalV[i] = potSwitchScalarForceMod(FscalV[i], Vvdw[i], sw, rV,
 641                                                                     rvdw, dsw, zero);
 642                                 Vvdw[i]   = potSwitchPotentialMod(Vvdw[i], sw, rV, rvdw, zero);
 643                             }
 644                         }
 645
 646                         /* FscalC (and FscalV) now contain: dV/drC * rC
 647                          * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
 648                          * Further down we first multiply by r^p-2 and then by
 649                          * the vector r, which in total gives: dV/drC * (r/rC)^1-p
 650                          */
 651                         FscalC[i] *= rpinvC;
 652                         FscalV[i] *= rpinvV;
 653                     }
 654                 }
 655
 656                 /* Assemble A and B states */
 657                 for (int i = 0; i < NSTATES; i++)
 658                 {
 659                     vctot += LFC[i] * Vcoul[i];
 660                     vvtot += LFV[i] * Vvdw[i];
 661
 662                     Fscal += LFC[i] * FscalC[i] * rpm2;
 663                     Fscal += LFV[i] * FscalV[i] * rpm2;
 664
 665                     if (useSoftCore)
 666                     {
 667                         dvdl_coul +=
 668                                 Vcoul[i] * DLF[i]
 669                                 + LFC[i] * alpha_coul_eff * dlfac_coul[i] * FscalC[i] * sigma_pow[i];
 670                         dvdl_vdw += Vvdw[i] * DLF[i]
 671                                     + LFV[i] * alpha_vdw_eff * dlfac_vdw[i] * FscalV[i] * sigma_pow[i];
 672                     }
 673                     else
 674                     {
 675                         dvdl_coul += Vcoul[i] * DLF[i];
 676                         dvdl_vdw += Vvdw[i] * DLF[i];
 677                     }
 678                 }
 679             }
 680             else if (icoul == GMX_NBKERNEL_ELEC_REACTIONFIELD)
 681             {
 682                 /* For excluded pairs, which are only in this pair list when
 683                  * using the Verlet scheme, we don't use soft-core.
 684                  * As there is no singularity, there is no need for soft-core.
 685                  */
 686                 const real FF = -two * krf;
 687                 real       VV = krf * rsq - crf;
 688
 689                 if (ii == jnr)
 690                 {
 691                     VV *= half;
 692                 }
 693
 694                 for (int i = 0; i < NSTATES; i++)
 695                 {
 696                     vctot += LFC[i] * qq[i] * VV;
 697                     Fscal += LFC[i] * qq[i] * FF;
 698                     dvdl_coul += DLF[i] * qq[i] * VV;
 699                 }
 700             }
 701
 702             if (elecInteractionTypeIsEwald && r < rcoulomb)
 703             {
 704                 /* See comment in the preamble. When using Ewald interactions
 705                  * (unless we use a switch modifier) we subtract the reciprocal-space
 706                  * Ewald component here which made it possible to apply the free
 707                  * energy interaction to 1/r (vanilla coulomb short-range part)
 708                  * above. This gets us closer to the ideal case of applying
 709                  * the softcore to the entire electrostatic interaction,
 710                  * including the reciprocal-space component.
 711                  */
 712                 real v_lr, f_lr;
 713
 714                 const real ewrt   = r * ewtabscale;
 715                 int        ewitab = static_cast<int>(ewrt);
 716                 const real eweps  = ewrt - ewitab;
 717                 ewitab            = 4 * ewitab;
 718                 f_lr              = ewtab[ewitab] + eweps * ewtab[ewitab + 1];
 719                 v_lr = (ewtab[ewitab + 2] - ewtabhalfspace * eweps * (ewtab[ewitab] + f_lr));
 720                 f_lr *= rinv;
 721
 722                 /* Note that any possible Ewald shift has already been applied in
 723                  * the normal interaction part above.
 724                  */
 725
 726                 if (ii == jnr)
 727                 {
 728                     /* If we get here, the i particle (ii) has itself (jnr)
 729                      * in its neighborlist. This can only happen with the Verlet
 730                      * scheme, and corresponds to a self-interaction that will
 731                      * occur twice. Scale it down by 50% to only include it once.
 732                      */
 733                     v_lr *= half;
 734                 }
 735
 736                 for (int i = 0; i < NSTATES; i++)
 737                 {
 738                     vctot -= LFC[i] * qq[i] * v_lr;
 739                     Fscal -= LFC[i] * qq[i] * f_lr;
 740                     dvdl_coul -= (DLF[i] * qq[i]) * v_lr;
 741                 }
 742             }
 743
 744             if (vdwInteractionTypeIsEwald && r < rvdw)
 745             {
 746                 /* See comment in the preamble. When using LJ-Ewald interactions
 747                  * (unless we use a switch modifier) we subtract the reciprocal-space
 748                  * Ewald component here which made it possible to apply the free
 749                  * energy interaction to r^-6 (vanilla LJ6 short-range part)
 750                  * above. This gets us closer to the ideal case of applying
 751                  * the softcore to the entire VdW interaction,
 752                  * including the reciprocal-space component.
 753                  */
 754                 /* We could also use the analytical form here
 755                  * iso a table, but that can cause issues for
 756                  * r close to 0 for non-interacting pairs.
 757                  */
 758
 759                 const real rs   = rsq * rinv * ewtabscale;
 760                 const int  ri   = static_cast<int>(rs);
 761                 const real frac = rs - ri;
 762                 const real f_lr = (1 - frac) * tab_ewald_F_lj[ri] + frac * tab_ewald_F_lj[ri + 1];
 763                 /* TODO: Currently the Ewald LJ table does not contain
 764                  * the factor 1/6, we should add this.
 765                  */
 766                 const real FF = f_lr * rinv / six;
 767                 real VV = (tab_ewald_V_lj[ri] - ewtabhalfspace * frac * (tab_ewald_F_lj[ri] + f_lr)) / six;
 768
 769                 if (ii == jnr)
 770                 {
 771                     /* If we get here, the i particle (ii) has itself (jnr)
 772                      * in its neighborlist. This can only happen with the Verlet
 773                      * scheme, and corresponds to a self-interaction that will
 774                      * occur twice. Scale it down by 50% to only include it once.
 775                      */
 776                     VV *= half;
 777                 }
 778
 779                 for (int i = 0; i < NSTATES; i++)
 780                 {
 781                     const real c6grid = nbfp_grid[tj[i]];
 782                     vvtot += LFV[i] * c6grid * VV;
 783                     Fscal += LFV[i] * c6grid * FF;
 784                     dvdl_vdw += (DLF[i] * c6grid) * VV;
 785                 }
 786             }
 787
 788             if (doForces)
 789             {
 790                 const real tx = Fscal * dx;
 791                 const real ty = Fscal * dy;
 792                 const real tz = Fscal * dz;
 793                 fix           = fix + tx;
 794                 fiy           = fiy + ty;
 795                 fiz           = fiz + tz;
 796                 /* OpenMP atomics are expensive, but this kernels is also
 797                  * expensive, so we can take this hit, instead of using
 798                  * thread-local output buffers and extra reduction.
 799                  *
 800                  * All the OpenMP regions in this file are trivial and should
 801                  * not throw, so no need for try/catch.
 802                  */
 803 #pragma omp atomic
 804                 f[j3] -= tx;
 805 #pragma omp atomic
 806                 f[j3 + 1] -= ty;
 807 #pragma omp atomic
 808                 f[j3 + 2] -= tz;
 809             }
 810         }
 811
 812         /* The atomics below are expensive with many OpenMP threads.
 813          * Here unperturbed i-particles will usually only have a few
 814          * (perturbed) j-particles in the list. Thus with a buffered list
 815          * we can skip a significant number of i-reductions with a check.
 816          */
 817         if (npair_within_cutoff > 0)
 818         {
 819             if (doForces)
 820             {
 821 #pragma omp atomic
 822                 f[ii3] += fix;
 823 #pragma omp atomic
 824                 f[ii3 + 1] += fiy;
 825 #pragma omp atomic
 826                 f[ii3 + 2] += fiz;
 827             }
 828             if (doShiftForces)
 829             {
 830 #pragma omp atomic
 831                 fshift[is3] += fix;
 832 #pragma omp atomic
 833                 fshift[is3 + 1] += fiy;
 834 #pragma omp atomic
 835                 fshift[is3 + 2] += fiz;
 836             }
 837             if (doPotential)
 838             {
 839                 int ggid = gid[n];
 840 #pragma omp atomic
 841                 Vc[ggid] += vctot;
 842 #pragma omp atomic
 843                 Vv[ggid] += vvtot;
 844             }
 845         }
 846     }
 847
 848 #pragma omp atomic
 849     dvdl[efptCOUL] += dvdl_coul;
 850 #pragma omp atomic
 851     dvdl[efptVDW] += dvdl_vdw;
 852
 853     /* Estimate flops, average for free energy stuff:
 854      * 12  flops per outer iteration
 855      * 150 flops per inner iteration
 856      */
 857 #pragma omp atomic
 858     inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri * 12 + nlist->jindex[nri] * 150);
 859 }
 860
 861 typedef void (*KernelFunction)(const t_nblist* gmx_restrict nlist,
 862                                rvec* gmx_restrict         xx,
 863                                gmx::ForceWithShiftForces* forceWithShiftForces,
 864                                const t_forcerec* gmx_restrict fr,
 865                                const t_mdatoms* gmx_restrict mdatoms,
 866                                nb_kernel_data_t* gmx_restrict kernel_data,
 867                                t_nrnb* gmx_restrict nrnb);
 868
 869 template<SoftCoreTreatment softCoreTreatment, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald>
 870 static KernelFunction dispatchKernelOnVdwModifier(const bool vdwModifierIsPotSwitch)
 871 {
 872     if (vdwModifierIsPotSwitch)
 873     {
 874         return (nb_free_energy_kernel<softCoreTreatment, scLambdasOrAlphasDiffer,
 875                                       vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, true>);
 876     }
 877     else
 878     {
 879         return (nb_free_energy_kernel<softCoreTreatment, scLambdasOrAlphasDiffer,
 880                                       vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, false>);
 881     }
 882 }
 883
 884 template<SoftCoreTreatment softCoreTreatment, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald>
 885 static KernelFunction dispatchKernelOnElecInteractionType(const bool elecInteractionTypeIsEwald,
 886                                                           const bool vdwModifierIsPotSwitch)
 887 {
 888     if (elecInteractionTypeIsEwald)
 889     {
 890         return (dispatchKernelOnVdwModifier<softCoreTreatment, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, true>(
 891                 vdwModifierIsPotSwitch));
 892     }
 893     else
 894     {
 895         return (dispatchKernelOnVdwModifier<softCoreTreatment, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, false>(
 896                 vdwModifierIsPotSwitch));
 897     }
 898 }
 899
 900 template<SoftCoreTreatment softCoreTreatment, bool scLambdasOrAlphasDiffer>
 901 static KernelFunction dispatchKernelOnVdwInteractionType(const bool vdwInteractionTypeIsEwald,
 902                                                          const bool elecInteractionTypeIsEwald,
 903                                                          const bool vdwModifierIsPotSwitch)
 904 {
 905     if (vdwInteractionTypeIsEwald)
 906     {
 907         return (dispatchKernelOnElecInteractionType<softCoreTreatment, scLambdasOrAlphasDiffer, true>(
 908                 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch));
 909     }
 910     else
 911     {
 912         return (dispatchKernelOnElecInteractionType<softCoreTreatment, scLambdasOrAlphasDiffer, false>(
 913                 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch));
 914     }
 915 }
 916
 917 template<SoftCoreTreatment softCoreTreatment>
 918 static KernelFunction dispatchKernelOnScLambdasOrAlphasDifference(const bool scLambdasOrAlphasDiffer,
 919                                                                   const bool vdwInteractionTypeIsEwald,
 920                                                                   const bool elecInteractionTypeIsEwald,
 921                                                                   const bool vdwModifierIsPotSwitch)
 922 {
 923     if (scLambdasOrAlphasDiffer)
 924     {
 925         return (dispatchKernelOnVdwInteractionType<softCoreTreatment, true>(
 926                 vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch));
 927     }
 928     else
 929     {
 930         return (dispatchKernelOnVdwInteractionType<softCoreTreatment, false>(
 931                 vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch));
 932     }
 933 }
 934
 935 static KernelFunction dispatchKernel(const bool        scLambdasOrAlphasDiffer,
 936                                      const bool        vdwInteractionTypeIsEwald,
 937                                      const bool        elecInteractionTypeIsEwald,
 938                                      const bool        vdwModifierIsPotSwitch,
 939                                      const t_forcerec* fr)
 940 {
 941     if (fr->sc_alphacoul == 0 && fr->sc_alphavdw == 0)
 942     {
 943         return (dispatchKernelOnScLambdasOrAlphasDifference<SoftCoreTreatment::None>(
 944                 scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
 945                 vdwModifierIsPotSwitch));
 946     }
 947     else if (fr->sc_r_power == 6.0_real)
 948     {
 949         return (dispatchKernelOnScLambdasOrAlphasDifference<SoftCoreTreatment::RPower6>(
 950                 scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
 951                 vdwModifierIsPotSwitch));
 952     }
 953     else
 954     {
 955         return (dispatchKernelOnScLambdasOrAlphasDifference<SoftCoreTreatment::RPower48>(
 956                 scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
 957                 vdwModifierIsPotSwitch));
 958     }
 959 }
 960
 961
 962 void gmx_nb_free_energy_kernel(const t_nblist*            nlist,
 963                                rvec*                      xx,
 964                                gmx::ForceWithShiftForces* ff,
 965                                const t_forcerec*          fr,
 966                                const t_mdatoms*           mdatoms,
 967                                nb_kernel_data_t*          kernel_data,
 968                                t_nrnb*                    nrnb)
 969 {
 970     GMX_ASSERT(EEL_PME_EWALD(fr->ic->eeltype) || fr->ic->eeltype == eelCUT || EEL_RF(fr->ic->eeltype),
 971                "Unsupported eeltype with free energy");
 972
 973     const bool vdwInteractionTypeIsEwald  = (EVDW_PME(fr->ic->vdwtype));
 974     const bool elecInteractionTypeIsEwald = (EEL_PME_EWALD(fr->ic->eeltype));
 975     const bool vdwModifierIsPotSwitch     = (fr->ic->vdw_modifier == eintmodPOTSWITCH);
 976     bool       scLambdasOrAlphasDiffer    = true;
 977
 978     if (fr->sc_alphacoul == 0 && fr->sc_alphavdw == 0)
 979     {
 980         scLambdasOrAlphasDiffer = false;
 981     }
 982     else if (fr->sc_r_power == 6.0_real || fr->sc_r_power == 48.0_real)
 983     {
 984         if (kernel_data->lambda[efptCOUL] == kernel_data->lambda[efptVDW] && fr->sc_alphacoul == fr->sc_alphavdw)
 985         {
 986             scLambdasOrAlphasDiffer = false;
 987         }
 988     }
 989     else
 990     {
 991         GMX_RELEASE_ASSERT(false, "Unsupported soft-core r-power");
 992     }
 993     KernelFunction kernelFunc = dispatchKernel(scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
 994                                                elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, fr);
 995     kernelFunc(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
 996 }