src/gromacs/gmxlib/nonbonded/nb_free_energy.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #include "gmxpre.h"
  38
  39 #include "nb_free_energy.h"
  40
  41 #include <cmath>
  42
  43 #include <algorithm>
  44
  45 #include "gromacs/gmxlib/nrnb.h"
  46 #include "gromacs/gmxlib/nonbonded/nb_kernel.h"
  47 #include "gromacs/gmxlib/nonbonded/nonbonded.h"
  48 #include "gromacs/math/functions.h"
  49 #include "gromacs/math/vec.h"
  50 #include "gromacs/mdtypes/forceoutput.h"
  51 #include "gromacs/mdtypes/forcerec.h"
  52 #include "gromacs/mdtypes/md_enums.h"
  53 #include "gromacs/utility/fatalerror.h"
  54
  55
  56 //! Enum for templating the soft-core treatment in the kernel
  57 enum class SoftCoreTreatment
  58 {
  59     None,    //!< No soft-core
  60     RPower6, //!< Soft-core with r-power = 6
  61     RPower48 //!< Soft-core with r-power = 48
  62 };
  63
  64 //! Most treatments are fine with float in mixed-precision mode.
  65 template <SoftCoreTreatment softCoreTreatment>
  66 struct SoftCoreReal
  67 {
  68     //! Real type for soft-core calculations
  69     using Real = real;
  70 };
  71
  72 //! This treatment requires double precision for some computations.
  73 template <>
  74 struct SoftCoreReal<SoftCoreTreatment::RPower48>
  75 {
  76     //! Real type for soft-core calculations
  77     using Real = double;
  78 };
  79
  80 //! Computes r^(1/p) and 1/r^(1/p) for the standard p=6
  81 template <SoftCoreTreatment softCoreTreatment>
  82 static inline void pthRoot(const real  r,
  83                            real       *pthRoot,
  84                            real       *invPthRoot)
  85 {
  86     *invPthRoot = gmx::invsqrt(std::cbrt(r));
  87     *pthRoot    = 1/(*invPthRoot);
  88 }
  89
  90 // We need a double version to make the specialization below work
  91 #if !GMX_DOUBLE
  92 //! Computes r^(1/p) and 1/r^(1/p) for the standard p=6
  93 template <SoftCoreTreatment softCoreTreatment>
  94 static inline void pthRoot(const double  r,
  95                            real         *pthRoot,
  96                            double       *invPthRoot)
  97 {
  98     *invPthRoot = gmx::invsqrt(std::cbrt(r));
  99     *pthRoot    = 1/(*invPthRoot);
 100 }
 101 #endif
 102
 103 //! Computes r^(1/p) and 1/r^(1/p) for p=48
 104 template <>
 105 inline void pthRoot<SoftCoreTreatment::RPower48>(const double  r,
 106                                                  real         *pthRoot,
 107                                                  double       *invPthRoot)
 108 {
 109     *pthRoot    = std::pow(r, 1.0/48.0);
 110     *invPthRoot = 1/(*pthRoot);
 111 }
 112
 113 template<SoftCoreTreatment softCoreTreatment>
 114 static inline real calculateSigmaPow(const real sigma6)
 115 {
 116     if (softCoreTreatment == SoftCoreTreatment::RPower6)
 117     {
 118         return sigma6;
 119     }
 120     else
 121     {
 122         real sigmaPow = sigma6 * sigma6;     /* sigma^12 */
 123         sigmaPow      = sigmaPow * sigmaPow; /* sigma^24 */
 124         sigmaPow      = sigmaPow * sigmaPow; /* sigma^48 */
 125         return (sigmaPow);
 126     }
 127 }
 128
 129 template<SoftCoreTreatment softCoreTreatment, class SCReal>
 130 static inline real calculateRinv6(const SCReal rinvV)
 131 {
 132     if (softCoreTreatment == SoftCoreTreatment::RPower6)
 133     {
 134         return rinvV;
 135     }
 136     else
 137     {
 138         real rinv6 = rinvV * rinvV;
 139         return (rinv6 * rinv6 * rinv6);
 140     }
 141 }
 142
 143 static inline real calculateVdw6(const real c6, const real rinv6)
 144 {
 145     return (c6 * rinv6);
 146 }
 147
 148 static inline real calculateVdw12(const real c12, const real rinv6)
 149 {
 150     return (c12 * rinv6 * rinv6);
 151 }
 152
 153 /* reaction-field electrostatics */
 154 template<class SCReal>
 155 static inline SCReal
 156 reactionFieldScalarForce(const real qq, const real rinv, const SCReal r, const real krf,
 157                          const real two)
 158 {
 159     return (qq*(rinv - two*krf*r*r));
 160 }
 161 template<class SCReal>
 162 static inline real
 163 reactionFieldPotential(const real qq, const real rinv, const SCReal r, const real krf,
 164                        const real potentialShift)
 165 {
 166     return (qq*(rinv + krf*r*r-potentialShift));
 167 }
 168
 169 /* Ewald electrostatics */
 170 static inline real
 171 ewaldScalarForce(const real coulomb, const real rinv)
 172 {
 173     return (coulomb*rinv);
 174 }
 175 static inline real
 176 ewaldPotential(const real coulomb, const real rinv, const real potentialShift)
 177 {
 178     return (coulomb*(rinv-potentialShift));
 179 }
 180
 181 /* cutoff LJ */
 182 static inline real
 183 lennardJonesScalarForce(const real v6, const real v12)
 184 {
 185     return (v12 - v6);
 186 }
 187 static inline real
 188 lennardJonesPotential(const real v6, const real v12, const real c6, const real c12, const real repulsionShift,
 189                       const real dispersionShift, const real onesixth, const real onetwelfth)
 190 {
 191     return ((v12 + c12*repulsionShift)*onetwelfth - (v6 + c6*dispersionShift)*onesixth);
 192 }
 193
 194 /* Ewald LJ */
 195 static inline real
 196 ewaldLennardJonesGridSubtract(const real c6grid, const real potentialShift, const real onesixth)
 197 {
 198     return (c6grid * potentialShift * onesixth);
 199 }
 200
 201 /* LJ Potential switch */
 202 template<class SCReal>
 203 static inline SCReal
 204 potSwitchScalarForceMod(const SCReal fScalarInp, const real potential, const real sw, const SCReal r, const real rVdw, const real dsw, const real zero)
 205 {
 206     if (r < rVdw)
 207     {
 208         SCReal fScalar = fScalarInp * sw - r * potential * dsw;
 209         return (fScalar);
 210     }
 211     return (zero);
 212 }
 213 template<class SCReal>
 214 static inline real
 215 potSwitchPotentialMod(const real potentialInp, const real sw, const SCReal r, const real rVdw, const real zero)
 216 {
 217     if (r < rVdw)
 218     {
 219         real potential = potentialInp * sw;
 220         return (potential);
 221     }
 222     return (zero);
 223 }
 224
 225
 226 //! Templated free-energy non-bonded kernel
 227 template<SoftCoreTreatment softCoreTreatment,
 228          bool              scLambdasOrAlphasDiffer,
 229          bool              vdwInteractionTypeIsEwald,
 230          bool              elecInteractionTypeIsEwald,
 231          bool              vdwModifierIsPotSwitch>
 232 static void
 233 nb_free_energy_kernel(const t_nblist * gmx_restrict    nlist,
 234                       rvec * gmx_restrict              xx,
 235                       gmx::ForceWithShiftForces *      forceWithShiftForces,
 236                       const t_forcerec * gmx_restrict  fr,
 237                       const t_mdatoms * gmx_restrict   mdatoms,
 238                       nb_kernel_data_t * gmx_restrict  kernel_data,
 239                       t_nrnb * gmx_restrict            nrnb)
 240 {
 241     using SCReal = typename SoftCoreReal<softCoreTreatment>::Real;
 242
 243     constexpr bool useSoftCore = (softCoreTreatment != SoftCoreTreatment::None);
 244
 245 #define  STATE_A  0
 246 #define  STATE_B  1
 247 #define  NSTATES  2
 248
 249     constexpr real  onetwelfth  = 1.0/12.0;
 250     constexpr real  onesixth    = 1.0/6.0;
 251     constexpr real  zero        = 0.0;
 252     constexpr real  half        = 0.5;
 253     constexpr real  one         = 1.0;
 254     constexpr real  two         = 2.0;
 255     constexpr real  six         = 6.0;
 256
 257     /* Extract pointer to non-bonded interaction constants */
 258     const interaction_const_t *ic = fr->ic;
 259
 260     // Extract pair list data
 261     const int   nri             = nlist->nri;
 262     const int  *iinr            = nlist->iinr;
 263     const int  *jindex          = nlist->jindex;
 264     const int  *jjnr            = nlist->jjnr;
 265     const int  *shift           = nlist->shift;
 266     const int  *gid             = nlist->gid;
 267
 268     const real *shiftvec        = fr->shift_vec[0];
 269     const real *chargeA         = mdatoms->chargeA;
 270     const real *chargeB         = mdatoms->chargeB;
 271     real       *Vc              = kernel_data->energygrp_elec;
 272     const int  *typeA           = mdatoms->typeA;
 273     const int  *typeB           = mdatoms->typeB;
 274     const int   ntype           = fr->ntype;
 275     const real *nbfp            = fr->nbfp;
 276     const real *nbfp_grid       = fr->ljpme_c6grid;
 277     real       *Vv              = kernel_data->energygrp_vdw;
 278     const real  lambda_coul     = kernel_data->lambda[efptCOUL];
 279     const real  lambda_vdw      = kernel_data->lambda[efptVDW];
 280     real       *dvdl            = kernel_data->dvdl;
 281     const real  alpha_coul      = fr->sc_alphacoul;
 282     const real  alpha_vdw       = fr->sc_alphavdw;
 283     const real  lam_power       = fr->sc_power;
 284     const real  sigma6_def      = fr->sc_sigma6_def;
 285     const real  sigma6_min      = fr->sc_sigma6_min;
 286     const bool  doForces        = ((kernel_data->flags & GMX_NONBONDED_DO_FORCE) != 0);
 287     const bool  doShiftForces   = ((kernel_data->flags & GMX_NONBONDED_DO_SHIFTFORCE) != 0);
 288     const bool  doPotential     = ((kernel_data->flags & GMX_NONBONDED_DO_POTENTIAL) != 0);
 289
 290     // Extract data from interaction_const_t
 291     const real  facel           = ic->epsfac;
 292     const real  rcoulomb        = ic->rcoulomb;
 293     const real  krf             = ic->k_rf;
 294     const real  crf             = ic->c_rf;
 295     const real  sh_lj_ewald     = ic->sh_lj_ewald;
 296     const real  rvdw            = ic->rvdw;
 297     const real  dispersionShift = ic->dispersion_shift.cpot;
 298     const real  repulsionShift  = ic->repulsion_shift.cpot;
 299
 300     // Note that the nbnxm kernels do not support Coulomb potential switching at all
 301     GMX_ASSERT(ic->coulomb_modifier != eintmodPOTSWITCH,
 302                "Potential switching is not supported for Coulomb with FEP");
 303
 304     real vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4;
 305     if (vdwModifierIsPotSwitch)
 306     {
 307         const real d = ic->rvdw - ic->rvdw_switch;
 308         vdw_swV3     = -10.0/(d*d*d);
 309         vdw_swV4     =  15.0/(d*d*d*d);
 310         vdw_swV5     =  -6.0/(d*d*d*d*d);
 311         vdw_swF2     = -30.0/(d*d*d);
 312         vdw_swF3     =  60.0/(d*d*d*d);
 313         vdw_swF4     = -30.0/(d*d*d*d*d);
 314     }
 315     else
 316     {
 317         /* Avoid warnings from stupid compilers (looking at you, Clang!) */
 318         vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0;
 319     }
 320
 321     int icoul;
 322     if (ic->eeltype == eelCUT || EEL_RF(ic->eeltype))
 323     {
 324         icoul = GMX_NBKERNEL_ELEC_REACTIONFIELD;
 325     }
 326     else
 327     {
 328         icoul = GMX_NBKERNEL_ELEC_NONE;
 329     }
 330
 331     real rcutoff_max2 = std::max(ic->rcoulomb, ic->rvdw);
 332     rcutoff_max2      = rcutoff_max2*rcutoff_max2;
 333
 334     const real   *tab_ewald_F_lj = nullptr;
 335     const real   *tab_ewald_V_lj = nullptr;
 336     const real   *ewtab          = nullptr;
 337     real          ewtabscale     = 0;
 338     real          ewtabhalfspace = 0;
 339     real          sh_ewald       = 0;
 340     if (elecInteractionTypeIsEwald || vdwInteractionTypeIsEwald)
 341     {
 342         const auto &tables = *ic->coulombEwaldTables;
 343         sh_ewald           = ic->sh_ewald;
 344         ewtab              = tables.tableFDV0.data();
 345         ewtabscale         = tables.scale;
 346         ewtabhalfspace     = half/ewtabscale;
 347         tab_ewald_F_lj     = tables.tableF.data();
 348         tab_ewald_V_lj     = tables.tableV.data();
 349     }
 350
 351     /* For Ewald/PME interactions we cannot easily apply the soft-core component to
 352      * reciprocal space. When we use non-switched Ewald interactions, we
 353      * assume the soft-coring does not significantly affect the grid contribution
 354      * and apply the soft-core only to the full 1/r (- shift) pair contribution.
 355      *
 356      * However, we cannot use this approach for switch-modified since we would then
 357      * effectively end up evaluating a significantly different interaction here compared to the
 358      * normal (non-free-energy) kernels, either by applying a cutoff at a different
 359      * position than what the user requested, or by switching different
 360      * things (1/r rather than short-range Ewald). For these settings, we just
 361      * use the traditional short-range Ewald interaction in that case.
 362      */
 363     GMX_RELEASE_ASSERT(!(vdwInteractionTypeIsEwald  && vdwModifierIsPotSwitch),
 364                        "Can not apply soft-core to switched Ewald potentials");
 365
 366     SCReal dvdl_coul  = 0; /* Needs double for sc_power==48 */
 367     SCReal dvdl_vdw   = 0; /* Needs double for sc_power==48 */
 368
 369     /* Lambda factor for state A, 1-lambda*/
 370     real LFC[NSTATES], LFV[NSTATES];
 371     LFC[STATE_A] = one - lambda_coul;
 372     LFV[STATE_A] = one - lambda_vdw;
 373
 374     /* Lambda factor for state B, lambda*/
 375     LFC[STATE_B] = lambda_coul;
 376     LFV[STATE_B] = lambda_vdw;
 377
 378     /*derivative of the lambda factor for state A and B */
 379     real DLF[NSTATES];
 380     DLF[STATE_A] = -1;
 381     DLF[STATE_B] = 1;
 382
 383     real           lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
 384     constexpr real sc_r_power = (softCoreTreatment == SoftCoreTreatment::RPower48 ? 48.0_real : 6.0_real);
 385     for (int i = 0; i < NSTATES; i++)
 386     {
 387         lfac_coul[i]  = (lam_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
 388         dlfac_coul[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFC[i]) : 1);
 389         lfac_vdw[i]   = (lam_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
 390         dlfac_vdw[i]  = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFV[i]) : 1);
 391     }
 392
 393     // TODO: We should get rid of using pointers to real
 394     const real          *x      = xx[0];
 395     real * gmx_restrict  f      = &(forceWithShiftForces->force()[0][0]);
 396     real * gmx_restrict  fshift = &(forceWithShiftForces->shiftForces()[0][0]);
 397
 398     for (int n = 0; n < nri; n++)
 399     {
 400         int        npair_within_cutoff = 0;
 401
 402         const int  is3   = 3*shift[n];
 403         const real shX   = shiftvec[is3];
 404         const real shY   = shiftvec[is3+1];
 405         const real shZ   = shiftvec[is3+2];
 406         const int  nj0   = jindex[n];
 407         const int  nj1   = jindex[n+1];
 408         const int  ii    = iinr[n];
 409         const int  ii3   = 3*ii;
 410         const real ix    = shX + x[ii3+0];
 411         const real iy    = shY + x[ii3+1];
 412         const real iz    = shZ + x[ii3+2];
 413         const real iqA   = facel*chargeA[ii];
 414         const real iqB   = facel*chargeB[ii];
 415         const int  ntiA  = 2*ntype*typeA[ii];
 416         const int  ntiB  = 2*ntype*typeB[ii];
 417         real       vctot = 0;
 418         real       vvtot = 0;
 419         real       fix   = 0;
 420         real       fiy   = 0;
 421         real       fiz   = 0;
 422
 423         for (int k = nj0; k < nj1; k++)
 424         {
 425             int        tj[NSTATES];
 426             const int  jnr             = jjnr[k];
 427             const int  j3              = 3*jnr;
 428             real       c6[NSTATES], c12[NSTATES], qq[NSTATES], Vcoul[NSTATES], Vvdw[NSTATES];
 429             real       r, rinv, rp, rpm2;
 430             real       alpha_vdw_eff, alpha_coul_eff, sigma_pow[NSTATES];
 431             const real dx             = ix - x[j3];
 432             const real dy             = iy - x[j3+1];
 433             const real dz             = iz - x[j3+2];
 434             const real rsq            = dx*dx + dy*dy + dz*dz;
 435             SCReal     FscalC[NSTATES], FscalV[NSTATES];  /* Needs double for sc_power==48 */
 436
 437             if (rsq >= rcutoff_max2)
 438             {
 439                 /* We save significant time by skipping all code below.
 440                  * Note that with soft-core interactions, the actual cut-off
 441                  * check might be different. But since the soft-core distance
 442                  * is always larger than r, checking on r here is safe.
 443                  */
 444                 continue;
 445             }
 446             npair_within_cutoff++;
 447
 448             if (rsq > 0)
 449             {
 450                 /* Note that unlike in the nbnxn kernels, we do not need
 451                  * to clamp the value of rsq before taking the invsqrt
 452                  * to avoid NaN in the LJ calculation, since here we do
 453                  * not calculate LJ interactions when C6 and C12 are zero.
 454                  */
 455
 456                 rinv         = gmx::invsqrt(rsq);
 457                 r            = rsq*rinv;
 458             }
 459             else
 460             {
 461                 /* The force at r=0 is zero, because of symmetry.
 462                  * But note that the potential is in general non-zero,
 463                  * since the soft-cored r will be non-zero.
 464                  */
 465                 rinv         = 0;
 466                 r            = 0;
 467             }
 468
 469             if (softCoreTreatment == SoftCoreTreatment::None)
 470             {
 471                 /* The soft-core power p will not affect the results
 472                  * with not using soft-core, so we use power of 0 which gives
 473                  * the simplest math and cheapest code.
 474                  */
 475                 rpm2         = rinv*rinv;
 476                 rp           = 1;
 477             }
 478             if (softCoreTreatment == SoftCoreTreatment::RPower6)
 479             {
 480                 rpm2         = rsq*rsq;  /* r4 */
 481                 rp           = rpm2*rsq; /* r6 */
 482             }
 483             if (softCoreTreatment == SoftCoreTreatment::RPower48)
 484             {
 485                 rp           = rsq*rsq*rsq; /* r6 */
 486                 rp           = rp*rp;       /* r12 */
 487                 rp           = rp*rp;       /* r24 */
 488                 rp           = rp*rp;       /* r48 */
 489                 rpm2         = rp/rsq;      /* r46 */
 490             }
 491
 492             real Fscal = 0;
 493
 494             qq[STATE_A]      = iqA*chargeA[jnr];
 495             qq[STATE_B]      = iqB*chargeB[jnr];
 496
 497             tj[STATE_A]      = ntiA+2*typeA[jnr];
 498             tj[STATE_B]      = ntiB+2*typeB[jnr];
 499
 500             if (nlist->excl_fep == nullptr || nlist->excl_fep[k])
 501             {
 502                 c6[STATE_A]      = nbfp[tj[STATE_A]];
 503                 c6[STATE_B]      = nbfp[tj[STATE_B]];
 504
 505                 for (int i = 0; i < NSTATES; i++)
 506                 {
 507                     c12[i]             = nbfp[tj[i]+1];
 508                     if (useSoftCore)
 509                     {
 510                         real sigma6[NSTATES];
 511                         if ((c6[i] > 0) && (c12[i] > 0))
 512                         {
 513                             /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
 514                             sigma6[i]       = half*c12[i]/c6[i];
 515                             if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
 516                             {
 517                                 sigma6[i] = sigma6_min;
 518                             }
 519                         }
 520                         else
 521                         {
 522                             sigma6[i]       = sigma6_def;
 523                         }
 524                         sigma_pow[i] = calculateSigmaPow<softCoreTreatment>(sigma6[i]);
 525                     }
 526                 }
 527
 528                 if (useSoftCore)
 529                 {
 530                     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
 531                     if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
 532                     {
 533                         alpha_vdw_eff    = 0;
 534                         alpha_coul_eff   = 0;
 535                     }
 536                     else
 537                     {
 538                         alpha_vdw_eff    = alpha_vdw;
 539                         alpha_coul_eff   = alpha_coul;
 540                     }
 541                 }
 542
 543                 for (int i = 0; i < NSTATES; i++)
 544                 {
 545                     FscalC[i] = 0;
 546                     FscalV[i] = 0;
 547                     Vcoul[i]  = 0;
 548                     Vvdw[i]   = 0;
 549
 550                     real   rinvC, rinvV;
 551                     SCReal rC, rV, rpinvC, rpinvV; /* Needs double for sc_power==48 */
 552
 553                     /* Only spend time on A or B state if it is non-zero */
 554                     if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
 555                     {
 556                         /* this section has to be inside the loop because of the dependence on sigma_pow */
 557                         if (useSoftCore)
 558                         {
 559                             rpinvC     = one/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
 560                             pthRoot<softCoreTreatment>(rpinvC, &rinvC, &rC);
 561                             if (scLambdasOrAlphasDiffer)
 562                             {
 563                                 rpinvV = one/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
 564                                 pthRoot<softCoreTreatment>(rpinvV, &rinvV, &rV);
 565                             }
 566                             else
 567                             {
 568                                 /* We can avoid one expensive pow and one / operation */
 569                                 rpinvV = rpinvC;
 570                                 rinvV  = rinvC;
 571                                 rV     = rC;
 572                             }
 573                         }
 574                         else
 575                         {
 576                             rpinvC     = 1;
 577                             rinvC      = rinv;
 578                             rC         = r;
 579
 580                             rpinvV     = 1;
 581                             rinvV      = rinv;
 582                             rV         = r;
 583                         }
 584
 585                         /* Only process the coulomb interactions if we have charges,
 586                          * and if we either include all entries in the list (no cutoff
 587                          * used in the kernel), or if we are within the cutoff.
 588                          */
 589                         bool computeElecInteraction =
 590                             ( elecInteractionTypeIsEwald && r < rcoulomb) ||
 591                             (!elecInteractionTypeIsEwald && rC < rcoulomb);
 592
 593                         if ( (qq[i] != 0) && computeElecInteraction)
 594                         {
 595                             if (elecInteractionTypeIsEwald)
 596                             {
 597                                 Vcoul[i]  = ewaldPotential(qq[i], rinvC, sh_ewald);
 598                                 FscalC[i] = ewaldScalarForce(qq[i], rinvC);
 599                             }
 600                             else
 601                             {
 602                                 Vcoul[i]  = reactionFieldPotential(qq[i], rinvC, rC, krf, crf);
 603                                 FscalC[i] = reactionFieldScalarForce(qq[i], rinvC, rC, krf, two);
 604                             }
 605                         }
 606
 607                         /* Only process the VDW interactions if we have
 608                          * some non-zero parameters, and if we either
 609                          * include all entries in the list (no cutoff used
 610                          * in the kernel), or if we are within the cutoff.
 611                          */
 612                         bool computeVdwInteraction =
 613                             ( vdwInteractionTypeIsEwald && r < rvdw) ||
 614                             (!vdwInteractionTypeIsEwald && rV < rvdw);
 615                         if ((c6[i] != 0 || c12[i] != 0) && computeVdwInteraction)
 616                         {
 617                             real rinv6;
 618                             if (softCoreTreatment == SoftCoreTreatment::RPower6)
 619                             {
 620                                 rinv6    = calculateRinv6<softCoreTreatment>(rpinvV);
 621                             }
 622                             else
 623                             {
 624                                 rinv6    = calculateRinv6<softCoreTreatment>(rinvV);
 625                             }
 626                             real Vvdw6   = calculateVdw6(c6[i], rinv6);
 627                             real Vvdw12  = calculateVdw12(c12[i], rinv6);
 628
 629                             Vvdw[i]      = lennardJonesPotential(Vvdw6, Vvdw12, c6[i], c12[i], repulsionShift, dispersionShift, onesixth, onetwelfth);
 630                             FscalV[i]    = lennardJonesScalarForce(Vvdw6, Vvdw12);
 631
 632                             if (vdwInteractionTypeIsEwald)
 633                             {
 634                                 /* Subtract the grid potential at the cut-off */
 635                                 Vvdw[i] += ewaldLennardJonesGridSubtract(nbfp_grid[tj[i]], sh_lj_ewald, onesixth);
 636                             }
 637
 638                             if (vdwModifierIsPotSwitch)
 639                             {
 640                                 real       d   = rV - ic->rvdw_switch;
 641                                 d              = (d > zero) ? d : zero;
 642                                 const real d2  = d*d;
 643                                 const real sw  = one+d2*d*(vdw_swV3+d*(vdw_swV4+d*vdw_swV5));
 644                                 const real dsw = d2*(vdw_swF2+d*(vdw_swF3+d*vdw_swF4));
 645
 646                                 FscalV[i] = potSwitchScalarForceMod(FscalV[i], Vvdw[i], sw, rV, rvdw, dsw, zero);
 647                                 Vvdw[i]   = potSwitchPotentialMod(Vvdw[i], sw, rV, rvdw, zero);
 648                             }
 649                         }
 650
 651                         /* FscalC (and FscalV) now contain: dV/drC * rC
 652                          * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
 653                          * Further down we first multiply by r^p-2 and then by
 654                          * the vector r, which in total gives: dV/drC * (r/rC)^1-p
 655                          */
 656                         FscalC[i] *= rpinvC;
 657                         FscalV[i] *= rpinvV;
 658                     }
 659                 }
 660
 661                 /* Assemble A and B states */
 662                 for (int i = 0; i < NSTATES; i++)
 663                 {
 664                     vctot += LFC[i]*Vcoul[i];
 665                     vvtot += LFV[i]*Vvdw[i];
 666
 667                     Fscal += LFC[i]*FscalC[i]*rpm2;
 668                     Fscal += LFV[i]*FscalV[i]*rpm2;
 669
 670                     if (useSoftCore)
 671                     {
 672                         dvdl_coul += Vcoul[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*FscalC[i]*sigma_pow[i];
 673                         dvdl_vdw  += Vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*FscalV[i]*sigma_pow[i];
 674                     }
 675                     else
 676                     {
 677                         dvdl_coul += Vcoul[i]*DLF[i];
 678                         dvdl_vdw  += Vvdw[i]*DLF[i];
 679                     }
 680                 }
 681             }
 682             else if (icoul == GMX_NBKERNEL_ELEC_REACTIONFIELD)
 683             {
 684                 /* For excluded pairs, which are only in this pair list when
 685                  * using the Verlet scheme, we don't use soft-core.
 686                  * As there is no singularity, there is no need for soft-core.
 687                  */
 688                 const real FF = -two*krf;
 689                 real       VV = krf*rsq - crf;
 690
 691                 if (ii == jnr)
 692                 {
 693                     VV *= half;
 694                 }
 695
 696                 for (int i = 0; i < NSTATES; i++)
 697                 {
 698                     vctot      += LFC[i]*qq[i]*VV;
 699                     Fscal      += LFC[i]*qq[i]*FF;
 700                     dvdl_coul  += DLF[i]*qq[i]*VV;
 701                 }
 702             }
 703
 704             if (elecInteractionTypeIsEwald && r < rcoulomb)
 705             {
 706                 /* See comment in the preamble. When using Ewald interactions
 707                  * (unless we use a switch modifier) we subtract the reciprocal-space
 708                  * Ewald component here which made it possible to apply the free
 709                  * energy interaction to 1/r (vanilla coulomb short-range part)
 710                  * above. This gets us closer to the ideal case of applying
 711                  * the softcore to the entire electrostatic interaction,
 712                  * including the reciprocal-space component.
 713                  */
 714                 real       v_lr, f_lr;
 715
 716                 const real ewrt   = r*ewtabscale;
 717                 int        ewitab = static_cast<int>(ewrt);
 718                 const real eweps  = ewrt - ewitab;
 719                 ewitab            = 4*ewitab;
 720                 f_lr              = ewtab[ewitab] + eweps*ewtab[ewitab+1];
 721                 v_lr              = (ewtab[ewitab + 2] - ewtabhalfspace*eweps*(ewtab[ewitab] + f_lr));
 722                 f_lr             *= rinv;
 723
 724                 /* Note that any possible Ewald shift has already been applied in
 725                  * the normal interaction part above.
 726                  */
 727
 728                 if (ii == jnr)
 729                 {
 730                     /* If we get here, the i particle (ii) has itself (jnr)
 731                      * in its neighborlist. This can only happen with the Verlet
 732                      * scheme, and corresponds to a self-interaction that will
 733                      * occur twice. Scale it down by 50% to only include it once.
 734                      */
 735                     v_lr *= half;
 736                 }
 737
 738                 for (int i = 0; i < NSTATES; i++)
 739                 {
 740                     vctot     -= LFC[i]*qq[i]*v_lr;
 741                     Fscal     -= LFC[i]*qq[i]*f_lr;
 742                     dvdl_coul -= (DLF[i]*qq[i])*v_lr;
 743                 }
 744             }
 745
 746             if (vdwInteractionTypeIsEwald && r < rvdw)
 747             {
 748                 /* See comment in the preamble. When using LJ-Ewald interactions
 749                  * (unless we use a switch modifier) we subtract the reciprocal-space
 750                  * Ewald component here which made it possible to apply the free
 751                  * energy interaction to r^-6 (vanilla LJ6 short-range part)
 752                  * above. This gets us closer to the ideal case of applying
 753                  * the softcore to the entire VdW interaction,
 754                  * including the reciprocal-space component.
 755                  */
 756                 /* We could also use the analytical form here
 757                  * iso a table, but that can cause issues for
 758                  * r close to 0 for non-interacting pairs.
 759                  */
 760
 761                 const real rs   = rsq*rinv*ewtabscale;
 762                 const int  ri   = static_cast<int>(rs);
 763                 const real frac = rs - ri;
 764                 const real f_lr = (1 - frac)*tab_ewald_F_lj[ri] + frac*tab_ewald_F_lj[ri+1];
 765                 /* TODO: Currently the Ewald LJ table does not contain
 766                  * the factor 1/6, we should add this.
 767                  */
 768                 const real FF = f_lr*rinv/six;
 769                 real       VV = (tab_ewald_V_lj[ri] - ewtabhalfspace*frac*(tab_ewald_F_lj[ri] + f_lr))/six;
 770
 771                 if (ii == jnr)
 772                 {
 773                     /* If we get here, the i particle (ii) has itself (jnr)
 774                      * in its neighborlist. This can only happen with the Verlet
 775                      * scheme, and corresponds to a self-interaction that will
 776                      * occur twice. Scale it down by 50% to only include it once.
 777                      */
 778                     VV *= half;
 779                 }
 780
 781                 for (int i = 0; i < NSTATES; i++)
 782                 {
 783                     const real c6grid = nbfp_grid[tj[i]];
 784                     vvtot            += LFV[i]*c6grid*VV;
 785                     Fscal            += LFV[i]*c6grid*FF;
 786                     dvdl_vdw         += (DLF[i]*c6grid)*VV;
 787                 }
 788             }
 789
 790             if (doForces)
 791             {
 792                 const real tx  = Fscal*dx;
 793                 const real ty  = Fscal*dy;
 794                 const real tz  = Fscal*dz;
 795                 fix            = fix + tx;
 796                 fiy            = fiy + ty;
 797                 fiz            = fiz + tz;
 798                 /* OpenMP atomics are expensive, but this kernels is also
 799                  * expensive, so we can take this hit, instead of using
 800                  * thread-local output buffers and extra reduction.
 801                  *
 802                  * All the OpenMP regions in this file are trivial and should
 803                  * not throw, so no need for try/catch.
 804                  */
 805 #pragma omp atomic
 806                 f[j3]     -= tx;
 807 #pragma omp atomic
 808                 f[j3+1]   -= ty;
 809 #pragma omp atomic
 810                 f[j3+2]   -= tz;
 811             }
 812         }
 813
 814         /* The atomics below are expensive with many OpenMP threads.
 815          * Here unperturbed i-particles will usually only have a few
 816          * (perturbed) j-particles in the list. Thus with a buffered list
 817          * we can skip a significant number of i-reductions with a check.
 818          */
 819         if (npair_within_cutoff > 0)
 820         {
 821             if (doForces)
 822             {
 823 #pragma omp atomic
 824                 f[ii3]        += fix;
 825 #pragma omp atomic
 826                 f[ii3+1]      += fiy;
 827 #pragma omp atomic
 828                 f[ii3+2]      += fiz;
 829             }
 830             if (doShiftForces)
 831             {
 832 #pragma omp atomic
 833                 fshift[is3]   += fix;
 834 #pragma omp atomic
 835                 fshift[is3+1] += fiy;
 836 #pragma omp atomic
 837                 fshift[is3+2] += fiz;
 838             }
 839             if (doPotential)
 840             {
 841                 int ggid  = gid[n];
 842 #pragma omp atomic
 843                 Vc[ggid] += vctot;
 844 #pragma omp atomic
 845                 Vv[ggid] += vvtot;
 846             }
 847         }
 848     }
 849
 850 #pragma omp atomic
 851     dvdl[efptCOUL]     += dvdl_coul;
 852  #pragma omp atomic
 853     dvdl[efptVDW]      += dvdl_vdw;
 854
 855     /* Estimate flops, average for free energy stuff:
 856      * 12  flops per outer iteration
 857      * 150 flops per inner iteration
 858      */
 859 #pragma omp atomic
 860     inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri*12 + nlist->jindex[nri]*150);
 861 }
 862
 863 typedef void (* KernelFunction)(const t_nblist * gmx_restrict    nlist,
 864                                 rvec * gmx_restrict              xx,
 865                                 gmx::ForceWithShiftForces *      forceWithShiftForces,
 866                                 const t_forcerec * gmx_restrict  fr,
 867                                 const t_mdatoms * gmx_restrict   mdatoms,
 868                                 nb_kernel_data_t * gmx_restrict  kernel_data,
 869                                 t_nrnb * gmx_restrict            nrnb);
 870
 871 template<SoftCoreTreatment softCoreTreatment,
 872          bool scLambdasOrAlphasDiffer,
 873          bool vdwInteractionTypeIsEwald,
 874          bool elecInteractionTypeIsEwald>
 875 static KernelFunction
 876 dispatchKernelOnVdwModifier(const bool vdwModifierIsPotSwitch)
 877 {
 878     if (vdwModifierIsPotSwitch)
 879     {
 880         return(nb_free_energy_kernel<softCoreTreatment, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
 881                                      elecInteractionTypeIsEwald, true>);
 882     }
 883     else
 884     {
 885         return(nb_free_energy_kernel<softCoreTreatment, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
 886                                      elecInteractionTypeIsEwald, false>);
 887     }
 888 }
 889
 890 template<SoftCoreTreatment softCoreTreatment,
 891          bool scLambdasOrAlphasDiffer,
 892          bool vdwInteractionTypeIsEwald>
 893 static KernelFunction
 894 dispatchKernelOnElecInteractionType(const bool elecInteractionTypeIsEwald,
 895                                     const bool vdwModifierIsPotSwitch)
 896 {
 897     if (elecInteractionTypeIsEwald)
 898     {
 899         return(dispatchKernelOnVdwModifier<softCoreTreatment, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
 900                                            true>(vdwModifierIsPotSwitch));
 901     }
 902     else
 903     {
 904         return(dispatchKernelOnVdwModifier<softCoreTreatment, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
 905                                            false>(vdwModifierIsPotSwitch));
 906     }
 907 }
 908
 909 template<SoftCoreTreatment softCoreTreatment,
 910          bool scLambdasOrAlphasDiffer>
 911 static KernelFunction
 912 dispatchKernelOnVdwInteractionType(const bool vdwInteractionTypeIsEwald,
 913                                    const bool elecInteractionTypeIsEwald,
 914                                    const bool vdwModifierIsPotSwitch)
 915 {
 916     if (vdwInteractionTypeIsEwald)
 917     {
 918         return(dispatchKernelOnElecInteractionType<softCoreTreatment, scLambdasOrAlphasDiffer,
 919                                                    true>(elecInteractionTypeIsEwald, vdwModifierIsPotSwitch));
 920     }
 921     else
 922     {
 923         return(dispatchKernelOnElecInteractionType<softCoreTreatment, scLambdasOrAlphasDiffer,
 924                                                    false>(elecInteractionTypeIsEwald, vdwModifierIsPotSwitch));
 925     }
 926 }
 927
 928 template<SoftCoreTreatment softCoreTreatment>
 929 static KernelFunction
 930 dispatchKernelOnScLambdasOrAlphasDifference(const bool scLambdasOrAlphasDiffer,
 931                                             const bool vdwInteractionTypeIsEwald,
 932                                             const bool elecInteractionTypeIsEwald,
 933                                             const bool vdwModifierIsPotSwitch)
 934 {
 935     if (scLambdasOrAlphasDiffer)
 936     {
 937         return(dispatchKernelOnVdwInteractionType<softCoreTreatment, true>(vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
 938                                                                            vdwModifierIsPotSwitch));
 939     }
 940     else
 941     {
 942         return(dispatchKernelOnVdwInteractionType<softCoreTreatment, false>(vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
 943                                                                             vdwModifierIsPotSwitch));
 944     }
 945 }
 946
 947 static KernelFunction
 948 dispatchKernel(const bool                 scLambdasOrAlphasDiffer,
 949                const bool                 vdwInteractionTypeIsEwald,
 950                const bool                 elecInteractionTypeIsEwald,
 951                const bool                 vdwModifierIsPotSwitch,
 952                const t_forcerec          *fr)
 953 {
 954     if (fr->sc_alphacoul == 0 && fr->sc_alphavdw == 0)
 955     {
 956         return(dispatchKernelOnScLambdasOrAlphasDifference<SoftCoreTreatment::None>(scLambdasOrAlphasDiffer,
 957                                                                                     vdwInteractionTypeIsEwald,
 958                                                                                     elecInteractionTypeIsEwald,
 959                                                                                     vdwModifierIsPotSwitch));
 960     }
 961     else if (fr->sc_r_power == 6.0_real)
 962     {
 963         return(dispatchKernelOnScLambdasOrAlphasDifference<SoftCoreTreatment::RPower6>(scLambdasOrAlphasDiffer,
 964                                                                                        vdwInteractionTypeIsEwald,
 965                                                                                        elecInteractionTypeIsEwald,
 966                                                                                        vdwModifierIsPotSwitch));
 967     }
 968     else
 969     {
 970         return(dispatchKernelOnScLambdasOrAlphasDifference<SoftCoreTreatment::RPower48>(scLambdasOrAlphasDiffer,
 971                                                                                         vdwInteractionTypeIsEwald,
 972                                                                                         elecInteractionTypeIsEwald,
 973                                                                                         vdwModifierIsPotSwitch));
 974     }
 975 }
 976
 977
 978 void gmx_nb_free_energy_kernel(const t_nblist            *nlist,
 979                                rvec                      *xx,
 980                                gmx::ForceWithShiftForces *ff,
 981                                const t_forcerec          *fr,
 982                                const t_mdatoms           *mdatoms,
 983                                nb_kernel_data_t          *kernel_data,
 984                                t_nrnb                    *nrnb)
 985 {
 986     GMX_ASSERT(EEL_PME_EWALD(fr->ic->eeltype)  || fr->ic->eeltype == eelCUT || EEL_RF(fr->ic->eeltype),
 987                "Unsupported eeltype with free energy");
 988
 989     const bool vdwInteractionTypeIsEwald  = (EVDW_PME(fr->ic->vdwtype));
 990     const bool elecInteractionTypeIsEwald = (EEL_PME_EWALD(fr->ic->eeltype));
 991     const bool vdwModifierIsPotSwitch     = (fr->ic->vdw_modifier == eintmodPOTSWITCH);
 992     bool       scLambdasOrAlphasDiffer    = true;
 993
 994     if (fr->sc_alphacoul == 0 && fr->sc_alphavdw == 0)
 995     {
 996         scLambdasOrAlphasDiffer = false;
 997     }
 998     else if (fr->sc_r_power == 6.0_real || fr->sc_r_power == 48.0_real)
 999     {
1000         if (kernel_data->lambda[efptCOUL] == kernel_data->lambda[efptVDW] &&
1001             fr->sc_alphacoul == fr->sc_alphavdw)
1002         {
1003             scLambdasOrAlphasDiffer = false;
1004         }
1005     }
1006     else
1007     {
1008         GMX_RELEASE_ASSERT(false, "Unsupported soft-core r-power");
1009     }
1010     KernelFunction kernelFunc = dispatchKernel(scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
1011                                                vdwModifierIsPotSwitch, fr);
1012     kernelFunc(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
1013 }