src/gromacs/gmxlib/nonbonded/nb_free_energy.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #include "gmxpre.h"
  38
  39 #include "nb_free_energy.h"
  40
  41 #include <cmath>
  42
  43 #include <algorithm>
  44
  45 #include "gromacs/gmxlib/nrnb.h"
  46 #include "gromacs/gmxlib/nonbonded/nb_kernel.h"
  47 #include "gromacs/gmxlib/nonbonded/nonbonded.h"
  48 #include "gromacs/math/functions.h"
  49 #include "gromacs/math/vec.h"
  50 #include "gromacs/mdtypes/forceoutput.h"
  51 #include "gromacs/mdtypes/forcerec.h"
  52 #include "gromacs/mdtypes/md_enums.h"
  53 #include "gromacs/utility/fatalerror.h"
  54
  55
  56 //! Enum for templating the soft-core treatment in the kernel
  57 enum class SoftCoreTreatment
  58 {
  59     None,    //!< No soft-core
  60     RPower6, //!< Soft-core with r-power = 6
  61     RPower48 //!< Soft-core with r-power = 48
  62 };
  63
  64 //! Most treatments are fine with float in mixed-precision mode.
  65 template <SoftCoreTreatment softCoreTreatment>
  66 struct SoftCoreReal
  67 {
  68     //! Real type for soft-core calculations
  69     using Real = real;
  70 };
  71
  72 //! This treatment requires double precision for some computations.
  73 template <>
  74 struct SoftCoreReal<SoftCoreTreatment::RPower48>
  75 {
  76     //! Real type for soft-core calculations
  77     using Real = double;
  78 };
  79
  80 //! Computes r^(1/p) and 1/r^(1/p) for the standard p=6
  81 template <SoftCoreTreatment softCoreTreatment>
  82 static inline void pthRoot(const real  r,
  83                            real       *pthRoot,
  84                            real       *invPthRoot)
  85 {
  86     *invPthRoot = gmx::invsqrt(std::cbrt(r));
  87     *pthRoot    = 1/(*invPthRoot);
  88 }
  89
  90 // We need a double version to make the specialization below work
  91 #if !GMX_DOUBLE
  92 //! Computes r^(1/p) and 1/r^(1/p) for the standard p=6
  93 template <SoftCoreTreatment softCoreTreatment>
  94 static inline void pthRoot(const double  r,
  95                            real         *pthRoot,
  96                            double       *invPthRoot)
  97 {
  98     *invPthRoot = gmx::invsqrt(std::cbrt(r));
  99     *pthRoot    = 1/(*invPthRoot);
 100 }
 101 #endif
 102
 103 //! Computes r^(1/p) and 1/r^(1/p) for p=48
 104 template <>
 105 inline void pthRoot<SoftCoreTreatment::RPower48>(const double  r,
 106                                                  real         *pthRoot,
 107                                                  double       *invPthRoot)
 108 {
 109     *pthRoot    = std::pow(r, 1.0/48.0);
 110     *invPthRoot = 1/(*pthRoot);
 111 }
 112
 113 template<SoftCoreTreatment softCoreTreatment>
 114 static inline real calculateSigmaPow(const real sigma6)
 115 {
 116     if (softCoreTreatment == SoftCoreTreatment::RPower6)
 117     {
 118         return sigma6;
 119     }
 120     else
 121     {
 122         real sigmaPow = sigma6 * sigma6;     /* sigma^12 */
 123         sigmaPow      = sigmaPow * sigmaPow; /* sigma^24 */
 124         sigmaPow      = sigmaPow * sigmaPow; /* sigma^48 */
 125         return (sigmaPow);
 126     }
 127 }
 128
 129 template<SoftCoreTreatment softCoreTreatment, class SCReal>
 130 static inline real calculateRinv6(const SCReal rinvV)
 131 {
 132     if (softCoreTreatment == SoftCoreTreatment::RPower6)
 133     {
 134         return rinvV;
 135     }
 136     else
 137     {
 138         real rinv6 = rinvV * rinvV;
 139         return (rinv6 * rinv6 * rinv6);
 140     }
 141 }
 142
 143 static inline real calculateVdw6(const real c6, const real rinv6)
 144 {
 145     return (c6 * rinv6);
 146 }
 147
 148 static inline real calculateVdw12(const real c12, const real rinv6)
 149 {
 150     return (c12 * rinv6 * rinv6);
 151 }
 152
 153 /* reaction-field electrostatics */
 154 template<class SCReal>
 155 static inline SCReal
 156 reactionFieldScalarForce(const real qq, const real rinv, const SCReal r, const real krf,
 157                          const real two)
 158 {
 159     return (qq*(rinv - two*krf*r*r));
 160 }
 161 template<class SCReal>
 162 static inline real
 163 reactionFieldPotential(const real qq, const real rinv, const SCReal r, const real krf,
 164                        const real potentialShift)
 165 {
 166     return (qq*(rinv + krf*r*r-potentialShift));
 167 }
 168
 169 /* Ewald electrostatics */
 170 static inline real
 171 ewaldScalarForce(const real coulomb, const real rinv)
 172 {
 173     return (coulomb*rinv);
 174 }
 175 static inline real
 176 ewaldPotential(const real coulomb, const real rinv, const real potentialShift)
 177 {
 178     return (coulomb*(rinv-potentialShift));
 179 }
 180
 181 /* cutoff LJ */
 182 static inline real
 183 lennardJonesScalarForce(const real v6, const real v12)
 184 {
 185     return (v12 - v6);
 186 }
 187 static inline real
 188 lennardJonesPotential(const real v6, const real v12, const real c6, const real c12, const real repulsionShift,
 189                       const real dispersionShift, const real onesixth, const real onetwelfth)
 190 {
 191     return ((v12 + c12*repulsionShift)*onetwelfth - (v6 + c6*dispersionShift)*onesixth);
 192 }
 193
 194 /* Ewald LJ */
 195 static inline real
 196 ewaldLennardJonesGridSubtract(const real c6grid, const real potentialShift, const real onesixth)
 197 {
 198     return (c6grid * potentialShift * onesixth);
 199 }
 200
 201 /* LJ Potential switch */
 202 template<class SCReal>
 203 static inline SCReal
 204 potSwitchScalarForceMod(const SCReal fScalarInp, const real potential, const real sw, const SCReal r, const real rVdw, const real dsw, const real zero)
 205 {
 206     if (r < rVdw)
 207     {
 208         SCReal fScalar = fScalarInp * sw - r * potential * dsw;
 209         return (fScalar);
 210     }
 211     return (zero);
 212 }
 213 template<class SCReal>
 214 static inline real
 215 potSwitchPotentialMod(const real potentialInp, const real sw, const SCReal r, const real rVdw, const real zero)
 216 {
 217     if (r < rVdw)
 218     {
 219         real potential = potentialInp * sw;
 220         return (potential);
 221     }
 222     return (zero);
 223 }
 224
 225
 226 //! Templated free-energy non-bonded kernel
 227 template<SoftCoreTreatment softCoreTreatment,
 228          bool              scLambdasOrAlphasDiffer,
 229          bool              vdwInteractionTypeIsEwald,
 230          bool              elecInteractionTypeIsEwald,
 231          bool              vdwModifierIsPotSwitch>
 232 static void
 233 nb_free_energy_kernel(const t_nblist * gmx_restrict    nlist,
 234                       rvec * gmx_restrict              xx,
 235                       gmx::ForceWithShiftForces *      forceWithShiftForces,
 236                       const t_forcerec * gmx_restrict  fr,
 237                       const t_mdatoms * gmx_restrict   mdatoms,
 238                       nb_kernel_data_t * gmx_restrict  kernel_data,
 239                       t_nrnb * gmx_restrict            nrnb)
 240 {
 241     using SCReal = typename SoftCoreReal<softCoreTreatment>::Real;
 242
 243     constexpr bool useSoftCore = (softCoreTreatment != SoftCoreTreatment::None);
 244
 245 #define  STATE_A  0
 246 #define  STATE_B  1
 247 #define  NSTATES  2
 248
 249     constexpr real  onetwelfth  = 1.0/12.0;
 250     constexpr real  onesixth    = 1.0/6.0;
 251     constexpr real  zero        = 0.0;
 252     constexpr real  half        = 0.5;
 253     constexpr real  one         = 1.0;
 254     constexpr real  two         = 2.0;
 255     constexpr real  six         = 6.0;
 256
 257     /* Extract pointer to non-bonded interaction constants */
 258     const interaction_const_t *ic = fr->ic;
 259
 260     // Extract pair list data
 261     const int   nri             = nlist->nri;
 262     const int  *iinr            = nlist->iinr;
 263     const int  *jindex          = nlist->jindex;
 264     const int  *jjnr            = nlist->jjnr;
 265     const int  *shift           = nlist->shift;
 266     const int  *gid             = nlist->gid;
 267
 268     const real *shiftvec        = fr->shift_vec[0];
 269     const real *chargeA         = mdatoms->chargeA;
 270     const real *chargeB         = mdatoms->chargeB;
 271     real       *Vc              = kernel_data->energygrp_elec;
 272     const int  *typeA           = mdatoms->typeA;
 273     const int  *typeB           = mdatoms->typeB;
 274     const int   ntype           = fr->ntype;
 275     const real *nbfp            = fr->nbfp;
 276     const real *nbfp_grid       = fr->ljpme_c6grid;
 277     real       *Vv              = kernel_data->energygrp_vdw;
 278     const real  lambda_coul     = kernel_data->lambda[efptCOUL];
 279     const real  lambda_vdw      = kernel_data->lambda[efptVDW];
 280     real       *dvdl            = kernel_data->dvdl;
 281     const real  alpha_coul      = fr->sc_alphacoul;
 282     const real  alpha_vdw       = fr->sc_alphavdw;
 283     const real  lam_power       = fr->sc_power;
 284     const real  sigma6_def      = fr->sc_sigma6_def;
 285     const real  sigma6_min      = fr->sc_sigma6_min;
 286     const bool  doForces        = ((kernel_data->flags & GMX_NONBONDED_DO_FORCE) != 0);
 287     const bool  doShiftForces   = ((kernel_data->flags & GMX_NONBONDED_DO_SHIFTFORCE) != 0);
 288     const bool  doPotential     = ((kernel_data->flags & GMX_NONBONDED_DO_POTENTIAL) != 0);
 289
 290     // Extract data from interaction_const_t
 291     const real  facel           = ic->epsfac;
 292     const real  rcoulomb        = ic->rcoulomb;
 293     const real  krf             = ic->k_rf;
 294     const real  crf             = ic->c_rf;
 295     const real  sh_lj_ewald     = ic->sh_lj_ewald;
 296     const real  rvdw            = ic->rvdw;
 297     const real  dispersionShift = ic->dispersion_shift.cpot;
 298     const real  repulsionShift  = ic->repulsion_shift.cpot;
 299
 300     // Note that the nbnxm kernels do not support Coulomb potential switching at all
 301     GMX_ASSERT(ic->coulomb_modifier != eintmodPOTSWITCH,
 302                "Potential switching is not supported for Coulomb with FEP");
 303
 304     real vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4;
 305     if (vdwModifierIsPotSwitch)
 306     {
 307         const real d = ic->rvdw - ic->rvdw_switch;
 308         vdw_swV3     = -10.0/(d*d*d);
 309         vdw_swV4     =  15.0/(d*d*d*d);
 310         vdw_swV5     =  -6.0/(d*d*d*d*d);
 311         vdw_swF2     = -30.0/(d*d*d);
 312         vdw_swF3     =  60.0/(d*d*d*d);
 313         vdw_swF4     = -30.0/(d*d*d*d*d);
 314     }
 315     else
 316     {
 317         /* Avoid warnings from stupid compilers (looking at you, Clang!) */
 318         vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0;
 319     }
 320
 321     int icoul;
 322     if (ic->eeltype == eelCUT || EEL_RF(ic->eeltype))
 323     {
 324         icoul = GMX_NBKERNEL_ELEC_REACTIONFIELD;
 325     }
 326     else
 327     {
 328         icoul = GMX_NBKERNEL_ELEC_NONE;
 329     }
 330
 331     real rcutoff_max2 = std::max(ic->rcoulomb, ic->rvdw);
 332     rcutoff_max2      = rcutoff_max2*rcutoff_max2;
 333
 334     const real   *tab_ewald_F_lj = nullptr;
 335     const real   *tab_ewald_V_lj = nullptr;
 336     const real   *ewtab          = nullptr;
 337     real          ewtabscale     = 0;
 338     real          ewtabhalfspace = 0;
 339     real          sh_ewald       = 0;
 340     if (elecInteractionTypeIsEwald || vdwInteractionTypeIsEwald)
 341     {
 342         const auto &tables = *ic->coulombEwaldTables;
 343         sh_ewald           = ic->sh_ewald;
 344         ewtab              = tables.tableFDV0.data();
 345         ewtabscale         = tables.scale;
 346         ewtabhalfspace     = half/ewtabscale;
 347         tab_ewald_F_lj     = tables.tableF.data();
 348         tab_ewald_V_lj     = tables.tableV.data();
 349     }
 350
 351     /* For Ewald/PME interactions we cannot easily apply the soft-core component to
 352      * reciprocal space. When we use non-switched Ewald interactions, we
 353      * assume the soft-coring does not significantly affect the grid contribution
 354      * and apply the soft-core only to the full 1/r (- shift) pair contribution.
 355      *
 356      * However, we cannot use this approach for switch-modified since we would then
 357      * effectively end up evaluating a significantly different interaction here compared to the
 358      * normal (non-free-energy) kernels, either by applying a cutoff at a different
 359      * position than what the user requested, or by switching different
 360      * things (1/r rather than short-range Ewald). For these settings, we just
 361      * use the traditional short-range Ewald interaction in that case.
 362      */
 363     GMX_RELEASE_ASSERT(!(vdwInteractionTypeIsEwald  && vdwModifierIsPotSwitch),
 364                        "Can not apply soft-core to switched Ewald potentials");
 365
 366     SCReal dvdl_coul  = 0; /* Needs double for sc_power==48 */
 367     SCReal dvdl_vdw   = 0; /* Needs double for sc_power==48 */
 368
 369     /* Lambda factor for state A, 1-lambda*/
 370     real LFC[NSTATES], LFV[NSTATES];
 371     LFC[STATE_A] = one - lambda_coul;
 372     LFV[STATE_A] = one - lambda_vdw;
 373
 374     /* Lambda factor for state B, lambda*/
 375     LFC[STATE_B] = lambda_coul;
 376     LFV[STATE_B] = lambda_vdw;
 377
 378     /*derivative of the lambda factor for state A and B */
 379     real DLF[NSTATES];
 380     DLF[STATE_A] = -1;
 381     DLF[STATE_B] = 1;
 382
 383     real           lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
 384     constexpr real sc_r_power = (softCoreTreatment == SoftCoreTreatment::RPower48 ? 48.0_real : 6.0_real);
 385     for (int i = 0; i < NSTATES; i++)
 386     {
 387         lfac_coul[i]  = (lam_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
 388         dlfac_coul[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFC[i]) : 1);
 389         lfac_vdw[i]   = (lam_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
 390         dlfac_vdw[i]  = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFV[i]) : 1);
 391     }
 392
 393     // TODO: We should get rid of using pointers to real
 394     const real          *x      = xx[0];
 395     real * gmx_restrict  f      = &(forceWithShiftForces->force()[0][0]);
 396     real * gmx_restrict  fshift = &(forceWithShiftForces->shiftForces()[0][0]);
 397
 398     for (int n = 0; n < nri; n++)
 399     {
 400         int        npair_within_cutoff = 0;
 401
 402         const int  is3   = 3*shift[n];
 403         const real shX   = shiftvec[is3];
 404         const real shY   = shiftvec[is3+1];
 405         const real shZ   = shiftvec[is3+2];
 406         const int  nj0   = jindex[n];
 407         const int  nj1   = jindex[n+1];
 408         const int  ii    = iinr[n];
 409         const int  ii3   = 3*ii;
 410         const real ix    = shX + x[ii3+0];
 411         const real iy    = shY + x[ii3+1];
 412         const real iz    = shZ + x[ii3+2];
 413         const real iqA   = facel*chargeA[ii];
 414         const real iqB   = facel*chargeB[ii];
 415         const int  ntiA  = 2*ntype*typeA[ii];
 416         const int  ntiB  = 2*ntype*typeB[ii];
 417         real       vctot = 0;
 418         real       vvtot = 0;
 419         real       fix   = 0;
 420         real       fiy   = 0;
 421         real       fiz   = 0;
 422
 423         for (int k = nj0; k < nj1; k++)
 424         {
 425             int        tj[NSTATES];
 426             const int  jnr             = jjnr[k];
 427             const int  j3              = 3*jnr;
 428             real       c6[NSTATES], c12[NSTATES], qq[NSTATES], Vcoul[NSTATES], Vvdw[NSTATES];
 429             real       r, rinv, rp, rpm2;
 430             real       alpha_vdw_eff, alpha_coul_eff, sigma_pow[NSTATES];
 431             const real dx             = ix - x[j3];
 432             const real dy             = iy - x[j3+1];
 433             const real dz             = iz - x[j3+2];
 434             const real rsq            = dx*dx + dy*dy + dz*dz;
 435             SCReal     FscalC[NSTATES], FscalV[NSTATES];  /* Needs double for sc_power==48 */
 436
 437             if (rsq >= rcutoff_max2)
 438             {
 439                 /* We save significant time by skipping all code below.
 440                  * Note that with soft-core interactions, the actual cut-off
 441                  * check might be different. But since the soft-core distance
 442                  * is always larger than r, checking on r here is safe.
 443                  */
 444                 continue;
 445             }
 446             npair_within_cutoff++;
 447
 448             if (rsq > 0)
 449             {
 450                 /* Note that unlike in the nbnxn kernels, we do not need
 451                  * to clamp the value of rsq before taking the invsqrt
 452                  * to avoid NaN in the LJ calculation, since here we do
 453                  * not calculate LJ interactions when C6 and C12 are zero.
 454                  */
 455
 456                 rinv         = gmx::invsqrt(rsq);
 457                 r            = rsq*rinv;
 458             }
 459             else
 460             {
 461                 /* The force at r=0 is zero, because of symmetry.
 462                  * But note that the potential is in general non-zero,
 463                  * since the soft-cored r will be non-zero.
 464                  */
 465                 rinv         = 0;
 466                 r            = 0;
 467             }
 468
 469             if (softCoreTreatment == SoftCoreTreatment::None)
 470             {
 471                 /* The soft-core power p will not affect the results
 472                  * with not using soft-core, so we use power of 0 which gives
 473                  * the simplest math and cheapest code.
 474                  */
 475                 rpm2         = rinv*rinv;
 476                 rp           = 1;
 477             }
 478             if (softCoreTreatment == SoftCoreTreatment::RPower6)
 479             {
 480                 rpm2         = rsq*rsq;  /* r4 */
 481                 rp           = rpm2*rsq; /* r6 */
 482             }
 483             if (softCoreTreatment == SoftCoreTreatment::RPower48)
 484             {
 485                 rp           = rsq*rsq*rsq; /* r6 */
 486                 rp           = rp*rp;       /* r12 */
 487                 rp           = rp*rp;       /* r24 */
 488                 rp           = rp*rp;       /* r48 */
 489                 rpm2         = rp/rsq;      /* r46 */
 490             }
 491
 492             real Fscal = 0;
 493
 494             qq[STATE_A]      = iqA*chargeA[jnr];
 495             qq[STATE_B]      = iqB*chargeB[jnr];
 496
 497             tj[STATE_A]      = ntiA+2*typeA[jnr];
 498             tj[STATE_B]      = ntiB+2*typeB[jnr];
 499
 500             if (nlist->excl_fep == nullptr || nlist->excl_fep[k])
 501             {
 502                 c6[STATE_A]      = nbfp[tj[STATE_A]];
 503                 c6[STATE_B]      = nbfp[tj[STATE_B]];
 504
 505                 for (int i = 0; i < NSTATES; i++)
 506                 {
 507                     c12[i]             = nbfp[tj[i]+1];
 508                     if (useSoftCore)
 509                     {
 510                         real sigma6[NSTATES];
 511                         if ((c6[i] > 0) && (c12[i] > 0))
 512                         {
 513                             /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
 514                             sigma6[i]       = half*c12[i]/c6[i];
 515                             if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
 516                             {
 517                                 sigma6[i] = sigma6_min;
 518                             }
 519                         }
 520                         else
 521                         {
 522                             sigma6[i]       = sigma6_def;
 523                         }
 524                         sigma_pow[i] = calculateSigmaPow<softCoreTreatment>(sigma6[i]);
 525                     }
 526                 }
 527
 528                 if (useSoftCore)
 529                 {
 530                     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
 531                     if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
 532                     {
 533                         alpha_vdw_eff    = 0;
 534                         alpha_coul_eff   = 0;
 535                     }
 536                     else
 537                     {
 538                         alpha_vdw_eff    = alpha_vdw;
 539                         alpha_coul_eff   = alpha_coul;
 540                     }
 541                 }
 542
 543                 for (int i = 0; i < NSTATES; i++)
 544                 {
 545                     FscalC[i] = 0;
 546                     FscalV[i] = 0;
 547                     Vcoul[i]  = 0;
 548                     Vvdw[i]   = 0;
 549
 550                     real   rinvC, rinvV;
 551                     SCReal rC, rV, rpinvC, rpinvV; /* Needs double for sc_power==48 */
 552
 553                     /* Only spend time on A or B state if it is non-zero */
 554                     if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
 555                     {
 556                         /* this section has to be inside the loop because of the dependence on sigma_pow */
 557                         if (useSoftCore)
 558                         {
 559                             rpinvC     = one/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
 560                             pthRoot<softCoreTreatment>(rpinvC, &rinvC, &rC);
 561                             if (scLambdasOrAlphasDiffer)
 562                             {
 563                                 rpinvV = one/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
 564                                 pthRoot<softCoreTreatment>(rpinvV, &rinvV, &rV);
 565                             }
 566                             else
 567                             {
 568                                 /* We can avoid one expensive pow and one / operation */
 569                                 rpinvV = rpinvC;
 570                                 rinvV  = rinvC;
 571                                 rV     = rC;
 572                             }
 573                         }
 574                         else
 575                         {
 576                             rpinvC     = 1;
 577                             rinvC      = rinv;
 578                             rC         = r;
 579
 580                             rpinvV     = 1;
 581                             rinvV      = rinv;
 582                             rV         = r;
 583                         }
 584
 585                         /* Only process the coulomb interactions if we have charges,
 586                          * and if we either include all entries in the list (no cutoff
 587                          * used in the kernel), or if we are within the cutoff.
 588                          */
 589                         bool computeElecInteraction =
 590                             ( elecInteractionTypeIsEwald && r < rcoulomb) ||
 591                             (!elecInteractionTypeIsEwald && rC < rcoulomb);
 592
 593                         if ( (qq[i] != 0) && computeElecInteraction)
 594                         {
 595                             if (elecInteractionTypeIsEwald)
 596                             {
 597                                 Vcoul[i]  = ewaldPotential(qq[i], rinvC, sh_ewald);
 598                                 FscalC[i] = ewaldScalarForce(qq[i], rinvC);
 599                             }
 600                             else
 601                             {
 602                                 Vcoul[i]  = reactionFieldPotential(qq[i], rinvC, rC, krf, crf);
 603                                 FscalC[i] = reactionFieldScalarForce(qq[i], rinvC, rC, krf, two);
 604                             }
 605                         }
 606
 607                         /* Only process the VDW interactions if we have
 608                          * some non-zero parameters, and if we either
 609                          * include all entries in the list (no cutoff used
 610                          * in the kernel), or if we are within the cutoff.
 611                          */
 612                         bool computeVdwInteraction =
 613                             ( vdwInteractionTypeIsEwald && r < rvdw) ||
 614                             (!vdwInteractionTypeIsEwald && rV < rvdw);
 615                         if ((c6[i] != 0 || c12[i] != 0) && computeVdwInteraction)
 616                         {
 617                             real rinv6;
 618                             if (softCoreTreatment == SoftCoreTreatment::RPower6)
 619                             {
 620                                 rinv6    = calculateRinv6<softCoreTreatment>(rpinvV);
 621                             }
 622                             else
 623                             {
 624                                 rinv6    = calculateRinv6<softCoreTreatment>(rinvV);
 625                             }
 626                             real Vvdw6   = calculateVdw6(c6[i], rinv6);
 627                             real Vvdw12  = calculateVdw12(c12[i], rinv6);
 628
 629                             Vvdw[i]      = lennardJonesPotential(Vvdw6, Vvdw12, c6[i], c12[i], repulsionShift, dispersionShift, onesixth, onetwelfth);
 630                             FscalV[i]    = lennardJonesScalarForce(Vvdw6, Vvdw12);
 631
 632                             if (vdwInteractionTypeIsEwald)
 633                             {
 634                                 /* Subtract the grid potential at the cut-off */
 635                                 Vvdw[i] += ewaldLennardJonesGridSubtract(nbfp_grid[tj[i]], sh_lj_ewald, onesixth);
 636                             }
 637
 638                             if (vdwModifierIsPotSwitch)
 639                             {
 640                                 real       d   = rV - ic->rvdw_switch;
 641                                 d              = (d > zero) ? d : zero;
 642                                 const real d2  = d*d;
 643                                 const real sw  = one+d2*d*(vdw_swV3+d*(vdw_swV4+d*vdw_swV5));
 644                                 const real dsw = d2*(vdw_swF2+d*(vdw_swF3+d*vdw_swF4));
 645
 646                                 FscalV[i] = potSwitchScalarForceMod(FscalV[i], Vvdw[i], sw, rV, rvdw, dsw, zero);
 647                                 Vvdw[i]   = potSwitchPotentialMod(Vvdw[i], sw, rV, rvdw, zero);
 648                             }
 649                         }
 650
 651                         /* FscalC (and FscalV) now contain: dV/drC * rC
 652                          * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
 653                          * Further down we first multiply by r^p-2 and then by
 654                          * the vector r, which in total gives: dV/drC * (r/rC)^1-p
 655                          */
 656                         FscalC[i] *= rpinvC;
 657                         FscalV[i] *= rpinvV;
 658                     }
 659                 }
 660
 661                 /* Assemble A and B states */
 662                 for (int i = 0; i < NSTATES; i++)
 663                 {
 664                     vctot += LFC[i]*Vcoul[i];
 665                     vvtot += LFV[i]*Vvdw[i];
 666
 667                     Fscal += LFC[i]*FscalC[i]*rpm2;
 668                     Fscal += LFV[i]*FscalV[i]*rpm2;
 669
 670                     if (useSoftCore)
 671                     {
 672                         dvdl_coul += Vcoul[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*FscalC[i]*sigma_pow[i];
 673                         dvdl_vdw  += Vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*FscalV[i]*sigma_pow[i];
 674                     }
 675                     else
 676                     {
 677                         dvdl_coul += Vcoul[i]*DLF[i];
 678                         dvdl_vdw  += Vvdw[i]*DLF[i];
 679                     }
 680                 }
 681             }
 682             else if (icoul == GMX_NBKERNEL_ELEC_REACTIONFIELD)
 683             {
 684                 /* For excluded pairs, which are only in this pair list when
 685                  * using the Verlet scheme, we don't use soft-core.
 686                  * The group scheme also doesn't soft-core for these.
 687                  * As there is no singularity, there is no need for soft-core.
 688                  */
 689                 const real FF = -two*krf;
 690                 real       VV = krf*rsq - crf;
 691
 692                 if (ii == jnr)
 693                 {
 694                     VV *= half;
 695                 }
 696
 697                 for (int i = 0; i < NSTATES; i++)
 698                 {
 699                     vctot      += LFC[i]*qq[i]*VV;
 700                     Fscal      += LFC[i]*qq[i]*FF;
 701                     dvdl_coul  += DLF[i]*qq[i]*VV;
 702                 }
 703             }
 704
 705             if (elecInteractionTypeIsEwald && r < rcoulomb)
 706             {
 707                 /* See comment in the preamble. When using Ewald interactions
 708                  * (unless we use a switch modifier) we subtract the reciprocal-space
 709                  * Ewald component here which made it possible to apply the free
 710                  * energy interaction to 1/r (vanilla coulomb short-range part)
 711                  * above. This gets us closer to the ideal case of applying
 712                  * the softcore to the entire electrostatic interaction,
 713                  * including the reciprocal-space component.
 714                  */
 715                 real       v_lr, f_lr;
 716
 717                 const real ewrt   = r*ewtabscale;
 718                 int        ewitab = static_cast<int>(ewrt);
 719                 const real eweps  = ewrt - ewitab;
 720                 ewitab            = 4*ewitab;
 721                 f_lr              = ewtab[ewitab] + eweps*ewtab[ewitab+1];
 722                 v_lr              = (ewtab[ewitab + 2] - ewtabhalfspace*eweps*(ewtab[ewitab] + f_lr));
 723                 f_lr             *= rinv;
 724
 725                 /* Note that any possible Ewald shift has already been applied in
 726                  * the normal interaction part above.
 727                  */
 728
 729                 if (ii == jnr)
 730                 {
 731                     /* If we get here, the i particle (ii) has itself (jnr)
 732                      * in its neighborlist. This can only happen with the Verlet
 733                      * scheme, and corresponds to a self-interaction that will
 734                      * occur twice. Scale it down by 50% to only include it once.
 735                      */
 736                     v_lr *= half;
 737                 }
 738
 739                 for (int i = 0; i < NSTATES; i++)
 740                 {
 741                     vctot     -= LFC[i]*qq[i]*v_lr;
 742                     Fscal     -= LFC[i]*qq[i]*f_lr;
 743                     dvdl_coul -= (DLF[i]*qq[i])*v_lr;
 744                 }
 745             }
 746
 747             if (vdwInteractionTypeIsEwald && r < rvdw)
 748             {
 749                 /* See comment in the preamble. When using LJ-Ewald interactions
 750                  * (unless we use a switch modifier) we subtract the reciprocal-space
 751                  * Ewald component here which made it possible to apply the free
 752                  * energy interaction to r^-6 (vanilla LJ6 short-range part)
 753                  * above. This gets us closer to the ideal case of applying
 754                  * the softcore to the entire VdW interaction,
 755                  * including the reciprocal-space component.
 756                  */
 757                 /* We could also use the analytical form here
 758                  * iso a table, but that can cause issues for
 759                  * r close to 0 for non-interacting pairs.
 760                  */
 761
 762                 const real rs   = rsq*rinv*ewtabscale;
 763                 const int  ri   = static_cast<int>(rs);
 764                 const real frac = rs - ri;
 765                 const real f_lr = (1 - frac)*tab_ewald_F_lj[ri] + frac*tab_ewald_F_lj[ri+1];
 766                 /* TODO: Currently the Ewald LJ table does not contain
 767                  * the factor 1/6, we should add this.
 768                  */
 769                 const real FF = f_lr*rinv/six;
 770                 real       VV = (tab_ewald_V_lj[ri] - ewtabhalfspace*frac*(tab_ewald_F_lj[ri] + f_lr))/six;
 771
 772                 if (ii == jnr)
 773                 {
 774                     /* If we get here, the i particle (ii) has itself (jnr)
 775                      * in its neighborlist. This can only happen with the Verlet
 776                      * scheme, and corresponds to a self-interaction that will
 777                      * occur twice. Scale it down by 50% to only include it once.
 778                      */
 779                     VV *= half;
 780                 }
 781
 782                 for (int i = 0; i < NSTATES; i++)
 783                 {
 784                     const real c6grid = nbfp_grid[tj[i]];
 785                     vvtot            += LFV[i]*c6grid*VV;
 786                     Fscal            += LFV[i]*c6grid*FF;
 787                     dvdl_vdw         += (DLF[i]*c6grid)*VV;
 788                 }
 789             }
 790
 791             if (doForces)
 792             {
 793                 const real tx  = Fscal*dx;
 794                 const real ty  = Fscal*dy;
 795                 const real tz  = Fscal*dz;
 796                 fix            = fix + tx;
 797                 fiy            = fiy + ty;
 798                 fiz            = fiz + tz;
 799                 /* OpenMP atomics are expensive, but this kernels is also
 800                  * expensive, so we can take this hit, instead of using
 801                  * thread-local output buffers and extra reduction.
 802                  *
 803                  * All the OpenMP regions in this file are trivial and should
 804                  * not throw, so no need for try/catch.
 805                  */
 806 #pragma omp atomic
 807                 f[j3]     -= tx;
 808 #pragma omp atomic
 809                 f[j3+1]   -= ty;
 810 #pragma omp atomic
 811                 f[j3+2]   -= tz;
 812             }
 813         }
 814
 815         /* The atomics below are expensive with many OpenMP threads.
 816          * Here unperturbed i-particles will usually only have a few
 817          * (perturbed) j-particles in the list. Thus with a buffered list
 818          * we can skip a significant number of i-reductions with a check.
 819          */
 820         if (npair_within_cutoff > 0)
 821         {
 822             if (doForces)
 823             {
 824 #pragma omp atomic
 825                 f[ii3]        += fix;
 826 #pragma omp atomic
 827                 f[ii3+1]      += fiy;
 828 #pragma omp atomic
 829                 f[ii3+2]      += fiz;
 830             }
 831             if (doShiftForces)
 832             {
 833 #pragma omp atomic
 834                 fshift[is3]   += fix;
 835 #pragma omp atomic
 836                 fshift[is3+1] += fiy;
 837 #pragma omp atomic
 838                 fshift[is3+2] += fiz;
 839             }
 840             if (doPotential)
 841             {
 842                 int ggid  = gid[n];
 843 #pragma omp atomic
 844                 Vc[ggid] += vctot;
 845 #pragma omp atomic
 846                 Vv[ggid] += vvtot;
 847             }
 848         }
 849     }
 850
 851 #pragma omp atomic
 852     dvdl[efptCOUL]     += dvdl_coul;
 853  #pragma omp atomic
 854     dvdl[efptVDW]      += dvdl_vdw;
 855
 856     /* Estimate flops, average for free energy stuff:
 857      * 12  flops per outer iteration
 858      * 150 flops per inner iteration
 859      */
 860 #pragma omp atomic
 861     inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri*12 + nlist->jindex[nri]*150);
 862 }
 863
 864 typedef void (* KernelFunction)(const t_nblist * gmx_restrict    nlist,
 865                                 rvec * gmx_restrict              xx,
 866                                 gmx::ForceWithShiftForces *      forceWithShiftForces,
 867                                 const t_forcerec * gmx_restrict  fr,
 868                                 const t_mdatoms * gmx_restrict   mdatoms,
 869                                 nb_kernel_data_t * gmx_restrict  kernel_data,
 870                                 t_nrnb * gmx_restrict            nrnb);
 871
 872 template<SoftCoreTreatment softCoreTreatment,
 873          bool scLambdasOrAlphasDiffer,
 874          bool vdwInteractionTypeIsEwald,
 875          bool elecInteractionTypeIsEwald>
 876 static KernelFunction
 877 dispatchKernelOnVdwModifier(const bool vdwModifierIsPotSwitch)
 878 {
 879     if (vdwModifierIsPotSwitch)
 880     {
 881         return(nb_free_energy_kernel<softCoreTreatment, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
 882                                      elecInteractionTypeIsEwald, true>);
 883     }
 884     else
 885     {
 886         return(nb_free_energy_kernel<softCoreTreatment, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
 887                                      elecInteractionTypeIsEwald, false>);
 888     }
 889 }
 890
 891 template<SoftCoreTreatment softCoreTreatment,
 892          bool scLambdasOrAlphasDiffer,
 893          bool vdwInteractionTypeIsEwald>
 894 static KernelFunction
 895 dispatchKernelOnElecInteractionType(const bool elecInteractionTypeIsEwald,
 896                                     const bool vdwModifierIsPotSwitch)
 897 {
 898     if (elecInteractionTypeIsEwald)
 899     {
 900         return(dispatchKernelOnVdwModifier<softCoreTreatment, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
 901                                            true>(vdwModifierIsPotSwitch));
 902     }
 903     else
 904     {
 905         return(dispatchKernelOnVdwModifier<softCoreTreatment, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
 906                                            false>(vdwModifierIsPotSwitch));
 907     }
 908 }
 909
 910 template<SoftCoreTreatment softCoreTreatment,
 911          bool scLambdasOrAlphasDiffer>
 912 static KernelFunction
 913 dispatchKernelOnVdwInteractionType(const bool vdwInteractionTypeIsEwald,
 914                                    const bool elecInteractionTypeIsEwald,
 915                                    const bool vdwModifierIsPotSwitch)
 916 {
 917     if (vdwInteractionTypeIsEwald)
 918     {
 919         return(dispatchKernelOnElecInteractionType<softCoreTreatment, scLambdasOrAlphasDiffer,
 920                                                    true>(elecInteractionTypeIsEwald, vdwModifierIsPotSwitch));
 921     }
 922     else
 923     {
 924         return(dispatchKernelOnElecInteractionType<softCoreTreatment, scLambdasOrAlphasDiffer,
 925                                                    false>(elecInteractionTypeIsEwald, vdwModifierIsPotSwitch));
 926     }
 927 }
 928
 929 template<SoftCoreTreatment softCoreTreatment>
 930 static KernelFunction
 931 dispatchKernelOnScLambdasOrAlphasDifference(const bool scLambdasOrAlphasDiffer,
 932                                             const bool vdwInteractionTypeIsEwald,
 933                                             const bool elecInteractionTypeIsEwald,
 934                                             const bool vdwModifierIsPotSwitch)
 935 {
 936     if (scLambdasOrAlphasDiffer)
 937     {
 938         return(dispatchKernelOnVdwInteractionType<softCoreTreatment, true>(vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
 939                                                                            vdwModifierIsPotSwitch));
 940     }
 941     else
 942     {
 943         return(dispatchKernelOnVdwInteractionType<softCoreTreatment, false>(vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
 944                                                                             vdwModifierIsPotSwitch));
 945     }
 946 }
 947
 948 static KernelFunction
 949 dispatchKernel(const bool                 scLambdasOrAlphasDiffer,
 950                const bool                 vdwInteractionTypeIsEwald,
 951                const bool                 elecInteractionTypeIsEwald,
 952                const bool                 vdwModifierIsPotSwitch,
 953                const t_forcerec          *fr)
 954 {
 955     if (fr->sc_alphacoul == 0 && fr->sc_alphavdw == 0)
 956     {
 957         return(dispatchKernelOnScLambdasOrAlphasDifference<SoftCoreTreatment::None>(scLambdasOrAlphasDiffer,
 958                                                                                     vdwInteractionTypeIsEwald,
 959                                                                                     elecInteractionTypeIsEwald,
 960                                                                                     vdwModifierIsPotSwitch));
 961     }
 962     else if (fr->sc_r_power == 6.0_real)
 963     {
 964         return(dispatchKernelOnScLambdasOrAlphasDifference<SoftCoreTreatment::RPower6>(scLambdasOrAlphasDiffer,
 965                                                                                        vdwInteractionTypeIsEwald,
 966                                                                                        elecInteractionTypeIsEwald,
 967                                                                                        vdwModifierIsPotSwitch));
 968     }
 969     else
 970     {
 971         return(dispatchKernelOnScLambdasOrAlphasDifference<SoftCoreTreatment::RPower48>(scLambdasOrAlphasDiffer,
 972                                                                                         vdwInteractionTypeIsEwald,
 973                                                                                         elecInteractionTypeIsEwald,
 974                                                                                         vdwModifierIsPotSwitch));
 975     }
 976 }
 977
 978
 979 void gmx_nb_free_energy_kernel(const t_nblist            *nlist,
 980                                rvec                      *xx,
 981                                gmx::ForceWithShiftForces *ff,
 982                                const t_forcerec          *fr,
 983                                const t_mdatoms           *mdatoms,
 984                                nb_kernel_data_t          *kernel_data,
 985                                t_nrnb                    *nrnb)
 986 {
 987     GMX_ASSERT(EEL_PME_EWALD(fr->ic->eeltype)  || fr->ic->eeltype == eelCUT || EEL_RF(fr->ic->eeltype),
 988                "Unsupported eeltype with free energy");
 989
 990     const bool vdwInteractionTypeIsEwald  = (EVDW_PME(fr->ic->vdwtype));
 991     const bool elecInteractionTypeIsEwald = (EEL_PME_EWALD(fr->ic->eeltype));
 992     const bool vdwModifierIsPotSwitch     = (fr->ic->vdw_modifier == eintmodPOTSWITCH);
 993     bool       scLambdasOrAlphasDiffer    = true;
 994
 995     if (fr->sc_alphacoul == 0 && fr->sc_alphavdw == 0)
 996     {
 997         scLambdasOrAlphasDiffer = false;
 998     }
 999     else if (fr->sc_r_power == 6.0_real || fr->sc_r_power == 48.0_real)
1000     {
1001         if (kernel_data->lambda[efptCOUL] == kernel_data->lambda[efptVDW] &&
1002             fr->sc_alphacoul == fr->sc_alphavdw)
1003         {
1004             scLambdasOrAlphasDiffer = false;
1005         }
1006     }
1007     else
1008     {
1009         GMX_RELEASE_ASSERT(false, "Unsupported soft-core r-power");
1010     }
1011     KernelFunction kernelFunc = dispatchKernel(scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
1012                                                vdwModifierIsPotSwitch, fr);
1013     kernelFunc(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
1014 }