src/gromacs/gmxlib/nonbonded/nb_free_energy.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #include "gmxpre.h"
  38
  39 #include "nb_free_energy.h"
  40
  41 #include <cmath>
  42
  43 #include <algorithm>
  44
  45 #include "gromacs/gmxlib/nrnb.h"
  46 #include "gromacs/gmxlib/nonbonded/nb_kernel.h"
  47 #include "gromacs/gmxlib/nonbonded/nonbonded.h"
  48 #include "gromacs/math/functions.h"
  49 #include "gromacs/math/vec.h"
  50 #include "gromacs/mdtypes/forceoutput.h"
  51 #include "gromacs/mdtypes/forcerec.h"
  52 #include "gromacs/mdtypes/md_enums.h"
  53 #include "gromacs/utility/fatalerror.h"
  54
  55
  56 //! Enum for templating the soft-core treatment in the kernel
  57 enum class SoftCoreTreatment
  58 {
  59     None,    //!< No soft-core
  60     RPower6, //!< Soft-core with r-power = 6
  61     RPower48 //!< Soft-core with r-power = 48
  62 };
  63
  64 //! Most treatments are fine with float in mixed-precision mode.
  65 template <SoftCoreTreatment softCoreTreatment>
  66 struct SoftCoreReal
  67 {
  68     //! Real type for soft-core calculations
  69     using Real = real;
  70 };
  71
  72 //! This treatment requires double precision for some computations.
  73 template <>
  74 struct SoftCoreReal<SoftCoreTreatment::RPower48>
  75 {
  76     //! Real type for soft-core calculations
  77     using Real = double;
  78 };
  79
  80 //! Computes r^(1/p) and 1/r^(1/p) for the standard p=6
  81 template <SoftCoreTreatment softCoreTreatment>
  82 static inline void pthRoot(const real  r,
  83                            real       *pthRoot,
  84                            real       *invPthRoot)
  85 {
  86     *invPthRoot = gmx::invsqrt(std::cbrt(r));
  87     *pthRoot    = 1/(*invPthRoot);
  88 }
  89
  90 // We need a double version to make the specialization below work
  91 #if !GMX_DOUBLE
  92 //! Computes r^(1/p) and 1/r^(1/p) for the standard p=6
  93 template <SoftCoreTreatment softCoreTreatment>
  94 static inline void pthRoot(const double  r,
  95                            real         *pthRoot,
  96                            double       *invPthRoot)
  97 {
  98     *invPthRoot = gmx::invsqrt(std::cbrt(r));
  99     *pthRoot    = 1/(*invPthRoot);
 100 }
 101 #endif
 102
 103 //! Computes r^(1/p) and 1/r^(1/p) for p=48
 104 template <>
 105 inline void pthRoot<SoftCoreTreatment::RPower48>(const double  r,
 106                                                  real         *pthRoot,
 107                                                  double       *invPthRoot)
 108 {
 109     *pthRoot    = std::pow(r, 1.0/48.0);
 110     *invPthRoot = 1/(*pthRoot);
 111 }
 112
 113 template<SoftCoreTreatment softCoreTreatment>
 114 static inline real calculateSigmaPow(const real sigma6)
 115 {
 116     if (softCoreTreatment == SoftCoreTreatment::RPower6)
 117     {
 118         return sigma6;
 119     }
 120     else
 121     {
 122         real sigmaPow = sigma6 * sigma6;     /* sigma^12 */
 123         sigmaPow      = sigmaPow * sigmaPow; /* sigma^24 */
 124         sigmaPow      = sigmaPow * sigmaPow; /* sigma^48 */
 125         return (sigmaPow);
 126     }
 127 }
 128
 129 template<SoftCoreTreatment softCoreTreatment, class SCReal>
 130 static inline real calculateRinv6(const SCReal rinvV)
 131 {
 132     if (softCoreTreatment == SoftCoreTreatment::RPower6)
 133     {
 134         return rinvV;
 135     }
 136     else
 137     {
 138         real rinv6 = rinvV * rinvV;
 139         return (rinv6 * rinv6 * rinv6);
 140     }
 141 }
 142
 143 static inline real calculateVdw6(const real c6, const real rinv6)
 144 {
 145     return (c6 * rinv6);
 146 }
 147
 148 static inline real calculateVdw12(const real c12, const real rinv6)
 149 {
 150     return (c12 * rinv6 * rinv6);
 151 }
 152
 153 /* reaction-field electrostatics */
 154 template<class SCReal>
 155 static inline SCReal
 156 reactionFieldScalarForce(const real qq, const real rinv, const SCReal r, const real krf,
 157                          const real two)
 158 {
 159     return (qq*(rinv - two*krf*r*r));
 160 }
 161 template<class SCReal>
 162 static inline real
 163 reactionFieldPotential(const real qq, const real rinv, const SCReal r, const real krf,
 164                        const real potentialShift)
 165 {
 166     return (qq*(rinv + krf*r*r-potentialShift));
 167 }
 168
 169 /* Ewald electrostatics */
 170 static inline real
 171 ewaldScalarForce(const real coulomb, const real rinv)
 172 {
 173     return (coulomb*rinv);
 174 }
 175 static inline real
 176 ewaldPotential(const real coulomb, const real rinv, const real potentialShift)
 177 {
 178     return (coulomb*(rinv-potentialShift));
 179 }
 180
 181 /* cutoff LJ */
 182 static inline real
 183 lennardJonesScalarForce(const real v6, const real v12)
 184 {
 185     return (v12 - v6);
 186 }
 187 static inline real
 188 lennardJonesPotential(const real v6, const real v12, const real c6, const real c12, const real repulsionShift,
 189                       const real dispersionShift, const real onesixth, const real onetwelfth)
 190 {
 191     return ((v12 + c12*repulsionShift)*onetwelfth - (v6 + c6*dispersionShift)*onesixth);
 192 }
 193
 194 /* Ewald LJ */
 195 static inline real
 196 ewaldLennardJonesGridSubtract(const real c6grid, const real potentialShift, const real onesixth)
 197 {
 198     return (c6grid * potentialShift * onesixth);
 199 }
 200
 201 /* LJ Potential switch */
 202 template<class SCReal>
 203 static inline SCReal
 204 potSwitchScalarForceMod(const SCReal fScalarInp, const real potential, const real sw, const SCReal r, const real rVdw, const real dsw, const real zero)
 205 {
 206     if (r < rVdw)
 207     {
 208         SCReal fScalar = fScalarInp * sw - r * potential * dsw;
 209         return (fScalar);
 210     }
 211     return (zero);
 212 }
 213 template<class SCReal>
 214 static inline real
 215 potSwitchPotentialMod(const real potentialInp, const real sw, const SCReal r, const real rVdw, const real zero)
 216 {
 217     if (r < rVdw)
 218     {
 219         real potential = potentialInp * sw;
 220         return (potential);
 221     }
 222     return (zero);
 223 }
 224
 225
 226 //! Templated free-energy non-bonded kernel
 227 template<SoftCoreTreatment softCoreTreatment,
 228          bool              scLambdasOrAlphasDiffer,
 229          bool              vdwInteractionTypeIsEwald,
 230          bool              elecInteractionTypeIsEwald,
 231          bool              vdwModifierIsPotSwitch>
 232 static void
 233 nb_free_energy_kernel(const t_nblist * gmx_restrict    nlist,
 234                       rvec * gmx_restrict              xx,
 235                       gmx::ForceWithShiftForces *      forceWithShiftForces,
 236                       const t_forcerec * gmx_restrict  fr,
 237                       const t_mdatoms * gmx_restrict   mdatoms,
 238                       nb_kernel_data_t * gmx_restrict  kernel_data,
 239                       t_nrnb * gmx_restrict            nrnb)
 240 {
 241     using SCReal = typename SoftCoreReal<softCoreTreatment>::Real;
 242
 243     constexpr bool useSoftCore = (softCoreTreatment != SoftCoreTreatment::None);
 244
 245 #define  STATE_A  0
 246 #define  STATE_B  1
 247 #define  NSTATES  2
 248     int           i, n, ii, is3, ii3, k, nj0, nj1, jnr, j3, ggid;
 249     real          shX, shY, shZ;
 250     real          tx, ty, tz, Fscal;
 251     SCReal        FscalC[NSTATES], FscalV[NSTATES];  /* Needs double for sc_power==48 */
 252     real          Vcoul[NSTATES], Vvdw[NSTATES];
 253     real          iqA, iqB;
 254     real          qq[NSTATES], vctot;
 255     int           ntiA, ntiB, tj[NSTATES];
 256     real          vvtot;
 257     real          ix, iy, iz, fix, fiy, fiz;
 258     real          dx, dy, dz, r, rsq, rinv;
 259     real          c6[NSTATES], c12[NSTATES], c6grid;
 260     real          LFC[NSTATES], LFV[NSTATES], DLF[NSTATES];
 261     SCReal        dvdl_coul, dvdl_vdw;
 262     real          lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
 263     real          sigma6[NSTATES], alpha_vdw_eff, alpha_coul_eff;
 264     real          rinvC, rinvV;
 265     SCReal        rp, rpm2, rC, rV, rpinvC, rpinvV; /* Needs double for sc_power==48 */
 266     real          sigma_pow[NSTATES];
 267     real          VV, FF;
 268     int           icoul = GMX_NBKERNEL_ELEC_NONE;
 269     int           nri;
 270     const int *   iinr;
 271     const int *   jindex;
 272     const int *   jjnr;
 273     const int *   shift;
 274     const int *   gid;
 275     const int *   typeA;
 276     const int *   typeB;
 277     int           ntype;
 278     const real *  shiftvec;
 279     const real *  chargeA;
 280     const real *  chargeB;
 281     real          sigma6_min, sigma6_def, lam_power;
 282     real          alpha_coul, alpha_vdw, lambda_coul, lambda_vdw;
 283     const real *  nbfp, *nbfp_grid;
 284     real *        dvdl;
 285     real *        Vv;
 286     real *        Vc;
 287     gmx_bool      bDoForces, bDoShiftForces, bDoPotential;
 288     real          rcutoff_max2;
 289     const real *  tab_ewald_F_lj = nullptr;
 290     const real *  tab_ewald_V_lj = nullptr;
 291     real          vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4;
 292     gmx_bool      bComputeVdwInteraction, bComputeElecInteraction;
 293     const real *  ewtab = nullptr;
 294     int           ewitab;
 295     real          ewrt, eweps, ewtabscale = 0, ewtabhalfspace = 0, sh_ewald = 0;
 296
 297     const real    onetwelfth  = 1.0/12.0;
 298     const real    onesixth    = 1.0/6.0;
 299     const real    zero        = 0.0;
 300     const real    half        = 0.5;
 301     const real    one         = 1.0;
 302     const real    two         = 2.0;
 303     const real    six         = 6.0;
 304
 305     /* Extract pointer to non-bonded interaction constants */
 306     const interaction_const_t *ic = fr->ic;
 307
 308     // TODO: We should get rid of using pointers to real
 309     const real          *x      = xx[0];
 310     real * gmx_restrict  f      = &(forceWithShiftForces->force()[0][0]);
 311
 312     real * gmx_restrict  fshift = &(forceWithShiftForces->shiftForces()[0][0]);
 313
 314     // Extract pair list data
 315     nri                 = nlist->nri;
 316     iinr                = nlist->iinr;
 317     jindex              = nlist->jindex;
 318     jjnr                = nlist->jjnr;
 319     shift               = nlist->shift;
 320     gid                 = nlist->gid;
 321
 322     shiftvec            = fr->shift_vec[0];
 323     chargeA             = mdatoms->chargeA;
 324     chargeB             = mdatoms->chargeB;
 325     Vc                  = kernel_data->energygrp_elec;
 326     typeA               = mdatoms->typeA;
 327     typeB               = mdatoms->typeB;
 328     ntype               = fr->ntype;
 329     nbfp                = fr->nbfp;
 330     nbfp_grid           = fr->ljpme_c6grid;
 331     Vv                  = kernel_data->energygrp_vdw;
 332     lambda_coul         = kernel_data->lambda[efptCOUL];
 333     lambda_vdw          = kernel_data->lambda[efptVDW];
 334     dvdl                = kernel_data->dvdl;
 335     alpha_coul          = fr->sc_alphacoul;
 336     alpha_vdw           = fr->sc_alphavdw;
 337     lam_power           = fr->sc_power;
 338     sigma6_def          = fr->sc_sigma6_def;
 339     sigma6_min          = fr->sc_sigma6_min;
 340     bDoForces           = ((kernel_data->flags & GMX_NONBONDED_DO_FORCE) != 0);
 341     bDoShiftForces      = ((kernel_data->flags & GMX_NONBONDED_DO_SHIFTFORCE) != 0);
 342     bDoPotential        = ((kernel_data->flags & GMX_NONBONDED_DO_POTENTIAL) != 0);
 343
 344     // Extract data from interaction_const_t
 345     const real facel           = ic->epsfac;
 346     const real rcoulomb        = ic->rcoulomb;
 347     const real krf             = ic->k_rf;
 348     const real crf             = ic->c_rf;
 349     const real sh_lj_ewald     = ic->sh_lj_ewald;
 350     const real rvdw            = ic->rvdw;
 351     const real dispersionShift = ic->dispersion_shift.cpot;
 352     const real repulsionShift  = ic->repulsion_shift.cpot;
 353
 354     // Note that the nbnxm kernels do not support Coulomb potential switching at all
 355     GMX_ASSERT(ic->coulomb_modifier != eintmodPOTSWITCH,
 356                "Potential switching is not supported for Coulomb with FEP");
 357
 358     if (vdwModifierIsPotSwitch)
 359     {
 360         real d          = ic->rvdw - ic->rvdw_switch;
 361         vdw_swV3        = -10.0/(d*d*d);
 362         vdw_swV4        =  15.0/(d*d*d*d);
 363         vdw_swV5        =  -6.0/(d*d*d*d*d);
 364         vdw_swF2        = -30.0/(d*d*d);
 365         vdw_swF3        =  60.0/(d*d*d*d);
 366         vdw_swF4        = -30.0/(d*d*d*d*d);
 367     }
 368     else
 369     {
 370         /* Avoid warnings from stupid compilers (looking at you, Clang!) */
 371         vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0;
 372     }
 373
 374     if (ic->eeltype == eelCUT || EEL_RF(ic->eeltype))
 375     {
 376         icoul = GMX_NBKERNEL_ELEC_REACTIONFIELD;
 377     }
 378
 379     rcutoff_max2    = std::max(ic->rcoulomb, ic->rvdw);
 380     rcutoff_max2    = rcutoff_max2*rcutoff_max2;
 381
 382     if (elecInteractionTypeIsEwald || vdwInteractionTypeIsEwald)
 383     {
 384         const auto &tables = *ic->coulombEwaldTables;
 385         sh_ewald       = ic->sh_ewald;
 386         ewtab          = tables.tableFDV0.data();
 387         ewtabscale     = tables.scale;
 388         ewtabhalfspace = half/ewtabscale;
 389         tab_ewald_F_lj = tables.tableF.data();
 390         tab_ewald_V_lj = tables.tableV.data();
 391     }
 392
 393     /* For Ewald/PME interactions we cannot easily apply the soft-core component to
 394      * reciprocal space. When we use non-switched Ewald interactions, we
 395      * assume the soft-coring does not significantly affect the grid contribution
 396      * and apply the soft-core only to the full 1/r (- shift) pair contribution.
 397      *
 398      * However, we cannot use this approach for switch-modified since we would then
 399      * effectively end up evaluating a significantly different interaction here compared to the
 400      * normal (non-free-energy) kernels, either by applying a cutoff at a different
 401      * position than what the user requested, or by switching different
 402      * things (1/r rather than short-range Ewald). For these settings, we just
 403      * use the traditional short-range Ewald interaction in that case.
 404      */
 405     GMX_RELEASE_ASSERT(!(vdwInteractionTypeIsEwald  && vdwModifierIsPotSwitch),
 406                        "Can not apply soft-core to switched Ewald potentials");
 407
 408     dvdl_coul  = 0;
 409     dvdl_vdw   = 0;
 410
 411     /* Lambda factor for state A, 1-lambda*/
 412     LFC[STATE_A] = one - lambda_coul;
 413     LFV[STATE_A] = one - lambda_vdw;
 414
 415     /* Lambda factor for state B, lambda*/
 416     LFC[STATE_B] = lambda_coul;
 417     LFV[STATE_B] = lambda_vdw;
 418
 419     /*derivative of the lambda factor for state A and B */
 420     DLF[STATE_A] = -1;
 421     DLF[STATE_B] = 1;
 422
 423     constexpr real sc_r_power = (softCoreTreatment == SoftCoreTreatment::RPower48 ? 48.0_real : 6.0_real);
 424     for (i = 0; i < NSTATES; i++)
 425     {
 426         lfac_coul[i]  = (lam_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
 427         dlfac_coul[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFC[i]) : 1);
 428         lfac_vdw[i]   = (lam_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
 429         dlfac_vdw[i]  = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFV[i]) : 1);
 430     }
 431
 432     for (n = 0; (n < nri); n++)
 433     {
 434         int npair_within_cutoff;
 435
 436         npair_within_cutoff = 0;
 437
 438         is3              = 3*shift[n];
 439         shX              = shiftvec[is3];
 440         shY              = shiftvec[is3+1];
 441         shZ              = shiftvec[is3+2];
 442         nj0              = jindex[n];
 443         nj1              = jindex[n+1];
 444         ii               = iinr[n];
 445         ii3              = 3*ii;
 446         ix               = shX + x[ii3+0];
 447         iy               = shY + x[ii3+1];
 448         iz               = shZ + x[ii3+2];
 449         iqA              = facel*chargeA[ii];
 450         iqB              = facel*chargeB[ii];
 451         ntiA             = 2*ntype*typeA[ii];
 452         ntiB             = 2*ntype*typeB[ii];
 453         vctot            = 0;
 454         vvtot            = 0;
 455         fix              = 0;
 456         fiy              = 0;
 457         fiz              = 0;
 458
 459         for (k = nj0; (k < nj1); k++)
 460         {
 461             jnr              = jjnr[k];
 462             j3               = 3*jnr;
 463             dx               = ix - x[j3];
 464             dy               = iy - x[j3+1];
 465             dz               = iz - x[j3+2];
 466             rsq              = dx*dx + dy*dy + dz*dz;
 467
 468             if (rsq >= rcutoff_max2)
 469             {
 470                 /* We save significant time by skipping all code below.
 471                  * Note that with soft-core interactions, the actual cut-off
 472                  * check might be different. But since the soft-core distance
 473                  * is always larger than r, checking on r here is safe.
 474                  */
 475                 continue;
 476             }
 477             npair_within_cutoff++;
 478
 479             if (rsq > 0)
 480             {
 481                 /* Note that unlike in the nbnxn kernels, we do not need
 482                  * to clamp the value of rsq before taking the invsqrt
 483                  * to avoid NaN in the LJ calculation, since here we do
 484                  * not calculate LJ interactions when C6 and C12 are zero.
 485                  */
 486
 487                 rinv         = gmx::invsqrt(rsq);
 488                 r            = rsq*rinv;
 489             }
 490             else
 491             {
 492                 /* The force at r=0 is zero, because of symmetry.
 493                  * But note that the potential is in general non-zero,
 494                  * since the soft-cored r will be non-zero.
 495                  */
 496                 rinv         = 0;
 497                 r            = 0;
 498             }
 499
 500             if (softCoreTreatment == SoftCoreTreatment::None)
 501             {
 502                 /* The soft-core power p will not affect the results
 503                  * with not using soft-core, so we use power of 0 which gives
 504                  * the simplest math and cheapest code.
 505                  */
 506                 rpm2         = rinv*rinv;
 507                 rp           = 1;
 508             }
 509             if (softCoreTreatment == SoftCoreTreatment::RPower6)
 510             {
 511                 rpm2         = rsq*rsq;  /* r4 */
 512                 rp           = rpm2*rsq; /* r6 */
 513             }
 514             if (softCoreTreatment == SoftCoreTreatment::RPower48)
 515             {
 516                 rp           = rsq*rsq*rsq; /* r6 */
 517                 rp           = rp*rp;       /* r12 */
 518                 rp           = rp*rp;       /* r24 */
 519                 rp           = rp*rp;       /* r48 */
 520                 rpm2         = rp/rsq;      /* r46 */
 521             }
 522
 523             Fscal = 0;
 524
 525             qq[STATE_A]      = iqA*chargeA[jnr];
 526             qq[STATE_B]      = iqB*chargeB[jnr];
 527
 528             tj[STATE_A]      = ntiA+2*typeA[jnr];
 529             tj[STATE_B]      = ntiB+2*typeB[jnr];
 530
 531             if (nlist->excl_fep == nullptr || nlist->excl_fep[k])
 532             {
 533                 c6[STATE_A]      = nbfp[tj[STATE_A]];
 534                 c6[STATE_B]      = nbfp[tj[STATE_B]];
 535
 536                 for (i = 0; i < NSTATES; i++)
 537                 {
 538                     c12[i]             = nbfp[tj[i]+1];
 539                     if (useSoftCore)
 540                     {
 541                         if ((c6[i] > 0) && (c12[i] > 0))
 542                         {
 543                             /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
 544                             sigma6[i]       = half*c12[i]/c6[i];
 545                             if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
 546                             {
 547                                 sigma6[i] = sigma6_min;
 548                             }
 549                         }
 550                         else
 551                         {
 552                             sigma6[i]       = sigma6_def;
 553                         }
 554                         sigma_pow[i] = calculateSigmaPow<softCoreTreatment>(sigma6[i]);
 555                     }
 556                 }
 557
 558                 if (useSoftCore)
 559                 {
 560                     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
 561                     if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
 562                     {
 563                         alpha_vdw_eff    = 0;
 564                         alpha_coul_eff   = 0;
 565                     }
 566                     else
 567                     {
 568                         alpha_vdw_eff    = alpha_vdw;
 569                         alpha_coul_eff   = alpha_coul;
 570                     }
 571                 }
 572
 573                 for (i = 0; i < NSTATES; i++)
 574                 {
 575                     FscalC[i]    = 0;
 576                     FscalV[i]    = 0;
 577                     Vcoul[i]     = 0;
 578                     Vvdw[i]      = 0;
 579
 580                     /* Only spend time on A or B state if it is non-zero */
 581                     if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
 582                     {
 583                         /* this section has to be inside the loop because of the dependence on sigma_pow */
 584                         if (useSoftCore)
 585                         {
 586                             rpinvC     = one/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
 587                             pthRoot<softCoreTreatment>(rpinvC, &rinvC, &rC);
 588
 589                             if (scLambdasOrAlphasDiffer)
 590                             {
 591                                 rpinvV = one/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
 592                                 pthRoot<softCoreTreatment>(rpinvV, &rinvV, &rV);
 593                             }
 594                             else
 595                             {
 596                                 /* We can avoid one expensive pow and one / operation */
 597                                 rpinvV = rpinvC;
 598                                 rinvV  = rinvC;
 599                                 rV     = rC;
 600                             }
 601                         }
 602                         else
 603                         {
 604                             rpinvC     = 1;
 605                             rinvC      = rinv;
 606                             rC         = r;
 607
 608                             rpinvV     = 1;
 609                             rinvV      = rinv;
 610                             rV         = r;
 611                         }
 612
 613                         /* Only process the coulomb interactions if we have charges,
 614                          * and if we either include all entries in the list (no cutoff
 615                          * used in the kernel), or if we are within the cutoff.
 616                          */
 617                         bComputeElecInteraction =
 618                             ( elecInteractionTypeIsEwald && r < rcoulomb) ||
 619                             (!elecInteractionTypeIsEwald && rC < rcoulomb);
 620
 621                         if ( (qq[i] != 0) && bComputeElecInteraction)
 622                         {
 623                             if (elecInteractionTypeIsEwald)
 624                             {
 625                                 Vcoul[i]  = ewaldPotential(qq[i], rinvC, sh_ewald);
 626                                 FscalC[i] = ewaldScalarForce(qq[i], rinvC);
 627                             }
 628                             else
 629                             {
 630                                 Vcoul[i]  = reactionFieldPotential(qq[i], rinvC, rC, krf, crf);
 631                                 FscalC[i] = reactionFieldScalarForce(qq[i], rinvC, rC, krf, two);
 632                             }
 633                         }
 634
 635                         /* Only process the VDW interactions if we have
 636                          * some non-zero parameters, and if we either
 637                          * include all entries in the list (no cutoff used
 638                          * in the kernel), or if we are within the cutoff.
 639                          */
 640                         bComputeVdwInteraction =
 641                             ( vdwInteractionTypeIsEwald && r < rvdw) ||
 642                             (!vdwInteractionTypeIsEwald && rV < rvdw);
 643                         if ((c6[i] != 0 || c12[i] != 0) && bComputeVdwInteraction)
 644                         {
 645                             real rinv6;
 646                             if (softCoreTreatment == SoftCoreTreatment::RPower6)
 647                             {
 648                                 rinv6  = calculateRinv6<softCoreTreatment>(rpinvV);
 649                             }
 650                             else
 651                             {
 652                                 rinv6  = calculateRinv6<softCoreTreatment>(rinvV);
 653                             }
 654                             real Vvdw6  = calculateVdw6(c6[i], rinv6);
 655                             real Vvdw12 = calculateVdw12(c12[i], rinv6);
 656
 657                             Vvdw[i]     = lennardJonesPotential(Vvdw6, Vvdw12, c6[i], c12[i], repulsionShift, dispersionShift, onesixth, onetwelfth);
 658                             FscalV[i]   = lennardJonesScalarForce(Vvdw6, Vvdw12);
 659
 660                             if (vdwInteractionTypeIsEwald)
 661                             {
 662                                 /* Subtract the grid potential at the cut-off */
 663                                 Vvdw[i] += ewaldLennardJonesGridSubtract(nbfp_grid[tj[i]], sh_lj_ewald, onesixth);
 664                             }
 665
 666                             if (vdwModifierIsPotSwitch)
 667                             {
 668                                 real d    = rV - ic->rvdw_switch;
 669                                 d         = (d > zero) ? d : zero;
 670                                 real d2   = d*d;
 671                                 real sw   = one+d2*d*(vdw_swV3+d*(vdw_swV4+d*vdw_swV5));
 672                                 real dsw  = d2*(vdw_swF2+d*(vdw_swF3+d*vdw_swF4));
 673
 674                                 FscalV[i] = potSwitchScalarForceMod(FscalV[i], Vvdw[i], sw, rV, rvdw, dsw, zero);
 675                                 Vvdw[i]   = potSwitchPotentialMod(Vvdw[i], sw, rV, rvdw, zero);
 676                             }
 677                         }
 678
 679                         /* FscalC (and FscalV) now contain: dV/drC * rC
 680                          * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
 681                          * Further down we first multiply by r^p-2 and then by
 682                          * the vector r, which in total gives: dV/drC * (r/rC)^1-p
 683                          */
 684                         FscalC[i] *= rpinvC;
 685                         FscalV[i] *= rpinvV;
 686                     }
 687                 }
 688
 689                 /* Assemble A and B states */
 690                 for (i = 0; i < NSTATES; i++)
 691                 {
 692                     vctot         += LFC[i]*Vcoul[i];
 693                     vvtot         += LFV[i]*Vvdw[i];
 694
 695                     Fscal     += LFC[i]*FscalC[i]*rpm2;
 696                     Fscal     += LFV[i]*FscalV[i]*rpm2;
 697
 698                     if (useSoftCore)
 699                     {
 700                         dvdl_coul += Vcoul[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*FscalC[i]*sigma_pow[i];
 701                         dvdl_vdw  += Vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*FscalV[i]*sigma_pow[i];
 702                     }
 703                     else
 704                     {
 705                         dvdl_coul += Vcoul[i]*DLF[i];
 706                         dvdl_vdw  += Vvdw[i]*DLF[i];
 707                     }
 708                 }
 709             }
 710             else if (icoul == GMX_NBKERNEL_ELEC_REACTIONFIELD)
 711             {
 712                 /* For excluded pairs, which are only in this pair list when
 713                  * using the Verlet scheme, we don't use soft-core.
 714                  * The group scheme also doesn't soft-core for these.
 715                  * As there is no singularity, there is no need for soft-core.
 716                  */
 717                 VV = krf*rsq - crf;
 718                 FF = -two*krf;
 719
 720                 if (ii == jnr)
 721                 {
 722                     VV *= half;
 723                 }
 724
 725                 for (i = 0; i < NSTATES; i++)
 726                 {
 727                     vctot      += LFC[i]*qq[i]*VV;
 728                     Fscal      += LFC[i]*qq[i]*FF;
 729                     dvdl_coul  += DLF[i]*qq[i]*VV;
 730                 }
 731             }
 732
 733             if (elecInteractionTypeIsEwald && r < rcoulomb)
 734             {
 735                 /* See comment in the preamble. When using Ewald interactions
 736                  * (unless we use a switch modifier) we subtract the reciprocal-space
 737                  * Ewald component here which made it possible to apply the free
 738                  * energy interaction to 1/r (vanilla coulomb short-range part)
 739                  * above. This gets us closer to the ideal case of applying
 740                  * the softcore to the entire electrostatic interaction,
 741                  * including the reciprocal-space component.
 742                  */
 743                 real v_lr, f_lr;
 744
 745                 ewrt      = r*ewtabscale;
 746                 ewitab    = static_cast<int>(ewrt);
 747                 eweps     = ewrt-ewitab;
 748                 ewitab    = 4*ewitab;
 749                 f_lr      = ewtab[ewitab]+eweps*ewtab[ewitab+1];
 750                 v_lr      = (ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+f_lr));
 751                 f_lr     *= rinv;
 752
 753                 /* Note that any possible Ewald shift has already been applied in
 754                  * the normal interaction part above.
 755                  */
 756
 757                 if (ii == jnr)
 758                 {
 759                     /* If we get here, the i particle (ii) has itself (jnr)
 760                      * in its neighborlist. This can only happen with the Verlet
 761                      * scheme, and corresponds to a self-interaction that will
 762                      * occur twice. Scale it down by 50% to only include it once.
 763                      */
 764                     v_lr *= half;
 765                 }
 766
 767                 for (i = 0; i < NSTATES; i++)
 768                 {
 769                     vctot      -= LFC[i]*qq[i]*v_lr;
 770                     Fscal      -= LFC[i]*qq[i]*f_lr;
 771                     dvdl_coul  -= (DLF[i]*qq[i])*v_lr;
 772                 }
 773             }
 774
 775             if (vdwInteractionTypeIsEwald && r < rvdw)
 776             {
 777                 /* See comment in the preamble. When using LJ-Ewald interactions
 778                  * (unless we use a switch modifier) we subtract the reciprocal-space
 779                  * Ewald component here which made it possible to apply the free
 780                  * energy interaction to r^-6 (vanilla LJ6 short-range part)
 781                  * above. This gets us closer to the ideal case of applying
 782                  * the softcore to the entire VdW interaction,
 783                  * including the reciprocal-space component.
 784                  */
 785                 /* We could also use the analytical form here
 786                  * iso a table, but that can cause issues for
 787                  * r close to 0 for non-interacting pairs.
 788                  */
 789                 real rs, frac, f_lr;
 790                 int  ri;
 791
 792                 rs     = rsq*rinv*ewtabscale;
 793                 ri     = static_cast<int>(rs);
 794                 frac   = rs - ri;
 795                 f_lr   = (1 - frac)*tab_ewald_F_lj[ri] + frac*tab_ewald_F_lj[ri+1];
 796                 /* TODO: Currently the Ewald LJ table does not contain
 797                  * the factor 1/6, we should add this.
 798                  */
 799                 FF     = f_lr*rinv/six;
 800                 VV     = (tab_ewald_V_lj[ri] - ewtabhalfspace*frac*(tab_ewald_F_lj[ri] + f_lr))/six;
 801
 802                 if (ii == jnr)
 803                 {
 804                     /* If we get here, the i particle (ii) has itself (jnr)
 805                      * in its neighborlist. This can only happen with the Verlet
 806                      * scheme, and corresponds to a self-interaction that will
 807                      * occur twice. Scale it down by 50% to only include it once.
 808                      */
 809                     VV *= half;
 810                 }
 811
 812                 for (i = 0; i < NSTATES; i++)
 813                 {
 814                     c6grid      = nbfp_grid[tj[i]];
 815                     vvtot      += LFV[i]*c6grid*VV;
 816                     Fscal      += LFV[i]*c6grid*FF;
 817                     dvdl_vdw   += (DLF[i]*c6grid)*VV;
 818                 }
 819             }
 820
 821             if (bDoForces)
 822             {
 823                 tx         = Fscal*dx;
 824                 ty         = Fscal*dy;
 825                 tz         = Fscal*dz;
 826                 fix        = fix + tx;
 827                 fiy        = fiy + ty;
 828                 fiz        = fiz + tz;
 829                 /* OpenMP atomics are expensive, but this kernels is also
 830                  * expensive, so we can take this hit, instead of using
 831                  * thread-local output buffers and extra reduction.
 832                  *
 833                  * All the OpenMP regions in this file are trivial and should
 834                  * not throw, so no need for try/catch.
 835                  */
 836 #pragma omp atomic
 837                 f[j3]     -= tx;
 838 #pragma omp atomic
 839                 f[j3+1]   -= ty;
 840 #pragma omp atomic
 841                 f[j3+2]   -= tz;
 842             }
 843         }
 844
 845         /* The atomics below are expensive with many OpenMP threads.
 846          * Here unperturbed i-particles will usually only have a few
 847          * (perturbed) j-particles in the list. Thus with a buffered list
 848          * we can skip a significant number of i-reductions with a check.
 849          */
 850         if (npair_within_cutoff > 0)
 851         {
 852             if (bDoForces)
 853             {
 854 #pragma omp atomic
 855                 f[ii3]        += fix;
 856 #pragma omp atomic
 857                 f[ii3+1]      += fiy;
 858 #pragma omp atomic
 859                 f[ii3+2]      += fiz;
 860             }
 861             if (bDoShiftForces)
 862             {
 863 #pragma omp atomic
 864                 fshift[is3]   += fix;
 865 #pragma omp atomic
 866                 fshift[is3+1] += fiy;
 867 #pragma omp atomic
 868                 fshift[is3+2] += fiz;
 869             }
 870             if (bDoPotential)
 871             {
 872                 ggid               = gid[n];
 873 #pragma omp atomic
 874                 Vc[ggid]          += vctot;
 875 #pragma omp atomic
 876                 Vv[ggid]          += vvtot;
 877             }
 878         }
 879     }
 880
 881 #pragma omp atomic
 882     dvdl[efptCOUL]     += dvdl_coul;
 883  #pragma omp atomic
 884     dvdl[efptVDW]      += dvdl_vdw;
 885
 886     /* Estimate flops, average for free energy stuff:
 887      * 12  flops per outer iteration
 888      * 150 flops per inner iteration
 889      */
 890 #pragma omp atomic
 891     inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri*12 + nlist->jindex[n]*150);
 892 }
 893
 894 typedef void (* KernelFunction)(const t_nblist * gmx_restrict    nlist,
 895                                 rvec * gmx_restrict              xx,
 896                                 gmx::ForceWithShiftForces *      forceWithShiftForces,
 897                                 const t_forcerec * gmx_restrict  fr,
 898                                 const t_mdatoms * gmx_restrict   mdatoms,
 899                                 nb_kernel_data_t * gmx_restrict  kernel_data,
 900                                 t_nrnb * gmx_restrict            nrnb);
 901
 902 template<SoftCoreTreatment softCoreTreatment,
 903          bool scLambdasOrAlphasDiffer,
 904          bool vdwInteractionTypeIsEwald,
 905          bool elecInteractionTypeIsEwald>
 906 static KernelFunction
 907 dispatchKernelOnVdwModifier(const bool vdwModifierIsPotSwitch)
 908 {
 909     if (vdwModifierIsPotSwitch)
 910     {
 911         return(nb_free_energy_kernel<softCoreTreatment, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
 912                                      elecInteractionTypeIsEwald, true>);
 913     }
 914     else
 915     {
 916         return(nb_free_energy_kernel<softCoreTreatment, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
 917                                      elecInteractionTypeIsEwald, false>);
 918     }
 919 }
 920
 921 template<SoftCoreTreatment softCoreTreatment,
 922          bool scLambdasOrAlphasDiffer,
 923          bool vdwInteractionTypeIsEwald>
 924 static KernelFunction
 925 dispatchKernelOnElecInteractionType(const bool elecInteractionTypeIsEwald,
 926                                     const bool vdwModifierIsPotSwitch)
 927 {
 928     if (elecInteractionTypeIsEwald)
 929     {
 930         return(dispatchKernelOnVdwModifier<softCoreTreatment, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
 931                                            true>(vdwModifierIsPotSwitch));
 932     }
 933     else
 934     {
 935         return(dispatchKernelOnVdwModifier<softCoreTreatment, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
 936                                            false>(vdwModifierIsPotSwitch));
 937     }
 938 }
 939
 940 template<SoftCoreTreatment softCoreTreatment,
 941          bool scLambdasOrAlphasDiffer>
 942 static KernelFunction
 943 dispatchKernelOnVdwInteractionType(const bool vdwInteractionTypeIsEwald,
 944                                    const bool elecInteractionTypeIsEwald,
 945                                    const bool vdwModifierIsPotSwitch)
 946 {
 947     if (vdwInteractionTypeIsEwald)
 948     {
 949         return(dispatchKernelOnElecInteractionType<softCoreTreatment, scLambdasOrAlphasDiffer,
 950                                                    true>(elecInteractionTypeIsEwald, vdwModifierIsPotSwitch));
 951     }
 952     else
 953     {
 954         return(dispatchKernelOnElecInteractionType<softCoreTreatment, scLambdasOrAlphasDiffer,
 955                                                    false>(elecInteractionTypeIsEwald, vdwModifierIsPotSwitch));
 956     }
 957 }
 958
 959 template<SoftCoreTreatment softCoreTreatment>
 960 static KernelFunction
 961 dispatchKernelOnScLambdasOrAlphasDifference(const bool scLambdasOrAlphasDiffer,
 962                                             const bool vdwInteractionTypeIsEwald,
 963                                             const bool elecInteractionTypeIsEwald,
 964                                             const bool vdwModifierIsPotSwitch)
 965 {
 966     if (scLambdasOrAlphasDiffer)
 967     {
 968         return(dispatchKernelOnVdwInteractionType<softCoreTreatment, true>(vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
 969                                                                            vdwModifierIsPotSwitch));
 970     }
 971     else
 972     {
 973         return(dispatchKernelOnVdwInteractionType<softCoreTreatment, false>(vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
 974                                                                             vdwModifierIsPotSwitch));
 975     }
 976 }
 977
 978 static KernelFunction
 979 dispatchKernel(const bool                 scLambdasOrAlphasDiffer,
 980                const bool                 vdwInteractionTypeIsEwald,
 981                const bool                 elecInteractionTypeIsEwald,
 982                const bool                 vdwModifierIsPotSwitch,
 983                const t_forcerec          *fr)
 984 {
 985     if (fr->sc_alphacoul == 0 && fr->sc_alphavdw == 0)
 986     {
 987         return(dispatchKernelOnScLambdasOrAlphasDifference<SoftCoreTreatment::None>(scLambdasOrAlphasDiffer,
 988                                                                                     vdwInteractionTypeIsEwald,
 989                                                                                     elecInteractionTypeIsEwald,
 990                                                                                     vdwModifierIsPotSwitch));
 991     }
 992     else if (fr->sc_r_power == 6.0_real)
 993     {
 994         return(dispatchKernelOnScLambdasOrAlphasDifference<SoftCoreTreatment::RPower6>(scLambdasOrAlphasDiffer,
 995                                                                                        vdwInteractionTypeIsEwald,
 996                                                                                        elecInteractionTypeIsEwald,
 997                                                                                        vdwModifierIsPotSwitch));
 998     }
 999     else
1000     {
1001         return(dispatchKernelOnScLambdasOrAlphasDifference<SoftCoreTreatment::RPower48>(scLambdasOrAlphasDiffer,
1002                                                                                         vdwInteractionTypeIsEwald,
1003                                                                                         elecInteractionTypeIsEwald,
1004                                                                                         vdwModifierIsPotSwitch));
1005     }
1006 }
1007
1008
1009 void gmx_nb_free_energy_kernel(const t_nblist            *nlist,
1010                                rvec                      *xx,
1011                                gmx::ForceWithShiftForces *ff,
1012                                const t_forcerec          *fr,
1013                                const t_mdatoms           *mdatoms,
1014                                nb_kernel_data_t          *kernel_data,
1015                                t_nrnb                    *nrnb)
1016 {
1017     GMX_ASSERT(EEL_PME_EWALD(fr->ic->eeltype)  || fr->ic->eeltype == eelCUT || EEL_RF(fr->ic->eeltype),
1018                "Unsupported eeltype with free energy");
1019
1020     const bool vdwInteractionTypeIsEwald  = (EVDW_PME(fr->ic->vdwtype));
1021     const bool elecInteractionTypeIsEwald = (EEL_PME_EWALD(fr->ic->eeltype));
1022     const bool vdwModifierIsPotSwitch     = (fr->ic->vdw_modifier == eintmodPOTSWITCH);
1023     bool       scLambdasOrAlphasDiffer    = true;
1024
1025     if (fr->sc_alphacoul == 0 && fr->sc_alphavdw == 0)
1026     {
1027         scLambdasOrAlphasDiffer = false;
1028     }
1029     else if (fr->sc_r_power == 6.0_real || fr->sc_r_power == 48.0_real)
1030     {
1031         if (kernel_data->lambda[efptCOUL] == kernel_data->lambda[efptVDW] &&
1032             fr->sc_alphacoul == fr->sc_alphavdw)
1033         {
1034             scLambdasOrAlphasDiffer = false;
1035         }
1036     }
1037     else
1038     {
1039         GMX_RELEASE_ASSERT(false, "Unsupported soft-core r-power");
1040     }
1041     KernelFunction kernelFunc = dispatchKernel(scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
1042                                                vdwModifierIsPotSwitch, fr);
1043     kernelFunc(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
1044 }