src/gromacs/gmxlib/nonbonded/nb_free_energy.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #include "gmxpre.h"
  38
  39 #include "nb_free_energy.h"
  40
  41 #include <cmath>
  42
  43 #include <algorithm>
  44
  45 #include "gromacs/gmxlib/nrnb.h"
  46 #include "gromacs/gmxlib/nonbonded/nb_kernel.h"
  47 #include "gromacs/gmxlib/nonbonded/nonbonded.h"
  48 #include "gromacs/math/functions.h"
  49 #include "gromacs/math/vec.h"
  50 #include "gromacs/mdtypes/forceoutput.h"
  51 #include "gromacs/mdtypes/forcerec.h"
  52 #include "gromacs/mdtypes/md_enums.h"
  53 #include "gromacs/utility/fatalerror.h"
  54
  55
  56 //! Enum for templating the soft-core treatment in the kernel
  57 enum class SoftCoreTreatment
  58 {
  59     None,    //!< No soft-core
  60     RPower6, //!< Soft-core with r-power = 6
  61     RPower48 //!< Soft-core with r-power = 48
  62 };
  63
  64 //! Most treatments are fine with float in mixed-precision mode.
  65 template <SoftCoreTreatment softCoreTreatment>
  66 struct SoftCoreReal
  67 {
  68     //! Real type for soft-core calculations
  69     using Real = real;
  70 };
  71
  72 //! This treatment requires double precision for some computations.
  73 template <>
  74 struct SoftCoreReal<SoftCoreTreatment::RPower48>
  75 {
  76     //! Real type for soft-core calculations
  77     using Real = double;
  78 };
  79
  80 //! Computes r^(1/p) and 1/r^(1/p) for the standard p=6
  81 template <SoftCoreTreatment softCoreTreatment>
  82 static inline void pthRoot(const real  r,
  83                            real       *pthRoot,
  84                            real       *invPthRoot)
  85 {
  86     *invPthRoot = gmx::invsqrt(std::cbrt(r));
  87     *pthRoot    = 1/(*invPthRoot);
  88 }
  89
  90 // We need a double version to make the specialization below work
  91 #if !GMX_DOUBLE
  92 //! Computes r^(1/p) and 1/r^(1/p) for the standard p=6
  93 template <SoftCoreTreatment softCoreTreatment>
  94 static inline void pthRoot(const double  r,
  95                            real         *pthRoot,
  96                            double       *invPthRoot)
  97 {
  98     *invPthRoot = gmx::invsqrt(std::cbrt(r));
  99     *pthRoot    = 1/(*invPthRoot);
 100 }
 101 #endif
 102
 103 //! Computes r^(1/p) and 1/r^(1/p) for p=48
 104 template <>
 105 inline void pthRoot<SoftCoreTreatment::RPower48>(const double  r,
 106                                                  real         *pthRoot,
 107                                                  double       *invPthRoot)
 108 {
 109     *pthRoot    = std::pow(r, 1.0/48.0);
 110     *invPthRoot = 1/(*pthRoot);
 111 }
 112
 113 template<SoftCoreTreatment softCoreTreatment>
 114 static inline real calculateSigmaPow(const real sigma6)
 115 {
 116     if (softCoreTreatment == SoftCoreTreatment::RPower6)
 117     {
 118         return sigma6;
 119     }
 120     else
 121     {
 122         real sigmaPow = sigma6 * sigma6;     /* sigma^12 */
 123         sigmaPow      = sigmaPow * sigmaPow; /* sigma^24 */
 124         sigmaPow      = sigmaPow * sigmaPow; /* sigma^48 */
 125         return (sigmaPow);
 126     }
 127 }
 128
 129 template<SoftCoreTreatment softCoreTreatment, class SCReal>
 130 static inline real calculateRinv6(const SCReal rinvV)
 131 {
 132     if (softCoreTreatment == SoftCoreTreatment::RPower6)
 133     {
 134         return rinvV;
 135     }
 136     else
 137     {
 138         real rinv6 = rinvV * rinvV;
 139         return (rinv6 * rinv6 * rinv6);
 140     }
 141 }
 142
 143 static inline real calculateVdw6(const real c6, const real rinv6)
 144 {
 145     return (c6 * rinv6);
 146 }
 147
 148 static inline real calculateVdw12(const real c12, const real rinv6)
 149 {
 150     return (c12 * rinv6 * rinv6);
 151 }
 152
 153 /* reaction-field electrostatics */
 154 template<class SCReal>
 155 static inline SCReal
 156 reactionFieldScalarForce(const real qq, const real rinv, const SCReal r, const real krf,
 157                          const real two)
 158 {
 159     return (qq*(rinv - two*krf*r*r));
 160 }
 161 template<class SCReal>
 162 static inline real
 163 reactionFieldPotential(const real qq, const real rinv, const SCReal r, const real krf,
 164                        const real potentialShift)
 165 {
 166     return (qq*(rinv + krf*r*r-potentialShift));
 167 }
 168
 169 /* Ewald electrostatics */
 170 static inline real
 171 ewaldScalarForce(const real coulomb, const real rinv)
 172 {
 173     return (coulomb*rinv);
 174 }
 175 static inline real
 176 ewaldPotential(const real coulomb, const real rinv, const real potentialShift)
 177 {
 178     return (coulomb*(rinv-potentialShift));
 179 }
 180
 181 /* cutoff LJ */
 182 static inline real
 183 lennardJonesScalarForce(const real v6, const real v12)
 184 {
 185     return (v12 - v6);
 186 }
 187 static inline real
 188 lennardJonesPotential(const real v6, const real v12, const real c6, const real c12, const real repulsionShift,
 189                       const real dispersionShift, const real onesixth, const real onetwelfth)
 190 {
 191     return ((v12 + c12*repulsionShift)*onetwelfth - (v6 + c6*dispersionShift)*onesixth);
 192 }
 193
 194 /* Ewald LJ */
 195 static inline real
 196 ewaldLennardJonesGridSubtract(const real c6grid, const real potentialShift, const real onesixth)
 197 {
 198     return (c6grid * potentialShift * onesixth);
 199 }
 200
 201 /* LJ Potential switch */
 202 template<class SCReal>
 203 static inline SCReal
 204 potSwitchScalarForceMod(const SCReal fScalarInp, const real potential, const real sw, const SCReal r, const real rVdw, const real dsw, const real zero)
 205 {
 206     if (r < rVdw)
 207     {
 208         SCReal fScalar = fScalarInp * sw - r * potential * dsw;
 209         return (fScalar);
 210     }
 211     return (zero);
 212 }
 213 template<class SCReal>
 214 static inline real
 215 potSwitchPotentialMod(const real potentialInp, const real sw, const SCReal r, const real rVdw, const real zero)
 216 {
 217     if (r < rVdw)
 218     {
 219         real potential = potentialInp * sw;
 220         return (potential);
 221     }
 222     return (zero);
 223 }
 224
 225
 226 //! Templated free-energy non-bonded kernel
 227 template<SoftCoreTreatment softCoreTreatment, bool scLambdasOrAlphasDiffer>
 228 static void
 229 nb_free_energy_kernel(const t_nblist * gmx_restrict    nlist,
 230                       rvec * gmx_restrict              xx,
 231                       gmx::ForceWithShiftForces *      forceWithShiftForces,
 232                       const t_forcerec * gmx_restrict  fr,
 233                       const t_mdatoms * gmx_restrict   mdatoms,
 234                       nb_kernel_data_t * gmx_restrict  kernel_data,
 235                       t_nrnb * gmx_restrict            nrnb)
 236 {
 237     using SCReal = typename SoftCoreReal<softCoreTreatment>::Real;
 238
 239     constexpr bool useSoftCore = (softCoreTreatment != SoftCoreTreatment::None);
 240
 241 #define  STATE_A  0
 242 #define  STATE_B  1
 243 #define  NSTATES  2
 244     int           i, n, ii, is3, ii3, k, nj0, nj1, jnr, j3, ggid;
 245     real          shX, shY, shZ;
 246     real          tx, ty, tz, Fscal;
 247     SCReal        FscalC[NSTATES], FscalV[NSTATES];  /* Needs double for sc_power==48 */
 248     real          Vcoul[NSTATES], Vvdw[NSTATES];
 249     real          iqA, iqB;
 250     real          qq[NSTATES], vctot;
 251     int           ntiA, ntiB, tj[NSTATES];
 252     real          vvtot;
 253     real          ix, iy, iz, fix, fiy, fiz;
 254     real          dx, dy, dz, r, rsq, rinv;
 255     real          c6[NSTATES], c12[NSTATES], c6grid;
 256     real          LFC[NSTATES], LFV[NSTATES], DLF[NSTATES];
 257     SCReal        dvdl_coul, dvdl_vdw;
 258     real          lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
 259     real          sigma6[NSTATES], alpha_vdw_eff, alpha_coul_eff;
 260     real          rinvC, rinvV;
 261     SCReal        rp, rpm2, rC, rV, rpinvC, rpinvV; /* Needs double for sc_power==48 */
 262     real          sigma_pow[NSTATES];
 263     real          VV, FF;
 264     int           icoul, ivdw;
 265     int           nri;
 266     const int *   iinr;
 267     const int *   jindex;
 268     const int *   jjnr;
 269     const int *   shift;
 270     const int *   gid;
 271     const int *   typeA;
 272     const int *   typeB;
 273     int           ntype;
 274     const real *  shiftvec;
 275     const real *  chargeA;
 276     const real *  chargeB;
 277     real          sigma6_min, sigma6_def, lam_power;
 278     real          alpha_coul, alpha_vdw, lambda_coul, lambda_vdw;
 279     const real *  nbfp, *nbfp_grid;
 280     real *        dvdl;
 281     real *        Vv;
 282     real *        Vc;
 283     gmx_bool      bDoForces, bDoShiftForces, bDoPotential;
 284     gmx_bool      bEwald, bEwaldLJ;
 285     real          rcutoff_max2;
 286     const real *  tab_ewald_F_lj = nullptr;
 287     const real *  tab_ewald_V_lj = nullptr;
 288     real          vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4;
 289     gmx_bool      bComputeVdwInteraction, bComputeElecInteraction;
 290     const real *  ewtab = nullptr;
 291     int           ewitab;
 292     real          ewrt, eweps, ewtabscale = 0, ewtabhalfspace = 0, sh_ewald = 0;
 293
 294     const real    onetwelfth  = 1.0/12.0;
 295     const real    onesixth    = 1.0/6.0;
 296     const real    zero        = 0.0;
 297     const real    half        = 0.5;
 298     const real    one         = 1.0;
 299     const real    two         = 2.0;
 300     const real    six         = 6.0;
 301
 302     /* Extract pointer to non-bonded interaction constants */
 303     const interaction_const_t *ic = fr->ic;
 304
 305     // TODO: We should get rid of using pointers to real
 306     const real          *x      = xx[0];
 307     real * gmx_restrict  f      = &(forceWithShiftForces->force()[0][0]);
 308
 309     real * gmx_restrict  fshift = &(forceWithShiftForces->shiftForces()[0][0]);
 310
 311     // Extract pair list data
 312     nri                 = nlist->nri;
 313     iinr                = nlist->iinr;
 314     jindex              = nlist->jindex;
 315     jjnr                = nlist->jjnr;
 316     shift               = nlist->shift;
 317     gid                 = nlist->gid;
 318
 319     shiftvec            = fr->shift_vec[0];
 320     chargeA             = mdatoms->chargeA;
 321     chargeB             = mdatoms->chargeB;
 322     Vc                  = kernel_data->energygrp_elec;
 323     typeA               = mdatoms->typeA;
 324     typeB               = mdatoms->typeB;
 325     ntype               = fr->ntype;
 326     nbfp                = fr->nbfp;
 327     nbfp_grid           = fr->ljpme_c6grid;
 328     Vv                  = kernel_data->energygrp_vdw;
 329     lambda_coul         = kernel_data->lambda[efptCOUL];
 330     lambda_vdw          = kernel_data->lambda[efptVDW];
 331     dvdl                = kernel_data->dvdl;
 332     alpha_coul          = fr->sc_alphacoul;
 333     alpha_vdw           = fr->sc_alphavdw;
 334     lam_power           = fr->sc_power;
 335     sigma6_def          = fr->sc_sigma6_def;
 336     sigma6_min          = fr->sc_sigma6_min;
 337     bDoForces           = ((kernel_data->flags & GMX_NONBONDED_DO_FORCE) != 0);
 338     bDoShiftForces      = ((kernel_data->flags & GMX_NONBONDED_DO_SHIFTFORCE) != 0);
 339     bDoPotential        = ((kernel_data->flags & GMX_NONBONDED_DO_POTENTIAL) != 0);
 340
 341     // Extract data from interaction_const_t
 342     const real facel           = ic->epsfac;
 343     const real rcoulomb        = ic->rcoulomb;
 344     const real krf             = ic->k_rf;
 345     const real crf             = ic->c_rf;
 346     const real sh_lj_ewald     = ic->sh_lj_ewald;
 347     const real rvdw            = ic->rvdw;
 348     const real dispersionShift = ic->dispersion_shift.cpot;
 349     const real repulsionShift  = ic->repulsion_shift.cpot;
 350
 351     // Note that the nbnxm kernels do not support Coulomb potential switching at all
 352     GMX_ASSERT(ic->coulomb_modifier != eintmodPOTSWITCH,
 353                "Potential switching is not supported for Coulomb with FEP");
 354
 355     if (ic->vdw_modifier == eintmodPOTSWITCH)
 356     {
 357         real d          = ic->rvdw - ic->rvdw_switch;
 358         vdw_swV3        = -10.0/(d*d*d);
 359         vdw_swV4        =  15.0/(d*d*d*d);
 360         vdw_swV5        =  -6.0/(d*d*d*d*d);
 361         vdw_swF2        = -30.0/(d*d*d);
 362         vdw_swF3        =  60.0/(d*d*d*d);
 363         vdw_swF4        = -30.0/(d*d*d*d*d);
 364     }
 365     else
 366     {
 367         /* Avoid warnings from stupid compilers (looking at you, Clang!) */
 368         vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0;
 369     }
 370
 371     if (EVDW_PME(ic->vdwtype))
 372     {
 373         ivdw         = GMX_NBKERNEL_VDW_LJEWALD;
 374     }
 375     else
 376     {
 377         ivdw         = GMX_NBKERNEL_VDW_LENNARDJONES;
 378     }
 379
 380     if (ic->eeltype == eelCUT || EEL_RF(ic->eeltype))
 381     {
 382         icoul        = GMX_NBKERNEL_ELEC_REACTIONFIELD;
 383     }
 384     else if (EEL_PME_EWALD(ic->eeltype))
 385     {
 386         icoul        = GMX_NBKERNEL_ELEC_EWALD;
 387     }
 388     else
 389     {
 390         gmx_incons("Unsupported eeltype with Verlet and free-energy");
 391     }
 392
 393     rcutoff_max2    = std::max(ic->rcoulomb, ic->rvdw);
 394     rcutoff_max2    = rcutoff_max2*rcutoff_max2;
 395
 396     bEwald          = (icoul == GMX_NBKERNEL_ELEC_EWALD);
 397     bEwaldLJ        = (ivdw == GMX_NBKERNEL_VDW_LJEWALD);
 398
 399     if (bEwald || bEwaldLJ)
 400     {
 401         const auto &tables = *ic->coulombEwaldTables;
 402         sh_ewald       = ic->sh_ewald;
 403         ewtab          = tables.tableFDV0.data();
 404         ewtabscale     = tables.scale;
 405         ewtabhalfspace = half/ewtabscale;
 406         tab_ewald_F_lj = tables.tableF.data();
 407         tab_ewald_V_lj = tables.tableV.data();
 408     }
 409
 410     /* For Ewald/PME interactions we cannot easily apply the soft-core component to
 411      * reciprocal space. When we use non-switched Ewald interactions, we
 412      * assume the soft-coring does not significantly affect the grid contribution
 413      * and apply the soft-core only to the full 1/r (- shift) pair contribution.
 414      *
 415      * However, we cannot use this approach for switch-modified since we would then
 416      * effectively end up evaluating a significantly different interaction here compared to the
 417      * normal (non-free-energy) kernels, either by applying a cutoff at a different
 418      * position than what the user requested, or by switching different
 419      * things (1/r rather than short-range Ewald). For these settings, we just
 420      * use the traditional short-range Ewald interaction in that case.
 421      */
 422     GMX_RELEASE_ASSERT(!(bEwald   && ic->coulomb_modifier == eintmodPOTSWITCH) &&
 423                        !(bEwaldLJ && ic->vdw_modifier   == eintmodPOTSWITCH),
 424                        "Can not apply soft-core to switched Ewald potentials");
 425
 426     dvdl_coul  = 0;
 427     dvdl_vdw   = 0;
 428
 429     /* Lambda factor for state A, 1-lambda*/
 430     LFC[STATE_A] = one - lambda_coul;
 431     LFV[STATE_A] = one - lambda_vdw;
 432
 433     /* Lambda factor for state B, lambda*/
 434     LFC[STATE_B] = lambda_coul;
 435     LFV[STATE_B] = lambda_vdw;
 436
 437     /*derivative of the lambda factor for state A and B */
 438     DLF[STATE_A] = -1;
 439     DLF[STATE_B] = 1;
 440
 441     constexpr real sc_r_power = (softCoreTreatment == SoftCoreTreatment::RPower48 ? 48.0_real : 6.0_real);
 442     for (i = 0; i < NSTATES; i++)
 443     {
 444         lfac_coul[i]  = (lam_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
 445         dlfac_coul[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFC[i]) : 1);
 446         lfac_vdw[i]   = (lam_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
 447         dlfac_vdw[i]  = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFV[i]) : 1);
 448     }
 449
 450     for (n = 0; (n < nri); n++)
 451     {
 452         int npair_within_cutoff;
 453
 454         npair_within_cutoff = 0;
 455
 456         is3              = 3*shift[n];
 457         shX              = shiftvec[is3];
 458         shY              = shiftvec[is3+1];
 459         shZ              = shiftvec[is3+2];
 460         nj0              = jindex[n];
 461         nj1              = jindex[n+1];
 462         ii               = iinr[n];
 463         ii3              = 3*ii;
 464         ix               = shX + x[ii3+0];
 465         iy               = shY + x[ii3+1];
 466         iz               = shZ + x[ii3+2];
 467         iqA              = facel*chargeA[ii];
 468         iqB              = facel*chargeB[ii];
 469         ntiA             = 2*ntype*typeA[ii];
 470         ntiB             = 2*ntype*typeB[ii];
 471         vctot            = 0;
 472         vvtot            = 0;
 473         fix              = 0;
 474         fiy              = 0;
 475         fiz              = 0;
 476
 477         for (k = nj0; (k < nj1); k++)
 478         {
 479             jnr              = jjnr[k];
 480             j3               = 3*jnr;
 481             dx               = ix - x[j3];
 482             dy               = iy - x[j3+1];
 483             dz               = iz - x[j3+2];
 484             rsq              = dx*dx + dy*dy + dz*dz;
 485
 486             if (rsq >= rcutoff_max2)
 487             {
 488                 /* We save significant time by skipping all code below.
 489                  * Note that with soft-core interactions, the actual cut-off
 490                  * check might be different. But since the soft-core distance
 491                  * is always larger than r, checking on r here is safe.
 492                  */
 493                 continue;
 494             }
 495             npair_within_cutoff++;
 496
 497             if (rsq > 0)
 498             {
 499                 /* Note that unlike in the nbnxn kernels, we do not need
 500                  * to clamp the value of rsq before taking the invsqrt
 501                  * to avoid NaN in the LJ calculation, since here we do
 502                  * not calculate LJ interactions when C6 and C12 are zero.
 503                  */
 504
 505                 rinv         = gmx::invsqrt(rsq);
 506                 r            = rsq*rinv;
 507             }
 508             else
 509             {
 510                 /* The force at r=0 is zero, because of symmetry.
 511                  * But note that the potential is in general non-zero,
 512                  * since the soft-cored r will be non-zero.
 513                  */
 514                 rinv         = 0;
 515                 r            = 0;
 516             }
 517
 518             if (softCoreTreatment == SoftCoreTreatment::None)
 519             {
 520                 /* The soft-core power p will not affect the results
 521                  * with not using soft-core, so we use power of 0 which gives
 522                  * the simplest math and cheapest code.
 523                  */
 524                 rpm2         = rinv*rinv;
 525                 rp           = 1;
 526             }
 527             if (softCoreTreatment == SoftCoreTreatment::RPower6)
 528             {
 529                 rpm2         = rsq*rsq;  /* r4 */
 530                 rp           = rpm2*rsq; /* r6 */
 531             }
 532             if (softCoreTreatment == SoftCoreTreatment::RPower48)
 533             {
 534                 rp           = rsq*rsq*rsq; /* r6 */
 535                 rp           = rp*rp;       /* r12 */
 536                 rp           = rp*rp;       /* r24 */
 537                 rp           = rp*rp;       /* r48 */
 538                 rpm2         = rp/rsq;      /* r46 */
 539             }
 540
 541             Fscal = 0;
 542
 543             qq[STATE_A]      = iqA*chargeA[jnr];
 544             qq[STATE_B]      = iqB*chargeB[jnr];
 545
 546             tj[STATE_A]      = ntiA+2*typeA[jnr];
 547             tj[STATE_B]      = ntiB+2*typeB[jnr];
 548
 549             if (nlist->excl_fep == nullptr || nlist->excl_fep[k])
 550             {
 551                 c6[STATE_A]      = nbfp[tj[STATE_A]];
 552                 c6[STATE_B]      = nbfp[tj[STATE_B]];
 553
 554                 for (i = 0; i < NSTATES; i++)
 555                 {
 556                     c12[i]             = nbfp[tj[i]+1];
 557                     if (useSoftCore)
 558                     {
 559                         if ((c6[i] > 0) && (c12[i] > 0))
 560                         {
 561                             /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
 562                             sigma6[i]       = half*c12[i]/c6[i];
 563                             if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
 564                             {
 565                                 sigma6[i] = sigma6_min;
 566                             }
 567                         }
 568                         else
 569                         {
 570                             sigma6[i]       = sigma6_def;
 571                         }
 572                         sigma_pow[i] = calculateSigmaPow<softCoreTreatment>(sigma6[i]);
 573                     }
 574                 }
 575
 576                 if (useSoftCore)
 577                 {
 578                     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
 579                     if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
 580                     {
 581                         alpha_vdw_eff    = 0;
 582                         alpha_coul_eff   = 0;
 583                     }
 584                     else
 585                     {
 586                         alpha_vdw_eff    = alpha_vdw;
 587                         alpha_coul_eff   = alpha_coul;
 588                     }
 589                 }
 590
 591                 for (i = 0; i < NSTATES; i++)
 592                 {
 593                     FscalC[i]    = 0;
 594                     FscalV[i]    = 0;
 595                     Vcoul[i]     = 0;
 596                     Vvdw[i]      = 0;
 597
 598                     /* Only spend time on A or B state if it is non-zero */
 599                     if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
 600                     {
 601                         /* this section has to be inside the loop because of the dependence on sigma_pow */
 602                         if (useSoftCore)
 603                         {
 604                             rpinvC     = one/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
 605                             pthRoot<softCoreTreatment>(rpinvC, &rinvC, &rC);
 606
 607                             if (scLambdasOrAlphasDiffer)
 608                             {
 609                                 rpinvV = one/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
 610                                 pthRoot<softCoreTreatment>(rpinvV, &rinvV, &rV);
 611                             }
 612                             else
 613                             {
 614                                 /* We can avoid one expensive pow and one / operation */
 615                                 rpinvV = rpinvC;
 616                                 rinvV  = rinvC;
 617                                 rV     = rC;
 618                             }
 619                         }
 620                         else
 621                         {
 622                             rpinvC     = 1;
 623                             rinvC      = rinv;
 624                             rC         = r;
 625
 626                             rpinvV     = 1;
 627                             rinvV      = rinv;
 628                             rV         = r;
 629                         }
 630
 631                         /* Only process the coulomb interactions if we have charges,
 632                          * and if we either include all entries in the list (no cutoff
 633                          * used in the kernel), or if we are within the cutoff.
 634                          */
 635                         bComputeElecInteraction =
 636                             ( bEwald && r < rcoulomb) ||
 637                             (!bEwald && rC < rcoulomb);
 638
 639                         if ( (qq[i] != 0) && bComputeElecInteraction)
 640                         {
 641                             if (bEwald)
 642                             {
 643                                 Vcoul[i]  = ewaldPotential(qq[i], rinvC, sh_ewald);
 644                                 FscalC[i] = ewaldScalarForce(qq[i], rinvC);
 645                             }
 646                             else
 647                             {
 648                                 Vcoul[i]  = reactionFieldPotential(qq[i], rinvC, rC, krf, crf);
 649                                 FscalC[i] = reactionFieldScalarForce(qq[i], rinvC, rC, krf, two);
 650                             }
 651                         }
 652
 653                         /* Only process the VDW interactions if we have
 654                          * some non-zero parameters, and if we either
 655                          * include all entries in the list (no cutoff used
 656                          * in the kernel), or if we are within the cutoff.
 657                          */
 658                         bComputeVdwInteraction =
 659                             ( bEwaldLJ && r < rvdw) ||
 660                             (!bEwaldLJ && rV < rvdw);
 661                         if ((c6[i] != 0 || c12[i] != 0) && bComputeVdwInteraction)
 662                         {
 663                             real rinv6;
 664                             if (softCoreTreatment == SoftCoreTreatment::RPower6)
 665                             {
 666                                 rinv6  = calculateRinv6<softCoreTreatment>(rpinvV);
 667                             }
 668                             else
 669                             {
 670                                 rinv6  = calculateRinv6<softCoreTreatment>(rinvV);
 671                             }
 672                             real Vvdw6  = calculateVdw6(c6[i], rinv6);
 673                             real Vvdw12 = calculateVdw12(c12[i], rinv6);
 674
 675                             Vvdw[i]     = lennardJonesPotential(Vvdw6, Vvdw12, c6[i], c12[i], repulsionShift, dispersionShift, onesixth, onetwelfth);
 676                             FscalV[i]   = lennardJonesScalarForce(Vvdw6, Vvdw12);
 677
 678                             if (bEwaldLJ)
 679                             {
 680                                 /* Subtract the grid potential at the cut-off */
 681                                 Vvdw[i] += ewaldLennardJonesGridSubtract(nbfp_grid[tj[i]], sh_lj_ewald, onesixth);
 682                             }
 683
 684                             if (ic->vdw_modifier == eintmodPOTSWITCH)
 685                             {
 686                                 real d    = rV - ic->rvdw_switch;
 687                                 d         = (d > zero) ? d : zero;
 688                                 real d2   = d*d;
 689                                 real sw   = one+d2*d*(vdw_swV3+d*(vdw_swV4+d*vdw_swV5));
 690                                 real dsw  = d2*(vdw_swF2+d*(vdw_swF3+d*vdw_swF4));
 691
 692                                 FscalV[i] = potSwitchScalarForceMod(FscalV[i], Vvdw[i], sw, rV, rvdw, dsw, zero);
 693                                 Vvdw[i]   = potSwitchPotentialMod(Vvdw[i], sw, rV, rvdw, zero);
 694                             }
 695                         }
 696
 697                         /* FscalC (and FscalV) now contain: dV/drC * rC
 698                          * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
 699                          * Further down we first multiply by r^p-2 and then by
 700                          * the vector r, which in total gives: dV/drC * (r/rC)^1-p
 701                          */
 702                         FscalC[i] *= rpinvC;
 703                         FscalV[i] *= rpinvV;
 704                     }
 705                 }
 706
 707                 /* Assemble A and B states */
 708                 for (i = 0; i < NSTATES; i++)
 709                 {
 710                     vctot         += LFC[i]*Vcoul[i];
 711                     vvtot         += LFV[i]*Vvdw[i];
 712
 713                     Fscal     += LFC[i]*FscalC[i]*rpm2;
 714                     Fscal     += LFV[i]*FscalV[i]*rpm2;
 715
 716                     if (useSoftCore)
 717                     {
 718                         dvdl_coul += Vcoul[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*FscalC[i]*sigma_pow[i];
 719                         dvdl_vdw  += Vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*FscalV[i]*sigma_pow[i];
 720                     }
 721                     else
 722                     {
 723                         dvdl_coul += Vcoul[i]*DLF[i];
 724                         dvdl_vdw  += Vvdw[i]*DLF[i];
 725                     }
 726                 }
 727             }
 728             else if (icoul == GMX_NBKERNEL_ELEC_REACTIONFIELD)
 729             {
 730                 /* For excluded pairs, which are only in this pair list when
 731                  * using the Verlet scheme, we don't use soft-core.
 732                  * The group scheme also doesn't soft-core for these.
 733                  * As there is no singularity, there is no need for soft-core.
 734                  */
 735                 VV = krf*rsq - crf;
 736                 FF = -two*krf;
 737
 738                 if (ii == jnr)
 739                 {
 740                     VV *= half;
 741                 }
 742
 743                 for (i = 0; i < NSTATES; i++)
 744                 {
 745                     vctot      += LFC[i]*qq[i]*VV;
 746                     Fscal      += LFC[i]*qq[i]*FF;
 747                     dvdl_coul  += DLF[i]*qq[i]*VV;
 748                 }
 749             }
 750
 751             if (bEwald && r < rcoulomb)
 752             {
 753                 /* See comment in the preamble. When using Ewald interactions
 754                  * (unless we use a switch modifier) we subtract the reciprocal-space
 755                  * Ewald component here which made it possible to apply the free
 756                  * energy interaction to 1/r (vanilla coulomb short-range part)
 757                  * above. This gets us closer to the ideal case of applying
 758                  * the softcore to the entire electrostatic interaction,
 759                  * including the reciprocal-space component.
 760                  */
 761                 real v_lr, f_lr;
 762
 763                 ewrt      = r*ewtabscale;
 764                 ewitab    = static_cast<int>(ewrt);
 765                 eweps     = ewrt-ewitab;
 766                 ewitab    = 4*ewitab;
 767                 f_lr      = ewtab[ewitab]+eweps*ewtab[ewitab+1];
 768                 v_lr      = (ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+f_lr));
 769                 f_lr     *= rinv;
 770
 771                 /* Note that any possible Ewald shift has already been applied in
 772                  * the normal interaction part above.
 773                  */
 774
 775                 if (ii == jnr)
 776                 {
 777                     /* If we get here, the i particle (ii) has itself (jnr)
 778                      * in its neighborlist. This can only happen with the Verlet
 779                      * scheme, and corresponds to a self-interaction that will
 780                      * occur twice. Scale it down by 50% to only include it once.
 781                      */
 782                     v_lr *= half;
 783                 }
 784
 785                 for (i = 0; i < NSTATES; i++)
 786                 {
 787                     vctot      -= LFC[i]*qq[i]*v_lr;
 788                     Fscal      -= LFC[i]*qq[i]*f_lr;
 789                     dvdl_coul  -= (DLF[i]*qq[i])*v_lr;
 790                 }
 791             }
 792
 793             if (bEwaldLJ && r < rvdw)
 794             {
 795                 /* See comment in the preamble. When using LJ-Ewald interactions
 796                  * (unless we use a switch modifier) we subtract the reciprocal-space
 797                  * Ewald component here which made it possible to apply the free
 798                  * energy interaction to r^-6 (vanilla LJ6 short-range part)
 799                  * above. This gets us closer to the ideal case of applying
 800                  * the softcore to the entire VdW interaction,
 801                  * including the reciprocal-space component.
 802                  */
 803                 /* We could also use the analytical form here
 804                  * iso a table, but that can cause issues for
 805                  * r close to 0 for non-interacting pairs.
 806                  */
 807                 real rs, frac, f_lr;
 808                 int  ri;
 809
 810                 rs     = rsq*rinv*ewtabscale;
 811                 ri     = static_cast<int>(rs);
 812                 frac   = rs - ri;
 813                 f_lr   = (1 - frac)*tab_ewald_F_lj[ri] + frac*tab_ewald_F_lj[ri+1];
 814                 /* TODO: Currently the Ewald LJ table does not contain
 815                  * the factor 1/6, we should add this.
 816                  */
 817                 FF     = f_lr*rinv/six;
 818                 VV     = (tab_ewald_V_lj[ri] - ewtabhalfspace*frac*(tab_ewald_F_lj[ri] + f_lr))/six;
 819
 820                 if (ii == jnr)
 821                 {
 822                     /* If we get here, the i particle (ii) has itself (jnr)
 823                      * in its neighborlist. This can only happen with the Verlet
 824                      * scheme, and corresponds to a self-interaction that will
 825                      * occur twice. Scale it down by 50% to only include it once.
 826                      */
 827                     VV *= half;
 828                 }
 829
 830                 for (i = 0; i < NSTATES; i++)
 831                 {
 832                     c6grid      = nbfp_grid[tj[i]];
 833                     vvtot      += LFV[i]*c6grid*VV;
 834                     Fscal      += LFV[i]*c6grid*FF;
 835                     dvdl_vdw   += (DLF[i]*c6grid)*VV;
 836                 }
 837             }
 838
 839             if (bDoForces)
 840             {
 841                 tx         = Fscal*dx;
 842                 ty         = Fscal*dy;
 843                 tz         = Fscal*dz;
 844                 fix        = fix + tx;
 845                 fiy        = fiy + ty;
 846                 fiz        = fiz + tz;
 847                 /* OpenMP atomics are expensive, but this kernels is also
 848                  * expensive, so we can take this hit, instead of using
 849                  * thread-local output buffers and extra reduction.
 850                  *
 851                  * All the OpenMP regions in this file are trivial and should
 852                  * not throw, so no need for try/catch.
 853                  */
 854 #pragma omp atomic
 855                 f[j3]     -= tx;
 856 #pragma omp atomic
 857                 f[j3+1]   -= ty;
 858 #pragma omp atomic
 859                 f[j3+2]   -= tz;
 860             }
 861         }
 862
 863         /* The atomics below are expensive with many OpenMP threads.
 864          * Here unperturbed i-particles will usually only have a few
 865          * (perturbed) j-particles in the list. Thus with a buffered list
 866          * we can skip a significant number of i-reductions with a check.
 867          */
 868         if (npair_within_cutoff > 0)
 869         {
 870             if (bDoForces)
 871             {
 872 #pragma omp atomic
 873                 f[ii3]        += fix;
 874 #pragma omp atomic
 875                 f[ii3+1]      += fiy;
 876 #pragma omp atomic
 877                 f[ii3+2]      += fiz;
 878             }
 879             if (bDoShiftForces)
 880             {
 881 #pragma omp atomic
 882                 fshift[is3]   += fix;
 883 #pragma omp atomic
 884                 fshift[is3+1] += fiy;
 885 #pragma omp atomic
 886                 fshift[is3+2] += fiz;
 887             }
 888             if (bDoPotential)
 889             {
 890                 ggid               = gid[n];
 891 #pragma omp atomic
 892                 Vc[ggid]          += vctot;
 893 #pragma omp atomic
 894                 Vv[ggid]          += vvtot;
 895             }
 896         }
 897     }
 898
 899 #pragma omp atomic
 900     dvdl[efptCOUL]     += dvdl_coul;
 901  #pragma omp atomic
 902     dvdl[efptVDW]      += dvdl_vdw;
 903
 904     /* Estimate flops, average for free energy stuff:
 905      * 12  flops per outer iteration
 906      * 150 flops per inner iteration
 907      */
 908 #pragma omp atomic
 909     inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri*12 + nlist->jindex[n]*150);
 910 }
 911
 912 void gmx_nb_free_energy_kernel(const t_nblist            *nlist,
 913                                rvec                      *xx,
 914                                gmx::ForceWithShiftForces *ff,
 915                                const t_forcerec          *fr,
 916                                const t_mdatoms           *mdatoms,
 917                                nb_kernel_data_t          *kernel_data,
 918                                t_nrnb                    *nrnb)
 919 {
 920     if (fr->sc_alphacoul == 0 && fr->sc_alphavdw == 0)
 921     {
 922         nb_free_energy_kernel<SoftCoreTreatment::None, false>(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
 923     }
 924     else if (fr->sc_r_power == 6.0_real)
 925     {
 926         if (kernel_data->lambda[efptCOUL] == kernel_data->lambda[efptVDW] &&
 927             fr->sc_alphacoul == fr->sc_alphavdw)
 928         {
 929             nb_free_energy_kernel<SoftCoreTreatment::RPower6, false>(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
 930         }
 931         else
 932         {
 933             nb_free_energy_kernel<SoftCoreTreatment::RPower6, true>(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
 934         }
 935     }
 936     else if (fr->sc_r_power == 48.0_real)
 937     {
 938         nb_free_energy_kernel<SoftCoreTreatment::RPower48, true>(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
 939     }
 940     else
 941     {
 942         GMX_RELEASE_ASSERT(false, "Unsupported soft-core r-power");
 943     }
 944 }