src/gromacs/gmxlib/nonbonded/nb_free_energy.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #include "gmxpre.h"
  38
  39 #include "nb_free_energy.h"
  40
  41 #include <cmath>
  42
  43 #include <algorithm>
  44
  45 #include "gromacs/gmxlib/nrnb.h"
  46 #include "gromacs/gmxlib/nonbonded/nb_kernel.h"
  47 #include "gromacs/gmxlib/nonbonded/nonbonded.h"
  48 #include "gromacs/math/functions.h"
  49 #include "gromacs/math/vec.h"
  50 #include "gromacs/mdtypes/forceoutput.h"
  51 #include "gromacs/mdtypes/forcerec.h"
  52 #include "gromacs/mdtypes/md_enums.h"
  53 #include "gromacs/utility/fatalerror.h"
  54
  55
  56 //! Enum for templating the soft-core treatment in the kernel
  57 enum class SoftCoreTreatment
  58 {
  59     None,    //!< No soft-core
  60     RPower6, //!< Soft-core with r-power = 6
  61     RPower48 //!< Soft-core with r-power = 48
  62 };
  63
  64 //! Most treatments are fine with float in mixed-precision mode.
  65 template <SoftCoreTreatment softCoreTreatment>
  66 struct SoftCoreReal
  67 {
  68     //! Real type for soft-core calculations
  69     using Real = real;
  70 };
  71
  72 //! This treatment requires double precision for some computations.
  73 template <>
  74 struct SoftCoreReal<SoftCoreTreatment::RPower48>
  75 {
  76     //! Real type for soft-core calculations
  77     using Real = double;
  78 };
  79
  80 //! Computes r^(1/p) and 1/r^(1/p) for the standard p=6
  81 template <SoftCoreTreatment softCoreTreatment>
  82 static inline void pthRoot(const real  r,
  83                            real       *pthRoot,
  84                            real       *invPthRoot)
  85 {
  86     *invPthRoot = gmx::invsqrt(std::cbrt(r));
  87     *pthRoot    = 1/(*invPthRoot);
  88 }
  89
  90 // We need a double version to make the specialization below work
  91 #if !GMX_DOUBLE
  92 //! Computes r^(1/p) and 1/r^(1/p) for the standard p=6
  93 template <SoftCoreTreatment softCoreTreatment>
  94 static inline void pthRoot(const double  r,
  95                            real         *pthRoot,
  96                            double       *invPthRoot)
  97 {
  98     *invPthRoot = gmx::invsqrt(std::cbrt(r));
  99     *pthRoot    = 1/(*invPthRoot);
 100 }
 101 #endif
 102
 103 //! Computes r^(1/p) and 1/r^(1/p) for p=48
 104 template <>
 105 inline void pthRoot<SoftCoreTreatment::RPower48>(const double  r,
 106                                                  real         *pthRoot,
 107                                                  double       *invPthRoot)
 108 {
 109     *pthRoot    = std::pow(r, 1.0/48.0);
 110     *invPthRoot = 1/(*pthRoot);
 111 }
 112
 113 //! Templated free-energy non-bonded kernel
 114 template<SoftCoreTreatment softCoreTreatment, bool scLambdasOrAlphasDiffer>
 115 static void
 116 nb_free_energy_kernel(const t_nblist * gmx_restrict    nlist,
 117                       rvec * gmx_restrict              xx,
 118                       gmx::ForceWithShiftForces *      forceWithShiftForces,
 119                       const t_forcerec * gmx_restrict  fr,
 120                       const t_mdatoms * gmx_restrict   mdatoms,
 121                       nb_kernel_data_t * gmx_restrict  kernel_data,
 122                       t_nrnb * gmx_restrict            nrnb)
 123 {
 124     using SCReal = typename SoftCoreReal<softCoreTreatment>::Real;
 125
 126     constexpr bool useSoftCore = (softCoreTreatment != SoftCoreTreatment::None);
 127
 128 #define  STATE_A  0
 129 #define  STATE_B  1
 130 #define  NSTATES  2
 131     int           i, n, ii, is3, ii3, k, nj0, nj1, jnr, j3, ggid;
 132     real          shX, shY, shZ;
 133     real          tx, ty, tz, Fscal;
 134     SCReal        FscalC[NSTATES], FscalV[NSTATES];  /* Needs double for sc_power==48 */
 135     real          Vcoul[NSTATES], Vvdw[NSTATES];
 136     real          rinv6, r;
 137     real          iqA, iqB;
 138     real          qq[NSTATES], vctot;
 139     int           ntiA, ntiB, tj[NSTATES];
 140     real          Vvdw6, Vvdw12, vvtot;
 141     real          ix, iy, iz, fix, fiy, fiz;
 142     real          dx, dy, dz, rsq, rinv;
 143     real          c6[NSTATES], c12[NSTATES], c6grid;
 144     real          LFC[NSTATES], LFV[NSTATES], DLF[NSTATES];
 145     SCReal        dvdl_coul, dvdl_vdw;
 146     real          lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
 147     real          sigma6[NSTATES], alpha_vdw_eff, alpha_coul_eff;
 148     real          rinvC, rinvV;
 149     SCReal        rp, rpm2, rC, rV, rpinvC, rpinvV; /* Needs double for sc_power==48 */
 150     real          sigma_pow[NSTATES];
 151     real          VV, FF;
 152     int           icoul, ivdw;
 153     int           nri;
 154     const int *   iinr;
 155     const int *   jindex;
 156     const int *   jjnr;
 157     const int *   shift;
 158     const int *   gid;
 159     const int *   typeA;
 160     const int *   typeB;
 161     int           ntype;
 162     const real *  shiftvec;
 163     const real *  chargeA;
 164     const real *  chargeB;
 165     real          sigma6_min, sigma6_def, lam_power;
 166     real          alpha_coul, alpha_vdw, lambda_coul, lambda_vdw;
 167     const real *  nbfp, *nbfp_grid;
 168     real *        dvdl;
 169     real *        Vv;
 170     real *        Vc;
 171     gmx_bool      bDoForces, bDoShiftForces, bDoPotential;
 172     gmx_bool      bEwald, bEwaldLJ;
 173     real          rcutoff_max2;
 174     const real *  tab_ewald_F_lj = nullptr;
 175     const real *  tab_ewald_V_lj = nullptr;
 176     real          d, d2, sw, dsw;
 177     real          vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4;
 178     gmx_bool      bComputeVdwInteraction, bComputeElecInteraction;
 179     const real *  ewtab = nullptr;
 180     int           ewitab;
 181     real          ewrt, eweps, ewtabscale = 0, ewtabhalfspace = 0, sh_ewald = 0;
 182
 183     const real    onetwelfth  = 1.0/12.0;
 184     const real    onesixth    = 1.0/6.0;
 185     const real    zero        = 0.0;
 186     const real    half        = 0.5;
 187     const real    one         = 1.0;
 188     const real    two         = 2.0;
 189     const real    six         = 6.0;
 190
 191     /* Extract pointer to non-bonded interaction constants */
 192     const interaction_const_t *ic = fr->ic;
 193
 194     // TODO: We should get rid of using pointers to real
 195     const real          *x      = xx[0];
 196     real * gmx_restrict  f      = &(forceWithShiftForces->force()[0][0]);
 197
 198     real * gmx_restrict  fshift = &(forceWithShiftForces->shiftForces()[0][0]);
 199
 200     // Extract pair list data
 201     nri                 = nlist->nri;
 202     iinr                = nlist->iinr;
 203     jindex              = nlist->jindex;
 204     jjnr                = nlist->jjnr;
 205     shift               = nlist->shift;
 206     gid                 = nlist->gid;
 207
 208     shiftvec            = fr->shift_vec[0];
 209     chargeA             = mdatoms->chargeA;
 210     chargeB             = mdatoms->chargeB;
 211     Vc                  = kernel_data->energygrp_elec;
 212     typeA               = mdatoms->typeA;
 213     typeB               = mdatoms->typeB;
 214     ntype               = fr->ntype;
 215     nbfp                = fr->nbfp;
 216     nbfp_grid           = fr->ljpme_c6grid;
 217     Vv                  = kernel_data->energygrp_vdw;
 218     lambda_coul         = kernel_data->lambda[efptCOUL];
 219     lambda_vdw          = kernel_data->lambda[efptVDW];
 220     dvdl                = kernel_data->dvdl;
 221     alpha_coul          = fr->sc_alphacoul;
 222     alpha_vdw           = fr->sc_alphavdw;
 223     lam_power           = fr->sc_power;
 224     sigma6_def          = fr->sc_sigma6_def;
 225     sigma6_min          = fr->sc_sigma6_min;
 226     bDoForces           = ((kernel_data->flags & GMX_NONBONDED_DO_FORCE) != 0);
 227     bDoShiftForces      = ((kernel_data->flags & GMX_NONBONDED_DO_SHIFTFORCE) != 0);
 228     bDoPotential        = ((kernel_data->flags & GMX_NONBONDED_DO_POTENTIAL) != 0);
 229
 230     // Extract data from interaction_const_t
 231     const real facel           = ic->epsfac;
 232     const real rcoulomb        = ic->rcoulomb;
 233     const real krf             = ic->k_rf;
 234     const real crf             = ic->c_rf;
 235     const real sh_lj_ewald     = ic->sh_lj_ewald;
 236     const real rvdw            = ic->rvdw;
 237     const real dispersionShift = ic->dispersion_shift.cpot;
 238     const real repulsionShift  = ic->repulsion_shift.cpot;
 239
 240     // Note that the nbnxm kernels do not support Coulomb potential switching at all
 241     GMX_ASSERT(ic->coulomb_modifier != eintmodPOTSWITCH,
 242                "Potential switching is not supported for Coulomb with FEP");
 243
 244     if (ic->vdw_modifier == eintmodPOTSWITCH)
 245     {
 246         d               = ic->rvdw - ic->rvdw_switch;
 247         vdw_swV3        = -10.0/(d*d*d);
 248         vdw_swV4        =  15.0/(d*d*d*d);
 249         vdw_swV5        =  -6.0/(d*d*d*d*d);
 250         vdw_swF2        = -30.0/(d*d*d);
 251         vdw_swF3        =  60.0/(d*d*d*d);
 252         vdw_swF4        = -30.0/(d*d*d*d*d);
 253     }
 254     else
 255     {
 256         /* Avoid warnings from stupid compilers (looking at you, Clang!) */
 257         vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0;
 258     }
 259
 260     if (EVDW_PME(ic->vdwtype))
 261     {
 262         ivdw         = GMX_NBKERNEL_VDW_LJEWALD;
 263     }
 264     else
 265     {
 266         ivdw         = GMX_NBKERNEL_VDW_LENNARDJONES;
 267     }
 268
 269     if (ic->eeltype == eelCUT || EEL_RF(ic->eeltype))
 270     {
 271         icoul        = GMX_NBKERNEL_ELEC_REACTIONFIELD;
 272     }
 273     else if (EEL_PME_EWALD(ic->eeltype))
 274     {
 275         icoul        = GMX_NBKERNEL_ELEC_EWALD;
 276     }
 277     else
 278     {
 279         gmx_incons("Unsupported eeltype with Verlet and free-energy");
 280     }
 281
 282     rcutoff_max2    = std::max(ic->rcoulomb, ic->rvdw);
 283     rcutoff_max2    = rcutoff_max2*rcutoff_max2;
 284
 285     bEwald          = (icoul == GMX_NBKERNEL_ELEC_EWALD);
 286     bEwaldLJ        = (ivdw == GMX_NBKERNEL_VDW_LJEWALD);
 287
 288     if (bEwald || bEwaldLJ)
 289     {
 290         const auto &tables = *ic->coulombEwaldTables;
 291         sh_ewald       = ic->sh_ewald;
 292         ewtab          = tables.tableFDV0.data();
 293         ewtabscale     = tables.scale;
 294         ewtabhalfspace = half/ewtabscale;
 295         tab_ewald_F_lj = tables.tableF.data();
 296         tab_ewald_V_lj = tables.tableV.data();
 297     }
 298
 299     /* For Ewald/PME interactions we cannot easily apply the soft-core component to
 300      * reciprocal space. When we use non-switched Ewald interactions, we
 301      * assume the soft-coring does not significantly affect the grid contribution
 302      * and apply the soft-core only to the full 1/r (- shift) pair contribution.
 303      *
 304      * However, we cannot use this approach for switch-modified since we would then
 305      * effectively end up evaluating a significantly different interaction here compared to the
 306      * normal (non-free-energy) kernels, either by applying a cutoff at a different
 307      * position than what the user requested, or by switching different
 308      * things (1/r rather than short-range Ewald). For these settings, we just
 309      * use the traditional short-range Ewald interaction in that case.
 310      */
 311     GMX_RELEASE_ASSERT(!(bEwald   && ic->coulomb_modifier == eintmodPOTSWITCH) &&
 312                        !(bEwaldLJ && ic->vdw_modifier   == eintmodPOTSWITCH),
 313                        "Can not apply soft-core to switched Ewald potentials");
 314
 315     dvdl_coul  = 0;
 316     dvdl_vdw   = 0;
 317
 318     /* Lambda factor for state A, 1-lambda*/
 319     LFC[STATE_A] = one - lambda_coul;
 320     LFV[STATE_A] = one - lambda_vdw;
 321
 322     /* Lambda factor for state B, lambda*/
 323     LFC[STATE_B] = lambda_coul;
 324     LFV[STATE_B] = lambda_vdw;
 325
 326     /*derivative of the lambda factor for state A and B */
 327     DLF[STATE_A] = -1;
 328     DLF[STATE_B] = 1;
 329
 330     constexpr real sc_r_power = (softCoreTreatment == SoftCoreTreatment::RPower48 ? 48.0_real : 6.0_real);
 331     for (i = 0; i < NSTATES; i++)
 332     {
 333         lfac_coul[i]  = (lam_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
 334         dlfac_coul[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFC[i]) : 1);
 335         lfac_vdw[i]   = (lam_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
 336         dlfac_vdw[i]  = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFV[i]) : 1);
 337     }
 338
 339     for (n = 0; (n < nri); n++)
 340     {
 341         int npair_within_cutoff;
 342
 343         npair_within_cutoff = 0;
 344
 345         is3              = 3*shift[n];
 346         shX              = shiftvec[is3];
 347         shY              = shiftvec[is3+1];
 348         shZ              = shiftvec[is3+2];
 349         nj0              = jindex[n];
 350         nj1              = jindex[n+1];
 351         ii               = iinr[n];
 352         ii3              = 3*ii;
 353         ix               = shX + x[ii3+0];
 354         iy               = shY + x[ii3+1];
 355         iz               = shZ + x[ii3+2];
 356         iqA              = facel*chargeA[ii];
 357         iqB              = facel*chargeB[ii];
 358         ntiA             = 2*ntype*typeA[ii];
 359         ntiB             = 2*ntype*typeB[ii];
 360         vctot            = 0;
 361         vvtot            = 0;
 362         fix              = 0;
 363         fiy              = 0;
 364         fiz              = 0;
 365
 366         for (k = nj0; (k < nj1); k++)
 367         {
 368             jnr              = jjnr[k];
 369             j3               = 3*jnr;
 370             dx               = ix - x[j3];
 371             dy               = iy - x[j3+1];
 372             dz               = iz - x[j3+2];
 373             rsq              = dx*dx + dy*dy + dz*dz;
 374
 375             if (rsq >= rcutoff_max2)
 376             {
 377                 /* We save significant time by skipping all code below.
 378                  * Note that with soft-core interactions, the actual cut-off
 379                  * check might be different. But since the soft-core distance
 380                  * is always larger than r, checking on r here is safe.
 381                  */
 382                 continue;
 383             }
 384             npair_within_cutoff++;
 385
 386             if (rsq > 0)
 387             {
 388                 /* Note that unlike in the nbnxn kernels, we do not need
 389                  * to clamp the value of rsq before taking the invsqrt
 390                  * to avoid NaN in the LJ calculation, since here we do
 391                  * not calculate LJ interactions when C6 and C12 are zero.
 392                  */
 393
 394                 rinv         = gmx::invsqrt(rsq);
 395                 r            = rsq*rinv;
 396             }
 397             else
 398             {
 399                 /* The force at r=0 is zero, because of symmetry.
 400                  * But note that the potential is in general non-zero,
 401                  * since the soft-cored r will be non-zero.
 402                  */
 403                 rinv         = 0;
 404                 r            = 0;
 405             }
 406
 407             if (softCoreTreatment == SoftCoreTreatment::None)
 408             {
 409                 /* The soft-core power p will not affect the results
 410                  * with not using soft-core, so we use power of 0 which gives
 411                  * the simplest math and cheapest code.
 412                  */
 413                 rpm2         = rinv*rinv;
 414                 rp           = 1;
 415             }
 416             if (softCoreTreatment == SoftCoreTreatment::RPower6)
 417             {
 418                 rpm2         = rsq*rsq;  /* r4 */
 419                 rp           = rpm2*rsq; /* r6 */
 420             }
 421             if (softCoreTreatment == SoftCoreTreatment::RPower48)
 422             {
 423                 rp           = rsq*rsq*rsq; /* r6 */
 424                 rp           = rp*rp;       /* r12 */
 425                 rp           = rp*rp;       /* r24 */
 426                 rp           = rp*rp;       /* r48 */
 427                 rpm2         = rp/rsq;      /* r46 */
 428             }
 429
 430             Fscal = 0;
 431
 432             qq[STATE_A]      = iqA*chargeA[jnr];
 433             qq[STATE_B]      = iqB*chargeB[jnr];
 434
 435             tj[STATE_A]      = ntiA+2*typeA[jnr];
 436             tj[STATE_B]      = ntiB+2*typeB[jnr];
 437
 438             if (nlist->excl_fep == nullptr || nlist->excl_fep[k])
 439             {
 440                 c6[STATE_A]      = nbfp[tj[STATE_A]];
 441                 c6[STATE_B]      = nbfp[tj[STATE_B]];
 442
 443                 for (i = 0; i < NSTATES; i++)
 444                 {
 445                     c12[i]             = nbfp[tj[i]+1];
 446                     if (useSoftCore)
 447                     {
 448                         if ((c6[i] > 0) && (c12[i] > 0))
 449                         {
 450                             /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
 451                             sigma6[i]       = half*c12[i]/c6[i];
 452                             if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
 453                             {
 454                                 sigma6[i] = sigma6_min;
 455                             }
 456                         }
 457                         else
 458                         {
 459                             sigma6[i]       = sigma6_def;
 460                         }
 461                         if (softCoreTreatment == SoftCoreTreatment::RPower6)
 462                         {
 463                             sigma_pow[i]    = sigma6[i];
 464                         }
 465                         else
 466                         {
 467                             sigma_pow[i]    = sigma6[i]*sigma6[i];       /* sigma^12 */
 468                             sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
 469                             sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
 470                         }
 471                     }
 472                 }
 473
 474                 if (useSoftCore)
 475                 {
 476                     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
 477                     if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
 478                     {
 479                         alpha_vdw_eff    = 0;
 480                         alpha_coul_eff   = 0;
 481                     }
 482                     else
 483                     {
 484                         alpha_vdw_eff    = alpha_vdw;
 485                         alpha_coul_eff   = alpha_coul;
 486                     }
 487                 }
 488
 489                 for (i = 0; i < NSTATES; i++)
 490                 {
 491                     FscalC[i]    = 0;
 492                     FscalV[i]    = 0;
 493                     Vcoul[i]     = 0;
 494                     Vvdw[i]      = 0;
 495
 496                     /* Only spend time on A or B state if it is non-zero */
 497                     if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
 498                     {
 499                         /* this section has to be inside the loop because of the dependence on sigma_pow */
 500                         if (useSoftCore)
 501                         {
 502                             rpinvC     = one/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
 503                             pthRoot<softCoreTreatment>(rpinvC, &rinvC, &rC);
 504
 505                             if (scLambdasOrAlphasDiffer)
 506                             {
 507                                 rpinvV = one/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
 508                                 pthRoot<softCoreTreatment>(rpinvV, &rinvV, &rV);
 509                             }
 510                             else
 511                             {
 512                                 /* We can avoid one expensive pow and one / operation */
 513                                 rpinvV = rpinvC;
 514                                 rinvV  = rinvC;
 515                                 rV     = rC;
 516                             }
 517                         }
 518                         else
 519                         {
 520                             rpinvC     = 1;
 521                             rinvC      = rinv;
 522                             rC         = r;
 523
 524                             rpinvV     = 1;
 525                             rinvV      = rinv;
 526                             rV         = r;
 527                         }
 528
 529                         /* Only process the coulomb interactions if we have charges,
 530                          * and if we either include all entries in the list (no cutoff
 531                          * used in the kernel), or if we are within the cutoff.
 532                          */
 533                         bComputeElecInteraction =
 534                             ( bEwald && r < rcoulomb) ||
 535                             (!bEwald && rC < rcoulomb);
 536
 537                         if ( (qq[i] != 0) && bComputeElecInteraction)
 538                         {
 539                             if (bEwald)
 540                             {
 541                                 /* Ewald FEP is done only on the 1/r part */
 542                                 Vcoul[i]   = qq[i]*(rinvC-sh_ewald);
 543                                 FscalC[i]  = qq[i]*rinvC;
 544                             }
 545                             else
 546                             {
 547                                 /* reaction-field */
 548                                 Vcoul[i]   = qq[i]*(rinvC + krf*rC*rC-crf);
 549                                 FscalC[i]  = qq[i]*(rinvC - two*krf*rC*rC);
 550                             }
 551                         }
 552
 553                         /* Only process the VDW interactions if we have
 554                          * some non-zero parameters, and if we either
 555                          * include all entries in the list (no cutoff used
 556                          * in the kernel), or if we are within the cutoff.
 557                          */
 558                         bComputeVdwInteraction =
 559                             ( bEwaldLJ && r < rvdw) ||
 560                             (!bEwaldLJ && rV < rvdw);
 561                         if ((c6[i] != 0 || c12[i] != 0) && bComputeVdwInteraction)
 562                         {
 563                             /* cutoff LJ, also handles part of Ewald LJ */
 564                             if (softCoreTreatment == SoftCoreTreatment::RPower6)
 565                             {
 566                                 rinv6        = rpinvV;
 567                             }
 568                             else
 569                             {
 570                                 rinv6        = rinvV*rinvV;
 571                                 rinv6        = rinv6*rinv6*rinv6;
 572                             }
 573                             Vvdw6            = c6[i]*rinv6;
 574                             Vvdw12           = c12[i]*rinv6*rinv6;
 575
 576                             Vvdw[i]          = ( (Vvdw12 + c12[i]*repulsionShift)*onetwelfth
 577                                                  - (Vvdw6 + c6[i]*dispersionShift)*onesixth);
 578                             FscalV[i]        = Vvdw12 - Vvdw6;
 579
 580                             if (bEwaldLJ)
 581                             {
 582                                 /* Subtract the grid potential at the cut-off */
 583                                 c6grid      = nbfp_grid[tj[i]];
 584                                 Vvdw[i]    += c6grid*sh_lj_ewald*onesixth;
 585                             }
 586
 587                             if (ic->vdw_modifier == eintmodPOTSWITCH)
 588                             {
 589                                 d                = rV - ic->rvdw_switch;
 590                                 d                = (d > zero) ? d : zero;
 591                                 d2               = d*d;
 592                                 sw               = one+d2*d*(vdw_swV3+d*(vdw_swV4+d*vdw_swV5));
 593                                 dsw              = d2*(vdw_swF2+d*(vdw_swF3+d*vdw_swF4));
 594
 595                                 FscalV[i]        = FscalV[i]*sw - rV*Vvdw[i]*dsw;
 596                                 Vvdw[i]         *= sw;
 597
 598                                 FscalV[i]  = (rV < rvdw) ? FscalV[i] : zero;
 599                                 Vvdw[i]    = (rV < rvdw) ? Vvdw[i] : zero;
 600                             }
 601                         }
 602
 603                         /* FscalC (and FscalV) now contain: dV/drC * rC
 604                          * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
 605                          * Further down we first multiply by r^p-2 and then by
 606                          * the vector r, which in total gives: dV/drC * (r/rC)^1-p
 607                          */
 608                         FscalC[i] *= rpinvC;
 609                         FscalV[i] *= rpinvV;
 610                     }
 611                 }
 612
 613                 /* Assemble A and B states */
 614                 for (i = 0; i < NSTATES; i++)
 615                 {
 616                     vctot         += LFC[i]*Vcoul[i];
 617                     vvtot         += LFV[i]*Vvdw[i];
 618
 619                     Fscal     += LFC[i]*FscalC[i]*rpm2;
 620                     Fscal     += LFV[i]*FscalV[i]*rpm2;
 621
 622                     if (useSoftCore)
 623                     {
 624                         dvdl_coul += Vcoul[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*FscalC[i]*sigma_pow[i];
 625                         dvdl_vdw  += Vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*FscalV[i]*sigma_pow[i];
 626                     }
 627                     else
 628                     {
 629                         dvdl_coul += Vcoul[i]*DLF[i];
 630                         dvdl_vdw  += Vvdw[i]*DLF[i];
 631                     }
 632                 }
 633             }
 634             else if (icoul == GMX_NBKERNEL_ELEC_REACTIONFIELD)
 635             {
 636                 /* For excluded pairs, which are only in this pair list when
 637                  * using the Verlet scheme, we don't use soft-core.
 638                  * The group scheme also doesn't soft-core for these.
 639                  * As there is no singularity, there is no need for soft-core.
 640                  */
 641                 VV = krf*rsq - crf;
 642                 FF = -two*krf;
 643
 644                 if (ii == jnr)
 645                 {
 646                     VV *= half;
 647                 }
 648
 649                 for (i = 0; i < NSTATES; i++)
 650                 {
 651                     vctot      += LFC[i]*qq[i]*VV;
 652                     Fscal      += LFC[i]*qq[i]*FF;
 653                     dvdl_coul  += DLF[i]*qq[i]*VV;
 654                 }
 655             }
 656
 657             if (bEwald && r < rcoulomb)
 658             {
 659                 /* See comment in the preamble. When using Ewald interactions
 660                  * (unless we use a switch modifier) we subtract the reciprocal-space
 661                  * Ewald component here which made it possible to apply the free
 662                  * energy interaction to 1/r (vanilla coulomb short-range part)
 663                  * above. This gets us closer to the ideal case of applying
 664                  * the softcore to the entire electrostatic interaction,
 665                  * including the reciprocal-space component.
 666                  */
 667                 real v_lr, f_lr;
 668
 669                 ewrt      = r*ewtabscale;
 670                 ewitab    = static_cast<int>(ewrt);
 671                 eweps     = ewrt-ewitab;
 672                 ewitab    = 4*ewitab;
 673                 f_lr      = ewtab[ewitab]+eweps*ewtab[ewitab+1];
 674                 v_lr      = (ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+f_lr));
 675                 f_lr     *= rinv;
 676
 677                 /* Note that any possible Ewald shift has already been applied in
 678                  * the normal interaction part above.
 679                  */
 680
 681                 if (ii == jnr)
 682                 {
 683                     /* If we get here, the i particle (ii) has itself (jnr)
 684                      * in its neighborlist. This can only happen with the Verlet
 685                      * scheme, and corresponds to a self-interaction that will
 686                      * occur twice. Scale it down by 50% to only include it once.
 687                      */
 688                     v_lr *= half;
 689                 }
 690
 691                 for (i = 0; i < NSTATES; i++)
 692                 {
 693                     vctot      -= LFC[i]*qq[i]*v_lr;
 694                     Fscal      -= LFC[i]*qq[i]*f_lr;
 695                     dvdl_coul  -= (DLF[i]*qq[i])*v_lr;
 696                 }
 697             }
 698
 699             if (bEwaldLJ && r < rvdw)
 700             {
 701                 /* See comment in the preamble. When using LJ-Ewald interactions
 702                  * (unless we use a switch modifier) we subtract the reciprocal-space
 703                  * Ewald component here which made it possible to apply the free
 704                  * energy interaction to r^-6 (vanilla LJ6 short-range part)
 705                  * above. This gets us closer to the ideal case of applying
 706                  * the softcore to the entire VdW interaction,
 707                  * including the reciprocal-space component.
 708                  */
 709                 /* We could also use the analytical form here
 710                  * iso a table, but that can cause issues for
 711                  * r close to 0 for non-interacting pairs.
 712                  */
 713                 real rs, frac, f_lr;
 714                 int  ri;
 715
 716                 rs     = rsq*rinv*ewtabscale;
 717                 ri     = static_cast<int>(rs);
 718                 frac   = rs - ri;
 719                 f_lr   = (1 - frac)*tab_ewald_F_lj[ri] + frac*tab_ewald_F_lj[ri+1];
 720                 /* TODO: Currently the Ewald LJ table does not contain
 721                  * the factor 1/6, we should add this.
 722                  */
 723                 FF     = f_lr*rinv/six;
 724                 VV     = (tab_ewald_V_lj[ri] - ewtabhalfspace*frac*(tab_ewald_F_lj[ri] + f_lr))/six;
 725
 726                 if (ii == jnr)
 727                 {
 728                     /* If we get here, the i particle (ii) has itself (jnr)
 729                      * in its neighborlist. This can only happen with the Verlet
 730                      * scheme, and corresponds to a self-interaction that will
 731                      * occur twice. Scale it down by 50% to only include it once.
 732                      */
 733                     VV *= half;
 734                 }
 735
 736                 for (i = 0; i < NSTATES; i++)
 737                 {
 738                     c6grid      = nbfp_grid[tj[i]];
 739                     vvtot      += LFV[i]*c6grid*VV;
 740                     Fscal      += LFV[i]*c6grid*FF;
 741                     dvdl_vdw   += (DLF[i]*c6grid)*VV;
 742                 }
 743             }
 744
 745             if (bDoForces)
 746             {
 747                 tx         = Fscal*dx;
 748                 ty         = Fscal*dy;
 749                 tz         = Fscal*dz;
 750                 fix        = fix + tx;
 751                 fiy        = fiy + ty;
 752                 fiz        = fiz + tz;
 753                 /* OpenMP atomics are expensive, but this kernels is also
 754                  * expensive, so we can take this hit, instead of using
 755                  * thread-local output buffers and extra reduction.
 756                  *
 757                  * All the OpenMP regions in this file are trivial and should
 758                  * not throw, so no need for try/catch.
 759                  */
 760 #pragma omp atomic
 761                 f[j3]     -= tx;
 762 #pragma omp atomic
 763                 f[j3+1]   -= ty;
 764 #pragma omp atomic
 765                 f[j3+2]   -= tz;
 766             }
 767         }
 768
 769         /* The atomics below are expensive with many OpenMP threads.
 770          * Here unperturbed i-particles will usually only have a few
 771          * (perturbed) j-particles in the list. Thus with a buffered list
 772          * we can skip a significant number of i-reductions with a check.
 773          */
 774         if (npair_within_cutoff > 0)
 775         {
 776             if (bDoForces)
 777             {
 778 #pragma omp atomic
 779                 f[ii3]        += fix;
 780 #pragma omp atomic
 781                 f[ii3+1]      += fiy;
 782 #pragma omp atomic
 783                 f[ii3+2]      += fiz;
 784             }
 785             if (bDoShiftForces)
 786             {
 787 #pragma omp atomic
 788                 fshift[is3]   += fix;
 789 #pragma omp atomic
 790                 fshift[is3+1] += fiy;
 791 #pragma omp atomic
 792                 fshift[is3+2] += fiz;
 793             }
 794             if (bDoPotential)
 795             {
 796                 ggid               = gid[n];
 797 #pragma omp atomic
 798                 Vc[ggid]          += vctot;
 799 #pragma omp atomic
 800                 Vv[ggid]          += vvtot;
 801             }
 802         }
 803     }
 804
 805 #pragma omp atomic
 806     dvdl[efptCOUL]     += dvdl_coul;
 807  #pragma omp atomic
 808     dvdl[efptVDW]      += dvdl_vdw;
 809
 810     /* Estimate flops, average for free energy stuff:
 811      * 12  flops per outer iteration
 812      * 150 flops per inner iteration
 813      */
 814 #pragma omp atomic
 815     inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri*12 + nlist->jindex[n]*150);
 816 }
 817
 818 void gmx_nb_free_energy_kernel(const t_nblist            *nlist,
 819                                rvec                      *xx,
 820                                gmx::ForceWithShiftForces *ff,
 821                                const t_forcerec          *fr,
 822                                const t_mdatoms           *mdatoms,
 823                                nb_kernel_data_t          *kernel_data,
 824                                t_nrnb                    *nrnb)
 825 {
 826     if (fr->sc_alphacoul == 0 && fr->sc_alphavdw == 0)
 827     {
 828         nb_free_energy_kernel<SoftCoreTreatment::None, false>(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
 829     }
 830     else if (fr->sc_r_power == 6.0_real)
 831     {
 832         if (kernel_data->lambda[efptCOUL] == kernel_data->lambda[efptVDW] &&
 833             fr->sc_alphacoul == fr->sc_alphavdw)
 834         {
 835             nb_free_energy_kernel<SoftCoreTreatment::RPower6, false>(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
 836         }
 837         else
 838         {
 839             nb_free_energy_kernel<SoftCoreTreatment::RPower6, true>(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
 840         }
 841     }
 842     else if (fr->sc_r_power == 48.0_real)
 843     {
 844         nb_free_energy_kernel<SoftCoreTreatment::RPower48, true>(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
 845     }
 846     else
 847     {
 848         GMX_RELEASE_ASSERT(false, "Unsupported soft-core r-power");
 849     }
 850 }