src/gromacs/gmxlib/nonbonded/nb_free_energy.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
   7  * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
   8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   9  * and including many others, as listed in the AUTHORS file in the
  10  * top-level source directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38 #include "gmxpre.h"
  39
  40 #include "nb_free_energy.h"
  41
  42 #include <cmath>
  43
  44 #include <algorithm>
  45
  46 #include "gromacs/gmxlib/nrnb.h"
  47 #include "gromacs/gmxlib/nonbonded/nb_kernel.h"
  48 #include "gromacs/gmxlib/nonbonded/nonbonded.h"
  49 #include "gromacs/math/functions.h"
  50 #include "gromacs/math/vec.h"
  51 #include "gromacs/mdtypes/forceoutput.h"
  52 #include "gromacs/mdtypes/forcerec.h"
  53 #include "gromacs/mdtypes/md_enums.h"
  54 #include "gromacs/utility/fatalerror.h"
  55
  56
  57 //! Computes r^(1/p) and 1/r^(1/p) for the standard p=6
  58 static inline void pthRoot(const real r, real* pthRoot, real* invPthRoot)
  59 {
  60     *invPthRoot = gmx::invsqrt(std::cbrt(r));
  61     *pthRoot    = 1 / (*invPthRoot);
  62 }
  63
  64 static inline real calculateRinv6(const real rinvV)
  65 {
  66     real rinv6 = rinvV * rinvV;
  67     return (rinv6 * rinv6 * rinv6);
  68 }
  69
  70 static inline real calculateVdw6(const real c6, const real rinv6)
  71 {
  72     return (c6 * rinv6);
  73 }
  74
  75 static inline real calculateVdw12(const real c12, const real rinv6)
  76 {
  77     return (c12 * rinv6 * rinv6);
  78 }
  79
  80 /* reaction-field electrostatics */
  81 static inline real
  82 reactionFieldScalarForce(const real qq, const real rinv, const real r, const real krf, const real two)
  83 {
  84     return (qq * (rinv - two * krf * r * r));
  85 }
  86 static inline real reactionFieldPotential(const real qq, const real rinv, const real r, const real krf, const real potentialShift)
  87 {
  88     return (qq * (rinv + krf * r * r - potentialShift));
  89 }
  90
  91 /* Ewald electrostatics */
  92 static inline real ewaldScalarForce(const real coulomb, const real rinv)
  93 {
  94     return (coulomb * rinv);
  95 }
  96 static inline real ewaldPotential(const real coulomb, const real rinv, const real potentialShift)
  97 {
  98     return (coulomb * (rinv - potentialShift));
  99 }
 100
 101 /* cutoff LJ */
 102 static inline real lennardJonesScalarForce(const real v6, const real v12)
 103 {
 104     return (v12 - v6);
 105 }
 106 static inline real lennardJonesPotential(const real v6,
 107                                          const real v12,
 108                                          const real c6,
 109                                          const real c12,
 110                                          const real repulsionShift,
 111                                          const real dispersionShift,
 112                                          const real onesixth,
 113                                          const real onetwelfth)
 114 {
 115     return ((v12 + c12 * repulsionShift) * onetwelfth - (v6 + c6 * dispersionShift) * onesixth);
 116 }
 117
 118 /* Ewald LJ */
 119 static inline real ewaldLennardJonesGridSubtract(const real c6grid, const real potentialShift, const real onesixth)
 120 {
 121     return (c6grid * potentialShift * onesixth);
 122 }
 123
 124 /* LJ Potential switch */
 125 static inline real potSwitchScalarForceMod(const real fScalarInp,
 126                                            const real potential,
 127                                            const real sw,
 128                                            const real r,
 129                                            const real rVdw,
 130                                            const real dsw,
 131                                            const real zero)
 132 {
 133     if (r < rVdw)
 134     {
 135         real fScalar = fScalarInp * sw - r * potential * dsw;
 136         return (fScalar);
 137     }
 138     return (zero);
 139 }
 140 static inline real
 141 potSwitchPotentialMod(const real potentialInp, const real sw, const real r, const real rVdw, const real zero)
 142 {
 143     if (r < rVdw)
 144     {
 145         real potential = potentialInp * sw;
 146         return (potential);
 147     }
 148     return (zero);
 149 }
 150
 151
 152 //! Templated free-energy non-bonded kernel
 153 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald, bool vdwModifierIsPotSwitch>
 154 static void nb_free_energy_kernel(const t_nblist* gmx_restrict nlist,
 155                                   rvec* gmx_restrict         xx,
 156                                   gmx::ForceWithShiftForces* forceWithShiftForces,
 157                                   const t_forcerec* gmx_restrict fr,
 158                                   const t_mdatoms* gmx_restrict mdatoms,
 159                                   nb_kernel_data_t* gmx_restrict kernel_data,
 160                                   t_nrnb* gmx_restrict nrnb)
 161 {
 162 #define STATE_A 0
 163 #define STATE_B 1
 164 #define NSTATES 2
 165
 166     constexpr real onetwelfth = 1.0 / 12.0;
 167     constexpr real onesixth   = 1.0 / 6.0;
 168     constexpr real zero       = 0.0;
 169     constexpr real half       = 0.5;
 170     constexpr real one        = 1.0;
 171     constexpr real two        = 2.0;
 172     constexpr real six        = 6.0;
 173
 174     /* Extract pointer to non-bonded interaction constants */
 175     const interaction_const_t* ic = fr->ic;
 176
 177     // Extract pair list data
 178     const int  nri    = nlist->nri;
 179     const int* iinr   = nlist->iinr;
 180     const int* jindex = nlist->jindex;
 181     const int* jjnr   = nlist->jjnr;
 182     const int* shift  = nlist->shift;
 183     const int* gid    = nlist->gid;
 184
 185     const real* shiftvec      = fr->shift_vec[0];
 186     const real* chargeA       = mdatoms->chargeA;
 187     const real* chargeB       = mdatoms->chargeB;
 188     real*       Vc            = kernel_data->energygrp_elec;
 189     const int*  typeA         = mdatoms->typeA;
 190     const int*  typeB         = mdatoms->typeB;
 191     const int   ntype         = fr->ntype;
 192     const real* nbfp          = fr->nbfp.data();
 193     const real* nbfp_grid     = fr->ljpme_c6grid;
 194     real*       Vv            = kernel_data->energygrp_vdw;
 195     const real  lambda_coul   = kernel_data->lambda[efptCOUL];
 196     const real  lambda_vdw    = kernel_data->lambda[efptVDW];
 197     real*       dvdl          = kernel_data->dvdl;
 198     const real  alpha_coul    = fr->sc_alphacoul;
 199     const real  alpha_vdw     = fr->sc_alphavdw;
 200     const real  lam_power     = fr->sc_power;
 201     const real  sigma6_def    = fr->sc_sigma6_def;
 202     const real  sigma6_min    = fr->sc_sigma6_min;
 203     const bool  doForces      = ((kernel_data->flags & GMX_NONBONDED_DO_FORCE) != 0);
 204     const bool  doShiftForces = ((kernel_data->flags & GMX_NONBONDED_DO_SHIFTFORCE) != 0);
 205     const bool  doPotential   = ((kernel_data->flags & GMX_NONBONDED_DO_POTENTIAL) != 0);
 206
 207     // Extract data from interaction_const_t
 208     const real facel           = ic->epsfac;
 209     const real rcoulomb        = ic->rcoulomb;
 210     const real krf             = ic->k_rf;
 211     const real crf             = ic->c_rf;
 212     const real sh_lj_ewald     = ic->sh_lj_ewald;
 213     const real rvdw            = ic->rvdw;
 214     const real dispersionShift = ic->dispersion_shift.cpot;
 215     const real repulsionShift  = ic->repulsion_shift.cpot;
 216
 217     // Note that the nbnxm kernels do not support Coulomb potential switching at all
 218     GMX_ASSERT(ic->coulomb_modifier != eintmodPOTSWITCH,
 219                "Potential switching is not supported for Coulomb with FEP");
 220
 221     real vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4;
 222     if (vdwModifierIsPotSwitch)
 223     {
 224         const real d = ic->rvdw - ic->rvdw_switch;
 225         vdw_swV3     = -10.0 / (d * d * d);
 226         vdw_swV4     = 15.0 / (d * d * d * d);
 227         vdw_swV5     = -6.0 / (d * d * d * d * d);
 228         vdw_swF2     = -30.0 / (d * d * d);
 229         vdw_swF3     = 60.0 / (d * d * d * d);
 230         vdw_swF4     = -30.0 / (d * d * d * d * d);
 231     }
 232     else
 233     {
 234         /* Avoid warnings from stupid compilers (looking at you, Clang!) */
 235         vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0;
 236     }
 237
 238     int icoul;
 239     if (ic->eeltype == eelCUT || EEL_RF(ic->eeltype))
 240     {
 241         icoul = GMX_NBKERNEL_ELEC_REACTIONFIELD;
 242     }
 243     else
 244     {
 245         icoul = GMX_NBKERNEL_ELEC_NONE;
 246     }
 247
 248     real rcutoff_max2 = std::max(ic->rcoulomb, ic->rvdw);
 249     rcutoff_max2      = rcutoff_max2 * rcutoff_max2;
 250
 251     const real* tab_ewald_F_lj = nullptr;
 252     const real* tab_ewald_V_lj = nullptr;
 253     const real* ewtab          = nullptr;
 254     real        ewtabscale     = 0;
 255     real        ewtabhalfspace = 0;
 256     real        sh_ewald       = 0;
 257     if (elecInteractionTypeIsEwald || vdwInteractionTypeIsEwald)
 258     {
 259         const auto& tables = *ic->coulombEwaldTables;
 260         sh_ewald           = ic->sh_ewald;
 261         ewtab              = tables.tableFDV0.data();
 262         ewtabscale         = tables.scale;
 263         ewtabhalfspace     = half / ewtabscale;
 264         tab_ewald_F_lj     = tables.tableF.data();
 265         tab_ewald_V_lj     = tables.tableV.data();
 266     }
 267
 268     /* For Ewald/PME interactions we cannot easily apply the soft-core component to
 269      * reciprocal space. When we use non-switched Ewald interactions, we
 270      * assume the soft-coring does not significantly affect the grid contribution
 271      * and apply the soft-core only to the full 1/r (- shift) pair contribution.
 272      *
 273      * However, we cannot use this approach for switch-modified since we would then
 274      * effectively end up evaluating a significantly different interaction here compared to the
 275      * normal (non-free-energy) kernels, either by applying a cutoff at a different
 276      * position than what the user requested, or by switching different
 277      * things (1/r rather than short-range Ewald). For these settings, we just
 278      * use the traditional short-range Ewald interaction in that case.
 279      */
 280     GMX_RELEASE_ASSERT(!(vdwInteractionTypeIsEwald && vdwModifierIsPotSwitch),
 281                        "Can not apply soft-core to switched Ewald potentials");
 282
 283     real dvdl_coul = 0;
 284     real dvdl_vdw  = 0;
 285
 286     /* Lambda factor for state A, 1-lambda*/
 287     real LFC[NSTATES], LFV[NSTATES];
 288     LFC[STATE_A] = one - lambda_coul;
 289     LFV[STATE_A] = one - lambda_vdw;
 290
 291     /* Lambda factor for state B, lambda*/
 292     LFC[STATE_B] = lambda_coul;
 293     LFV[STATE_B] = lambda_vdw;
 294
 295     /*derivative of the lambda factor for state A and B */
 296     real DLF[NSTATES];
 297     DLF[STATE_A] = -1;
 298     DLF[STATE_B] = 1;
 299
 300     real           lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
 301     constexpr real sc_r_power = 6.0_real;
 302     for (int i = 0; i < NSTATES; i++)
 303     {
 304         lfac_coul[i]  = (lam_power == 2 ? (1 - LFC[i]) * (1 - LFC[i]) : (1 - LFC[i]));
 305         dlfac_coul[i] = DLF[i] * lam_power / sc_r_power * (lam_power == 2 ? (1 - LFC[i]) : 1);
 306         lfac_vdw[i]   = (lam_power == 2 ? (1 - LFV[i]) * (1 - LFV[i]) : (1 - LFV[i]));
 307         dlfac_vdw[i]  = DLF[i] * lam_power / sc_r_power * (lam_power == 2 ? (1 - LFV[i]) : 1);
 308     }
 309
 310     // TODO: We should get rid of using pointers to real
 311     const real* x             = xx[0];
 312     real* gmx_restrict f      = &(forceWithShiftForces->force()[0][0]);
 313     real* gmx_restrict fshift = &(forceWithShiftForces->shiftForces()[0][0]);
 314
 315     for (int n = 0; n < nri; n++)
 316     {
 317         int npair_within_cutoff = 0;
 318
 319         const int  is3   = 3 * shift[n];
 320         const real shX   = shiftvec[is3];
 321         const real shY   = shiftvec[is3 + 1];
 322         const real shZ   = shiftvec[is3 + 2];
 323         const int  nj0   = jindex[n];
 324         const int  nj1   = jindex[n + 1];
 325         const int  ii    = iinr[n];
 326         const int  ii3   = 3 * ii;
 327         const real ix    = shX + x[ii3 + 0];
 328         const real iy    = shY + x[ii3 + 1];
 329         const real iz    = shZ + x[ii3 + 2];
 330         const real iqA   = facel * chargeA[ii];
 331         const real iqB   = facel * chargeB[ii];
 332         const int  ntiA  = 2 * ntype * typeA[ii];
 333         const int  ntiB  = 2 * ntype * typeB[ii];
 334         real       vctot = 0;
 335         real       vvtot = 0;
 336         real       fix   = 0;
 337         real       fiy   = 0;
 338         real       fiz   = 0;
 339
 340         for (int k = nj0; k < nj1; k++)
 341         {
 342             int        tj[NSTATES];
 343             const int  jnr = jjnr[k];
 344             const int  j3  = 3 * jnr;
 345             real       c6[NSTATES], c12[NSTATES], qq[NSTATES], Vcoul[NSTATES], Vvdw[NSTATES];
 346             real       r, rinv, rp, rpm2;
 347             real       alpha_vdw_eff, alpha_coul_eff, sigma6[NSTATES];
 348             const real dx  = ix - x[j3];
 349             const real dy  = iy - x[j3 + 1];
 350             const real dz  = iz - x[j3 + 2];
 351             const real rsq = dx * dx + dy * dy + dz * dz;
 352             real       FscalC[NSTATES], FscalV[NSTATES];
 353
 354             if (rsq >= rcutoff_max2)
 355             {
 356                 /* We save significant time by skipping all code below.
 357                  * Note that with soft-core interactions, the actual cut-off
 358                  * check might be different. But since the soft-core distance
 359                  * is always larger than r, checking on r here is safe.
 360                  */
 361                 continue;
 362             }
 363             npair_within_cutoff++;
 364
 365             if (rsq > 0)
 366             {
 367                 /* Note that unlike in the nbnxn kernels, we do not need
 368                  * to clamp the value of rsq before taking the invsqrt
 369                  * to avoid NaN in the LJ calculation, since here we do
 370                  * not calculate LJ interactions when C6 and C12 are zero.
 371                  */
 372
 373                 rinv = gmx::invsqrt(rsq);
 374                 r    = rsq * rinv;
 375             }
 376             else
 377             {
 378                 /* The force at r=0 is zero, because of symmetry.
 379                  * But note that the potential is in general non-zero,
 380                  * since the soft-cored r will be non-zero.
 381                  */
 382                 rinv = 0;
 383                 r    = 0;
 384             }
 385
 386             if (useSoftCore)
 387             {
 388                 rpm2 = rsq * rsq;  /* r4 */
 389                 rp   = rpm2 * rsq; /* r6 */
 390             }
 391             else
 392             {
 393                 /* The soft-core power p will not affect the results
 394                  * with not using soft-core, so we use power of 0 which gives
 395                  * the simplest math and cheapest code.
 396                  */
 397                 rpm2 = rinv * rinv;
 398                 rp   = 1;
 399             }
 400
 401             real Fscal = 0;
 402
 403             qq[STATE_A] = iqA * chargeA[jnr];
 404             qq[STATE_B] = iqB * chargeB[jnr];
 405
 406             tj[STATE_A] = ntiA + 2 * typeA[jnr];
 407             tj[STATE_B] = ntiB + 2 * typeB[jnr];
 408
 409             if (nlist->excl_fep == nullptr || nlist->excl_fep[k])
 410             {
 411                 c6[STATE_A] = nbfp[tj[STATE_A]];
 412                 c6[STATE_B] = nbfp[tj[STATE_B]];
 413
 414                 for (int i = 0; i < NSTATES; i++)
 415                 {
 416                     c12[i] = nbfp[tj[i] + 1];
 417                     if (useSoftCore)
 418                     {
 419                         if ((c6[i] > 0) && (c12[i] > 0))
 420                         {
 421                             /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
 422                             sigma6[i] = half * c12[i] / c6[i];
 423                             if (sigma6[i] < sigma6_min) /* for disappearing coul and vdw with soft core at the same time */
 424                             {
 425                                 sigma6[i] = sigma6_min;
 426                             }
 427                         }
 428                         else
 429                         {
 430                             sigma6[i] = sigma6_def;
 431                         }
 432                     }
 433                 }
 434
 435                 if (useSoftCore)
 436                 {
 437                     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
 438                     if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
 439                     {
 440                         alpha_vdw_eff  = 0;
 441                         alpha_coul_eff = 0;
 442                     }
 443                     else
 444                     {
 445                         alpha_vdw_eff  = alpha_vdw;
 446                         alpha_coul_eff = alpha_coul;
 447                     }
 448                 }
 449
 450                 for (int i = 0; i < NSTATES; i++)
 451                 {
 452                     FscalC[i] = 0;
 453                     FscalV[i] = 0;
 454                     Vcoul[i]  = 0;
 455                     Vvdw[i]   = 0;
 456
 457                     real rinvC, rinvV, rC, rV, rpinvC, rpinvV;
 458
 459                     /* Only spend time on A or B state if it is non-zero */
 460                     if ((qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0))
 461                     {
 462                         /* this section has to be inside the loop because of the dependence on sigma6 */
 463                         if (useSoftCore)
 464                         {
 465                             rpinvC = one / (alpha_coul_eff * lfac_coul[i] * sigma6[i] + rp);
 466                             pthRoot(rpinvC, &rinvC, &rC);
 467                             if (scLambdasOrAlphasDiffer)
 468                             {
 469                                 rpinvV = one / (alpha_vdw_eff * lfac_vdw[i] * sigma6[i] + rp);
 470                                 pthRoot(rpinvV, &rinvV, &rV);
 471                             }
 472                             else
 473                             {
 474                                 /* We can avoid one expensive pow and one / operation */
 475                                 rpinvV = rpinvC;
 476                                 rinvV  = rinvC;
 477                                 rV     = rC;
 478                             }
 479                         }
 480                         else
 481                         {
 482                             rpinvC = 1;
 483                             rinvC  = rinv;
 484                             rC     = r;
 485
 486                             rpinvV = 1;
 487                             rinvV  = rinv;
 488                             rV     = r;
 489                         }
 490
 491                         /* Only process the coulomb interactions if we have charges,
 492                          * and if we either include all entries in the list (no cutoff
 493                          * used in the kernel), or if we are within the cutoff.
 494                          */
 495                         bool computeElecInteraction = (elecInteractionTypeIsEwald && r < rcoulomb)
 496                                                       || (!elecInteractionTypeIsEwald && rC < rcoulomb);
 497
 498                         if ((qq[i] != 0) && computeElecInteraction)
 499                         {
 500                             if (elecInteractionTypeIsEwald)
 501                             {
 502                                 Vcoul[i]  = ewaldPotential(qq[i], rinvC, sh_ewald);
 503                                 FscalC[i] = ewaldScalarForce(qq[i], rinvC);
 504                             }
 505                             else
 506                             {
 507                                 Vcoul[i]  = reactionFieldPotential(qq[i], rinvC, rC, krf, crf);
 508                                 FscalC[i] = reactionFieldScalarForce(qq[i], rinvC, rC, krf, two);
 509                             }
 510                         }
 511
 512                         /* Only process the VDW interactions if we have
 513                          * some non-zero parameters, and if we either
 514                          * include all entries in the list (no cutoff used
 515                          * in the kernel), or if we are within the cutoff.
 516                          */
 517                         bool computeVdwInteraction = (vdwInteractionTypeIsEwald && r < rvdw)
 518                                                      || (!vdwInteractionTypeIsEwald && rV < rvdw);
 519                         if ((c6[i] != 0 || c12[i] != 0) && computeVdwInteraction)
 520                         {
 521                             real rinv6;
 522                             if (useSoftCore)
 523                             {
 524                                 rinv6 = rpinvV;
 525                             }
 526                             else
 527                             {
 528                                 rinv6 = calculateRinv6(rinvV);
 529                             }
 530                             real Vvdw6  = calculateVdw6(c6[i], rinv6);
 531                             real Vvdw12 = calculateVdw12(c12[i], rinv6);
 532
 533                             Vvdw[i] = lennardJonesPotential(Vvdw6, Vvdw12, c6[i], c12[i], repulsionShift,
 534                                                             dispersionShift, onesixth, onetwelfth);
 535                             FscalV[i] = lennardJonesScalarForce(Vvdw6, Vvdw12);
 536
 537                             if (vdwInteractionTypeIsEwald)
 538                             {
 539                                 /* Subtract the grid potential at the cut-off */
 540                                 Vvdw[i] += ewaldLennardJonesGridSubtract(nbfp_grid[tj[i]],
 541                                                                          sh_lj_ewald, onesixth);
 542                             }
 543
 544                             if (vdwModifierIsPotSwitch)
 545                             {
 546                                 real d        = rV - ic->rvdw_switch;
 547                                 d             = (d > zero) ? d : zero;
 548                                 const real d2 = d * d;
 549                                 const real sw = one + d2 * d * (vdw_swV3 + d * (vdw_swV4 + d * vdw_swV5));
 550                                 const real dsw = d2 * (vdw_swF2 + d * (vdw_swF3 + d * vdw_swF4));
 551
 552                                 FscalV[i] = potSwitchScalarForceMod(FscalV[i], Vvdw[i], sw, rV,
 553                                                                     rvdw, dsw, zero);
 554                                 Vvdw[i]   = potSwitchPotentialMod(Vvdw[i], sw, rV, rvdw, zero);
 555                             }
 556                         }
 557
 558                         /* FscalC (and FscalV) now contain: dV/drC * rC
 559                          * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
 560                          * Further down we first multiply by r^p-2 and then by
 561                          * the vector r, which in total gives: dV/drC * (r/rC)^1-p
 562                          */
 563                         FscalC[i] *= rpinvC;
 564                         FscalV[i] *= rpinvV;
 565                     }
 566                 }
 567
 568                 /* Assemble A and B states */
 569                 for (int i = 0; i < NSTATES; i++)
 570                 {
 571                     vctot += LFC[i] * Vcoul[i];
 572                     vvtot += LFV[i] * Vvdw[i];
 573
 574                     Fscal += LFC[i] * FscalC[i] * rpm2;
 575                     Fscal += LFV[i] * FscalV[i] * rpm2;
 576
 577                     if (useSoftCore)
 578                     {
 579                         dvdl_coul += Vcoul[i] * DLF[i]
 580                                      + LFC[i] * alpha_coul_eff * dlfac_coul[i] * FscalC[i] * sigma6[i];
 581                         dvdl_vdw += Vvdw[i] * DLF[i]
 582                                     + LFV[i] * alpha_vdw_eff * dlfac_vdw[i] * FscalV[i] * sigma6[i];
 583                     }
 584                     else
 585                     {
 586                         dvdl_coul += Vcoul[i] * DLF[i];
 587                         dvdl_vdw += Vvdw[i] * DLF[i];
 588                     }
 589                 }
 590             }
 591             else if (icoul == GMX_NBKERNEL_ELEC_REACTIONFIELD)
 592             {
 593                 /* For excluded pairs, which are only in this pair list when
 594                  * using the Verlet scheme, we don't use soft-core.
 595                  * As there is no singularity, there is no need for soft-core.
 596                  */
 597                 const real FF = -two * krf;
 598                 real       VV = krf * rsq - crf;
 599
 600                 if (ii == jnr)
 601                 {
 602                     VV *= half;
 603                 }
 604
 605                 for (int i = 0; i < NSTATES; i++)
 606                 {
 607                     vctot += LFC[i] * qq[i] * VV;
 608                     Fscal += LFC[i] * qq[i] * FF;
 609                     dvdl_coul += DLF[i] * qq[i] * VV;
 610                 }
 611             }
 612
 613             if (elecInteractionTypeIsEwald && r < rcoulomb)
 614             {
 615                 /* See comment in the preamble. When using Ewald interactions
 616                  * (unless we use a switch modifier) we subtract the reciprocal-space
 617                  * Ewald component here which made it possible to apply the free
 618                  * energy interaction to 1/r (vanilla coulomb short-range part)
 619                  * above. This gets us closer to the ideal case of applying
 620                  * the softcore to the entire electrostatic interaction,
 621                  * including the reciprocal-space component.
 622                  */
 623                 real v_lr, f_lr;
 624
 625                 const real ewrt   = r * ewtabscale;
 626                 int        ewitab = static_cast<int>(ewrt);
 627                 const real eweps  = ewrt - ewitab;
 628                 ewitab            = 4 * ewitab;
 629                 f_lr              = ewtab[ewitab] + eweps * ewtab[ewitab + 1];
 630                 v_lr = (ewtab[ewitab + 2] - ewtabhalfspace * eweps * (ewtab[ewitab] + f_lr));
 631                 f_lr *= rinv;
 632
 633                 /* Note that any possible Ewald shift has already been applied in
 634                  * the normal interaction part above.
 635                  */
 636
 637                 if (ii == jnr)
 638                 {
 639                     /* If we get here, the i particle (ii) has itself (jnr)
 640                      * in its neighborlist. This can only happen with the Verlet
 641                      * scheme, and corresponds to a self-interaction that will
 642                      * occur twice. Scale it down by 50% to only include it once.
 643                      */
 644                     v_lr *= half;
 645                 }
 646
 647                 for (int i = 0; i < NSTATES; i++)
 648                 {
 649                     vctot -= LFC[i] * qq[i] * v_lr;
 650                     Fscal -= LFC[i] * qq[i] * f_lr;
 651                     dvdl_coul -= (DLF[i] * qq[i]) * v_lr;
 652                 }
 653             }
 654
 655             if (vdwInteractionTypeIsEwald && r < rvdw)
 656             {
 657                 /* See comment in the preamble. When using LJ-Ewald interactions
 658                  * (unless we use a switch modifier) we subtract the reciprocal-space
 659                  * Ewald component here which made it possible to apply the free
 660                  * energy interaction to r^-6 (vanilla LJ6 short-range part)
 661                  * above. This gets us closer to the ideal case of applying
 662                  * the softcore to the entire VdW interaction,
 663                  * including the reciprocal-space component.
 664                  */
 665                 /* We could also use the analytical form here
 666                  * iso a table, but that can cause issues for
 667                  * r close to 0 for non-interacting pairs.
 668                  */
 669
 670                 const real rs   = rsq * rinv * ewtabscale;
 671                 const int  ri   = static_cast<int>(rs);
 672                 const real frac = rs - ri;
 673                 const real f_lr = (1 - frac) * tab_ewald_F_lj[ri] + frac * tab_ewald_F_lj[ri + 1];
 674                 /* TODO: Currently the Ewald LJ table does not contain
 675                  * the factor 1/6, we should add this.
 676                  */
 677                 const real FF = f_lr * rinv / six;
 678                 real VV = (tab_ewald_V_lj[ri] - ewtabhalfspace * frac * (tab_ewald_F_lj[ri] + f_lr)) / six;
 679
 680                 if (ii == jnr)
 681                 {
 682                     /* If we get here, the i particle (ii) has itself (jnr)
 683                      * in its neighborlist. This can only happen with the Verlet
 684                      * scheme, and corresponds to a self-interaction that will
 685                      * occur twice. Scale it down by 50% to only include it once.
 686                      */
 687                     VV *= half;
 688                 }
 689
 690                 for (int i = 0; i < NSTATES; i++)
 691                 {
 692                     const real c6grid = nbfp_grid[tj[i]];
 693                     vvtot += LFV[i] * c6grid * VV;
 694                     Fscal += LFV[i] * c6grid * FF;
 695                     dvdl_vdw += (DLF[i] * c6grid) * VV;
 696                 }
 697             }
 698
 699             if (doForces)
 700             {
 701                 const real tx = Fscal * dx;
 702                 const real ty = Fscal * dy;
 703                 const real tz = Fscal * dz;
 704                 fix           = fix + tx;
 705                 fiy           = fiy + ty;
 706                 fiz           = fiz + tz;
 707                 /* OpenMP atomics are expensive, but this kernels is also
 708                  * expensive, so we can take this hit, instead of using
 709                  * thread-local output buffers and extra reduction.
 710                  *
 711                  * All the OpenMP regions in this file are trivial and should
 712                  * not throw, so no need for try/catch.
 713                  */
 714 #pragma omp atomic
 715                 f[j3] -= tx;
 716 #pragma omp atomic
 717                 f[j3 + 1] -= ty;
 718 #pragma omp atomic
 719                 f[j3 + 2] -= tz;
 720             }
 721         }
 722
 723         /* The atomics below are expensive with many OpenMP threads.
 724          * Here unperturbed i-particles will usually only have a few
 725          * (perturbed) j-particles in the list. Thus with a buffered list
 726          * we can skip a significant number of i-reductions with a check.
 727          */
 728         if (npair_within_cutoff > 0)
 729         {
 730             if (doForces)
 731             {
 732 #pragma omp atomic
 733                 f[ii3] += fix;
 734 #pragma omp atomic
 735                 f[ii3 + 1] += fiy;
 736 #pragma omp atomic
 737                 f[ii3 + 2] += fiz;
 738             }
 739             if (doShiftForces)
 740             {
 741 #pragma omp atomic
 742                 fshift[is3] += fix;
 743 #pragma omp atomic
 744                 fshift[is3 + 1] += fiy;
 745 #pragma omp atomic
 746                 fshift[is3 + 2] += fiz;
 747             }
 748             if (doPotential)
 749             {
 750                 int ggid = gid[n];
 751 #pragma omp atomic
 752                 Vc[ggid] += vctot;
 753 #pragma omp atomic
 754                 Vv[ggid] += vvtot;
 755             }
 756         }
 757     }
 758
 759 #pragma omp atomic
 760     dvdl[efptCOUL] += dvdl_coul;
 761 #pragma omp atomic
 762     dvdl[efptVDW] += dvdl_vdw;
 763
 764     /* Estimate flops, average for free energy stuff:
 765      * 12  flops per outer iteration
 766      * 150 flops per inner iteration
 767      */
 768 #pragma omp atomic
 769     inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri * 12 + nlist->jindex[nri] * 150);
 770 }
 771
 772 typedef void (*KernelFunction)(const t_nblist* gmx_restrict nlist,
 773                                rvec* gmx_restrict         xx,
 774                                gmx::ForceWithShiftForces* forceWithShiftForces,
 775                                const t_forcerec* gmx_restrict fr,
 776                                const t_mdatoms* gmx_restrict mdatoms,
 777                                nb_kernel_data_t* gmx_restrict kernel_data,
 778                                t_nrnb* gmx_restrict nrnb);
 779
 780 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald>
 781 static KernelFunction dispatchKernelOnVdwModifier(const bool vdwModifierIsPotSwitch)
 782 {
 783     if (vdwModifierIsPotSwitch)
 784     {
 785         return (nb_free_energy_kernel<useSoftCore, scLambdasOrAlphasDiffer,
 786                                       vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, true>);
 787     }
 788     else
 789     {
 790         return (nb_free_energy_kernel<useSoftCore, scLambdasOrAlphasDiffer,
 791                                       vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, false>);
 792     }
 793 }
 794
 795 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald>
 796 static KernelFunction dispatchKernelOnElecInteractionType(const bool elecInteractionTypeIsEwald,
 797                                                           const bool vdwModifierIsPotSwitch)
 798 {
 799     if (elecInteractionTypeIsEwald)
 800     {
 801         return (dispatchKernelOnVdwModifier<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, true>(
 802                 vdwModifierIsPotSwitch));
 803     }
 804     else
 805     {
 806         return (dispatchKernelOnVdwModifier<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, false>(
 807                 vdwModifierIsPotSwitch));
 808     }
 809 }
 810
 811 template<bool useSoftCore, bool scLambdasOrAlphasDiffer>
 812 static KernelFunction dispatchKernelOnVdwInteractionType(const bool vdwInteractionTypeIsEwald,
 813                                                          const bool elecInteractionTypeIsEwald,
 814                                                          const bool vdwModifierIsPotSwitch)
 815 {
 816     if (vdwInteractionTypeIsEwald)
 817     {
 818         return (dispatchKernelOnElecInteractionType<useSoftCore, scLambdasOrAlphasDiffer, true>(
 819                 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch));
 820     }
 821     else
 822     {
 823         return (dispatchKernelOnElecInteractionType<useSoftCore, scLambdasOrAlphasDiffer, false>(
 824                 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch));
 825     }
 826 }
 827
 828 template<bool useSoftCore>
 829 static KernelFunction dispatchKernelOnScLambdasOrAlphasDifference(const bool scLambdasOrAlphasDiffer,
 830                                                                   const bool vdwInteractionTypeIsEwald,
 831                                                                   const bool elecInteractionTypeIsEwald,
 832                                                                   const bool vdwModifierIsPotSwitch)
 833 {
 834     if (scLambdasOrAlphasDiffer)
 835     {
 836         return (dispatchKernelOnVdwInteractionType<useSoftCore, true>(
 837                 vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch));
 838     }
 839     else
 840     {
 841         return (dispatchKernelOnVdwInteractionType<useSoftCore, false>(
 842                 vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch));
 843     }
 844 }
 845
 846 static KernelFunction dispatchKernel(const bool        scLambdasOrAlphasDiffer,
 847                                      const bool        vdwInteractionTypeIsEwald,
 848                                      const bool        elecInteractionTypeIsEwald,
 849                                      const bool        vdwModifierIsPotSwitch,
 850                                      const t_forcerec* fr)
 851 {
 852     if (fr->sc_alphacoul == 0 && fr->sc_alphavdw == 0)
 853     {
 854         return (dispatchKernelOnScLambdasOrAlphasDifference<false>(
 855                 scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
 856                 vdwModifierIsPotSwitch));
 857     }
 858     else
 859     {
 860         return (dispatchKernelOnScLambdasOrAlphasDifference<true>(
 861                 scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
 862                 vdwModifierIsPotSwitch));
 863     }
 864 }
 865
 866
 867 void gmx_nb_free_energy_kernel(const t_nblist*            nlist,
 868                                rvec*                      xx,
 869                                gmx::ForceWithShiftForces* ff,
 870                                const t_forcerec*          fr,
 871                                const t_mdatoms*           mdatoms,
 872                                nb_kernel_data_t*          kernel_data,
 873                                t_nrnb*                    nrnb)
 874 {
 875     GMX_ASSERT(EEL_PME_EWALD(fr->ic->eeltype) || fr->ic->eeltype == eelCUT || EEL_RF(fr->ic->eeltype),
 876                "Unsupported eeltype with free energy");
 877
 878     const bool vdwInteractionTypeIsEwald  = (EVDW_PME(fr->ic->vdwtype));
 879     const bool elecInteractionTypeIsEwald = (EEL_PME_EWALD(fr->ic->eeltype));
 880     const bool vdwModifierIsPotSwitch     = (fr->ic->vdw_modifier == eintmodPOTSWITCH);
 881     bool       scLambdasOrAlphasDiffer    = true;
 882
 883     if (fr->sc_alphacoul == 0 && fr->sc_alphavdw == 0)
 884     {
 885         scLambdasOrAlphasDiffer = false;
 886     }
 887     else if (fr->sc_r_power == 6.0_real)
 888     {
 889         if (kernel_data->lambda[efptCOUL] == kernel_data->lambda[efptVDW] && fr->sc_alphacoul == fr->sc_alphavdw)
 890         {
 891             scLambdasOrAlphasDiffer = false;
 892         }
 893     }
 894     else
 895     {
 896         GMX_RELEASE_ASSERT(false, "Unsupported soft-core r-power");
 897     }
 898     KernelFunction kernelFunc = dispatchKernel(scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
 899                                                elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, fr);
 900     kernelFunc(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
 901 }