src/gromacs/gmxlib/nonbonded/nb_free_energy.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
   7  * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
   8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   9  * and including many others, as listed in the AUTHORS file in the
  10  * top-level source directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38 #include "gmxpre.h"
  39
  40 #include "nb_free_energy.h"
  41
  42 #include "config.h"
  43
  44 #include <cmath>
  45
  46 #include <algorithm>
  47
  48 #include "gromacs/gmxlib/nrnb.h"
  49 #include "gromacs/gmxlib/nonbonded/nb_kernel.h"
  50 #include "gromacs/gmxlib/nonbonded/nonbonded.h"
  51 #include "gromacs/math/functions.h"
  52 #include "gromacs/math/vec.h"
  53 #include "gromacs/mdtypes/forceoutput.h"
  54 #include "gromacs/mdtypes/forcerec.h"
  55 #include "gromacs/mdtypes/interaction_const.h"
  56 #include "gromacs/mdtypes/md_enums.h"
  57 #include "gromacs/mdtypes/mdatom.h"
  58 #include "gromacs/simd/simd.h"
  59 #include "gromacs/utility/fatalerror.h"
  60
  61
  62 //! Scalar (non-SIMD) data types.
  63 struct ScalarDataTypes
  64 {
  65     using RealType                     = real; //!< The data type to use as real.
  66     using IntType                      = int;  //!< The data type to use as int.
  67     static constexpr int simdRealWidth = 1;    //!< The width of the RealType.
  68     static constexpr int simdIntWidth  = 1;    //!< The width of the IntType.
  69 };
  70
  71 #if GMX_SIMD_HAVE_REAL && GMX_SIMD_HAVE_INT32_ARITHMETICS
  72 //! SIMD data types.
  73 struct SimdDataTypes
  74 {
  75     using RealType                     = gmx::SimdReal;         //!< The data type to use as real.
  76     using IntType                      = gmx::SimdInt32;        //!< The data type to use as int.
  77     static constexpr int simdRealWidth = GMX_SIMD_REAL_WIDTH;   //!< The width of the RealType.
  78     static constexpr int simdIntWidth  = GMX_SIMD_FINT32_WIDTH; //!< The width of the IntType.
  79 };
  80 #endif
  81
  82 //! Computes r^(1/p) and 1/r^(1/p) for the standard p=6
  83 template<class RealType>
  84 static inline void pthRoot(const RealType r, RealType* pthRoot, RealType* invPthRoot)
  85 {
  86     *invPthRoot = gmx::invsqrt(std::cbrt(r));
  87     *pthRoot    = 1 / (*invPthRoot);
  88 }
  89
  90 template<class RealType>
  91 static inline RealType calculateRinv6(const RealType rinvV)
  92 {
  93     RealType rinv6 = rinvV * rinvV;
  94     return (rinv6 * rinv6 * rinv6);
  95 }
  96
  97 template<class RealType>
  98 static inline RealType calculateVdw6(const RealType c6, const RealType rinv6)
  99 {
 100     return (c6 * rinv6);
 101 }
 102
 103 template<class RealType>
 104 static inline RealType calculateVdw12(const RealType c12, const RealType rinv6)
 105 {
 106     return (c12 * rinv6 * rinv6);
 107 }
 108
 109 /* reaction-field electrostatics */
 110 template<class RealType>
 111 static inline RealType reactionFieldScalarForce(const RealType qq,
 112                                                 const RealType rinv,
 113                                                 const RealType r,
 114                                                 const real     krf,
 115                                                 const real     two)
 116 {
 117     return (qq * (rinv - two * krf * r * r));
 118 }
 119 template<class RealType>
 120 static inline RealType reactionFieldPotential(const RealType qq,
 121                                               const RealType rinv,
 122                                               const RealType r,
 123                                               const real     krf,
 124                                               const real     potentialShift)
 125 {
 126     return (qq * (rinv + krf * r * r - potentialShift));
 127 }
 128
 129 /* Ewald electrostatics */
 130 template<class RealType>
 131 static inline RealType ewaldScalarForce(const RealType coulomb, const RealType rinv)
 132 {
 133     return (coulomb * rinv);
 134 }
 135 template<class RealType>
 136 static inline RealType ewaldPotential(const RealType coulomb, const RealType rinv, const real potentialShift)
 137 {
 138     return (coulomb * (rinv - potentialShift));
 139 }
 140
 141 /* cutoff LJ */
 142 template<class RealType>
 143 static inline RealType lennardJonesScalarForce(const RealType v6, const RealType v12)
 144 {
 145     return (v12 - v6);
 146 }
 147 template<class RealType>
 148 static inline RealType lennardJonesPotential(const RealType v6,
 149                                              const RealType v12,
 150                                              const RealType c6,
 151                                              const RealType c12,
 152                                              const real     repulsionShift,
 153                                              const real     dispersionShift,
 154                                              const real     onesixth,
 155                                              const real     onetwelfth)
 156 {
 157     return ((v12 + c12 * repulsionShift) * onetwelfth - (v6 + c6 * dispersionShift) * onesixth);
 158 }
 159
 160 /* Ewald LJ */
 161 static inline real ewaldLennardJonesGridSubtract(const real c6grid, const real potentialShift, const real onesixth)
 162 {
 163     return (c6grid * potentialShift * onesixth);
 164 }
 165
 166 /* LJ Potential switch */
 167 template<class RealType>
 168 static inline RealType potSwitchScalarForceMod(const RealType fScalarInp,
 169                                                const RealType potential,
 170                                                const RealType sw,
 171                                                const RealType r,
 172                                                const RealType rVdw,
 173                                                const RealType dsw,
 174                                                const real     zero)
 175 {
 176     if (r < rVdw)
 177     {
 178         real fScalar = fScalarInp * sw - r * potential * dsw;
 179         return (fScalar);
 180     }
 181     return (zero);
 182 }
 183 template<class RealType>
 184 static inline RealType potSwitchPotentialMod(const RealType potentialInp,
 185                                              const RealType sw,
 186                                              const RealType r,
 187                                              const RealType rVdw,
 188                                              const real     zero)
 189 {
 190     if (r < rVdw)
 191     {
 192         real potential = potentialInp * sw;
 193         return (potential);
 194     }
 195     return (zero);
 196 }
 197
 198
 199 //! Templated free-energy non-bonded kernel
 200 template<typename DataTypes, bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald, bool vdwModifierIsPotSwitch>
 201 static void nb_free_energy_kernel(const t_nblist* gmx_restrict nlist,
 202                                   rvec* gmx_restrict         xx,
 203                                   gmx::ForceWithShiftForces* forceWithShiftForces,
 204                                   const t_forcerec* gmx_restrict fr,
 205                                   const t_mdatoms* gmx_restrict mdatoms,
 206                                   nb_kernel_data_t* gmx_restrict kernel_data,
 207                                   t_nrnb* gmx_restrict nrnb)
 208 {
 209 #define STATE_A 0
 210 #define STATE_B 1
 211 #define NSTATES 2
 212
 213     using RealType = typename DataTypes::RealType;
 214     using IntType  = typename DataTypes::IntType;
 215
 216     /* FIXME: How should these be handled with SIMD? */
 217     constexpr real onetwelfth = 1.0 / 12.0;
 218     constexpr real onesixth   = 1.0 / 6.0;
 219     constexpr real zero       = 0.0;
 220     constexpr real half       = 0.5;
 221     constexpr real one        = 1.0;
 222     constexpr real two        = 2.0;
 223     constexpr real six        = 6.0;
 224
 225     /* Extract pointer to non-bonded interaction constants */
 226     const interaction_const_t* ic = fr->ic;
 227
 228     // Extract pair list data
 229     const int  nri    = nlist->nri;
 230     const int* iinr   = nlist->iinr;
 231     const int* jindex = nlist->jindex;
 232     const int* jjnr   = nlist->jjnr;
 233     const int* shift  = nlist->shift;
 234     const int* gid    = nlist->gid;
 235
 236     const real* shiftvec      = fr->shift_vec[0];
 237     const real* chargeA       = mdatoms->chargeA;
 238     const real* chargeB       = mdatoms->chargeB;
 239     real*       Vc            = kernel_data->energygrp_elec;
 240     const int*  typeA         = mdatoms->typeA;
 241     const int*  typeB         = mdatoms->typeB;
 242     const int   ntype         = fr->ntype;
 243     const real* nbfp          = fr->nbfp.data();
 244     const real* nbfp_grid     = fr->ljpme_c6grid;
 245     real*       Vv            = kernel_data->energygrp_vdw;
 246     const real  lambda_coul   = kernel_data->lambda[efptCOUL];
 247     const real  lambda_vdw    = kernel_data->lambda[efptVDW];
 248     real*       dvdl          = kernel_data->dvdl;
 249     const real  alpha_coul    = fr->sc_alphacoul;
 250     const real  alpha_vdw     = fr->sc_alphavdw;
 251     const real  lam_power     = fr->sc_power;
 252     const real  sigma6_def    = fr->sc_sigma6_def;
 253     const real  sigma6_min    = fr->sc_sigma6_min;
 254     const bool  doForces      = ((kernel_data->flags & GMX_NONBONDED_DO_FORCE) != 0);
 255     const bool  doShiftForces = ((kernel_data->flags & GMX_NONBONDED_DO_SHIFTFORCE) != 0);
 256     const bool  doPotential   = ((kernel_data->flags & GMX_NONBONDED_DO_POTENTIAL) != 0);
 257
 258     // Extract data from interaction_const_t
 259     const real facel           = ic->epsfac;
 260     const real rcoulomb        = ic->rcoulomb;
 261     const real krf             = ic->k_rf;
 262     const real crf             = ic->c_rf;
 263     const real sh_lj_ewald     = ic->sh_lj_ewald;
 264     const real rvdw            = ic->rvdw;
 265     const real dispersionShift = ic->dispersion_shift.cpot;
 266     const real repulsionShift  = ic->repulsion_shift.cpot;
 267
 268     // Note that the nbnxm kernels do not support Coulomb potential switching at all
 269     GMX_ASSERT(ic->coulomb_modifier != eintmodPOTSWITCH,
 270                "Potential switching is not supported for Coulomb with FEP");
 271
 272     real vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4;
 273     if (vdwModifierIsPotSwitch)
 274     {
 275         const real d = ic->rvdw - ic->rvdw_switch;
 276         vdw_swV3     = -10.0 / (d * d * d);
 277         vdw_swV4     = 15.0 / (d * d * d * d);
 278         vdw_swV5     = -6.0 / (d * d * d * d * d);
 279         vdw_swF2     = -30.0 / (d * d * d);
 280         vdw_swF3     = 60.0 / (d * d * d * d);
 281         vdw_swF4     = -30.0 / (d * d * d * d * d);
 282     }
 283     else
 284     {
 285         /* Avoid warnings from stupid compilers (looking at you, Clang!) */
 286         vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0;
 287     }
 288
 289     int icoul;
 290     if (ic->eeltype == eelCUT || EEL_RF(ic->eeltype))
 291     {
 292         icoul = GMX_NBKERNEL_ELEC_REACTIONFIELD;
 293     }
 294     else
 295     {
 296         icoul = GMX_NBKERNEL_ELEC_NONE;
 297     }
 298
 299     real rcutoff_max2 = std::max(ic->rcoulomb, ic->rvdw);
 300     rcutoff_max2      = rcutoff_max2 * rcutoff_max2;
 301
 302     const real* tab_ewald_F_lj = nullptr;
 303     const real* tab_ewald_V_lj = nullptr;
 304     const real* ewtab          = nullptr;
 305     real        ewtabscale     = 0;
 306     real        ewtabhalfspace = 0;
 307     real        sh_ewald       = 0;
 308     if (elecInteractionTypeIsEwald || vdwInteractionTypeIsEwald)
 309     {
 310         const auto& tables = *ic->coulombEwaldTables;
 311         sh_ewald           = ic->sh_ewald;
 312         ewtab              = tables.tableFDV0.data();
 313         ewtabscale         = tables.scale;
 314         ewtabhalfspace     = half / ewtabscale;
 315         tab_ewald_F_lj     = tables.tableF.data();
 316         tab_ewald_V_lj     = tables.tableV.data();
 317     }
 318
 319     /* For Ewald/PME interactions we cannot easily apply the soft-core component to
 320      * reciprocal space. When we use non-switched Ewald interactions, we
 321      * assume the soft-coring does not significantly affect the grid contribution
 322      * and apply the soft-core only to the full 1/r (- shift) pair contribution.
 323      *
 324      * However, we cannot use this approach for switch-modified since we would then
 325      * effectively end up evaluating a significantly different interaction here compared to the
 326      * normal (non-free-energy) kernels, either by applying a cutoff at a different
 327      * position than what the user requested, or by switching different
 328      * things (1/r rather than short-range Ewald). For these settings, we just
 329      * use the traditional short-range Ewald interaction in that case.
 330      */
 331     GMX_RELEASE_ASSERT(!(vdwInteractionTypeIsEwald && vdwModifierIsPotSwitch),
 332                        "Can not apply soft-core to switched Ewald potentials");
 333
 334     real dvdl_coul = 0;
 335     real dvdl_vdw  = 0;
 336
 337     /* Lambda factor for state A, 1-lambda*/
 338     real LFC[NSTATES], LFV[NSTATES];
 339     LFC[STATE_A] = one - lambda_coul;
 340     LFV[STATE_A] = one - lambda_vdw;
 341
 342     /* Lambda factor for state B, lambda*/
 343     LFC[STATE_B] = lambda_coul;
 344     LFV[STATE_B] = lambda_vdw;
 345
 346     /*derivative of the lambda factor for state A and B */
 347     real DLF[NSTATES];
 348     DLF[STATE_A] = -1;
 349     DLF[STATE_B] = 1;
 350
 351     real           lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
 352     constexpr real sc_r_power = 6.0_real;
 353     for (int i = 0; i < NSTATES; i++)
 354     {
 355         lfac_coul[i]  = (lam_power == 2 ? (1 - LFC[i]) * (1 - LFC[i]) : (1 - LFC[i]));
 356         dlfac_coul[i] = DLF[i] * lam_power / sc_r_power * (lam_power == 2 ? (1 - LFC[i]) : 1);
 357         lfac_vdw[i]   = (lam_power == 2 ? (1 - LFV[i]) * (1 - LFV[i]) : (1 - LFV[i]));
 358         dlfac_vdw[i]  = DLF[i] * lam_power / sc_r_power * (lam_power == 2 ? (1 - LFV[i]) : 1);
 359     }
 360
 361     // TODO: We should get rid of using pointers to real
 362     const real* x             = xx[0];
 363     real* gmx_restrict f      = &(forceWithShiftForces->force()[0][0]);
 364     real* gmx_restrict fshift = &(forceWithShiftForces->shiftForces()[0][0]);
 365
 366     for (int n = 0; n < nri; n++)
 367     {
 368         int npair_within_cutoff = 0;
 369
 370         const int  is3   = 3 * shift[n];
 371         const real shX   = shiftvec[is3];
 372         const real shY   = shiftvec[is3 + 1];
 373         const real shZ   = shiftvec[is3 + 2];
 374         const int  nj0   = jindex[n];
 375         const int  nj1   = jindex[n + 1];
 376         const int  ii    = iinr[n];
 377         const int  ii3   = 3 * ii;
 378         const real ix    = shX + x[ii3 + 0];
 379         const real iy    = shY + x[ii3 + 1];
 380         const real iz    = shZ + x[ii3 + 2];
 381         const real iqA   = facel * chargeA[ii];
 382         const real iqB   = facel * chargeB[ii];
 383         const int  ntiA  = 2 * ntype * typeA[ii];
 384         const int  ntiB  = 2 * ntype * typeB[ii];
 385         real       vctot = 0;
 386         real       vvtot = 0;
 387         real       fix   = 0;
 388         real       fiy   = 0;
 389         real       fiz   = 0;
 390
 391         for (int k = nj0; k < nj1; k++)
 392         {
 393             int            tj[NSTATES];
 394             const int      jnr = jjnr[k];
 395             const int      j3  = 3 * jnr;
 396             RealType       c6[NSTATES], c12[NSTATES], qq[NSTATES], Vcoul[NSTATES], Vvdw[NSTATES];
 397             RealType       r, rinv, rp, rpm2;
 398             RealType       alpha_vdw_eff, alpha_coul_eff, sigma6[NSTATES];
 399             const RealType dx  = ix - x[j3];
 400             const RealType dy  = iy - x[j3 + 1];
 401             const RealType dz  = iz - x[j3 + 2];
 402             const RealType rsq = dx * dx + dy * dy + dz * dz;
 403             RealType       FscalC[NSTATES], FscalV[NSTATES];
 404             /* Check if this pair on the exlusions list.*/
 405             const bool bPairIncluded = nlist->excl_fep == nullptr || nlist->excl_fep[k];
 406
 407             if (rsq >= rcutoff_max2 && bPairIncluded)
 408             {
 409                 /* We save significant time by skipping all code below.
 410                  * Note that with soft-core interactions, the actual cut-off
 411                  * check might be different. But since the soft-core distance
 412                  * is always larger than r, checking on r here is safe.
 413                  * Exclusions outside the cutoff can not be skipped as
 414                  * when using Ewald: the reciprocal-space
 415                  * Ewald component still needs to be subtracted.
 416                  */
 417
 418                 continue;
 419             }
 420             npair_within_cutoff++;
 421
 422             if (rsq > 0)
 423             {
 424                 /* Note that unlike in the nbnxn kernels, we do not need
 425                  * to clamp the value of rsq before taking the invsqrt
 426                  * to avoid NaN in the LJ calculation, since here we do
 427                  * not calculate LJ interactions when C6 and C12 are zero.
 428                  */
 429
 430                 rinv = gmx::invsqrt(rsq);
 431                 r    = rsq * rinv;
 432             }
 433             else
 434             {
 435                 /* The force at r=0 is zero, because of symmetry.
 436                  * But note that the potential is in general non-zero,
 437                  * since the soft-cored r will be non-zero.
 438                  */
 439                 rinv = 0;
 440                 r    = 0;
 441             }
 442
 443             if (useSoftCore)
 444             {
 445                 rpm2 = rsq * rsq;  /* r4 */
 446                 rp   = rpm2 * rsq; /* r6 */
 447             }
 448             else
 449             {
 450                 /* The soft-core power p will not affect the results
 451                  * with not using soft-core, so we use power of 0 which gives
 452                  * the simplest math and cheapest code.
 453                  */
 454                 rpm2 = rinv * rinv;
 455                 rp   = 1;
 456             }
 457
 458             RealType Fscal = 0;
 459
 460             qq[STATE_A] = iqA * chargeA[jnr];
 461             qq[STATE_B] = iqB * chargeB[jnr];
 462
 463             tj[STATE_A] = ntiA + 2 * typeA[jnr];
 464             tj[STATE_B] = ntiB + 2 * typeB[jnr];
 465
 466             if (bPairIncluded)
 467             {
 468                 c6[STATE_A] = nbfp[tj[STATE_A]];
 469                 c6[STATE_B] = nbfp[tj[STATE_B]];
 470
 471                 for (int i = 0; i < NSTATES; i++)
 472                 {
 473                     c12[i] = nbfp[tj[i] + 1];
 474                     if (useSoftCore)
 475                     {
 476                         if ((c6[i] > 0) && (c12[i] > 0))
 477                         {
 478                             /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
 479                             sigma6[i] = half * c12[i] / c6[i];
 480                             if (sigma6[i] < sigma6_min) /* for disappearing coul and vdw with soft core at the same time */
 481                             {
 482                                 sigma6[i] = sigma6_min;
 483                             }
 484                         }
 485                         else
 486                         {
 487                             sigma6[i] = sigma6_def;
 488                         }
 489                     }
 490                 }
 491
 492                 if (useSoftCore)
 493                 {
 494                     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
 495                     if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
 496                     {
 497                         alpha_vdw_eff  = 0;
 498                         alpha_coul_eff = 0;
 499                     }
 500                     else
 501                     {
 502                         alpha_vdw_eff  = alpha_vdw;
 503                         alpha_coul_eff = alpha_coul;
 504                     }
 505                 }
 506
 507                 for (int i = 0; i < NSTATES; i++)
 508                 {
 509                     FscalC[i] = 0;
 510                     FscalV[i] = 0;
 511                     Vcoul[i]  = 0;
 512                     Vvdw[i]   = 0;
 513
 514                     RealType rinvC, rinvV, rC, rV, rpinvC, rpinvV;
 515
 516                     /* Only spend time on A or B state if it is non-zero */
 517                     if ((qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0))
 518                     {
 519                         /* this section has to be inside the loop because of the dependence on sigma6 */
 520                         if (useSoftCore)
 521                         {
 522                             rpinvC = one / (alpha_coul_eff * lfac_coul[i] * sigma6[i] + rp);
 523                             pthRoot(rpinvC, &rinvC, &rC);
 524                             if (scLambdasOrAlphasDiffer)
 525                             {
 526                                 rpinvV = one / (alpha_vdw_eff * lfac_vdw[i] * sigma6[i] + rp);
 527                                 pthRoot(rpinvV, &rinvV, &rV);
 528                             }
 529                             else
 530                             {
 531                                 /* We can avoid one expensive pow and one / operation */
 532                                 rpinvV = rpinvC;
 533                                 rinvV  = rinvC;
 534                                 rV     = rC;
 535                             }
 536                         }
 537                         else
 538                         {
 539                             rpinvC = 1;
 540                             rinvC  = rinv;
 541                             rC     = r;
 542
 543                             rpinvV = 1;
 544                             rinvV  = rinv;
 545                             rV     = r;
 546                         }
 547
 548                         /* Only process the coulomb interactions if we have charges,
 549                          * and if we either include all entries in the list (no cutoff
 550                          * used in the kernel), or if we are within the cutoff.
 551                          */
 552                         bool computeElecInteraction = (elecInteractionTypeIsEwald && r < rcoulomb)
 553                                                       || (!elecInteractionTypeIsEwald && rC < rcoulomb);
 554
 555                         if ((qq[i] != 0) && computeElecInteraction)
 556                         {
 557                             if (elecInteractionTypeIsEwald)
 558                             {
 559                                 Vcoul[i]  = ewaldPotential(qq[i], rinvC, sh_ewald);
 560                                 FscalC[i] = ewaldScalarForce(qq[i], rinvC);
 561                             }
 562                             else
 563                             {
 564                                 Vcoul[i]  = reactionFieldPotential(qq[i], rinvC, rC, krf, crf);
 565                                 FscalC[i] = reactionFieldScalarForce(qq[i], rinvC, rC, krf, two);
 566                             }
 567                         }
 568
 569                         /* Only process the VDW interactions if we have
 570                          * some non-zero parameters, and if we either
 571                          * include all entries in the list (no cutoff used
 572                          * in the kernel), or if we are within the cutoff.
 573                          */
 574                         bool computeVdwInteraction = (vdwInteractionTypeIsEwald && r < rvdw)
 575                                                      || (!vdwInteractionTypeIsEwald && rV < rvdw);
 576                         if ((c6[i] != 0 || c12[i] != 0) && computeVdwInteraction)
 577                         {
 578                             RealType rinv6;
 579                             if (useSoftCore)
 580                             {
 581                                 rinv6 = rpinvV;
 582                             }
 583                             else
 584                             {
 585                                 rinv6 = calculateRinv6(rinvV);
 586                             }
 587                             RealType Vvdw6  = calculateVdw6(c6[i], rinv6);
 588                             RealType Vvdw12 = calculateVdw12(c12[i], rinv6);
 589
 590                             Vvdw[i] = lennardJonesPotential(Vvdw6, Vvdw12, c6[i], c12[i], repulsionShift,
 591                                                             dispersionShift, onesixth, onetwelfth);
 592                             FscalV[i] = lennardJonesScalarForce(Vvdw6, Vvdw12);
 593
 594                             if (vdwInteractionTypeIsEwald)
 595                             {
 596                                 /* Subtract the grid potential at the cut-off */
 597                                 Vvdw[i] += ewaldLennardJonesGridSubtract(nbfp_grid[tj[i]],
 598                                                                          sh_lj_ewald, onesixth);
 599                             }
 600
 601                             if (vdwModifierIsPotSwitch)
 602                             {
 603                                 RealType d        = rV - ic->rvdw_switch;
 604                                 d                 = (d > zero) ? d : zero;
 605                                 const RealType d2 = d * d;
 606                                 const RealType sw =
 607                                         one + d2 * d * (vdw_swV3 + d * (vdw_swV4 + d * vdw_swV5));
 608                                 const RealType dsw = d2 * (vdw_swF2 + d * (vdw_swF3 + d * vdw_swF4));
 609
 610                                 FscalV[i] = potSwitchScalarForceMod(FscalV[i], Vvdw[i], sw, rV,
 611                                                                     rvdw, dsw, zero);
 612                                 Vvdw[i]   = potSwitchPotentialMod(Vvdw[i], sw, rV, rvdw, zero);
 613                             }
 614                         }
 615
 616                         /* FscalC (and FscalV) now contain: dV/drC * rC
 617                          * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
 618                          * Further down we first multiply by r^p-2 and then by
 619                          * the vector r, which in total gives: dV/drC * (r/rC)^1-p
 620                          */
 621                         FscalC[i] *= rpinvC;
 622                         FscalV[i] *= rpinvV;
 623                     }
 624                 } // end for (int i = 0; i < NSTATES; i++)
 625
 626                 /* Assemble A and B states */
 627                 for (int i = 0; i < NSTATES; i++)
 628                 {
 629                     vctot += LFC[i] * Vcoul[i];
 630                     vvtot += LFV[i] * Vvdw[i];
 631
 632                     Fscal += LFC[i] * FscalC[i] * rpm2;
 633                     Fscal += LFV[i] * FscalV[i] * rpm2;
 634
 635                     if (useSoftCore)
 636                     {
 637                         dvdl_coul += Vcoul[i] * DLF[i]
 638                                      + LFC[i] * alpha_coul_eff * dlfac_coul[i] * FscalC[i] * sigma6[i];
 639                         dvdl_vdw += Vvdw[i] * DLF[i]
 640                                     + LFV[i] * alpha_vdw_eff * dlfac_vdw[i] * FscalV[i] * sigma6[i];
 641                     }
 642                     else
 643                     {
 644                         dvdl_coul += Vcoul[i] * DLF[i];
 645                         dvdl_vdw += Vvdw[i] * DLF[i];
 646                     }
 647                 }
 648             } // end if (bPairIncluded)
 649             else if (icoul == GMX_NBKERNEL_ELEC_REACTIONFIELD)
 650             {
 651                 /* For excluded pairs, which are only in this pair list when
 652                  * using the Verlet scheme, we don't use soft-core.
 653                  * As there is no singularity, there is no need for soft-core.
 654                  */
 655                 const real FF = -two * krf;
 656                 RealType   VV = krf * rsq - crf;
 657
 658                 if (ii == jnr)
 659                 {
 660                     VV *= half;
 661                 }
 662
 663                 for (int i = 0; i < NSTATES; i++)
 664                 {
 665                     vctot += LFC[i] * qq[i] * VV;
 666                     Fscal += LFC[i] * qq[i] * FF;
 667                     dvdl_coul += DLF[i] * qq[i] * VV;
 668                 }
 669             }
 670
 671             if (elecInteractionTypeIsEwald && (r < rcoulomb || !bPairIncluded))
 672             {
 673                 /* See comment in the preamble. When using Ewald interactions
 674                  * (unless we use a switch modifier) we subtract the reciprocal-space
 675                  * Ewald component here which made it possible to apply the free
 676                  * energy interaction to 1/r (vanilla coulomb short-range part)
 677                  * above. This gets us closer to the ideal case of applying
 678                  * the softcore to the entire electrostatic interaction,
 679                  * including the reciprocal-space component.
 680                  */
 681                 real v_lr, f_lr;
 682
 683                 const RealType ewrt   = r * ewtabscale;
 684                 IntType        ewitab = static_cast<IntType>(ewrt);
 685                 const RealType eweps  = ewrt - ewitab;
 686                 ewitab                = 4 * ewitab;
 687                 f_lr                  = ewtab[ewitab] + eweps * ewtab[ewitab + 1];
 688                 v_lr = (ewtab[ewitab + 2] - ewtabhalfspace * eweps * (ewtab[ewitab] + f_lr));
 689                 f_lr *= rinv;
 690
 691                 /* Note that any possible Ewald shift has already been applied in
 692                  * the normal interaction part above.
 693                  */
 694
 695                 if (ii == jnr)
 696                 {
 697                     /* If we get here, the i particle (ii) has itself (jnr)
 698                      * in its neighborlist. This can only happen with the Verlet
 699                      * scheme, and corresponds to a self-interaction that will
 700                      * occur twice. Scale it down by 50% to only include it once.
 701                      */
 702                     v_lr *= half;
 703                 }
 704
 705                 for (int i = 0; i < NSTATES; i++)
 706                 {
 707                     vctot -= LFC[i] * qq[i] * v_lr;
 708                     Fscal -= LFC[i] * qq[i] * f_lr;
 709                     dvdl_coul -= (DLF[i] * qq[i]) * v_lr;
 710                 }
 711             }
 712
 713             if (vdwInteractionTypeIsEwald && r < rvdw)
 714             {
 715                 /* See comment in the preamble. When using LJ-Ewald interactions
 716                  * (unless we use a switch modifier) we subtract the reciprocal-space
 717                  * Ewald component here which made it possible to apply the free
 718                  * energy interaction to r^-6 (vanilla LJ6 short-range part)
 719                  * above. This gets us closer to the ideal case of applying
 720                  * the softcore to the entire VdW interaction,
 721                  * including the reciprocal-space component.
 722                  */
 723                 /* We could also use the analytical form here
 724                  * iso a table, but that can cause issues for
 725                  * r close to 0 for non-interacting pairs.
 726                  */
 727
 728                 const RealType rs   = rsq * rinv * ewtabscale;
 729                 const IntType  ri   = static_cast<IntType>(rs);
 730                 const RealType frac = rs - ri;
 731                 const RealType f_lr = (1 - frac) * tab_ewald_F_lj[ri] + frac * tab_ewald_F_lj[ri + 1];
 732                 /* TODO: Currently the Ewald LJ table does not contain
 733                  * the factor 1/6, we should add this.
 734                  */
 735                 const RealType FF = f_lr * rinv / six;
 736                 RealType       VV =
 737                         (tab_ewald_V_lj[ri] - ewtabhalfspace * frac * (tab_ewald_F_lj[ri] + f_lr)) / six;
 738
 739                 if (ii == jnr)
 740                 {
 741                     /* If we get here, the i particle (ii) has itself (jnr)
 742                      * in its neighborlist. This can only happen with the Verlet
 743                      * scheme, and corresponds to a self-interaction that will
 744                      * occur twice. Scale it down by 50% to only include it once.
 745                      */
 746                     VV *= half;
 747                 }
 748
 749                 for (int i = 0; i < NSTATES; i++)
 750                 {
 751                     const real c6grid = nbfp_grid[tj[i]];
 752                     vvtot += LFV[i] * c6grid * VV;
 753                     Fscal += LFV[i] * c6grid * FF;
 754                     dvdl_vdw += (DLF[i] * c6grid) * VV;
 755                 }
 756             }
 757
 758             if (doForces)
 759             {
 760                 const real tx = Fscal * dx;
 761                 const real ty = Fscal * dy;
 762                 const real tz = Fscal * dz;
 763                 fix           = fix + tx;
 764                 fiy           = fiy + ty;
 765                 fiz           = fiz + tz;
 766                 /* OpenMP atomics are expensive, but this kernels is also
 767                  * expensive, so we can take this hit, instead of using
 768                  * thread-local output buffers and extra reduction.
 769                  *
 770                  * All the OpenMP regions in this file are trivial and should
 771                  * not throw, so no need for try/catch.
 772                  */
 773 #pragma omp atomic
 774                 f[j3] -= tx;
 775 #pragma omp atomic
 776                 f[j3 + 1] -= ty;
 777 #pragma omp atomic
 778                 f[j3 + 2] -= tz;
 779             }
 780         } // end for (int k = nj0; k < nj1; k++)
 781
 782         /* The atomics below are expensive with many OpenMP threads.
 783          * Here unperturbed i-particles will usually only have a few
 784          * (perturbed) j-particles in the list. Thus with a buffered list
 785          * we can skip a significant number of i-reductions with a check.
 786          */
 787         if (npair_within_cutoff > 0)
 788         {
 789             if (doForces)
 790             {
 791 #pragma omp atomic
 792                 f[ii3] += fix;
 793 #pragma omp atomic
 794                 f[ii3 + 1] += fiy;
 795 #pragma omp atomic
 796                 f[ii3 + 2] += fiz;
 797             }
 798             if (doShiftForces)
 799             {
 800 #pragma omp atomic
 801                 fshift[is3] += fix;
 802 #pragma omp atomic
 803                 fshift[is3 + 1] += fiy;
 804 #pragma omp atomic
 805                 fshift[is3 + 2] += fiz;
 806             }
 807             if (doPotential)
 808             {
 809                 int ggid = gid[n];
 810 #pragma omp atomic
 811                 Vc[ggid] += vctot;
 812 #pragma omp atomic
 813                 Vv[ggid] += vvtot;
 814             }
 815         }
 816     } // end for (int n = 0; n < nri; n++)
 817
 818 #pragma omp atomic
 819     dvdl[efptCOUL] += dvdl_coul;
 820 #pragma omp atomic
 821     dvdl[efptVDW] += dvdl_vdw;
 822
 823     /* Estimate flops, average for free energy stuff:
 824      * 12  flops per outer iteration
 825      * 150 flops per inner iteration
 826      */
 827 #pragma omp atomic
 828     inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri * 12 + nlist->jindex[nri] * 150);
 829 }
 830
 831 typedef void (*KernelFunction)(const t_nblist* gmx_restrict nlist,
 832                                rvec* gmx_restrict         xx,
 833                                gmx::ForceWithShiftForces* forceWithShiftForces,
 834                                const t_forcerec* gmx_restrict fr,
 835                                const t_mdatoms* gmx_restrict mdatoms,
 836                                nb_kernel_data_t* gmx_restrict kernel_data,
 837                                t_nrnb* gmx_restrict nrnb);
 838
 839 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald, bool vdwModifierIsPotSwitch>
 840 static KernelFunction dispatchKernelOnUseSimd(const bool useSimd)
 841 {
 842     if (useSimd)
 843     {
 844 #if GMX_SIMD_HAVE_REAL && GMX_SIMD_HAVE_INT32_ARITHMETICS && GMX_USE_SIMD_KERNELS
 845         /* FIXME: Here SimdDataTypes should be used to enable SIMD. So far, the code in nb_free_energy_kernel is not adapted to SIMD */
 846         return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
 847                                       elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
 848 #else
 849         return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
 850                                       elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
 851 #endif
 852     }
 853     else
 854     {
 855         return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
 856                                       elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
 857     }
 858 }
 859
 860 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald>
 861 static KernelFunction dispatchKernelOnVdwModifier(const bool vdwModifierIsPotSwitch, const bool useSimd)
 862 {
 863     if (vdwModifierIsPotSwitch)
 864     {
 865         return (dispatchKernelOnUseSimd<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
 866                                         elecInteractionTypeIsEwald, true>(useSimd));
 867     }
 868     else
 869     {
 870         return (dispatchKernelOnUseSimd<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
 871                                         elecInteractionTypeIsEwald, false>(useSimd));
 872     }
 873 }
 874
 875 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald>
 876 static KernelFunction dispatchKernelOnElecInteractionType(const bool elecInteractionTypeIsEwald,
 877                                                           const bool vdwModifierIsPotSwitch,
 878                                                           const bool useSimd)
 879 {
 880     if (elecInteractionTypeIsEwald)
 881     {
 882         return (dispatchKernelOnVdwModifier<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, true>(
 883                 vdwModifierIsPotSwitch, useSimd));
 884     }
 885     else
 886     {
 887         return (dispatchKernelOnVdwModifier<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, false>(
 888                 vdwModifierIsPotSwitch, useSimd));
 889     }
 890 }
 891
 892 template<bool useSoftCore, bool scLambdasOrAlphasDiffer>
 893 static KernelFunction dispatchKernelOnVdwInteractionType(const bool vdwInteractionTypeIsEwald,
 894                                                          const bool elecInteractionTypeIsEwald,
 895                                                          const bool vdwModifierIsPotSwitch,
 896                                                          const bool useSimd)
 897 {
 898     if (vdwInteractionTypeIsEwald)
 899     {
 900         return (dispatchKernelOnElecInteractionType<useSoftCore, scLambdasOrAlphasDiffer, true>(
 901                 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
 902     }
 903     else
 904     {
 905         return (dispatchKernelOnElecInteractionType<useSoftCore, scLambdasOrAlphasDiffer, false>(
 906                 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
 907     }
 908 }
 909
 910 template<bool useSoftCore>
 911 static KernelFunction dispatchKernelOnScLambdasOrAlphasDifference(const bool scLambdasOrAlphasDiffer,
 912                                                                   const bool vdwInteractionTypeIsEwald,
 913                                                                   const bool elecInteractionTypeIsEwald,
 914                                                                   const bool vdwModifierIsPotSwitch,
 915                                                                   const bool useSimd)
 916 {
 917     if (scLambdasOrAlphasDiffer)
 918     {
 919         return (dispatchKernelOnVdwInteractionType<useSoftCore, true>(
 920                 vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
 921     }
 922     else
 923     {
 924         return (dispatchKernelOnVdwInteractionType<useSoftCore, false>(
 925                 vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
 926     }
 927 }
 928
 929 static KernelFunction dispatchKernel(const bool        scLambdasOrAlphasDiffer,
 930                                      const bool        vdwInteractionTypeIsEwald,
 931                                      const bool        elecInteractionTypeIsEwald,
 932                                      const bool        vdwModifierIsPotSwitch,
 933                                      const bool        useSimd,
 934                                      const t_forcerec* fr)
 935 {
 936     if (fr->sc_alphacoul == 0 && fr->sc_alphavdw == 0)
 937     {
 938         return (dispatchKernelOnScLambdasOrAlphasDifference<false>(
 939                 scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
 940                 vdwModifierIsPotSwitch, useSimd));
 941     }
 942     else
 943     {
 944         return (dispatchKernelOnScLambdasOrAlphasDifference<true>(
 945                 scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
 946                 vdwModifierIsPotSwitch, useSimd));
 947     }
 948 }
 949
 950
 951 void gmx_nb_free_energy_kernel(const t_nblist*            nlist,
 952                                rvec*                      xx,
 953                                gmx::ForceWithShiftForces* ff,
 954                                const t_forcerec*          fr,
 955                                const t_mdatoms*           mdatoms,
 956                                nb_kernel_data_t*          kernel_data,
 957                                t_nrnb*                    nrnb)
 958 {
 959     GMX_ASSERT(EEL_PME_EWALD(fr->ic->eeltype) || fr->ic->eeltype == eelCUT || EEL_RF(fr->ic->eeltype),
 960                "Unsupported eeltype with free energy");
 961
 962     const bool vdwInteractionTypeIsEwald  = (EVDW_PME(fr->ic->vdwtype));
 963     const bool elecInteractionTypeIsEwald = (EEL_PME_EWALD(fr->ic->eeltype));
 964     const bool vdwModifierIsPotSwitch     = (fr->ic->vdw_modifier == eintmodPOTSWITCH);
 965     bool       scLambdasOrAlphasDiffer    = true;
 966     const bool useSimd                    = fr->use_simd_kernels;
 967
 968     if (fr->sc_alphacoul == 0 && fr->sc_alphavdw == 0)
 969     {
 970         scLambdasOrAlphasDiffer = false;
 971     }
 972     else if (fr->sc_r_power == 6.0_real)
 973     {
 974         if (kernel_data->lambda[efptCOUL] == kernel_data->lambda[efptVDW] && fr->sc_alphacoul == fr->sc_alphavdw)
 975         {
 976             scLambdasOrAlphasDiffer = false;
 977         }
 978     }
 979     else
 980     {
 981         GMX_RELEASE_ASSERT(false, "Unsupported soft-core r-power");
 982     }
 983
 984     KernelFunction kernelFunc;
 985     kernelFunc = dispatchKernel(scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
 986                                 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd, fr);
 987     kernelFunc(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
 988 }