2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
7 * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
8 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9 * and including many others, as listed in the AUTHORS file in the
10 * top-level source directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
40 #include "nb_free_energy.h"
49 #include "gromacs/gmxlib/nrnb.h"
50 #include "gromacs/gmxlib/nonbonded/nonbonded.h"
51 #include "gromacs/math/arrayrefwithpadding.h"
52 #include "gromacs/math/functions.h"
53 #include "gromacs/math/vec.h"
54 #include "gromacs/mdtypes/forceoutput.h"
55 #include "gromacs/mdtypes/forcerec.h"
56 #include "gromacs/mdtypes/interaction_const.h"
57 #include "gromacs/mdtypes/md_enums.h"
58 #include "gromacs/mdtypes/mdatom.h"
59 #include "gromacs/mdtypes/nblist.h"
60 #include "gromacs/pbcutil/ishift.h"
61 #include "gromacs/simd/simd.h"
62 #include "gromacs/simd/simd_math.h"
63 #include "gromacs/utility/fatalerror.h"
64 #include "gromacs/utility/arrayref.h"
66 #include "nb_softcore.h"
68 //! Scalar (non-SIMD) data types.
69 struct ScalarDataTypes
71 using RealType = real; //!< The data type to use as real.
72 using IntType = int; //!< The data type to use as int.
73 using BoolType = bool; //!< The data type to use as bool for real value comparison.
74 static constexpr int simdRealWidth = 1; //!< The width of the RealType.
75 static constexpr int simdIntWidth = 1; //!< The width of the IntType.
78 #if GMX_SIMD_HAVE_REAL && GMX_SIMD_HAVE_INT32_ARITHMETICS
82 using RealType = gmx::SimdReal; //!< The data type to use as real.
83 using IntType = gmx::SimdInt32; //!< The data type to use as int.
84 using BoolType = gmx::SimdBool; //!< The data type to use as bool for real value comparison.
85 static constexpr int simdRealWidth = GMX_SIMD_REAL_WIDTH; //!< The width of the RealType.
86 # if GMX_SIMD_HAVE_DOUBLE && GMX_DOUBLE
87 static constexpr int simdIntWidth = GMX_SIMD_DINT32_WIDTH; //!< The width of the IntType.
89 static constexpr int simdIntWidth = GMX_SIMD_FINT32_WIDTH; //!< The width of the IntType.
94 /*! \brief Lower limit for square interaction distances in nonbonded kernels.
96 * This is a mimimum on r^2 to avoid overflows when computing r^6.
97 * This will only affect results for soft-cored interaction at distances smaller
98 * than 1e-6 and will limit extremely high foreign energies for overlapping atoms.
99 * Note that we could use a somewhat smaller minimum in double precision.
100 * But because invsqrt in double precision can use single precision, this number
101 * can not be much smaller, we use the same number for simplicity.
103 constexpr real c_minDistanceSquared = 1.0e-12_real;
105 /*! \brief Higher limit for r^-6 used for Lennard-Jones interactions
107 * This is needed to avoid overflow of LJ energy and force terms for excluded
108 * atoms and foreign energies of hard-core states of overlapping atoms.
109 * Note that in single precision this value leaves room for C12 coefficients up to 3.4e8.
111 constexpr real c_maxRInvSix = 1.0e15_real;
113 template<bool computeForces, class RealType>
115 pmeCoulombCorrectionVF(const RealType rSq, const real beta, RealType* pot, RealType gmx_unused* force)
117 const RealType brsq = rSq * beta * beta;
118 if constexpr (computeForces)
120 *force = -brsq * beta * gmx::pmeForceCorrection(brsq);
122 *pot = beta * gmx::pmePotentialCorrection(brsq);
125 template<bool computeForces, class RealType, class BoolType>
126 static inline void pmeLJCorrectionVF(const RealType rInv,
128 const real ewaldLJCoeffSq,
129 const real ewaldLJCoeffSixDivSix,
131 RealType gmx_unused* force,
133 const BoolType bIiEqJnr)
135 // We mask rInv to get zero force and potential for masked out pair interactions
136 const RealType rInvSq = rInv * rInv;
137 const RealType rInvSix = rInvSq * rInvSq * rInvSq;
138 // Mask rSq to avoid underflow in exp()
139 const RealType coeffSqRSq = ewaldLJCoeffSq * gmx::selectByMask(rSq, mask);
140 const RealType expNegCoeffSqRSq = gmx::exp(-coeffSqRSq);
141 const RealType poly = 1.0_real + coeffSqRSq + 0.5_real * coeffSqRSq * coeffSqRSq;
142 if constexpr (computeForces)
144 *force = rInvSix - expNegCoeffSqRSq * (rInvSix * poly + ewaldLJCoeffSixDivSix);
145 *force = *force * rInvSq;
147 // The self interaction is the limit for r -> 0 which we need to compute separately
149 rInvSix * (1.0_real - expNegCoeffSqRSq * poly), 0.5_real * ewaldLJCoeffSixDivSix, bIiEqJnr);
152 //! Computes r^(1/6) and 1/r^(1/6)
153 template<class RealType>
154 static inline void sixthRoot(const RealType r, RealType* sixthRoot, RealType* invSixthRoot)
156 RealType cbrtRes = gmx::cbrt(r);
157 *invSixthRoot = gmx::invsqrt(cbrtRes);
158 *sixthRoot = gmx::inv(*invSixthRoot);
161 template<class RealType>
162 static inline RealType calculateRinv6(const RealType rInvV)
164 RealType rInv6 = rInvV * rInvV;
165 return (rInv6 * rInv6 * rInv6);
168 template<class RealType>
169 static inline RealType calculateVdw6(const RealType c6, const RealType rInv6)
174 template<class RealType>
175 static inline RealType calculateVdw12(const RealType c12, const RealType rInv6)
177 return (c12 * rInv6 * rInv6);
180 /* reaction-field electrostatics */
181 template<class RealType>
182 static inline RealType reactionFieldScalarForce(const RealType qq,
188 return (qq * (rInv - two * krf * r * r));
190 template<class RealType>
191 static inline RealType reactionFieldPotential(const RealType qq,
195 const real potentialShift)
197 return (qq * (rInv + krf * r * r - potentialShift));
200 /* Ewald electrostatics */
201 template<class RealType>
202 static inline RealType ewaldScalarForce(const RealType coulomb, const RealType rInv)
204 return (coulomb * rInv);
206 template<class RealType>
207 static inline RealType ewaldPotential(const RealType coulomb, const RealType rInv, const real potentialShift)
209 return (coulomb * (rInv - potentialShift));
213 template<class RealType>
214 static inline RealType lennardJonesScalarForce(const RealType v6, const RealType v12)
218 template<class RealType>
219 static inline RealType lennardJonesPotential(const RealType v6,
223 const real repulsionShift,
224 const real dispersionShift,
226 const real oneTwelfth)
228 return ((v12 + c12 * repulsionShift) * oneTwelfth - (v6 + c6 * dispersionShift) * oneSixth);
232 template<class RealType>
233 static inline RealType ewaldLennardJonesGridSubtract(const RealType c6grid,
234 const real potentialShift,
237 return (c6grid * potentialShift * oneSixth);
240 /* LJ Potential switch */
241 template<class RealType, class BoolType>
242 static inline RealType potSwitchScalarForceMod(const RealType fScalarInp,
243 const RealType potential,
249 /* The mask should select on rV < rVdw */
250 return (gmx::selectByMask(fScalarInp * sw - r * potential * dsw, mask));
252 template<class RealType, class BoolType>
253 static inline RealType potSwitchPotentialMod(const RealType potentialInp, const RealType sw, const BoolType mask)
255 /* The mask should select on rV < rVdw */
256 return (gmx::selectByMask(potentialInp * sw, mask));
260 //! Templated free-energy non-bonded kernel
261 template<typename DataTypes, KernelSoftcoreType softcoreType, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald, bool vdwModifierIsPotSwitch, bool computeForces>
262 static void nb_free_energy_kernel(const t_nblist& nlist,
263 const gmx::ArrayRefWithPadding<const gmx::RVec>& coords,
266 const interaction_const_t& ic,
267 gmx::ArrayRef<const gmx::RVec> shiftvec,
268 gmx::ArrayRef<const real> nbfp,
269 gmx::ArrayRef<const real> gmx_unused nbfp_grid,
270 gmx::ArrayRef<const real> chargeA,
271 gmx::ArrayRef<const real> chargeB,
272 gmx::ArrayRef<const int> typeA,
273 gmx::ArrayRef<const int> typeB,
275 gmx::ArrayRef<const real> lambda,
276 t_nrnb* gmx_restrict nrnb,
277 gmx::ArrayRefWithPadding<gmx::RVec> threadForceBuffer,
278 rvec gmx_unused* threadForceShiftBuffer,
279 gmx::ArrayRef<real> threadVc,
280 gmx::ArrayRef<real> threadVv,
281 gmx::ArrayRef<real> threadDvdl)
287 using RealType = typename DataTypes::RealType;
288 using IntType = typename DataTypes::IntType;
289 using BoolType = typename DataTypes::BoolType;
291 constexpr real oneTwelfth = 1.0_real / 12.0_real;
292 constexpr real oneSixth = 1.0_real / 6.0_real;
293 constexpr real zero = 0.0_real;
294 constexpr real half = 0.5_real;
295 constexpr real one = 1.0_real;
296 constexpr real two = 2.0_real;
297 constexpr real six = 6.0_real;
299 // Extract pair list data
300 const int nri = nlist.nri;
301 gmx::ArrayRef<const int> iinr = nlist.iinr;
302 gmx::ArrayRef<const int> jindex = nlist.jindex;
303 gmx::ArrayRef<const int> jjnr = nlist.jjnr;
304 gmx::ArrayRef<const int> shift = nlist.shift;
305 gmx::ArrayRef<const int> gid = nlist.gid;
307 const real lambda_coul = lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)];
308 const real lambda_vdw = lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Vdw)];
310 // Extract softcore parameters
311 const auto& scParams = *ic.softCoreParameters;
312 const real lam_power = scParams.lambdaPower;
313 const real gmx_unused alpha_coul = scParams.alphaCoulomb;
314 const real gmx_unused alpha_vdw = scParams.alphaVdw;
315 const real gmx_unused sigma6_def = scParams.sigma6WithInvalidSigma;
316 const real gmx_unused sigma6_min = scParams.sigma6Minimum;
318 const real gmx_unused scaleLinpointCoulGapsys = scParams.scaleLinpointCoulGapsys;
319 const real gmx_unused scaleLinpointVdWGapsys = scParams.scaleLinpointVdWGapsys;
320 const real gmx_unused sigma6VdWGapsys = scParams.sigma6VdWGapsys;
322 const bool gmx_unused doShiftForces = ((flags & GMX_NONBONDED_DO_SHIFTFORCE) != 0);
323 const bool doPotential = ((flags & GMX_NONBONDED_DO_POTENTIAL) != 0);
325 // Extract data from interaction_const_t
326 const real facel = ic.epsfac;
327 const real rCoulomb = ic.rcoulomb;
328 const real krf = ic.reactionFieldCoefficient;
329 const real crf = ic.reactionFieldShift;
330 const real gmx_unused shLjEwald = ic.sh_lj_ewald;
331 const real rVdw = ic.rvdw;
332 const real dispersionShift = ic.dispersion_shift.cpot;
333 const real repulsionShift = ic.repulsion_shift.cpot;
334 const real ewaldBeta = ic.ewaldcoeff_q;
335 real gmx_unused ewaldLJCoeffSq;
336 real gmx_unused ewaldLJCoeffSixDivSix;
337 if constexpr (vdwInteractionTypeIsEwald)
339 ewaldLJCoeffSq = ic.ewaldcoeff_lj * ic.ewaldcoeff_lj;
340 ewaldLJCoeffSixDivSix = ewaldLJCoeffSq * ewaldLJCoeffSq * ewaldLJCoeffSq / six;
343 // Note that the nbnxm kernels do not support Coulomb potential switching at all
344 GMX_ASSERT(ic.coulomb_modifier != InteractionModifiers::PotSwitch,
345 "Potential switching is not supported for Coulomb with FEP");
347 const real rVdwSwitch = ic.rvdw_switch;
348 real gmx_unused vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4;
349 if constexpr (vdwModifierIsPotSwitch)
351 const real d = rVdw - rVdwSwitch;
352 vdw_swV3 = -10.0_real / (d * d * d);
353 vdw_swV4 = 15.0_real / (d * d * d * d);
354 vdw_swV5 = -6.0_real / (d * d * d * d * d);
355 vdw_swF2 = -30.0_real / (d * d * d);
356 vdw_swF3 = 60.0_real / (d * d * d * d);
357 vdw_swF4 = -30.0_real / (d * d * d * d * d);
361 /* Avoid warnings from stupid compilers (looking at you, Clang!) */
362 vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = zero;
365 NbkernelElecType icoul;
366 if (ic.eeltype == CoulombInteractionType::Cut || EEL_RF(ic.eeltype))
368 icoul = NbkernelElecType::ReactionField;
372 icoul = NbkernelElecType::None;
375 real rcutoff_max2 = std::max(ic.rcoulomb, ic.rvdw);
376 rcutoff_max2 = rcutoff_max2 * rcutoff_max2;
377 const real gmx_unused rCutoffCoul = ic.rcoulomb;
379 real gmx_unused sh_ewald = 0;
380 if constexpr (elecInteractionTypeIsEwald || vdwInteractionTypeIsEwald)
382 sh_ewald = ic.sh_ewald;
385 /* For Ewald/PME interactions we cannot easily apply the soft-core component to
386 * reciprocal space. When we use non-switched Ewald interactions, we
387 * assume the soft-coring does not significantly affect the grid contribution
388 * and apply the soft-core only to the full 1/r (- shift) pair contribution.
390 * However, we cannot use this approach for switch-modified since we would then
391 * effectively end up evaluating a significantly different interaction here compared to the
392 * normal (non-free-energy) kernels, either by applying a cutoff at a different
393 * position than what the user requested, or by switching different
394 * things (1/r rather than short-range Ewald). For these settings, we just
395 * use the traditional short-range Ewald interaction in that case.
397 GMX_RELEASE_ASSERT(!(vdwInteractionTypeIsEwald && vdwModifierIsPotSwitch),
398 "Can not apply soft-core to switched Ewald potentials");
400 const RealType minDistanceSquared(c_minDistanceSquared);
401 const RealType maxRInvSix(c_maxRInvSix);
402 const RealType gmx_unused floatMin(GMX_FLOAT_MIN);
404 RealType dvdlCoul(zero);
405 RealType dvdlVdw(zero);
407 /* Lambda factor for state A, 1-lambda*/
408 real LFC[NSTATES], LFV[NSTATES];
409 LFC[STATE_A] = one - lambda_coul;
410 LFV[STATE_A] = one - lambda_vdw;
412 /* Lambda factor for state B, lambda*/
413 LFC[STATE_B] = lambda_coul;
414 LFV[STATE_B] = lambda_vdw;
416 /*derivative of the lambda factor for state A and B */
421 real gmx_unused lFacCoul[NSTATES], dlFacCoul[NSTATES], lFacVdw[NSTATES], dlFacVdw[NSTATES];
422 constexpr real sc_r_power = six;
423 for (int i = 0; i < NSTATES; i++)
425 lFacCoul[i] = (lam_power == 2 ? (1 - LFC[i]) * (1 - LFC[i]) : (1 - LFC[i]));
426 dlFacCoul[i] = DLF[i] * lam_power / sc_r_power * (lam_power == 2 ? (1 - LFC[i]) : 1);
427 lFacVdw[i] = (lam_power == 2 ? (1 - LFV[i]) * (1 - LFV[i]) : (1 - LFV[i]));
428 dlFacVdw[i] = DLF[i] * lam_power / sc_r_power * (lam_power == 2 ? (1 - LFV[i]) : 1);
431 // We need pointers to real for SIMD access
432 const real* gmx_restrict x = coords.paddedConstArrayRef().data()[0];
433 real* gmx_restrict forceRealPtr;
434 if constexpr (computeForces)
436 GMX_ASSERT(nri == 0 || !threadForceBuffer.empty(), "need a valid threadForceBuffer");
438 forceRealPtr = threadForceBuffer.paddedArrayRef().data()[0];
442 GMX_ASSERT(threadForceShiftBuffer != nullptr, "need a valid threadForceShiftBuffer");
446 const real rlistSquared = gmx::square(rlist);
448 bool haveExcludedPairsBeyondRlist = false;
450 for (int n = 0; n < nri; n++)
452 bool havePairsWithinCutoff = false;
454 const int is = shift[n];
455 const real shX = shiftvec[is][XX];
456 const real shY = shiftvec[is][YY];
457 const real shZ = shiftvec[is][ZZ];
458 const int nj0 = jindex[n];
459 const int nj1 = jindex[n + 1];
460 const int ii = iinr[n];
461 const int ii3 = 3 * ii;
462 const real ix = shX + x[ii3 + 0];
463 const real iy = shY + x[ii3 + 1];
464 const real iz = shZ + x[ii3 + 2];
465 const real iqA = facel * chargeA[ii];
466 const real iqB = facel * chargeB[ii];
467 const int ntiA = ntype * typeA[ii];
468 const int ntiB = ntype * typeB[ii];
475 #if GMX_SIMD_HAVE_REAL
476 alignas(GMX_SIMD_ALIGNMENT) int preloadIi[DataTypes::simdRealWidth];
477 alignas(GMX_SIMD_ALIGNMENT) int gmx_unused preloadIs[DataTypes::simdRealWidth];
479 int preloadIi[DataTypes::simdRealWidth];
480 int gmx_unused preloadIs[DataTypes::simdRealWidth];
482 for (int s = 0; s < DataTypes::simdRealWidth; s++)
485 preloadIs[s] = shift[n];
487 IntType ii_s = gmx::load<IntType>(preloadIi);
489 for (int k = nj0; k < nj1; k += DataTypes::simdRealWidth)
493 #if GMX_SIMD_HAVE_REAL
494 alignas(GMX_SIMD_ALIGNMENT) real preloadPairIsValid[DataTypes::simdRealWidth];
495 alignas(GMX_SIMD_ALIGNMENT) real preloadPairIncluded[DataTypes::simdRealWidth];
496 alignas(GMX_SIMD_ALIGNMENT) int32_t preloadJnr[DataTypes::simdRealWidth];
497 alignas(GMX_SIMD_ALIGNMENT) int32_t typeIndices[NSTATES][DataTypes::simdRealWidth];
498 alignas(GMX_SIMD_ALIGNMENT) real preloadQq[NSTATES][DataTypes::simdRealWidth];
499 alignas(GMX_SIMD_ALIGNMENT) real gmx_unused preloadSigma6[NSTATES][DataTypes::simdRealWidth];
500 alignas(GMX_SIMD_ALIGNMENT) real gmx_unused preloadAlphaVdwEff[DataTypes::simdRealWidth];
501 alignas(GMX_SIMD_ALIGNMENT) real gmx_unused preloadAlphaCoulEff[DataTypes::simdRealWidth];
502 alignas(GMX_SIMD_ALIGNMENT)
503 real gmx_unused preloadScaleLinpointVdWGapsys[DataTypes::simdRealWidth];
504 alignas(GMX_SIMD_ALIGNMENT)
505 real gmx_unused preloadScaleLinpointCoulGapsys[DataTypes::simdRealWidth];
506 alignas(GMX_SIMD_ALIGNMENT)
507 real gmx_unused preloadSigma6VdWGapsys[NSTATES][DataTypes::simdRealWidth];
508 alignas(GMX_SIMD_ALIGNMENT) real preloadLjPmeC6Grid[NSTATES][DataTypes::simdRealWidth];
510 real preloadPairIsValid[DataTypes::simdRealWidth];
511 real preloadPairIncluded[DataTypes::simdRealWidth];
512 int preloadJnr[DataTypes::simdRealWidth];
513 int typeIndices[NSTATES][DataTypes::simdRealWidth];
514 real preloadQq[NSTATES][DataTypes::simdRealWidth];
515 real gmx_unused preloadSigma6[NSTATES][DataTypes::simdRealWidth];
516 real gmx_unused preloadAlphaVdwEff[DataTypes::simdRealWidth];
517 real gmx_unused preloadAlphaCoulEff[DataTypes::simdRealWidth];
518 real gmx_unused preloadScaleLinpointVdWGapsys[DataTypes::simdRealWidth];
519 real gmx_unused preloadScaleLinpointCoulGapsys[DataTypes::simdRealWidth];
520 real gmx_unused preloadSigma6VdWGapsys[NSTATES][DataTypes::simdRealWidth];
521 real preloadLjPmeC6Grid[NSTATES][DataTypes::simdRealWidth];
523 for (int s = 0; s < DataTypes::simdRealWidth; s++)
527 preloadPairIsValid[s] = true;
528 /* Check if this pair on the exclusions list.*/
529 preloadPairIncluded[s] = (nlist.excl_fep.empty() || nlist.excl_fep[k + s]);
530 const int jnr = jjnr[k + s];
532 typeIndices[STATE_A][s] = ntiA + typeA[jnr];
533 typeIndices[STATE_B][s] = ntiB + typeB[jnr];
534 preloadQq[STATE_A][s] = iqA * chargeA[jnr];
535 preloadQq[STATE_B][s] = iqB * chargeB[jnr];
537 for (int i = 0; i < NSTATES; i++)
539 if constexpr (vdwInteractionTypeIsEwald)
541 preloadLjPmeC6Grid[i][s] = nbfp_grid[2 * typeIndices[i][s]];
545 preloadLjPmeC6Grid[i][s] = 0;
547 if constexpr (softcoreType == KernelSoftcoreType::Beutler)
549 const real c6 = nbfp[2 * typeIndices[i][s]];
550 const real c12 = nbfp[2 * typeIndices[i][s] + 1];
551 if (c6 > 0 && c12 > 0)
553 /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
554 preloadSigma6[i][s] = 0.5_real * c12 / c6;
555 if (preloadSigma6[i][s]
556 < sigma6_min) /* for disappearing coul and vdw with soft core at the same time */
558 preloadSigma6[i][s] = sigma6_min;
563 preloadSigma6[i][s] = sigma6_def;
566 if constexpr (softcoreType == KernelSoftcoreType::Gapsys)
568 const real c6 = nbfp[2 * typeIndices[i][s]];
569 const real c12 = nbfp[2 * typeIndices[i][s] + 1];
570 if (c6 > 0 && c12 > 0)
572 /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
573 preloadSigma6VdWGapsys[i][s] = 0.5_real * c12 / c6;
574 if (preloadSigma6VdWGapsys[i][s] < sigma6VdWGapsys)
576 preloadSigma6VdWGapsys[i][s] = sigma6VdWGapsys;
581 preloadSigma6VdWGapsys[i][s] = sigma6VdWGapsys;
585 if constexpr (softcoreType == KernelSoftcoreType::Beutler)
587 /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
588 const real c12A = nbfp[2 * typeIndices[STATE_A][s] + 1];
589 const real c12B = nbfp[2 * typeIndices[STATE_B][s] + 1];
590 if (c12A > 0 && c12B > 0)
592 preloadAlphaVdwEff[s] = 0;
593 preloadAlphaCoulEff[s] = 0;
597 preloadAlphaVdwEff[s] = alpha_vdw;
598 preloadAlphaCoulEff[s] = alpha_coul;
601 if constexpr (softcoreType == KernelSoftcoreType::Gapsys)
603 /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
604 const real c12A = nbfp[2 * typeIndices[STATE_A][s] + 1];
605 const real c12B = nbfp[2 * typeIndices[STATE_B][s] + 1];
606 if (c12A > 0 && c12B > 0)
608 preloadScaleLinpointVdWGapsys[s] = 0;
609 preloadScaleLinpointCoulGapsys[s] = 0;
613 preloadScaleLinpointVdWGapsys[s] = scaleLinpointVdWGapsys;
614 preloadScaleLinpointCoulGapsys[s] = scaleLinpointCoulGapsys;
620 preloadJnr[s] = jjnr[k];
621 preloadPairIsValid[s] = false;
622 preloadPairIncluded[s] = false;
623 preloadAlphaVdwEff[s] = 0;
624 preloadAlphaCoulEff[s] = 0;
625 preloadScaleLinpointVdWGapsys[s] = 0;
626 preloadScaleLinpointCoulGapsys[s] = 0;
628 for (int i = 0; i < NSTATES; i++)
630 typeIndices[STATE_A][s] = ntiA + typeA[jjnr[k]];
631 typeIndices[STATE_B][s] = ntiB + typeB[jjnr[k]];
632 preloadLjPmeC6Grid[i][s] = 0;
634 preloadSigma6[i][s] = 0;
635 preloadSigma6VdWGapsys[i][s] = 0;
641 gmx::gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), preloadJnr, &jx, &jy, &jz);
643 const RealType pairIsValid = gmx::load<RealType>(preloadPairIsValid);
644 const RealType pairIncluded = gmx::load<RealType>(preloadPairIncluded);
645 const BoolType bPairIncluded = (pairIncluded != zero);
646 const BoolType bPairExcluded = (pairIncluded == zero && pairIsValid != zero);
648 const RealType dX = ix - jx;
649 const RealType dY = iy - jy;
650 const RealType dZ = iz - jz;
651 RealType rSq = dX * dX + dY * dY + dZ * dZ;
653 BoolType withinCutoffMask = (rSq < rcutoff_max2);
655 if (!gmx::anyTrue(withinCutoffMask || bPairExcluded))
657 /* We save significant time by skipping all code below.
658 * Note that with soft-core interactions, the actual cut-off
659 * check might be different. But since the soft-core distance
660 * is always larger than r, checking on r here is safe.
661 * Exclusions outside the cutoff can not be skipped as
662 * when using Ewald: the reciprocal-space
663 * Ewald component still needs to be subtracted.
669 havePairsWithinCutoff = true;
672 if (gmx::anyTrue(rlistSquared < rSq && bPairExcluded))
674 haveExcludedPairsBeyondRlist = true;
677 const IntType jnr_s = gmx::load<IntType>(preloadJnr);
678 const BoolType bIiEqJnr = gmx::cvtIB2B(ii_s == jnr_s);
680 RealType c6[NSTATES];
681 RealType c12[NSTATES];
682 RealType gmx_unused sigma6[NSTATES];
683 RealType qq[NSTATES];
684 RealType gmx_unused ljPmeC6Grid[NSTATES];
685 RealType gmx_unused alphaVdwEff;
686 RealType gmx_unused alphaCoulEff;
687 RealType gmx_unused scaleLinpointVdWGapsysEff;
688 RealType gmx_unused scaleLinpointCoulGapsysEff;
689 RealType gmx_unused sigmaVdWGapsysEff[NSTATES];
690 for (int i = 0; i < NSTATES; i++)
692 gmx::gatherLoadTranspose<2>(nbfp.data(), typeIndices[i], &c6[i], &c12[i]);
693 qq[i] = gmx::load<RealType>(preloadQq[i]);
694 ljPmeC6Grid[i] = gmx::load<RealType>(preloadLjPmeC6Grid[i]);
695 if constexpr (softcoreType == KernelSoftcoreType::Beutler)
697 sigma6[i] = gmx::load<RealType>(preloadSigma6[i]);
699 if constexpr (softcoreType == KernelSoftcoreType::Gapsys)
701 sigmaVdWGapsysEff[i] = gmx::load<RealType>(preloadSigma6VdWGapsys[i]);
704 if constexpr (softcoreType == KernelSoftcoreType::Beutler)
706 alphaVdwEff = gmx::load<RealType>(preloadAlphaVdwEff);
707 alphaCoulEff = gmx::load<RealType>(preloadAlphaCoulEff);
709 if constexpr (softcoreType == KernelSoftcoreType::Gapsys)
711 scaleLinpointVdWGapsysEff = gmx::load<RealType>(preloadScaleLinpointVdWGapsys);
712 scaleLinpointCoulGapsysEff = gmx::load<RealType>(preloadScaleLinpointCoulGapsys);
715 // Avoid overflow of r^-12 at distances near zero
716 rSq = gmx::max(rSq, minDistanceSquared);
717 rInv = gmx::invsqrt(rSq);
720 RealType gmx_unused rp, rpm2;
721 if constexpr (softcoreType == KernelSoftcoreType::Beutler)
723 rpm2 = rSq * rSq; /* r4 */
724 rp = rpm2 * rSq; /* r6 */
728 /* The soft-core power p will not affect the results
729 * with not using soft-core, so we use power of 0 which gives
730 * the simplest math and cheapest code.
738 /* The following block is masked to only calculate values having bPairIncluded. If
739 * bPairIncluded is true then withinCutoffMask must also be true. */
740 if (gmx::anyTrue(withinCutoffMask && bPairIncluded))
742 RealType gmx_unused fScalC[NSTATES], fScalV[NSTATES];
743 RealType vCoul[NSTATES], vVdw[NSTATES];
744 for (int i = 0; i < NSTATES; i++)
751 RealType gmx_unused rInvC, rInvV, rC, rV, rPInvC, rPInvV;
753 /* The following block is masked to require (qq[i] != 0 || c6[i] != 0 || c12[i]
754 * != 0) in addition to bPairIncluded, which in turn requires withinCutoffMask. */
755 BoolType nonZeroState = ((qq[i] != zero || c6[i] != zero || c12[i] != zero)
756 && bPairIncluded && withinCutoffMask);
757 if (gmx::anyTrue(nonZeroState))
759 if constexpr (softcoreType == KernelSoftcoreType::Beutler)
761 RealType divisor = (alphaCoulEff * lFacCoul[i] * sigma6[i] + rp);
762 rPInvC = gmx::inv(divisor);
763 sixthRoot(rPInvC, &rInvC, &rC);
765 if constexpr (scLambdasOrAlphasDiffer)
767 RealType divisor = (alphaVdwEff * lFacVdw[i] * sigma6[i] + rp);
768 rPInvV = gmx::inv(divisor);
769 sixthRoot(rPInvV, &rInvV, &rV);
773 /* We can avoid one expensive pow and one / operation */
790 /* Only process the coulomb interactions if we either
791 * include all entries in the list (no cutoff
792 * used in the kernel), or if we are within the cutoff.
794 BoolType computeElecInteraction;
795 if constexpr (elecInteractionTypeIsEwald)
797 computeElecInteraction = (r < rCoulomb && qq[i] != zero && bPairIncluded);
801 computeElecInteraction = (rC < rCoulomb && qq[i] != zero && bPairIncluded);
803 if (gmx::anyTrue(computeElecInteraction))
805 if constexpr (elecInteractionTypeIsEwald)
807 vCoul[i] = ewaldPotential(qq[i], rInvC, sh_ewald);
808 if constexpr (computeForces)
810 fScalC[i] = ewaldScalarForce(qq[i], rInvC);
813 if constexpr (softcoreType == KernelSoftcoreType::Gapsys)
815 ewaldQuadraticPotential(qq[i],
821 scaleLinpointCoulGapsysEff,
826 computeElecInteraction);
831 vCoul[i] = reactionFieldPotential(qq[i], rInvC, rC, krf, crf);
832 if constexpr (computeForces)
834 fScalC[i] = reactionFieldScalarForce(qq[i], rInvC, rC, krf, two);
837 if constexpr (softcoreType == KernelSoftcoreType::Gapsys)
839 reactionFieldQuadraticPotential(qq[i],
845 scaleLinpointCoulGapsysEff,
851 computeElecInteraction);
855 vCoul[i] = gmx::selectByMask(vCoul[i], computeElecInteraction);
856 if constexpr (computeForces)
858 fScalC[i] = gmx::selectByMask(fScalC[i], computeElecInteraction);
862 /* Only process the VDW interactions if we either
863 * include all entries in the list (no cutoff used
864 * in the kernel), or if we are within the cutoff.
866 BoolType computeVdwInteraction;
867 if constexpr (vdwInteractionTypeIsEwald)
869 computeVdwInteraction =
870 (r < rVdw && (c6[i] != 0 || c12[i] != 0) && bPairIncluded);
874 computeVdwInteraction =
875 (rV < rVdw && (c6[i] != 0 || c12[i] != 0) && bPairIncluded);
877 if (gmx::anyTrue(computeVdwInteraction))
880 if constexpr (softcoreType == KernelSoftcoreType::Beutler)
886 rInv6 = calculateRinv6(rInvV);
888 // Avoid overflow at short distance for masked exclusions and
889 // for foreign energy calculations at a hard core end state.
890 // Note that we should limit r^-6, and thus also r^-12, and
891 // not only r^-12, as that could lead to erroneously low instead
892 // of very high foreign energies.
893 rInv6 = gmx::min(rInv6, maxRInvSix);
894 RealType vVdw6 = calculateVdw6(c6[i], rInv6);
895 RealType vVdw12 = calculateVdw12(c12[i], rInv6);
897 vVdw[i] = lennardJonesPotential(
898 vVdw6, vVdw12, c6[i], c12[i], repulsionShift, dispersionShift, oneSixth, oneTwelfth);
899 if constexpr (computeForces)
901 fScalV[i] = lennardJonesScalarForce(vVdw6, vVdw12);
904 if constexpr (softcoreType == KernelSoftcoreType::Gapsys)
906 lennardJonesQuadraticPotential(c6[i],
912 sigmaVdWGapsysEff[i],
913 scaleLinpointVdWGapsysEff,
919 computeVdwInteraction);
922 if constexpr (vdwInteractionTypeIsEwald)
924 /* Subtract the grid potential at the cut-off */
926 + gmx::selectByMask(ewaldLennardJonesGridSubtract(
927 ljPmeC6Grid[i], shLjEwald, oneSixth),
928 computeVdwInteraction);
931 if constexpr (vdwModifierIsPotSwitch)
933 RealType d = rV - rVdwSwitch;
934 BoolType zeroMask = zero < d;
935 BoolType potSwitchMask = rV < rVdw;
936 d = gmx::selectByMask(d, zeroMask);
937 const RealType d2 = d * d;
939 one + d2 * d * (vdw_swV3 + d * (vdw_swV4 + d * vdw_swV5));
941 if constexpr (computeForces)
943 const RealType dsw = d2 * (vdw_swF2 + d * (vdw_swF3 + d * vdw_swF4));
944 fScalV[i] = potSwitchScalarForceMod(
945 fScalV[i], vVdw[i], sw, rV, dsw, potSwitchMask);
947 vVdw[i] = potSwitchPotentialMod(vVdw[i], sw, potSwitchMask);
950 vVdw[i] = gmx::selectByMask(vVdw[i], computeVdwInteraction);
951 if constexpr (computeForces)
953 fScalV[i] = gmx::selectByMask(fScalV[i], computeVdwInteraction);
957 if constexpr (computeForces)
959 /* fScalC (and fScalV) now contain: dV/drC * rC
960 * Now we multiply by rC^-6, so it will be: dV/drC * rC^-5
961 * Further down we first multiply by r^4 and then by
962 * the vector r, which in total gives: dV/drC * (r/rC)^-5
964 fScalC[i] = fScalC[i] * rPInvC;
965 fScalV[i] = fScalV[i] * rPInvV;
967 } // end of block requiring nonZeroState
968 } // end for (int i = 0; i < NSTATES; i++)
970 /* Assemble A and B states. */
971 BoolType assembleStates = (bPairIncluded && withinCutoffMask);
972 if (gmx::anyTrue(assembleStates))
974 for (int i = 0; i < NSTATES; i++)
976 vCTot = vCTot + LFC[i] * vCoul[i];
977 vVTot = vVTot + LFV[i] * vVdw[i];
979 if constexpr (computeForces)
981 fScal = fScal + LFC[i] * fScalC[i] * rpm2;
982 fScal = fScal + LFV[i] * fScalV[i] * rpm2;
985 if constexpr (softcoreType == KernelSoftcoreType::Beutler)
987 dvdlCoul = dvdlCoul + vCoul[i] * DLF[i]
988 + LFC[i] * alphaCoulEff * dlFacCoul[i] * fScalC[i] * sigma6[i];
989 dvdlVdw = dvdlVdw + vVdw[i] * DLF[i]
990 + LFV[i] * alphaVdwEff * dlFacVdw[i] * fScalV[i] * sigma6[i];
994 dvdlCoul = dvdlCoul + vCoul[i] * DLF[i];
995 dvdlVdw = dvdlVdw + vVdw[i] * DLF[i];
999 } // end of block requiring bPairIncluded && withinCutoffMask
1000 /* In the following block bPairIncluded should be false in the masks. */
1001 if (icoul == NbkernelElecType::ReactionField)
1003 const BoolType computeReactionField = bPairExcluded;
1005 if (gmx::anyTrue(computeReactionField))
1007 /* For excluded pairs we don't use soft-core.
1008 * As there is no singularity, there is no need for soft-core.
1010 const RealType FF = -two * krf;
1011 RealType VV = krf * rSq - crf;
1013 /* If ii == jnr the i particle (ii) has itself (jnr)
1014 * in its neighborlist. This corresponds to a self-interaction
1015 * that will occur twice. Scale it down by 50% to only include
1018 VV = VV * gmx::blend(one, half, bIiEqJnr);
1020 for (int i = 0; i < NSTATES; i++)
1022 vCTot = vCTot + gmx::selectByMask(LFC[i] * qq[i] * VV, computeReactionField);
1023 fScal = fScal + gmx::selectByMask(LFC[i] * qq[i] * FF, computeReactionField);
1024 dvdlCoul = dvdlCoul + gmx::selectByMask(DLF[i] * qq[i] * VV, computeReactionField);
1029 const BoolType computeElecEwaldInteraction = (bPairExcluded || r < rCoulomb);
1030 if (elecInteractionTypeIsEwald && gmx::anyTrue(computeElecEwaldInteraction))
1032 /* See comment in the preamble. When using Ewald interactions
1033 * (unless we use a switch modifier) we subtract the reciprocal-space
1034 * Ewald component here which made it possible to apply the free
1035 * energy interaction to 1/r (vanilla coulomb short-range part)
1036 * above. This gets us closer to the ideal case of applying
1037 * the softcore to the entire electrostatic interaction,
1038 * including the reciprocal-space component.
1040 RealType v_lr, f_lr;
1042 pmeCoulombCorrectionVF<computeForces>(rSq, ewaldBeta, &v_lr, &f_lr);
1043 if constexpr (computeForces)
1045 f_lr = f_lr * rInv * rInv;
1048 /* Note that any possible Ewald shift has already been applied in
1049 * the normal interaction part above.
1052 /* If ii == jnr the i particle (ii) has itself (jnr)
1053 * in its neighborlist. This corresponds to a self-interaction
1054 * that will occur twice. Scale it down by 50% to only include
1057 v_lr = v_lr * gmx::blend(one, half, bIiEqJnr);
1059 for (int i = 0; i < NSTATES; i++)
1061 vCTot = vCTot - gmx::selectByMask(LFC[i] * qq[i] * v_lr, computeElecEwaldInteraction);
1062 if constexpr (computeForces)
1064 fScal = fScal - gmx::selectByMask(LFC[i] * qq[i] * f_lr, computeElecEwaldInteraction);
1067 - gmx::selectByMask(DLF[i] * qq[i] * v_lr, computeElecEwaldInteraction);
1071 const BoolType computeVdwEwaldInteraction = (bPairExcluded || r < rVdw);
1072 if (vdwInteractionTypeIsEwald && gmx::anyTrue(computeVdwEwaldInteraction))
1074 /* See comment in the preamble. When using LJ-Ewald interactions
1075 * (unless we use a switch modifier) we subtract the reciprocal-space
1076 * Ewald component here which made it possible to apply the free
1077 * energy interaction to r^-6 (vanilla LJ6 short-range part)
1078 * above. This gets us closer to the ideal case of applying
1079 * the softcore to the entire VdW interaction,
1080 * including the reciprocal-space component.
1083 RealType v_lr, f_lr;
1084 pmeLJCorrectionVF<computeForces>(
1085 rInv, rSq, ewaldLJCoeffSq, ewaldLJCoeffSixDivSix, &v_lr, &f_lr, computeVdwEwaldInteraction, bIiEqJnr);
1086 v_lr = v_lr * oneSixth;
1088 for (int i = 0; i < NSTATES; i++)
1090 vVTot = vVTot + gmx::selectByMask(LFV[i] * ljPmeC6Grid[i] * v_lr, computeVdwEwaldInteraction);
1091 if constexpr (computeForces)
1093 fScal = fScal + gmx::selectByMask(LFV[i] * ljPmeC6Grid[i] * f_lr, computeVdwEwaldInteraction);
1095 dvdlVdw = dvdlVdw + gmx::selectByMask(DLF[i] * ljPmeC6Grid[i] * v_lr, computeVdwEwaldInteraction);
1099 if (computeForces && gmx::anyTrue(fScal != zero))
1101 const RealType tX = fScal * dX;
1102 const RealType tY = fScal * dY;
1103 const RealType tZ = fScal * dZ;
1108 gmx::transposeScatterDecrU<3>(forceRealPtr, preloadJnr, tX, tY, tZ);
1110 } // end for (int k = nj0; k < nj1; k += DataTypes::simdRealWidth)
1112 if (havePairsWithinCutoff)
1114 if constexpr (computeForces)
1116 gmx::transposeScatterIncrU<3>(forceRealPtr, preloadIi, fIX, fIY, fIZ);
1120 gmx::transposeScatterIncrU<3>(
1121 reinterpret_cast<real*>(threadForceShiftBuffer), preloadIs, fIX, fIY, fIZ);
1127 threadVc[ggid] += gmx::reduce(vCTot);
1128 threadVv[ggid] += gmx::reduce(vVTot);
1131 } // end for (int n = 0; n < nri; n++)
1133 if (gmx::anyTrue(dvdlCoul != zero))
1135 threadDvdl[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)] += gmx::reduce(dvdlCoul);
1137 if (gmx::anyTrue(dvdlVdw != zero))
1139 threadDvdl[static_cast<int>(FreeEnergyPerturbationCouplingType::Vdw)] += gmx::reduce(dvdlVdw);
1142 /* Estimate flops, average for free energy stuff:
1143 * 12 flops per outer iteration
1144 * 150 flops per inner iteration
1145 * TODO: Update the number of flops and/or use different counts for different code paths.
1147 atomicNrnbIncrement(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist.nri * 12 + nlist.jindex[nri] * 150);
1149 if (haveExcludedPairsBeyondRlist > 0)
1152 "There are perturbed non-bonded pair interactions beyond the pair-list cutoff "
1153 "of %g nm, which is not supported. This can happen because the system is "
1154 "unstable or because intra-molecular interactions at long distances are "
1156 "latter is the case, you can try to increase nstlist or rlist to avoid this."
1157 "The error is likely triggered by the use of couple-intramol=no "
1158 "and the maximal distance in the decoupled molecule exceeding rlist.",
1163 typedef void (*KernelFunction)(const t_nblist& nlist,
1164 const gmx::ArrayRefWithPadding<const gmx::RVec>& coords,
1167 const interaction_const_t& ic,
1168 gmx::ArrayRef<const gmx::RVec> shiftvec,
1169 gmx::ArrayRef<const real> nbfp,
1170 gmx::ArrayRef<const real> nbfp_grid,
1171 gmx::ArrayRef<const real> chargeA,
1172 gmx::ArrayRef<const real> chargeB,
1173 gmx::ArrayRef<const int> typeA,
1174 gmx::ArrayRef<const int> typeB,
1176 gmx::ArrayRef<const real> lambda,
1177 t_nrnb* gmx_restrict nrnb,
1178 gmx::ArrayRefWithPadding<gmx::RVec> threadForceBuffer,
1179 rvec* threadForceShiftBuffer,
1180 gmx::ArrayRef<real> threadVc,
1181 gmx::ArrayRef<real> threadVv,
1182 gmx::ArrayRef<real> threadDvdl);
1184 template<KernelSoftcoreType softcoreType, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald, bool vdwModifierIsPotSwitch, bool computeForces>
1185 static KernelFunction dispatchKernelOnUseSimd(const bool useSimd)
1189 #if GMX_SIMD_HAVE_REAL && GMX_SIMD_HAVE_INT32_ARITHMETICS && GMX_USE_SIMD_KERNELS
1190 return (nb_free_energy_kernel<SimdDataTypes, softcoreType, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, computeForces>);
1192 return (nb_free_energy_kernel<ScalarDataTypes, softcoreType, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch computeForces>);
1197 return (nb_free_energy_kernel<ScalarDataTypes, softcoreType, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, computeForces>);
1201 template<KernelSoftcoreType softcoreType, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald, bool vdwModifierIsPotSwitch>
1202 static KernelFunction dispatchKernelOnComputeForces(const bool computeForces, const bool useSimd)
1206 return (dispatchKernelOnUseSimd<softcoreType, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, true>(
1211 return (dispatchKernelOnUseSimd<softcoreType, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, false>(
1216 template<KernelSoftcoreType softcoreType, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald>
1217 static KernelFunction dispatchKernelOnVdwModifier(const bool vdwModifierIsPotSwitch,
1218 const bool computeForces,
1221 if (vdwModifierIsPotSwitch)
1223 return (dispatchKernelOnComputeForces<softcoreType, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, true>(
1224 computeForces, useSimd));
1228 return (dispatchKernelOnComputeForces<softcoreType, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, false>(
1229 computeForces, useSimd));
1233 template<KernelSoftcoreType softcoreType, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald>
1234 static KernelFunction dispatchKernelOnElecInteractionType(const bool elecInteractionTypeIsEwald,
1235 const bool vdwModifierIsPotSwitch,
1236 const bool computeForces,
1239 if (elecInteractionTypeIsEwald)
1241 return (dispatchKernelOnVdwModifier<softcoreType, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, true>(
1242 vdwModifierIsPotSwitch, computeForces, useSimd));
1246 return (dispatchKernelOnVdwModifier<softcoreType, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, false>(
1247 vdwModifierIsPotSwitch, computeForces, useSimd));
1251 template<KernelSoftcoreType softcoreType, bool scLambdasOrAlphasDiffer>
1252 static KernelFunction dispatchKernelOnVdwInteractionType(const bool vdwInteractionTypeIsEwald,
1253 const bool elecInteractionTypeIsEwald,
1254 const bool vdwModifierIsPotSwitch,
1255 const bool computeForces,
1258 if (vdwInteractionTypeIsEwald)
1260 return (dispatchKernelOnElecInteractionType<softcoreType, scLambdasOrAlphasDiffer, true>(
1261 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, computeForces, useSimd));
1265 return (dispatchKernelOnElecInteractionType<softcoreType, scLambdasOrAlphasDiffer, false>(
1266 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, computeForces, useSimd));
1270 template<KernelSoftcoreType softcoreType>
1271 static KernelFunction dispatchKernelOnScLambdasOrAlphasDifference(const bool scLambdasOrAlphasDiffer,
1272 const bool vdwInteractionTypeIsEwald,
1273 const bool elecInteractionTypeIsEwald,
1274 const bool vdwModifierIsPotSwitch,
1275 const bool computeForces,
1278 if (scLambdasOrAlphasDiffer)
1280 return (dispatchKernelOnVdwInteractionType<softcoreType, true>(
1281 vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, computeForces, useSimd));
1285 return (dispatchKernelOnVdwInteractionType<softcoreType, false>(
1286 vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, computeForces, useSimd));
1290 static KernelFunction dispatchKernel(const bool scLambdasOrAlphasDiffer,
1291 const bool vdwInteractionTypeIsEwald,
1292 const bool elecInteractionTypeIsEwald,
1293 const bool vdwModifierIsPotSwitch,
1294 const bool computeForces,
1296 const interaction_const_t& ic)
1298 const auto& scParams = *ic.softCoreParameters;
1299 if (scParams.softcoreType == SoftcoreType::Beutler)
1301 if (scParams.alphaCoulomb == 0 && scParams.alphaVdw == 0)
1303 return (dispatchKernelOnScLambdasOrAlphasDifference<KernelSoftcoreType::None>(
1304 scLambdasOrAlphasDiffer,
1305 vdwInteractionTypeIsEwald,
1306 elecInteractionTypeIsEwald,
1307 vdwModifierIsPotSwitch,
1311 return (dispatchKernelOnScLambdasOrAlphasDifference<KernelSoftcoreType::Beutler>(
1312 scLambdasOrAlphasDiffer,
1313 vdwInteractionTypeIsEwald,
1314 elecInteractionTypeIsEwald,
1315 vdwModifierIsPotSwitch,
1321 if (scParams.scaleLinpointCoulGapsys == 0 && scParams.scaleLinpointVdWGapsys == 0)
1323 return (dispatchKernelOnScLambdasOrAlphasDifference<KernelSoftcoreType::None>(
1324 scLambdasOrAlphasDiffer,
1325 vdwInteractionTypeIsEwald,
1326 elecInteractionTypeIsEwald,
1327 vdwModifierIsPotSwitch,
1331 return (dispatchKernelOnScLambdasOrAlphasDifference<KernelSoftcoreType::Gapsys>(
1332 scLambdasOrAlphasDiffer,
1333 vdwInteractionTypeIsEwald,
1334 elecInteractionTypeIsEwald,
1335 vdwModifierIsPotSwitch,
1342 void gmx_nb_free_energy_kernel(const t_nblist& nlist,
1343 const gmx::ArrayRefWithPadding<const gmx::RVec>& coords,
1347 const interaction_const_t& ic,
1348 gmx::ArrayRef<const gmx::RVec> shiftvec,
1349 gmx::ArrayRef<const real> nbfp,
1350 gmx::ArrayRef<const real> nbfp_grid,
1351 gmx::ArrayRef<const real> chargeA,
1352 gmx::ArrayRef<const real> chargeB,
1353 gmx::ArrayRef<const int> typeA,
1354 gmx::ArrayRef<const int> typeB,
1356 gmx::ArrayRef<const real> lambda,
1358 gmx::ArrayRefWithPadding<gmx::RVec> threadForceBuffer,
1359 rvec* threadForceShiftBuffer,
1360 gmx::ArrayRef<real> threadVc,
1361 gmx::ArrayRef<real> threadVv,
1362 gmx::ArrayRef<real> threadDvdl)
1364 GMX_ASSERT(EEL_PME_EWALD(ic.eeltype) || ic.eeltype == CoulombInteractionType::Cut || EEL_RF(ic.eeltype),
1365 "Unsupported eeltype with free energy");
1366 GMX_ASSERT(ic.softCoreParameters, "We need soft-core parameters");
1368 // Not all SIMD implementations need padding, but we provide padding anyhow so we can assert
1369 GMX_ASSERT(!GMX_SIMD_HAVE_REAL || threadForceBuffer.empty()
1370 || threadForceBuffer.size() > threadForceBuffer.unpaddedArrayRef().ssize(),
1371 "We need actual padding with at least one element for SIMD scatter operations");
1373 const auto& scParams = *ic.softCoreParameters;
1374 const bool vdwInteractionTypeIsEwald = (EVDW_PME(ic.vdwtype));
1375 const bool elecInteractionTypeIsEwald = (EEL_PME_EWALD(ic.eeltype));
1376 const bool vdwModifierIsPotSwitch = (ic.vdw_modifier == InteractionModifiers::PotSwitch);
1377 const bool computeForces = ((flags & GMX_NONBONDED_DO_FORCE) != 0);
1378 bool scLambdasOrAlphasDiffer = true;
1380 if (scParams.alphaCoulomb == 0 && scParams.alphaVdw == 0)
1382 scLambdasOrAlphasDiffer = false;
1386 if (lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)]
1387 == lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Vdw)]
1388 && scParams.alphaCoulomb == scParams.alphaVdw)
1390 scLambdasOrAlphasDiffer = false;
1394 KernelFunction kernelFunc;
1395 kernelFunc = dispatchKernel(scLambdasOrAlphasDiffer,
1396 vdwInteractionTypeIsEwald,
1397 elecInteractionTypeIsEwald,
1398 vdwModifierIsPotSwitch,
1418 threadForceShiftBuffer,