*
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
* Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
+ * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include "nb_free_energy.h"
+#include "config.h"
+
#include <cmath>
+#include <set>
#include <algorithm>
#include "gromacs/gmxlib/nrnb.h"
-#include "gromacs/gmxlib/nonbonded/nb_kernel.h"
#include "gromacs/gmxlib/nonbonded/nonbonded.h"
+#include "gromacs/math/arrayrefwithpadding.h"
#include "gromacs/math/functions.h"
#include "gromacs/math/vec.h"
+#include "gromacs/mdtypes/forceoutput.h"
#include "gromacs/mdtypes/forcerec.h"
+#include "gromacs/mdtypes/interaction_const.h"
#include "gromacs/mdtypes/md_enums.h"
+#include "gromacs/mdtypes/mdatom.h"
+#include "gromacs/mdtypes/nblist.h"
+#include "gromacs/pbcutil/ishift.h"
+#include "gromacs/simd/simd.h"
+#include "gromacs/simd/simd_math.h"
#include "gromacs/utility/fatalerror.h"
+#include "gromacs/utility/arrayref.h"
+
+#include "nb_softcore.h"
-void
-gmx_nb_free_energy_kernel(const t_nblist * gmx_restrict nlist,
- rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- t_forcerec * gmx_restrict fr,
- const t_mdatoms * gmx_restrict mdatoms,
- nb_kernel_data_t * gmx_restrict kernel_data,
- t_nrnb * gmx_restrict nrnb)
+//! Scalar (non-SIMD) data types.
+struct ScalarDataTypes
+{
+ using RealType = real; //!< The data type to use as real.
+ using IntType = int; //!< The data type to use as int.
+ using BoolType = bool; //!< The data type to use as bool for real value comparison.
+ static constexpr int simdRealWidth = 1; //!< The width of the RealType.
+ static constexpr int simdIntWidth = 1; //!< The width of the IntType.
+};
+
+#if GMX_SIMD_HAVE_REAL && GMX_SIMD_HAVE_INT32_ARITHMETICS
+//! SIMD data types.
+struct SimdDataTypes
{
+ using RealType = gmx::SimdReal; //!< The data type to use as real.
+ using IntType = gmx::SimdInt32; //!< The data type to use as int.
+ using BoolType = gmx::SimdBool; //!< The data type to use as bool for real value comparison.
+ static constexpr int simdRealWidth = GMX_SIMD_REAL_WIDTH; //!< The width of the RealType.
+# if GMX_SIMD_HAVE_DOUBLE && GMX_DOUBLE
+ static constexpr int simdIntWidth = GMX_SIMD_DINT32_WIDTH; //!< The width of the IntType.
+# else
+ static constexpr int simdIntWidth = GMX_SIMD_FINT32_WIDTH; //!< The width of the IntType.
+# endif
+};
+#endif
+
+/*! \brief Lower limit for square interaction distances in nonbonded kernels.
+ *
+ * This is a mimimum on r^2 to avoid overflows when computing r^6.
+ * This will only affect results for soft-cored interaction at distances smaller
+ * than 1e-6 and will limit extremely high foreign energies for overlapping atoms.
+ * Note that we could use a somewhat smaller minimum in double precision.
+ * But because invsqrt in double precision can use single precision, this number
+ * can not be much smaller, we use the same number for simplicity.
+ */
+constexpr real c_minDistanceSquared = 1.0e-12_real;
-#define STATE_A 0
-#define STATE_B 1
-#define NSTATES 2
- int i, n, ii, is3, ii3, k, nj0, nj1, jnr, j3, ggid;
- real shX, shY, shZ;
- real tx, ty, tz, Fscal;
- double FscalC[NSTATES], FscalV[NSTATES]; /* Needs double for sc_power==48 */
- double Vcoul[NSTATES], Vvdw[NSTATES]; /* Needs double for sc_power==48 */
- real rinv6, r, rtC, rtV;
- real iqA, iqB;
- real qq[NSTATES], vctot;
- int ntiA, ntiB, tj[NSTATES];
- real Vvdw6, Vvdw12, vvtot;
- real ix, iy, iz, fix, fiy, fiz;
- real dx, dy, dz, rsq, rinv;
- real c6[NSTATES], c12[NSTATES], c6grid;
- real LFC[NSTATES], LFV[NSTATES], DLF[NSTATES];
- double dvdl_coul, dvdl_vdw;
- real lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
- real sigma6[NSTATES], alpha_vdw_eff, alpha_coul_eff, sigma2_def, sigma2_min;
- double rp, rpm2, rC, rV, rinvC, rpinvC, rinvV, rpinvV; /* Needs double for sc_power==48 */
- real sigma2[NSTATES], sigma_pow[NSTATES];
- int do_tab, tab_elemsize = 0;
- int n0, n1C, n1V, nnn;
- real Y, F, Fp, Geps, Heps2, epsC, eps2C, epsV, eps2V, VV, FF;
- int icoul, ivdw;
- int nri;
- const int * iinr;
- const int * jindex;
- const int * jjnr;
- const int * shift;
- const int * gid;
- const int * typeA;
- const int * typeB;
- int ntype;
- const real * shiftvec;
- real * fshift;
- real tabscale = 0;
- const real * VFtab = nullptr;
- const real * x;
- real * f;
- real facel, krf, crf;
- const real * chargeA;
- const real * chargeB;
- real sigma6_min, sigma6_def, lam_power, sc_r_power;
- real alpha_coul, alpha_vdw, lambda_coul, lambda_vdw;
- real ewcljrsq, ewclj, ewclj2, exponent, poly, vvdw_disp, vvdw_rep, sh_lj_ewald;
- real ewclj6;
- const real * nbfp, *nbfp_grid;
- real * dvdl;
- real * Vv;
- real * Vc;
- gmx_bool bDoForces, bDoShiftForces, bDoPotential;
- real rcoulomb, rvdw, sh_invrc6;
- gmx_bool bExactElecCutoff, bExactVdwCutoff, bExactCutoffAll;
- gmx_bool bEwald, bEwaldLJ;
- real rcutoff_max2;
- const real * tab_ewald_F_lj = nullptr;
- const real * tab_ewald_V_lj = nullptr;
- real d, d2, sw, dsw, rinvcorr;
- real elec_swV3, elec_swV4, elec_swV5, elec_swF2, elec_swF3, elec_swF4;
- real vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4;
- gmx_bool bConvertEwaldToCoulomb, bConvertLJEwaldToLJ6;
- gmx_bool bComputeVdwInteraction, bComputeElecInteraction;
- const real * ewtab = nullptr;
- int ewitab;
- real ewrt, eweps, ewtabscale = 0, ewtabhalfspace = 0, sh_ewald = 0;
-
- const real onetwelfth = 1.0/12.0;
- const real onesixth = 1.0/6.0;
- const real zero = 0.0;
- const real half = 0.5;
- const real one = 1.0;
- const real two = 2.0;
- const real six = 6.0;
- const real fourtyeight = 48.0;
-
- /* Extract pointer to non-bonded interaction constants */
- const interaction_const_t *ic = fr->ic;
-
- x = xx[0];
- f = ff[0];
-
- fshift = fr->fshift[0];
-
- nri = nlist->nri;
- iinr = nlist->iinr;
- jindex = nlist->jindex;
- jjnr = nlist->jjnr;
- icoul = nlist->ielec;
- ivdw = nlist->ivdw;
- shift = nlist->shift;
- gid = nlist->gid;
-
- shiftvec = fr->shift_vec[0];
- chargeA = mdatoms->chargeA;
- chargeB = mdatoms->chargeB;
- facel = fr->ic->epsfac;
- krf = ic->k_rf;
- crf = ic->c_rf;
- Vc = kernel_data->energygrp_elec;
- typeA = mdatoms->typeA;
- typeB = mdatoms->typeB;
- ntype = fr->ntype;
- nbfp = fr->nbfp;
- nbfp_grid = fr->ljpme_c6grid;
- Vv = kernel_data->energygrp_vdw;
- lambda_coul = kernel_data->lambda[efptCOUL];
- lambda_vdw = kernel_data->lambda[efptVDW];
- dvdl = kernel_data->dvdl;
- alpha_coul = fr->sc_alphacoul;
- alpha_vdw = fr->sc_alphavdw;
- lam_power = fr->sc_power;
- sc_r_power = fr->sc_r_power;
- sigma6_def = fr->sc_sigma6_def;
- sigma6_min = fr->sc_sigma6_min;
- bDoForces = ((kernel_data->flags & GMX_NONBONDED_DO_FORCE) != 0);
- bDoShiftForces = ((kernel_data->flags & GMX_NONBONDED_DO_SHIFTFORCE) != 0);
- bDoPotential = ((kernel_data->flags & GMX_NONBONDED_DO_POTENTIAL) != 0);
-
- rcoulomb = ic->rcoulomb;
- rvdw = ic->rvdw;
- sh_invrc6 = ic->sh_invrc6;
- sh_lj_ewald = ic->sh_lj_ewald;
- ewclj = ic->ewaldcoeff_lj;
- ewclj2 = ewclj*ewclj;
- ewclj6 = ewclj2*ewclj2*ewclj2;
-
- if (ic->coulomb_modifier == eintmodPOTSWITCH)
+/*! \brief Higher limit for r^-6 used for Lennard-Jones interactions
+ *
+ * This is needed to avoid overflow of LJ energy and force terms for excluded
+ * atoms and foreign energies of hard-core states of overlapping atoms.
+ * Note that in single precision this value leaves room for C12 coefficients up to 3.4e8.
+ */
+constexpr real c_maxRInvSix = 1.0e15_real;
+
+template<bool computeForces, class RealType>
+static inline void
+pmeCoulombCorrectionVF(const RealType rSq, const real beta, RealType* pot, RealType gmx_unused* force)
+{
+ const RealType brsq = rSq * beta * beta;
+ if constexpr (computeForces)
{
- d = ic->rcoulomb - ic->rcoulomb_switch;
- elec_swV3 = -10.0/(d*d*d);
- elec_swV4 = 15.0/(d*d*d*d);
- elec_swV5 = -6.0/(d*d*d*d*d);
- elec_swF2 = -30.0/(d*d*d);
- elec_swF3 = 60.0/(d*d*d*d);
- elec_swF4 = -30.0/(d*d*d*d*d);
+ *force = -brsq * beta * gmx::pmeForceCorrection(brsq);
}
- else
+ *pot = beta * gmx::pmePotentialCorrection(brsq);
+}
+
+template<bool computeForces, class RealType, class BoolType>
+static inline void pmeLJCorrectionVF(const RealType rInv,
+ const RealType rSq,
+ const real ewaldLJCoeffSq,
+ const real ewaldLJCoeffSixDivSix,
+ RealType* pot,
+ RealType gmx_unused* force,
+ const BoolType mask,
+ const BoolType bIiEqJnr)
+{
+ // We mask rInv to get zero force and potential for masked out pair interactions
+ const RealType rInvSq = rInv * rInv;
+ const RealType rInvSix = rInvSq * rInvSq * rInvSq;
+ // Mask rSq to avoid underflow in exp()
+ const RealType coeffSqRSq = ewaldLJCoeffSq * gmx::selectByMask(rSq, mask);
+ const RealType expNegCoeffSqRSq = gmx::exp(-coeffSqRSq);
+ const RealType poly = 1.0_real + coeffSqRSq + 0.5_real * coeffSqRSq * coeffSqRSq;
+ if constexpr (computeForces)
{
- /* Avoid warnings from stupid compilers (looking at you, Clang!) */
- elec_swV3 = elec_swV4 = elec_swV5 = elec_swF2 = elec_swF3 = elec_swF4 = 0.0;
+ *force = rInvSix - expNegCoeffSqRSq * (rInvSix * poly + ewaldLJCoeffSixDivSix);
+ *force = *force * rInvSq;
}
+ // The self interaction is the limit for r -> 0 which we need to compute separately
+ *pot = gmx::blend(
+ rInvSix * (1.0_real - expNegCoeffSqRSq * poly), 0.5_real * ewaldLJCoeffSixDivSix, bIiEqJnr);
+}
+
+//! Computes r^(1/6) and 1/r^(1/6)
+template<class RealType>
+static inline void sixthRoot(const RealType r, RealType* sixthRoot, RealType* invSixthRoot)
+{
+ RealType cbrtRes = gmx::cbrt(r);
+ *invSixthRoot = gmx::invsqrt(cbrtRes);
+ *sixthRoot = gmx::inv(*invSixthRoot);
+}
+
+template<class RealType>
+static inline RealType calculateRinv6(const RealType rInvV)
+{
+ RealType rInv6 = rInvV * rInvV;
+ return (rInv6 * rInv6 * rInv6);
+}
- if (ic->vdw_modifier == eintmodPOTSWITCH)
+template<class RealType>
+static inline RealType calculateVdw6(const RealType c6, const RealType rInv6)
+{
+ return (c6 * rInv6);
+}
+
+template<class RealType>
+static inline RealType calculateVdw12(const RealType c12, const RealType rInv6)
+{
+ return (c12 * rInv6 * rInv6);
+}
+
+/* reaction-field electrostatics */
+template<class RealType>
+static inline RealType reactionFieldScalarForce(const RealType qq,
+ const RealType rInv,
+ const RealType r,
+ const real krf,
+ const real two)
+{
+ return (qq * (rInv - two * krf * r * r));
+}
+template<class RealType>
+static inline RealType reactionFieldPotential(const RealType qq,
+ const RealType rInv,
+ const RealType r,
+ const real krf,
+ const real potentialShift)
+{
+ return (qq * (rInv + krf * r * r - potentialShift));
+}
+
+/* Ewald electrostatics */
+template<class RealType>
+static inline RealType ewaldScalarForce(const RealType coulomb, const RealType rInv)
+{
+ return (coulomb * rInv);
+}
+template<class RealType>
+static inline RealType ewaldPotential(const RealType coulomb, const RealType rInv, const real potentialShift)
+{
+ return (coulomb * (rInv - potentialShift));
+}
+
+/* cutoff LJ */
+template<class RealType>
+static inline RealType lennardJonesScalarForce(const RealType v6, const RealType v12)
+{
+ return (v12 - v6);
+}
+template<class RealType>
+static inline RealType lennardJonesPotential(const RealType v6,
+ const RealType v12,
+ const RealType c6,
+ const RealType c12,
+ const real repulsionShift,
+ const real dispersionShift,
+ const real oneSixth,
+ const real oneTwelfth)
+{
+ return ((v12 + c12 * repulsionShift) * oneTwelfth - (v6 + c6 * dispersionShift) * oneSixth);
+}
+
+/* Ewald LJ */
+template<class RealType>
+static inline RealType ewaldLennardJonesGridSubtract(const RealType c6grid,
+ const real potentialShift,
+ const real oneSixth)
+{
+ return (c6grid * potentialShift * oneSixth);
+}
+
+/* LJ Potential switch */
+template<class RealType, class BoolType>
+static inline RealType potSwitchScalarForceMod(const RealType fScalarInp,
+ const RealType potential,
+ const RealType sw,
+ const RealType r,
+ const RealType dsw,
+ const BoolType mask)
+{
+ /* The mask should select on rV < rVdw */
+ return (gmx::selectByMask(fScalarInp * sw - r * potential * dsw, mask));
+}
+template<class RealType, class BoolType>
+static inline RealType potSwitchPotentialMod(const RealType potentialInp, const RealType sw, const BoolType mask)
+{
+ /* The mask should select on rV < rVdw */
+ return (gmx::selectByMask(potentialInp * sw, mask));
+}
+
+
+//! Templated free-energy non-bonded kernel
+template<typename DataTypes, KernelSoftcoreType softcoreType, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald, bool vdwModifierIsPotSwitch, bool computeForces>
+static void nb_free_energy_kernel(const t_nblist& nlist,
+ const gmx::ArrayRefWithPadding<const gmx::RVec>& coords,
+ const int ntype,
+ const real rlist,
+ const interaction_const_t& ic,
+ gmx::ArrayRef<const gmx::RVec> shiftvec,
+ gmx::ArrayRef<const real> nbfp,
+ gmx::ArrayRef<const real> gmx_unused nbfp_grid,
+ gmx::ArrayRef<const real> chargeA,
+ gmx::ArrayRef<const real> chargeB,
+ gmx::ArrayRef<const int> typeA,
+ gmx::ArrayRef<const int> typeB,
+ int flags,
+ gmx::ArrayRef<const real> lambda,
+ t_nrnb* gmx_restrict nrnb,
+ gmx::ArrayRefWithPadding<gmx::RVec> threadForceBuffer,
+ rvec gmx_unused* threadForceShiftBuffer,
+ gmx::ArrayRef<real> threadVc,
+ gmx::ArrayRef<real> threadVv,
+ gmx::ArrayRef<real> threadDvdl)
+{
+#define STATE_A 0
+#define STATE_B 1
+#define NSTATES 2
+
+ using RealType = typename DataTypes::RealType;
+ using IntType = typename DataTypes::IntType;
+ using BoolType = typename DataTypes::BoolType;
+
+ constexpr real oneTwelfth = 1.0_real / 12.0_real;
+ constexpr real oneSixth = 1.0_real / 6.0_real;
+ constexpr real zero = 0.0_real;
+ constexpr real half = 0.5_real;
+ constexpr real one = 1.0_real;
+ constexpr real two = 2.0_real;
+ constexpr real six = 6.0_real;
+
+ // Extract pair list data
+ const int nri = nlist.nri;
+ gmx::ArrayRef<const int> iinr = nlist.iinr;
+ gmx::ArrayRef<const int> jindex = nlist.jindex;
+ gmx::ArrayRef<const int> jjnr = nlist.jjnr;
+ gmx::ArrayRef<const int> shift = nlist.shift;
+ gmx::ArrayRef<const int> gid = nlist.gid;
+
+ const real lambda_coul = lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)];
+ const real lambda_vdw = lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Vdw)];
+
+ // Extract softcore parameters
+ const auto& scParams = *ic.softCoreParameters;
+ const real lam_power = scParams.lambdaPower;
+ const real gmx_unused alpha_coul = scParams.alphaCoulomb;
+ const real gmx_unused alpha_vdw = scParams.alphaVdw;
+ const real gmx_unused sigma6_def = scParams.sigma6WithInvalidSigma;
+ const real gmx_unused sigma6_min = scParams.sigma6Minimum;
+
+ const real gmx_unused gapsysScaleLinpointCoul = scParams.gapsysScaleLinpointCoul;
+ const real gmx_unused gapsysScaleLinpointVdW = scParams.gapsysScaleLinpointVdW;
+ const real gmx_unused gapsysSigma6VdW = scParams.gapsysSigma6VdW;
+
+ const bool gmx_unused doShiftForces = ((flags & GMX_NONBONDED_DO_SHIFTFORCE) != 0);
+ const bool doPotential = ((flags & GMX_NONBONDED_DO_POTENTIAL) != 0);
+
+ // Extract data from interaction_const_t
+ const real facel = ic.epsfac;
+ const real rCoulomb = ic.rcoulomb;
+ const real krf = ic.reactionFieldCoefficient;
+ const real crf = ic.reactionFieldShift;
+ const real gmx_unused shLjEwald = ic.sh_lj_ewald;
+ const real rVdw = ic.rvdw;
+ const real dispersionShift = ic.dispersion_shift.cpot;
+ const real repulsionShift = ic.repulsion_shift.cpot;
+ const real ewaldBeta = ic.ewaldcoeff_q;
+ real gmx_unused ewaldLJCoeffSq;
+ real gmx_unused ewaldLJCoeffSixDivSix;
+ if constexpr (vdwInteractionTypeIsEwald)
+ {
+ ewaldLJCoeffSq = ic.ewaldcoeff_lj * ic.ewaldcoeff_lj;
+ ewaldLJCoeffSixDivSix = ewaldLJCoeffSq * ewaldLJCoeffSq * ewaldLJCoeffSq / six;
+ }
+
+ // Note that the nbnxm kernels do not support Coulomb potential switching at all
+ GMX_ASSERT(ic.coulomb_modifier != InteractionModifiers::PotSwitch,
+ "Potential switching is not supported for Coulomb with FEP");
+
+ const real rVdwSwitch = ic.rvdw_switch;
+ real gmx_unused vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4;
+ if constexpr (vdwModifierIsPotSwitch)
{
- d = ic->rvdw - ic->rvdw_switch;
- vdw_swV3 = -10.0/(d*d*d);
- vdw_swV4 = 15.0/(d*d*d*d);
- vdw_swV5 = -6.0/(d*d*d*d*d);
- vdw_swF2 = -30.0/(d*d*d);
- vdw_swF3 = 60.0/(d*d*d*d);
- vdw_swF4 = -30.0/(d*d*d*d*d);
+ const real d = rVdw - rVdwSwitch;
+ vdw_swV3 = -10.0_real / (d * d * d);
+ vdw_swV4 = 15.0_real / (d * d * d * d);
+ vdw_swV5 = -6.0_real / (d * d * d * d * d);
+ vdw_swF2 = -30.0_real / (d * d * d);
+ vdw_swF3 = 60.0_real / (d * d * d * d);
+ vdw_swF4 = -30.0_real / (d * d * d * d * d);
}
else
{
/* Avoid warnings from stupid compilers (looking at you, Clang!) */
- vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0;
+ vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = zero;
}
- if (fr->cutoff_scheme == ecutsVERLET)
+ NbkernelElecType icoul;
+ if (ic.eeltype == CoulombInteractionType::Cut || EEL_RF(ic.eeltype))
{
- const interaction_const_t *ic = fr->ic;
-
- if (EVDW_PME(ic->vdwtype))
- {
- ivdw = GMX_NBKERNEL_VDW_LJEWALD;
- }
- else
- {
- ivdw = GMX_NBKERNEL_VDW_LENNARDJONES;
- }
-
- if (ic->eeltype == eelCUT || EEL_RF(ic->eeltype))
- {
- icoul = GMX_NBKERNEL_ELEC_REACTIONFIELD;
- }
- else if (EEL_PME_EWALD(ic->eeltype))
- {
- icoul = GMX_NBKERNEL_ELEC_EWALD;
- }
- else
- {
- gmx_incons("Unsupported eeltype with Verlet and free-energy");
- }
-
- bExactElecCutoff = TRUE;
- bExactVdwCutoff = TRUE;
+ icoul = NbkernelElecType::ReactionField;
}
else
{
- bExactElecCutoff = (ic->coulomb_modifier != eintmodNONE) || ic->eeltype == eelRF_ZERO;
- bExactVdwCutoff = (ic->vdw_modifier != eintmodNONE);
+ icoul = NbkernelElecType::None;
}
- bExactCutoffAll = (bExactElecCutoff && bExactVdwCutoff);
- rcutoff_max2 = std::max(ic->rcoulomb, ic->rvdw);
- rcutoff_max2 = rcutoff_max2*rcutoff_max2;
+ real rcutoff_max2 = std::max(ic.rcoulomb, ic.rvdw);
+ rcutoff_max2 = rcutoff_max2 * rcutoff_max2;
+ const real gmx_unused rCutoffCoul = ic.rcoulomb;
- bEwald = (icoul == GMX_NBKERNEL_ELEC_EWALD);
- bEwaldLJ = (ivdw == GMX_NBKERNEL_VDW_LJEWALD);
-
- if (bEwald || bEwaldLJ)
+ real gmx_unused sh_ewald = 0;
+ if constexpr (elecInteractionTypeIsEwald || vdwInteractionTypeIsEwald)
{
- sh_ewald = ic->sh_ewald;
- ewtab = ic->tabq_coul_FDV0;
- ewtabscale = ic->tabq_scale;
- ewtabhalfspace = half/ewtabscale;
- tab_ewald_F_lj = ic->tabq_vdw_F;
- tab_ewald_V_lj = ic->tabq_vdw_V;
+ sh_ewald = ic.sh_ewald;
}
/* For Ewald/PME interactions we cannot easily apply the soft-core component to
- * reciprocal space. When we use vanilla (not switch/shift) Ewald interactions, we
- * can apply the small trick of subtracting the _reciprocal_ space contribution
- * in this kernel, and instead apply the free energy interaction to the 1/r
- * (standard coulomb) interaction.
+ * reciprocal space. When we use non-switched Ewald interactions, we
+ * assume the soft-coring does not significantly affect the grid contribution
+ * and apply the soft-core only to the full 1/r (- shift) pair contribution.
*
* However, we cannot use this approach for switch-modified since we would then
* effectively end up evaluating a significantly different interaction here compared to the
* things (1/r rather than short-range Ewald). For these settings, we just
* use the traditional short-range Ewald interaction in that case.
*/
- bConvertEwaldToCoulomb = (bEwald && (ic->coulomb_modifier != eintmodPOTSWITCH));
- /* For now the below will always be true (since LJ-PME only works with Shift in Gromacs-5.0),
- * but writing it this way means we stay in sync with coulomb, and it avoids future bugs.
- */
- bConvertLJEwaldToLJ6 = (bEwaldLJ && (ic->vdw_modifier != eintmodPOTSWITCH));
+ GMX_RELEASE_ASSERT(!(vdwInteractionTypeIsEwald && vdwModifierIsPotSwitch),
+ "Can not apply soft-core to switched Ewald potentials");
- /* We currently don't implement exclusion correction, needed with the Verlet cut-off scheme, without conversion */
- if (fr->cutoff_scheme == ecutsVERLET &&
- ((bEwald && !bConvertEwaldToCoulomb) ||
- (bEwaldLJ && !bConvertLJEwaldToLJ6)))
- {
- gmx_incons("Unimplemented non-bonded setup");
- }
-
- /* fix compiler warnings */
- n1C = n1V = 0;
- epsC = epsV = 0;
- eps2C = eps2V = 0;
+ const RealType minDistanceSquared(c_minDistanceSquared);
+ const RealType maxRInvSix(c_maxRInvSix);
+ const RealType gmx_unused floatMin(GMX_FLOAT_MIN);
- dvdl_coul = 0;
- dvdl_vdw = 0;
+ RealType dvdlCoul{ zero };
+ RealType dvdlVdw{ zero };
/* Lambda factor for state A, 1-lambda*/
+ real LFC[NSTATES], LFV[NSTATES];
LFC[STATE_A] = one - lambda_coul;
LFV[STATE_A] = one - lambda_vdw;
LFV[STATE_B] = lambda_vdw;
/*derivative of the lambda factor for state A and B */
- DLF[STATE_A] = -1;
- DLF[STATE_B] = 1;
+ real DLF[NSTATES];
+ DLF[STATE_A] = -one;
+ DLF[STATE_B] = one;
- for (i = 0; i < NSTATES; i++)
+ real gmx_unused lFacCoul[NSTATES], dlFacCoul[NSTATES], lFacVdw[NSTATES], dlFacVdw[NSTATES];
+ constexpr real sc_r_power = six;
+ for (int i = 0; i < NSTATES; i++)
{
- lfac_coul[i] = (lam_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
- dlfac_coul[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFC[i]) : 1);
- lfac_vdw[i] = (lam_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
- dlfac_vdw[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFV[i]) : 1);
+ lFacCoul[i] = (lam_power == 2 ? (1 - LFC[i]) * (1 - LFC[i]) : (1 - LFC[i]));
+ dlFacCoul[i] = DLF[i] * lam_power / sc_r_power * (lam_power == 2 ? (1 - LFC[i]) : 1);
+ lFacVdw[i] = (lam_power == 2 ? (1 - LFV[i]) * (1 - LFV[i]) : (1 - LFV[i]));
+ dlFacVdw[i] = DLF[i] * lam_power / sc_r_power * (lam_power == 2 ? (1 - LFV[i]) : 1);
}
- /* precalculate */
- sigma2_def = std::cbrt(sigma6_def);
- sigma2_min = std::cbrt(sigma6_min);
- /* Ewald (not PME) table is special (icoul==enbcoulFEWALD) */
-
- do_tab = static_cast<int>(icoul == GMX_NBKERNEL_ELEC_CUBICSPLINETABLE ||
- ivdw == GMX_NBKERNEL_VDW_CUBICSPLINETABLE);
- if (do_tab)
+ // We need pointers to real for SIMD access
+ const real* gmx_restrict x = coords.paddedConstArrayRef().data()[0];
+ real* gmx_restrict forceRealPtr;
+ if constexpr (computeForces)
{
- tabscale = kernel_data->table_elec_vdw->scale;
- VFtab = kernel_data->table_elec_vdw->data;
- /* we always use the combined table here */
- tab_elemsize = kernel_data->table_elec_vdw->stride;
+ GMX_ASSERT(nri == 0 || !threadForceBuffer.empty(), "need a valid threadForceBuffer");
+
+ forceRealPtr = threadForceBuffer.paddedArrayRef().data()[0];
+
+ if (doShiftForces)
+ {
+ GMX_ASSERT(threadForceShiftBuffer != nullptr, "need a valid threadForceShiftBuffer");
+ }
}
- for (n = 0; (n < nri); n++)
+ const real rlistSquared = gmx::square(rlist);
+
+ bool haveExcludedPairsBeyondRlist = false;
+
+ for (int n = 0; n < nri; n++)
{
- int npair_within_cutoff;
-
- npair_within_cutoff = 0;
-
- is3 = 3*shift[n];
- shX = shiftvec[is3];
- shY = shiftvec[is3+1];
- shZ = shiftvec[is3+2];
- nj0 = jindex[n];
- nj1 = jindex[n+1];
- ii = iinr[n];
- ii3 = 3*ii;
- ix = shX + x[ii3+0];
- iy = shY + x[ii3+1];
- iz = shZ + x[ii3+2];
- iqA = facel*chargeA[ii];
- iqB = facel*chargeB[ii];
- ntiA = 2*ntype*typeA[ii];
- ntiB = 2*ntype*typeB[ii];
- vctot = 0;
- vvtot = 0;
- fix = 0;
- fiy = 0;
- fiz = 0;
-
- for (k = nj0; (k < nj1); k++)
+ bool havePairsWithinCutoff = false;
+
+ const int is = shift[n];
+ const real shX = shiftvec[is][XX];
+ const real shY = shiftvec[is][YY];
+ const real shZ = shiftvec[is][ZZ];
+ const int nj0 = jindex[n];
+ const int nj1 = jindex[n + 1];
+ const int ii = iinr[n];
+ const int ii3 = 3 * ii;
+ const real ix = shX + x[ii3 + 0];
+ const real iy = shY + x[ii3 + 1];
+ const real iz = shZ + x[ii3 + 2];
+ const real iqA = facel * chargeA[ii];
+ const real iqB = facel * chargeB[ii];
+ const int ntiA = ntype * typeA[ii];
+ const int ntiB = ntype * typeB[ii];
+ RealType vCTot(0);
+ RealType vVTot(0);
+ RealType fIX(0);
+ RealType fIY(0);
+ RealType fIZ(0);
+
+#if GMX_SIMD_HAVE_REAL
+ alignas(GMX_SIMD_ALIGNMENT) int preloadIi[DataTypes::simdRealWidth];
+ alignas(GMX_SIMD_ALIGNMENT) int gmx_unused preloadIs[DataTypes::simdRealWidth];
+#else
+ int preloadIi[DataTypes::simdRealWidth];
+ int gmx_unused preloadIs[DataTypes::simdRealWidth];
+#endif
+ for (int i = 0; i < DataTypes::simdRealWidth; i++)
{
- jnr = jjnr[k];
- j3 = 3*jnr;
- dx = ix - x[j3];
- dy = iy - x[j3+1];
- dz = iz - x[j3+2];
- rsq = dx*dx + dy*dy + dz*dz;
-
- if (bExactCutoffAll && rsq >= rcutoff_max2)
+ preloadIi[i] = ii;
+ preloadIs[i] = shift[n];
+ }
+ IntType ii_s = gmx::load<IntType>(preloadIi);
+
+ for (int k = nj0; k < nj1; k += DataTypes::simdRealWidth)
+ {
+ RealType r, rInv;
+
+#if GMX_SIMD_HAVE_REAL
+ alignas(GMX_SIMD_ALIGNMENT) real preloadPairIsValid[DataTypes::simdRealWidth];
+ alignas(GMX_SIMD_ALIGNMENT) real preloadPairIncluded[DataTypes::simdRealWidth];
+ alignas(GMX_SIMD_ALIGNMENT) int32_t preloadJnr[DataTypes::simdRealWidth];
+ alignas(GMX_SIMD_ALIGNMENT) int32_t typeIndices[NSTATES][DataTypes::simdRealWidth];
+ alignas(GMX_SIMD_ALIGNMENT) real preloadQq[NSTATES][DataTypes::simdRealWidth];
+ alignas(GMX_SIMD_ALIGNMENT) real gmx_unused preloadSigma6[NSTATES][DataTypes::simdRealWidth];
+ alignas(GMX_SIMD_ALIGNMENT) real gmx_unused preloadAlphaVdwEff[DataTypes::simdRealWidth];
+ alignas(GMX_SIMD_ALIGNMENT) real gmx_unused preloadAlphaCoulEff[DataTypes::simdRealWidth];
+ alignas(GMX_SIMD_ALIGNMENT)
+ real gmx_unused preloadGapsysScaleLinpointVdW[DataTypes::simdRealWidth];
+ alignas(GMX_SIMD_ALIGNMENT)
+ real gmx_unused preloadGapsysScaleLinpointCoul[DataTypes::simdRealWidth];
+ alignas(GMX_SIMD_ALIGNMENT)
+ real gmx_unused preloadGapsysSigma6VdW[NSTATES][DataTypes::simdRealWidth];
+ alignas(GMX_SIMD_ALIGNMENT) real preloadLjPmeC6Grid[NSTATES][DataTypes::simdRealWidth];
+#else
+ real preloadPairIsValid[DataTypes::simdRealWidth];
+ real preloadPairIncluded[DataTypes::simdRealWidth];
+ int preloadJnr[DataTypes::simdRealWidth];
+ int typeIndices[NSTATES][DataTypes::simdRealWidth];
+ real preloadQq[NSTATES][DataTypes::simdRealWidth];
+ real gmx_unused preloadSigma6[NSTATES][DataTypes::simdRealWidth];
+ real gmx_unused preloadAlphaVdwEff[DataTypes::simdRealWidth];
+ real gmx_unused preloadAlphaCoulEff[DataTypes::simdRealWidth];
+ real gmx_unused preloadGapsysScaleLinpointVdW[DataTypes::simdRealWidth];
+ real gmx_unused preloadGapsysScaleLinpointCoul[DataTypes::simdRealWidth];
+ real gmx_unused preloadGapsysSigma6VdW[NSTATES][DataTypes::simdRealWidth];
+ real preloadLjPmeC6Grid[NSTATES][DataTypes::simdRealWidth];
+#endif
+ for (int j = 0; j < DataTypes::simdRealWidth; j++)
+ {
+ if (k + j < nj1)
+ {
+ preloadPairIsValid[j] = true;
+ /* Check if this pair on the exclusions list.*/
+ preloadPairIncluded[j] = (nlist.excl_fep.empty() || nlist.excl_fep[k + j]);
+ const int jnr = jjnr[k + j];
+ preloadJnr[j] = jnr;
+ typeIndices[STATE_A][j] = ntiA + typeA[jnr];
+ typeIndices[STATE_B][j] = ntiB + typeB[jnr];
+ preloadQq[STATE_A][j] = iqA * chargeA[jnr];
+ preloadQq[STATE_B][j] = iqB * chargeB[jnr];
+
+ for (int i = 0; i < NSTATES; i++)
+ {
+ if constexpr (vdwInteractionTypeIsEwald)
+ {
+ preloadLjPmeC6Grid[i][j] = nbfp_grid[2 * typeIndices[i][j]];
+ }
+ else
+ {
+ preloadLjPmeC6Grid[i][j] = 0;
+ }
+ if constexpr (softcoreType == KernelSoftcoreType::Beutler)
+ {
+ const real c6 = nbfp[2 * typeIndices[i][j]];
+ const real c12 = nbfp[2 * typeIndices[i][j] + 1];
+ if (c6 > 0 && c12 > 0)
+ {
+ /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
+ preloadSigma6[i][j] = 0.5_real * c12 / c6;
+ if (preloadSigma6[i][j]
+ < sigma6_min) /* for disappearing coul and vdw with soft core at the same time */
+ {
+ preloadSigma6[i][j] = sigma6_min;
+ }
+ }
+ else
+ {
+ preloadSigma6[i][j] = sigma6_def;
+ }
+ }
+ if constexpr (softcoreType == KernelSoftcoreType::Gapsys)
+ {
+ const real c6 = nbfp[2 * typeIndices[i][j]];
+ const real c12 = nbfp[2 * typeIndices[i][j] + 1];
+ if (c6 > 0 && c12 > 0)
+ {
+ /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
+ preloadGapsysSigma6VdW[i][j] = 0.5_real * c12 / c6;
+ }
+ else
+ {
+ preloadGapsysSigma6VdW[i][j] = gapsysSigma6VdW;
+ }
+ }
+ }
+ if constexpr (softcoreType == KernelSoftcoreType::Beutler)
+ {
+ /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
+ const real c12A = nbfp[2 * typeIndices[STATE_A][j] + 1];
+ const real c12B = nbfp[2 * typeIndices[STATE_B][j] + 1];
+ if (c12A > 0 && c12B > 0)
+ {
+ preloadAlphaVdwEff[j] = 0;
+ preloadAlphaCoulEff[j] = 0;
+ }
+ else
+ {
+ preloadAlphaVdwEff[j] = alpha_vdw;
+ preloadAlphaCoulEff[j] = alpha_coul;
+ }
+ }
+ if constexpr (softcoreType == KernelSoftcoreType::Gapsys)
+ {
+ /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
+ const real c12A = nbfp[2 * typeIndices[STATE_A][j] + 1];
+ const real c12B = nbfp[2 * typeIndices[STATE_B][j] + 1];
+ if (c12A > 0 && c12B > 0)
+ {
+ preloadGapsysScaleLinpointVdW[j] = 0;
+ preloadGapsysScaleLinpointCoul[j] = 0;
+ }
+ else
+ {
+ preloadGapsysScaleLinpointVdW[j] = gapsysScaleLinpointVdW;
+ preloadGapsysScaleLinpointCoul[j] = gapsysScaleLinpointCoul;
+ }
+ }
+ }
+ else
+ {
+ preloadJnr[j] = jjnr[k];
+ preloadPairIsValid[j] = false;
+ preloadPairIncluded[j] = false;
+ preloadAlphaVdwEff[j] = 0;
+ preloadAlphaCoulEff[j] = 0;
+ preloadGapsysScaleLinpointVdW[j] = 0;
+ preloadGapsysScaleLinpointCoul[j] = 0;
+
+ typeIndices[STATE_A][j] = ntiA + typeA[jjnr[k]];
+ typeIndices[STATE_B][j] = ntiB + typeB[jjnr[k]];
+ for (int i = 0; i < NSTATES; i++)
+ {
+ preloadLjPmeC6Grid[i][j] = 0;
+ preloadQq[i][j] = 0;
+ preloadSigma6[i][j] = 0;
+ preloadGapsysSigma6VdW[i][j] = 0;
+ }
+ }
+ }
+
+ RealType jx, jy, jz;
+ gmx::gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), preloadJnr, &jx, &jy, &jz);
+
+ const RealType pairIsValid = gmx::load<RealType>(preloadPairIsValid);
+ const RealType pairIncluded = gmx::load<RealType>(preloadPairIncluded);
+ const BoolType bPairIncluded = (pairIncluded != zero);
+ const BoolType bPairExcluded = (pairIncluded == zero && pairIsValid != zero);
+
+ const RealType dX = ix - jx;
+ const RealType dY = iy - jy;
+ const RealType dZ = iz - jz;
+ RealType rSq = dX * dX + dY * dY + dZ * dZ;
+
+ BoolType withinCutoffMask = (rSq < rcutoff_max2);
+
+ if (!gmx::anyTrue(withinCutoffMask || bPairExcluded))
{
/* We save significant time by skipping all code below.
* Note that with soft-core interactions, the actual cut-off
* check might be different. But since the soft-core distance
* is always larger than r, checking on r here is safe.
+ * Exclusions outside the cutoff can not be skipped as
+ * when using Ewald: the reciprocal-space
+ * Ewald component still needs to be subtracted.
*/
continue;
}
- npair_within_cutoff++;
-
- if (rsq > 0)
+ else
{
- /* Note that unlike in the nbnxn kernels, we do not need
- * to clamp the value of rsq before taking the invsqrt
- * to avoid NaN in the LJ calculation, since here we do
- * not calculate LJ interactions when C6 and C12 are zero.
- */
-
- rinv = gmx::invsqrt(rsq);
- r = rsq*rinv;
+ havePairsWithinCutoff = true;
}
- else
+
+ if (gmx::anyTrue(rlistSquared < rSq && bPairExcluded))
{
- /* The force at r=0 is zero, because of symmetry.
- * But note that the potential is in general non-zero,
- * since the soft-cored r will be non-zero.
- */
- rinv = 0;
- r = 0;
+ haveExcludedPairsBeyondRlist = true;
}
- if (sc_r_power == six)
+ const IntType jnr_s = gmx::load<IntType>(preloadJnr);
+ const BoolType bIiEqJnr = gmx::cvtIB2B(ii_s == jnr_s);
+
+ RealType c6[NSTATES];
+ RealType c12[NSTATES];
+ RealType gmx_unused sigma6[NSTATES];
+ RealType qq[NSTATES];
+ RealType gmx_unused ljPmeC6Grid[NSTATES];
+ RealType gmx_unused alphaVdwEff;
+ RealType gmx_unused alphaCoulEff;
+ RealType gmx_unused gapsysScaleLinpointVdWEff;
+ RealType gmx_unused gapsysScaleLinpointCoulEff;
+ RealType gmx_unused gapsysSigma6VdWEff[NSTATES];
+ for (int i = 0; i < NSTATES; i++)
{
- rpm2 = rsq*rsq; /* r4 */
- rp = rpm2*rsq; /* r6 */
+ gmx::gatherLoadTranspose<2>(nbfp.data(), typeIndices[i], &c6[i], &c12[i]);
+ qq[i] = gmx::load<RealType>(preloadQq[i]);
+ ljPmeC6Grid[i] = gmx::load<RealType>(preloadLjPmeC6Grid[i]);
+ if constexpr (softcoreType == KernelSoftcoreType::Beutler)
+ {
+ sigma6[i] = gmx::load<RealType>(preloadSigma6[i]);
+ }
+ if constexpr (softcoreType == KernelSoftcoreType::Gapsys)
+ {
+ gapsysSigma6VdWEff[i] = gmx::load<RealType>(preloadGapsysSigma6VdW[i]);
+ }
}
- else if (sc_r_power == fourtyeight)
+ if constexpr (softcoreType == KernelSoftcoreType::Beutler)
{
- rp = rsq*rsq*rsq; /* r6 */
- rp = rp*rp; /* r12 */
- rp = rp*rp; /* r24 */
- rp = rp*rp; /* r48 */
- rpm2 = rp/rsq; /* r46 */
+ alphaVdwEff = gmx::load<RealType>(preloadAlphaVdwEff);
+ alphaCoulEff = gmx::load<RealType>(preloadAlphaCoulEff);
}
- else
+ if constexpr (softcoreType == KernelSoftcoreType::Gapsys)
{
- rp = std::pow(r, sc_r_power); /* not currently supported as input, but can handle it */
- rpm2 = rp/rsq;
+ gapsysScaleLinpointVdWEff = gmx::load<RealType>(preloadGapsysScaleLinpointVdW);
+ gapsysScaleLinpointCoulEff = gmx::load<RealType>(preloadGapsysScaleLinpointCoul);
}
- Fscal = 0;
+ // Avoid overflow of r^-12 at distances near zero
+ rSq = gmx::max(rSq, minDistanceSquared);
+ rInv = gmx::invsqrt(rSq);
+ r = rSq * rInv;
- qq[STATE_A] = iqA*chargeA[jnr];
- qq[STATE_B] = iqB*chargeB[jnr];
+ RealType gmx_unused rp, rpm2;
+ if constexpr (softcoreType == KernelSoftcoreType::Beutler)
+ {
+ rpm2 = rSq * rSq; /* r4 */
+ rp = rpm2 * rSq; /* r6 */
+ }
+ else
+ {
+ /* The soft-core power p will not affect the results
+ * with not using soft-core, so we use power of 0 which gives
+ * the simplest math and cheapest code.
+ */
+ rpm2 = rInv * rInv;
+ rp = one;
+ }
- tj[STATE_A] = ntiA+2*typeA[jnr];
- tj[STATE_B] = ntiB+2*typeB[jnr];
+ RealType fScal(0);
- if (nlist->excl_fep == nullptr || nlist->excl_fep[k])
+ /* The following block is masked to only calculate values having bPairIncluded. If
+ * bPairIncluded is true then withinCutoffMask must also be true. */
+ if (gmx::anyTrue(withinCutoffMask && bPairIncluded))
{
- c6[STATE_A] = nbfp[tj[STATE_A]];
- c6[STATE_B] = nbfp[tj[STATE_B]];
-
- for (i = 0; i < NSTATES; i++)
+ RealType gmx_unused fScalC[NSTATES], fScalV[NSTATES];
+ RealType vCoul[NSTATES], vVdw[NSTATES];
+ for (int i = 0; i < NSTATES; i++)
{
- c12[i] = nbfp[tj[i]+1];
- if ((c6[i] > 0) && (c12[i] > 0))
+ fScalC[i] = zero;
+ fScalV[i] = zero;
+ vCoul[i] = zero;
+ vVdw[i] = zero;
+
+ RealType gmx_unused rInvC, rInvV, rC, rV, rPInvC, rPInvV;
+
+ /* The following block is masked to require (qq[i] != 0 || c6[i] != 0 || c12[i]
+ * != 0) in addition to bPairIncluded, which in turn requires withinCutoffMask. */
+ BoolType nonZeroState = ((qq[i] != zero || c6[i] != zero || c12[i] != zero)
+ && bPairIncluded && withinCutoffMask);
+ if (gmx::anyTrue(nonZeroState))
{
- /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
- sigma6[i] = half*c12[i]/c6[i];
- sigma2[i] = std::cbrt(sigma6[i]);
- /* should be able to get rid of cbrt call eventually. Will require agreement on
- what data to store externally. Can't be fixed without larger scale changes, so not 4.6 */
- if (sigma6[i] < sigma6_min) /* for disappearing coul and vdw with soft core at the same time */
+ if constexpr (softcoreType == KernelSoftcoreType::Beutler)
{
- sigma6[i] = sigma6_min;
- sigma2[i] = sigma2_min;
- }
- }
- else
- {
- sigma6[i] = sigma6_def;
- sigma2[i] = sigma2_def;
- }
- if (sc_r_power == six)
- {
- sigma_pow[i] = sigma6[i];
- }
- else if (sc_r_power == fourtyeight)
- {
- sigma_pow[i] = sigma6[i]*sigma6[i]; /* sigma^12 */
- sigma_pow[i] = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
- sigma_pow[i] = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
- }
- else
- { /* not really supported as input, but in here for testing the general case*/
- sigma_pow[i] = std::pow(sigma2[i], sc_r_power/2);
- }
- }
+ RealType divisor = (alphaCoulEff * lFacCoul[i] * sigma6[i] + rp);
+ rPInvC = gmx::inv(divisor);
+ sixthRoot(rPInvC, &rInvC, &rC);
- /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
- if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
- {
- alpha_vdw_eff = 0;
- alpha_coul_eff = 0;
- }
- else
- {
- alpha_vdw_eff = alpha_vdw;
- alpha_coul_eff = alpha_coul;
- }
-
- for (i = 0; i < NSTATES; i++)
- {
- FscalC[i] = 0;
- FscalV[i] = 0;
- Vcoul[i] = 0;
- Vvdw[i] = 0;
-
- /* Only spend time on A or B state if it is non-zero */
- if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
- {
- /* this section has to be inside the loop because of the dependence on sigma_pow */
- rpinvC = one/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
- rinvC = std::pow(rpinvC, one/sc_r_power);
- rC = one/rinvC;
-
- rpinvV = one/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
- rinvV = std::pow(rpinvV, one/sc_r_power);
- rV = one/rinvV;
-
- if (do_tab)
+ if constexpr (scLambdasOrAlphasDiffer)
+ {
+ RealType divisor = (alphaVdwEff * lFacVdw[i] * sigma6[i] + rp);
+ rPInvV = gmx::inv(divisor);
+ sixthRoot(rPInvV, &rInvV, &rV);
+ }
+ else
+ {
+ /* We can avoid one expensive pow and one / operation */
+ rPInvV = rPInvC;
+ rInvV = rInvC;
+ rV = rC;
+ }
+ }
+ else
{
- rtC = rC*tabscale;
- n0 = rtC;
- epsC = rtC-n0;
- eps2C = epsC*epsC;
- n1C = tab_elemsize*n0;
-
- rtV = rV*tabscale;
- n0 = rtV;
- epsV = rtV-n0;
- eps2V = epsV*epsV;
- n1V = tab_elemsize*n0;
+ rPInvC = one;
+ rInvC = rInv;
+ rC = r;
+
+ rPInvV = one;
+ rInvV = rInv;
+ rV = r;
}
- /* Only process the coulomb interactions if we have charges,
- * and if we either include all entries in the list (no cutoff
+ /* Only process the coulomb interactions if we either
+ * include all entries in the list (no cutoff
* used in the kernel), or if we are within the cutoff.
*/
- bComputeElecInteraction = !bExactElecCutoff ||
- ( bConvertEwaldToCoulomb && r < rcoulomb) ||
- (!bConvertEwaldToCoulomb && rC < rcoulomb);
-
- if ( (qq[i] != 0) && bComputeElecInteraction)
+ BoolType computeElecInteraction;
+ if constexpr (elecInteractionTypeIsEwald)
+ {
+ computeElecInteraction = (r < rCoulomb && qq[i] != zero && bPairIncluded);
+ }
+ else
+ {
+ computeElecInteraction = (rC < rCoulomb && qq[i] != zero && bPairIncluded);
+ }
+ if (gmx::anyTrue(computeElecInteraction))
{
- switch (icoul)
+ if constexpr (elecInteractionTypeIsEwald)
{
- case GMX_NBKERNEL_ELEC_COULOMB:
- /* simple cutoff */
- Vcoul[i] = qq[i]*rinvC;
- FscalC[i] = Vcoul[i];
- /* The shift for the Coulomb potential is stored in
- * the RF parameter c_rf, which is 0 without shift.
- */
- Vcoul[i] -= qq[i]*ic->c_rf;
- break;
-
- case GMX_NBKERNEL_ELEC_REACTIONFIELD:
- /* reaction-field */
- Vcoul[i] = qq[i]*(rinvC + krf*rC*rC-crf);
- FscalC[i] = qq[i]*(rinvC - two*krf*rC*rC);
- break;
-
- case GMX_NBKERNEL_ELEC_CUBICSPLINETABLE:
- /* non-Ewald tabulated coulomb */
- nnn = n1C;
- Y = VFtab[nnn];
- F = VFtab[nnn+1];
- Geps = epsC*VFtab[nnn+2];
- Heps2 = eps2C*VFtab[nnn+3];
- Fp = F+Geps+Heps2;
- VV = Y+epsC*Fp;
- FF = Fp+Geps+two*Heps2;
- Vcoul[i] = qq[i]*VV;
- FscalC[i] = -qq[i]*tabscale*FF*rC;
- break;
-
- case GMX_NBKERNEL_ELEC_EWALD:
- if (bConvertEwaldToCoulomb)
- {
- /* Ewald FEP is done only on the 1/r part */
- Vcoul[i] = qq[i]*(rinvC-sh_ewald);
- FscalC[i] = qq[i]*rinvC;
- }
- else
- {
- ewrt = rC*ewtabscale;
- ewitab = static_cast<int>(ewrt);
- eweps = ewrt-ewitab;
- ewitab = 4*ewitab;
- FscalC[i] = ewtab[ewitab]+eweps*ewtab[ewitab+1];
- rinvcorr = rinvC-sh_ewald;
- Vcoul[i] = qq[i]*(rinvcorr-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+FscalC[i])));
- FscalC[i] = qq[i]*(rinvC-rC*FscalC[i]);
- }
- break;
-
- case GMX_NBKERNEL_ELEC_NONE:
- FscalC[i] = zero;
- Vcoul[i] = zero;
- break;
-
- default:
- gmx_incons("Invalid icoul in free energy kernel");
+ vCoul[i] = ewaldPotential(qq[i], rInvC, sh_ewald);
+ if constexpr (computeForces)
+ {
+ fScalC[i] = ewaldScalarForce(qq[i], rInvC);
+ }
+
+ if constexpr (softcoreType == KernelSoftcoreType::Gapsys)
+ {
+ ewaldQuadraticPotential<computeForces>(qq[i],
+ facel,
+ rC,
+ rCutoffCoul,
+ LFC[i],
+ DLF[i],
+ gapsysScaleLinpointCoulEff,
+ sh_ewald,
+ &fScalC[i],
+ &vCoul[i],
+ &dvdlCoul,
+ computeElecInteraction);
+ }
}
-
- if (ic->coulomb_modifier == eintmodPOTSWITCH)
+ else
{
- d = rC - ic->rcoulomb_switch;
- d = (d > zero) ? d : zero;
- d2 = d*d;
- sw = one+d2*d*(elec_swV3+d*(elec_swV4+d*elec_swV5));
- dsw = d2*(elec_swF2+d*(elec_swF3+d*elec_swF4));
-
- FscalC[i] = FscalC[i]*sw - rC*Vcoul[i]*dsw;
- Vcoul[i] *= sw;
+ vCoul[i] = reactionFieldPotential(qq[i], rInvC, rC, krf, crf);
+ if constexpr (computeForces)
+ {
+ fScalC[i] = reactionFieldScalarForce(qq[i], rInvC, rC, krf, two);
+ }
+
+ if constexpr (softcoreType == KernelSoftcoreType::Gapsys)
+ {
+ reactionFieldQuadraticPotential<computeForces>(
+ qq[i],
+ facel,
+ rC,
+ rCutoffCoul,
+ LFC[i],
+ DLF[i],
+ gapsysScaleLinpointCoulEff,
+ krf,
+ crf,
+ &fScalC[i],
+ &vCoul[i],
+ &dvdlCoul,
+ computeElecInteraction);
+ }
+ }
- FscalC[i] = (rC < rcoulomb) ? FscalC[i] : zero;
- Vcoul[i] = (rC < rcoulomb) ? Vcoul[i] : zero;
+ vCoul[i] = gmx::selectByMask(vCoul[i], computeElecInteraction);
+ if constexpr (computeForces)
+ {
+ fScalC[i] = gmx::selectByMask(fScalC[i], computeElecInteraction);
}
}
- /* Only process the VDW interactions if we have
- * some non-zero parameters, and if we either
+ /* Only process the VDW interactions if we either
* include all entries in the list (no cutoff used
* in the kernel), or if we are within the cutoff.
*/
- bComputeVdwInteraction = !bExactVdwCutoff ||
- ( bConvertLJEwaldToLJ6 && r < rvdw) ||
- (!bConvertLJEwaldToLJ6 && rV < rvdw);
- if ((c6[i] != 0 || c12[i] != 0) && bComputeVdwInteraction)
+ BoolType computeVdwInteraction;
+ if constexpr (vdwInteractionTypeIsEwald)
+ {
+ computeVdwInteraction =
+ (r < rVdw && (c6[i] != 0 || c12[i] != 0) && bPairIncluded);
+ }
+ else
+ {
+ computeVdwInteraction =
+ (rV < rVdw && (c6[i] != 0 || c12[i] != 0) && bPairIncluded);
+ }
+ if (gmx::anyTrue(computeVdwInteraction))
{
- switch (ivdw)
+ RealType rInv6;
+ if constexpr (softcoreType == KernelSoftcoreType::Beutler)
+ {
+ rInv6 = rPInvV;
+ }
+ else
+ {
+ rInv6 = calculateRinv6(rInvV);
+ }
+ // Avoid overflow at short distance for masked exclusions and
+ // for foreign energy calculations at a hard core end state.
+ // Note that we should limit r^-6, and thus also r^-12, and
+ // not only r^-12, as that could lead to erroneously low instead
+ // of very high foreign energies.
+ rInv6 = gmx::min(rInv6, maxRInvSix);
+ RealType vVdw6 = calculateVdw6(c6[i], rInv6);
+ RealType vVdw12 = calculateVdw12(c12[i], rInv6);
+
+ vVdw[i] = lennardJonesPotential(
+ vVdw6, vVdw12, c6[i], c12[i], repulsionShift, dispersionShift, oneSixth, oneTwelfth);
+ if constexpr (computeForces)
{
- case GMX_NBKERNEL_VDW_LENNARDJONES:
- /* cutoff LJ */
- if (sc_r_power == six)
- {
- rinv6 = rpinvV;
- }
- else
- {
- rinv6 = rinvV*rinvV;
- rinv6 = rinv6*rinv6*rinv6;
- }
- Vvdw6 = c6[i]*rinv6;
- Vvdw12 = c12[i]*rinv6*rinv6;
-
- Vvdw[i] = ( (Vvdw12 - c12[i]*sh_invrc6*sh_invrc6)*onetwelfth
- - (Vvdw6 - c6[i]*sh_invrc6)*onesixth);
- FscalV[i] = Vvdw12 - Vvdw6;
- break;
-
- case GMX_NBKERNEL_VDW_BUCKINGHAM:
- gmx_fatal(FARGS, "Buckingham free energy not supported.");
- case GMX_NBKERNEL_VDW_CUBICSPLINETABLE:
- /* Table LJ */
- nnn = n1V+4;
- /* dispersion */
- Y = VFtab[nnn];
- F = VFtab[nnn+1];
- Geps = epsV*VFtab[nnn+2];
- Heps2 = eps2V*VFtab[nnn+3];
- Fp = F+Geps+Heps2;
- VV = Y+epsV*Fp;
- FF = Fp+Geps+two*Heps2;
- Vvdw[i] += c6[i]*VV;
- FscalV[i] -= c6[i]*tabscale*FF*rV;
-
- /* repulsion */
- Y = VFtab[nnn+4];
- F = VFtab[nnn+5];
- Geps = epsV*VFtab[nnn+6];
- Heps2 = eps2V*VFtab[nnn+7];
- Fp = F+Geps+Heps2;
- VV = Y+epsV*Fp;
- FF = Fp+Geps+two*Heps2;
- Vvdw[i] += c12[i]*VV;
- FscalV[i] -= c12[i]*tabscale*FF*rV;
- break;
-
- case GMX_NBKERNEL_VDW_LJEWALD:
- if (sc_r_power == six)
- {
- rinv6 = rpinvV;
- }
- else
- {
- rinv6 = rinvV*rinvV;
- rinv6 = rinv6*rinv6*rinv6;
- }
- c6grid = nbfp_grid[tj[i]];
-
- if (bConvertLJEwaldToLJ6)
- {
- /* cutoff LJ */
- Vvdw6 = c6[i]*rinv6;
- Vvdw12 = c12[i]*rinv6*rinv6;
-
- Vvdw[i] = ( (Vvdw12 - c12[i]*sh_invrc6*sh_invrc6)*onetwelfth
- - (Vvdw6 - c6[i]*sh_invrc6 - c6grid*sh_lj_ewald)*onesixth);
- FscalV[i] = Vvdw12 - Vvdw6;
- }
- else
- {
- /* Normal LJ-PME */
- ewcljrsq = ewclj2*rV*rV;
- exponent = std::exp(-ewcljrsq);
- poly = exponent*(one + ewcljrsq + ewcljrsq*ewcljrsq*half);
- vvdw_disp = (c6[i]-c6grid*(one-poly))*rinv6;
- vvdw_rep = c12[i]*rinv6*rinv6;
- FscalV[i] = vvdw_rep - vvdw_disp - c6grid*onesixth*exponent*ewclj6;
- Vvdw[i] = (vvdw_rep - c12[i]*sh_invrc6*sh_invrc6)*onetwelfth - (vvdw_disp - c6[i]*sh_invrc6 - c6grid*sh_lj_ewald)/six;
- }
- break;
-
- case GMX_NBKERNEL_VDW_NONE:
- Vvdw[i] = zero;
- FscalV[i] = zero;
- break;
-
- default:
- gmx_incons("Invalid ivdw in free energy kernel");
+ fScalV[i] = lennardJonesScalarForce(vVdw6, vVdw12);
}
- if (ic->vdw_modifier == eintmodPOTSWITCH)
+ if constexpr (softcoreType == KernelSoftcoreType::Gapsys)
{
- d = rV - ic->rvdw_switch;
- d = (d > zero) ? d : zero;
- d2 = d*d;
- sw = one+d2*d*(vdw_swV3+d*(vdw_swV4+d*vdw_swV5));
- dsw = d2*(vdw_swF2+d*(vdw_swF3+d*vdw_swF4));
+ lennardJonesQuadraticPotential<computeForces>(c6[i],
+ c12[i],
+ r,
+ rSq,
+ LFV[i],
+ DLF[i],
+ gapsysSigma6VdWEff[i],
+ gapsysScaleLinpointVdWEff,
+ repulsionShift,
+ dispersionShift,
+ &fScalV[i],
+ &vVdw[i],
+ &dvdlVdw,
+ computeVdwInteraction);
+ }
- FscalV[i] = FscalV[i]*sw - rV*Vvdw[i]*dsw;
- Vvdw[i] *= sw;
+ if constexpr (vdwInteractionTypeIsEwald)
+ {
+ /* Subtract the grid potential at the cut-off */
+ vVdw[i] = vVdw[i]
+ + gmx::selectByMask(ewaldLennardJonesGridSubtract(
+ ljPmeC6Grid[i], shLjEwald, oneSixth),
+ computeVdwInteraction);
+ }
+
+ if constexpr (vdwModifierIsPotSwitch)
+ {
+ RealType d = rV - rVdwSwitch;
+ BoolType zeroMask = zero < d;
+ BoolType potSwitchMask = rV < rVdw;
+ d = gmx::selectByMask(d, zeroMask);
+ const RealType d2 = d * d;
+ const RealType sw =
+ one + d2 * d * (vdw_swV3 + d * (vdw_swV4 + d * vdw_swV5));
+
+ if constexpr (computeForces)
+ {
+ const RealType dsw = d2 * (vdw_swF2 + d * (vdw_swF3 + d * vdw_swF4));
+ fScalV[i] = potSwitchScalarForceMod(
+ fScalV[i], vVdw[i], sw, rV, dsw, potSwitchMask);
+ }
+ vVdw[i] = potSwitchPotentialMod(vVdw[i], sw, potSwitchMask);
+ }
- FscalV[i] = (rV < rvdw) ? FscalV[i] : zero;
- Vvdw[i] = (rV < rvdw) ? Vvdw[i] : zero;
+ vVdw[i] = gmx::selectByMask(vVdw[i], computeVdwInteraction);
+ if constexpr (computeForces)
+ {
+ fScalV[i] = gmx::selectByMask(fScalV[i], computeVdwInteraction);
}
}
- /* FscalC (and FscalV) now contain: dV/drC * rC
- * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
- * Further down we first multiply by r^p-2 and then by
- * the vector r, which in total gives: dV/drC * (r/rC)^1-p
- */
- FscalC[i] *= rpinvC;
- FscalV[i] *= rpinvV;
- }
- }
+ if constexpr (computeForces)
+ {
+ /* fScalC (and fScalV) now contain: dV/drC * rC
+ * Now we multiply by rC^-6, so it will be: dV/drC * rC^-5
+ * Further down we first multiply by r^4 and then by
+ * the vector r, which in total gives: dV/drC * (r/rC)^-5
+ */
+ fScalC[i] = fScalC[i] * rPInvC;
+ fScalV[i] = fScalV[i] * rPInvV;
+ }
+ } // end of block requiring nonZeroState
+ } // end for (int i = 0; i < NSTATES; i++)
- /* Assemble A and B states */
- for (i = 0; i < NSTATES; i++)
+ /* Assemble A and B states. */
+ BoolType assembleStates = (bPairIncluded && withinCutoffMask);
+ if (gmx::anyTrue(assembleStates))
{
- vctot += LFC[i]*Vcoul[i];
- vvtot += LFV[i]*Vvdw[i];
+ for (int i = 0; i < NSTATES; i++)
+ {
+ vCTot = vCTot + LFC[i] * vCoul[i];
+ vVTot = vVTot + LFV[i] * vVdw[i];
- Fscal += LFC[i]*FscalC[i]*rpm2;
- Fscal += LFV[i]*FscalV[i]*rpm2;
+ if constexpr (computeForces)
+ {
+ fScal = fScal + LFC[i] * fScalC[i] * rpm2;
+ fScal = fScal + LFV[i] * fScalV[i] * rpm2;
+ }
- dvdl_coul += Vcoul[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*FscalC[i]*sigma_pow[i];
- dvdl_vdw += Vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*FscalV[i]*sigma_pow[i];
+ if constexpr (softcoreType == KernelSoftcoreType::Beutler)
+ {
+ dvdlCoul = dvdlCoul + vCoul[i] * DLF[i]
+ + LFC[i] * alphaCoulEff * dlFacCoul[i] * fScalC[i] * sigma6[i];
+ dvdlVdw = dvdlVdw + vVdw[i] * DLF[i]
+ + LFV[i] * alphaVdwEff * dlFacVdw[i] * fScalV[i] * sigma6[i];
+ }
+ else
+ {
+ dvdlCoul = dvdlCoul + vCoul[i] * DLF[i];
+ dvdlVdw = dvdlVdw + vVdw[i] * DLF[i];
+ }
+ }
}
- }
- else if (icoul == GMX_NBKERNEL_ELEC_REACTIONFIELD)
+ } // end of block requiring bPairIncluded && withinCutoffMask
+ /* In the following block bPairIncluded should be false in the masks. */
+ if (icoul == NbkernelElecType::ReactionField)
{
- /* For excluded pairs, which are only in this pair list when
- * using the Verlet scheme, we don't use soft-core.
- * The group scheme also doesn't soft-core for these.
- * As there is no singularity, there is no need for soft-core.
- */
- VV = krf*rsq - crf;
- FF = -two*krf;
+ const BoolType computeReactionField = bPairExcluded;
- if (ii == jnr)
+ if (gmx::anyTrue(computeReactionField))
{
- VV *= half;
- }
+ /* For excluded pairs we don't use soft-core.
+ * As there is no singularity, there is no need for soft-core.
+ */
+ const RealType FF = -two * krf;
+ RealType VV = krf * rSq - crf;
- for (i = 0; i < NSTATES; i++)
- {
- vctot += LFC[i]*qq[i]*VV;
- Fscal += LFC[i]*qq[i]*FF;
- dvdl_coul += DLF[i]*qq[i]*VV;
+ /* If ii == jnr the i particle (ii) has itself (jnr)
+ * in its neighborlist. This corresponds to a self-interaction
+ * that will occur twice. Scale it down by 50% to only include
+ * it once.
+ */
+ VV = VV * gmx::blend(one, half, bIiEqJnr);
+
+ for (int i = 0; i < NSTATES; i++)
+ {
+ vCTot = vCTot + gmx::selectByMask(LFC[i] * qq[i] * VV, computeReactionField);
+ fScal = fScal + gmx::selectByMask(LFC[i] * qq[i] * FF, computeReactionField);
+ dvdlCoul = dvdlCoul + gmx::selectByMask(DLF[i] * qq[i] * VV, computeReactionField);
+ }
}
}
- if (bConvertEwaldToCoulomb && ( !bExactElecCutoff || r < rcoulomb ) )
+ const BoolType computeElecEwaldInteraction = (bPairExcluded || r < rCoulomb);
+ if (elecInteractionTypeIsEwald && gmx::anyTrue(computeElecEwaldInteraction))
{
/* See comment in the preamble. When using Ewald interactions
* (unless we use a switch modifier) we subtract the reciprocal-space
* the softcore to the entire electrostatic interaction,
* including the reciprocal-space component.
*/
- real v_lr, f_lr;
+ RealType v_lr, f_lr;
- ewrt = r*ewtabscale;
- ewitab = static_cast<int>(ewrt);
- eweps = ewrt-ewitab;
- ewitab = 4*ewitab;
- f_lr = ewtab[ewitab]+eweps*ewtab[ewitab+1];
- v_lr = (ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+f_lr));
- f_lr *= rinv;
+ pmeCoulombCorrectionVF<computeForces>(rSq, ewaldBeta, &v_lr, &f_lr);
+ if constexpr (computeForces)
+ {
+ f_lr = f_lr * rInv * rInv;
+ }
/* Note that any possible Ewald shift has already been applied in
* the normal interaction part above.
*/
- if (ii == jnr)
- {
- /* If we get here, the i particle (ii) has itself (jnr)
- * in its neighborlist. This can only happen with the Verlet
- * scheme, and corresponds to a self-interaction that will
- * occur twice. Scale it down by 50% to only include it once.
- */
- v_lr *= half;
- }
+ /* If ii == jnr the i particle (ii) has itself (jnr)
+ * in its neighborlist. This corresponds to a self-interaction
+ * that will occur twice. Scale it down by 50% to only include
+ * it once.
+ */
+ v_lr = v_lr * gmx::blend(one, half, bIiEqJnr);
- for (i = 0; i < NSTATES; i++)
+ for (int i = 0; i < NSTATES; i++)
{
- vctot -= LFC[i]*qq[i]*v_lr;
- Fscal -= LFC[i]*qq[i]*f_lr;
- dvdl_coul -= (DLF[i]*qq[i])*v_lr;
+ vCTot = vCTot - gmx::selectByMask(LFC[i] * qq[i] * v_lr, computeElecEwaldInteraction);
+ if constexpr (computeForces)
+ {
+ fScal = fScal - gmx::selectByMask(LFC[i] * qq[i] * f_lr, computeElecEwaldInteraction);
+ }
+ dvdlCoul = dvdlCoul
+ - gmx::selectByMask(DLF[i] * qq[i] * v_lr, computeElecEwaldInteraction);
}
}
- if (bConvertLJEwaldToLJ6 && (!bExactVdwCutoff || r < rvdw))
+ const BoolType computeVdwEwaldInteraction = (bPairExcluded || r < rVdw);
+ if (vdwInteractionTypeIsEwald && gmx::anyTrue(computeVdwEwaldInteraction))
{
/* See comment in the preamble. When using LJ-Ewald interactions
* (unless we use a switch modifier) we subtract the reciprocal-space
* the softcore to the entire VdW interaction,
* including the reciprocal-space component.
*/
- /* We could also use the analytical form here
- * iso a table, but that can cause issues for
- * r close to 0 for non-interacting pairs.
- */
- real rs, frac, f_lr;
- int ri;
-
- rs = rsq*rinv*ewtabscale;
- ri = static_cast<int>(rs);
- frac = rs - ri;
- f_lr = (1 - frac)*tab_ewald_F_lj[ri] + frac*tab_ewald_F_lj[ri+1];
- /* TODO: Currently the Ewald LJ table does not contain
- * the factor 1/6, we should add this.
- */
- FF = f_lr*rinv/six;
- VV = (tab_ewald_V_lj[ri] - ewtabhalfspace*frac*(tab_ewald_F_lj[ri] + f_lr))/six;
- if (ii == jnr)
- {
- /* If we get here, the i particle (ii) has itself (jnr)
- * in its neighborlist. This can only happen with the Verlet
- * scheme, and corresponds to a self-interaction that will
- * occur twice. Scale it down by 50% to only include it once.
- */
- VV *= half;
- }
+ RealType v_lr, f_lr;
+ pmeLJCorrectionVF<computeForces>(
+ rInv, rSq, ewaldLJCoeffSq, ewaldLJCoeffSixDivSix, &v_lr, &f_lr, computeVdwEwaldInteraction, bIiEqJnr);
+ v_lr = v_lr * oneSixth;
- for (i = 0; i < NSTATES; i++)
+ for (int i = 0; i < NSTATES; i++)
{
- c6grid = nbfp_grid[tj[i]];
- vvtot += LFV[i]*c6grid*VV;
- Fscal += LFV[i]*c6grid*FF;
- dvdl_vdw += (DLF[i]*c6grid)*VV;
+ vVTot = vVTot + gmx::selectByMask(LFV[i] * ljPmeC6Grid[i] * v_lr, computeVdwEwaldInteraction);
+ if constexpr (computeForces)
+ {
+ fScal = fScal + gmx::selectByMask(LFV[i] * ljPmeC6Grid[i] * f_lr, computeVdwEwaldInteraction);
+ }
+ dvdlVdw = dvdlVdw + gmx::selectByMask(DLF[i] * ljPmeC6Grid[i] * v_lr, computeVdwEwaldInteraction);
}
}
- if (bDoForces)
+ if (computeForces && gmx::anyTrue(fScal != zero))
{
- tx = Fscal*dx;
- ty = Fscal*dy;
- tz = Fscal*dz;
- fix = fix + tx;
- fiy = fiy + ty;
- fiz = fiz + tz;
- /* OpenMP atomics are expensive, but this kernels is also
- * expensive, so we can take this hit, instead of using
- * thread-local output buffers and extra reduction.
- *
- * All the OpenMP regions in this file are trivial and should
- * not throw, so no need for try/catch.
- */
-#pragma omp atomic
- f[j3] -= tx;
-#pragma omp atomic
- f[j3+1] -= ty;
-#pragma omp atomic
- f[j3+2] -= tz;
+ const RealType tX = fScal * dX;
+ const RealType tY = fScal * dY;
+ const RealType tZ = fScal * dZ;
+ fIX = fIX + tX;
+ fIY = fIY + tY;
+ fIZ = fIZ + tZ;
+
+ gmx::transposeScatterDecrU<3>(forceRealPtr, preloadJnr, tX, tY, tZ);
}
- }
+ } // end for (int k = nj0; k < nj1; k += DataTypes::simdRealWidth)
- /* The atomics below are expensive with many OpenMP threads.
- * Here unperturbed i-particles will usually only have a few
- * (perturbed) j-particles in the list. Thus with a buffered list
- * we can skip a significant number of i-reductions with a check.
- */
- if (npair_within_cutoff > 0)
+ if (havePairsWithinCutoff)
{
- if (bDoForces)
- {
-#pragma omp atomic
- f[ii3] += fix;
-#pragma omp atomic
- f[ii3+1] += fiy;
-#pragma omp atomic
- f[ii3+2] += fiz;
- }
- if (bDoShiftForces)
+ if constexpr (computeForces)
{
-#pragma omp atomic
- fshift[is3] += fix;
-#pragma omp atomic
- fshift[is3+1] += fiy;
-#pragma omp atomic
- fshift[is3+2] += fiz;
+ gmx::transposeScatterIncrU<3>(forceRealPtr, preloadIi, fIX, fIY, fIZ);
+
+ if (doShiftForces)
+ {
+ gmx::transposeScatterIncrU<3>(
+ reinterpret_cast<real*>(threadForceShiftBuffer), preloadIs, fIX, fIY, fIZ);
+ }
}
- if (bDoPotential)
+ if (doPotential)
{
- ggid = gid[n];
-#pragma omp atomic
- Vc[ggid] += vctot;
-#pragma omp atomic
- Vv[ggid] += vvtot;
+ int ggid = gid[n];
+ threadVc[ggid] += gmx::reduce(vCTot);
+ threadVv[ggid] += gmx::reduce(vVTot);
}
}
- }
+ } // end for (int n = 0; n < nri; n++)
-#pragma omp atomic
- dvdl[efptCOUL] += dvdl_coul;
- #pragma omp atomic
- dvdl[efptVDW] += dvdl_vdw;
+ if (gmx::anyTrue(dvdlCoul != zero))
+ {
+ threadDvdl[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)] += gmx::reduce(dvdlCoul);
+ }
+ if (gmx::anyTrue(dvdlVdw != zero))
+ {
+ threadDvdl[static_cast<int>(FreeEnergyPerturbationCouplingType::Vdw)] += gmx::reduce(dvdlVdw);
+ }
/* Estimate flops, average for free energy stuff:
* 12 flops per outer iteration
* 150 flops per inner iteration
+ * TODO: Update the number of flops and/or use different counts for different code paths.
*/
-#pragma omp atomic
- inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri*12 + nlist->jindex[n]*150);
+ atomicNrnbIncrement(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist.nri * 12 + nlist.jindex[nri] * 150);
+
+ if (haveExcludedPairsBeyondRlist > 0)
+ {
+ gmx_fatal(FARGS,
+ "There are perturbed non-bonded pair interactions beyond the pair-list cutoff "
+ "of %g nm, which is not supported. This can happen because the system is "
+ "unstable or because intra-molecular interactions at long distances are "
+ "excluded. If the "
+ "latter is the case, you can try to increase nstlist or rlist to avoid this."
+ "The error is likely triggered by the use of couple-intramol=no "
+ "and the maximal distance in the decoupled molecule exceeding rlist.",
+ rlist);
+ }
+}
+
+typedef void (*KernelFunction)(const t_nblist& nlist,
+ const gmx::ArrayRefWithPadding<const gmx::RVec>& coords,
+ const int ntype,
+ const real rlist,
+ const interaction_const_t& ic,
+ gmx::ArrayRef<const gmx::RVec> shiftvec,
+ gmx::ArrayRef<const real> nbfp,
+ gmx::ArrayRef<const real> nbfp_grid,
+ gmx::ArrayRef<const real> chargeA,
+ gmx::ArrayRef<const real> chargeB,
+ gmx::ArrayRef<const int> typeA,
+ gmx::ArrayRef<const int> typeB,
+ int flags,
+ gmx::ArrayRef<const real> lambda,
+ t_nrnb* gmx_restrict nrnb,
+ gmx::ArrayRefWithPadding<gmx::RVec> threadForceBuffer,
+ rvec* threadForceShiftBuffer,
+ gmx::ArrayRef<real> threadVc,
+ gmx::ArrayRef<real> threadVv,
+ gmx::ArrayRef<real> threadDvdl);
+
+template<KernelSoftcoreType softcoreType, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald, bool vdwModifierIsPotSwitch, bool computeForces>
+static KernelFunction dispatchKernelOnUseSimd(const bool useSimd)
+{
+ if (useSimd)
+ {
+#if GMX_SIMD_HAVE_REAL && GMX_SIMD_HAVE_INT32_ARITHMETICS && GMX_USE_SIMD_KERNELS
+ return (nb_free_energy_kernel<SimdDataTypes, softcoreType, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, computeForces>);
+#else
+ return (nb_free_energy_kernel<ScalarDataTypes, softcoreType, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, computeForces>);
+#endif
+ }
+ else
+ {
+ return (nb_free_energy_kernel<ScalarDataTypes, softcoreType, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, computeForces>);
+ }
+}
+
+template<KernelSoftcoreType softcoreType, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald, bool vdwModifierIsPotSwitch>
+static KernelFunction dispatchKernelOnComputeForces(const bool computeForces, const bool useSimd)
+{
+ if (computeForces)
+ {
+ return (dispatchKernelOnUseSimd<softcoreType, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, true>(
+ useSimd));
+ }
+ else
+ {
+ return (dispatchKernelOnUseSimd<softcoreType, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, false>(
+ useSimd));
+ }
+}
+
+template<KernelSoftcoreType softcoreType, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald>
+static KernelFunction dispatchKernelOnVdwModifier(const bool vdwModifierIsPotSwitch,
+ const bool computeForces,
+ const bool useSimd)
+{
+ if (vdwModifierIsPotSwitch)
+ {
+ return (dispatchKernelOnComputeForces<softcoreType, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, true>(
+ computeForces, useSimd));
+ }
+ else
+ {
+ return (dispatchKernelOnComputeForces<softcoreType, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, false>(
+ computeForces, useSimd));
+ }
+}
+
+template<KernelSoftcoreType softcoreType, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald>
+static KernelFunction dispatchKernelOnElecInteractionType(const bool elecInteractionTypeIsEwald,
+ const bool vdwModifierIsPotSwitch,
+ const bool computeForces,
+ const bool useSimd)
+{
+ if (elecInteractionTypeIsEwald)
+ {
+ return (dispatchKernelOnVdwModifier<softcoreType, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, true>(
+ vdwModifierIsPotSwitch, computeForces, useSimd));
+ }
+ else
+ {
+ return (dispatchKernelOnVdwModifier<softcoreType, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, false>(
+ vdwModifierIsPotSwitch, computeForces, useSimd));
+ }
+}
+
+template<KernelSoftcoreType softcoreType, bool scLambdasOrAlphasDiffer>
+static KernelFunction dispatchKernelOnVdwInteractionType(const bool vdwInteractionTypeIsEwald,
+ const bool elecInteractionTypeIsEwald,
+ const bool vdwModifierIsPotSwitch,
+ const bool computeForces,
+ const bool useSimd)
+{
+ if (vdwInteractionTypeIsEwald)
+ {
+ return (dispatchKernelOnElecInteractionType<softcoreType, scLambdasOrAlphasDiffer, true>(
+ elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, computeForces, useSimd));
+ }
+ else
+ {
+ return (dispatchKernelOnElecInteractionType<softcoreType, scLambdasOrAlphasDiffer, false>(
+ elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, computeForces, useSimd));
+ }
+}
+
+template<KernelSoftcoreType softcoreType>
+static KernelFunction dispatchKernelOnScLambdasOrAlphasDifference(const bool scLambdasOrAlphasDiffer,
+ const bool vdwInteractionTypeIsEwald,
+ const bool elecInteractionTypeIsEwald,
+ const bool vdwModifierIsPotSwitch,
+ const bool computeForces,
+ const bool useSimd)
+{
+ if (scLambdasOrAlphasDiffer)
+ {
+ return (dispatchKernelOnVdwInteractionType<softcoreType, true>(
+ vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, computeForces, useSimd));
+ }
+ else
+ {
+ return (dispatchKernelOnVdwInteractionType<softcoreType, false>(
+ vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, computeForces, useSimd));
+ }
+}
+
+static KernelFunction dispatchKernel(const bool scLambdasOrAlphasDiffer,
+ const bool vdwInteractionTypeIsEwald,
+ const bool elecInteractionTypeIsEwald,
+ const bool vdwModifierIsPotSwitch,
+ const bool computeForces,
+ const bool useSimd,
+ const interaction_const_t& ic)
+{
+ const auto& scParams = *ic.softCoreParameters;
+ if (scParams.softcoreType == SoftcoreType::Beutler)
+ {
+ if (scParams.alphaCoulomb == 0 && scParams.alphaVdw == 0)
+ {
+ return (dispatchKernelOnScLambdasOrAlphasDifference<KernelSoftcoreType::None>(
+ scLambdasOrAlphasDiffer,
+ vdwInteractionTypeIsEwald,
+ elecInteractionTypeIsEwald,
+ vdwModifierIsPotSwitch,
+ computeForces,
+ useSimd));
+ }
+ return (dispatchKernelOnScLambdasOrAlphasDifference<KernelSoftcoreType::Beutler>(
+ scLambdasOrAlphasDiffer,
+ vdwInteractionTypeIsEwald,
+ elecInteractionTypeIsEwald,
+ vdwModifierIsPotSwitch,
+ computeForces,
+ useSimd));
+ }
+ else // Gapsys
+ {
+ if (scParams.gapsysScaleLinpointCoul == 0 && scParams.gapsysScaleLinpointVdW == 0)
+ {
+ return (dispatchKernelOnScLambdasOrAlphasDifference<KernelSoftcoreType::None>(
+ scLambdasOrAlphasDiffer,
+ vdwInteractionTypeIsEwald,
+ elecInteractionTypeIsEwald,
+ vdwModifierIsPotSwitch,
+ computeForces,
+ useSimd));
+ }
+ return (dispatchKernelOnScLambdasOrAlphasDifference<KernelSoftcoreType::Gapsys>(
+ scLambdasOrAlphasDiffer,
+ vdwInteractionTypeIsEwald,
+ elecInteractionTypeIsEwald,
+ vdwModifierIsPotSwitch,
+ computeForces,
+ useSimd));
+ }
+}
+
+
+void gmx_nb_free_energy_kernel(const t_nblist& nlist,
+ const gmx::ArrayRefWithPadding<const gmx::RVec>& coords,
+ const bool useSimd,
+ const int ntype,
+ const real rlist,
+ const interaction_const_t& ic,
+ gmx::ArrayRef<const gmx::RVec> shiftvec,
+ gmx::ArrayRef<const real> nbfp,
+ gmx::ArrayRef<const real> nbfp_grid,
+ gmx::ArrayRef<const real> chargeA,
+ gmx::ArrayRef<const real> chargeB,
+ gmx::ArrayRef<const int> typeA,
+ gmx::ArrayRef<const int> typeB,
+ int flags,
+ gmx::ArrayRef<const real> lambda,
+ t_nrnb* nrnb,
+ gmx::ArrayRefWithPadding<gmx::RVec> threadForceBuffer,
+ rvec* threadForceShiftBuffer,
+ gmx::ArrayRef<real> threadVc,
+ gmx::ArrayRef<real> threadVv,
+ gmx::ArrayRef<real> threadDvdl)
+{
+ GMX_ASSERT(EEL_PME_EWALD(ic.eeltype) || ic.eeltype == CoulombInteractionType::Cut || EEL_RF(ic.eeltype),
+ "Unsupported eeltype with free energy");
+ GMX_ASSERT(ic.softCoreParameters, "We need soft-core parameters");
+
+ // Not all SIMD implementations need padding, but we provide padding anyhow so we can assert
+ GMX_ASSERT(!GMX_SIMD_HAVE_REAL || threadForceBuffer.empty()
+ || threadForceBuffer.size() > threadForceBuffer.unpaddedArrayRef().ssize(),
+ "We need actual padding with at least one element for SIMD scatter operations");
+
+ const auto& scParams = *ic.softCoreParameters;
+ const bool vdwInteractionTypeIsEwald = (EVDW_PME(ic.vdwtype));
+ const bool elecInteractionTypeIsEwald = (EEL_PME_EWALD(ic.eeltype));
+ const bool vdwModifierIsPotSwitch = (ic.vdw_modifier == InteractionModifiers::PotSwitch);
+ const bool computeForces = ((flags & GMX_NONBONDED_DO_FORCE) != 0);
+ bool scLambdasOrAlphasDiffer = true;
+
+ if (scParams.alphaCoulomb == 0 && scParams.alphaVdw == 0)
+ {
+ scLambdasOrAlphasDiffer = false;
+ }
+ else
+ {
+ if (lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)]
+ == lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Vdw)]
+ && scParams.alphaCoulomb == scParams.alphaVdw)
+ {
+ scLambdasOrAlphasDiffer = false;
+ }
+ }
+
+ KernelFunction kernelFunc;
+ kernelFunc = dispatchKernel(scLambdasOrAlphasDiffer,
+ vdwInteractionTypeIsEwald,
+ elecInteractionTypeIsEwald,
+ vdwModifierIsPotSwitch,
+ computeForces,
+ useSimd,
+ ic);
+ kernelFunc(nlist,
+ coords,
+ ntype,
+ rlist,
+ ic,
+ shiftvec,
+ nbfp,
+ nbfp_grid,
+ chargeA,
+ chargeB,
+ typeA,
+ typeB,
+ flags,
+ lambda,
+ nrnb,
+ threadForceBuffer,
+ threadForceShiftBuffer,
+ threadVc,
+ threadVv,
+ threadDvdl);
}