2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
7 * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
8 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9 * and including many others, as listed in the AUTHORS file in the
10 * top-level source directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
40 #include "nb_free_energy.h"
46 #include "gromacs/gmxlib/nrnb.h"
47 #include "gromacs/gmxlib/nonbonded/nb_kernel.h"
48 #include "gromacs/gmxlib/nonbonded/nonbonded.h"
49 #include "gromacs/math/functions.h"
50 #include "gromacs/math/vec.h"
51 #include "gromacs/mdtypes/forceoutput.h"
52 #include "gromacs/mdtypes/forcerec.h"
53 #include "gromacs/mdtypes/interaction_const.h"
54 #include "gromacs/mdtypes/md_enums.h"
55 #include "gromacs/mdtypes/mdatom.h"
56 #include "gromacs/simd/simd.h"
57 #include "gromacs/utility/fatalerror.h"
60 //! Scalar (non-SIMD) data types.
61 struct ScalarDataTypes
63 using RealType = real; //!< The data type to use as real.
64 using IntType = int; //!< The data type to use as int.
65 static constexpr int simdRealWidth = 1; //!< The width of the RealType.
66 static constexpr int simdIntWidth = 1; //!< The width of the IntType.
69 #if GMX_SIMD_HAVE_REAL && GMX_SIMD_HAVE_INT32_ARITHMETICS
73 using RealType = gmx::SimdReal; //!< The data type to use as real.
74 using IntType = gmx::SimdInt32; //!< The data type to use as int.
75 static constexpr int simdRealWidth = GMX_SIMD_REAL_WIDTH; //!< The width of the RealType.
76 static constexpr int simdIntWidth = GMX_SIMD_FINT32_WIDTH; //!< The width of the IntType.
80 //! Computes r^(1/p) and 1/r^(1/p) for the standard p=6
81 template<class RealType>
82 static inline void pthRoot(const RealType r, RealType* pthRoot, RealType* invPthRoot)
84 *invPthRoot = gmx::invsqrt(std::cbrt(r));
85 *pthRoot = 1 / (*invPthRoot);
88 template<class RealType>
89 static inline RealType calculateRinv6(const RealType rinvV)
91 RealType rinv6 = rinvV * rinvV;
92 return (rinv6 * rinv6 * rinv6);
95 template<class RealType>
96 static inline RealType calculateVdw6(const RealType c6, const RealType rinv6)
101 template<class RealType>
102 static inline RealType calculateVdw12(const RealType c12, const RealType rinv6)
104 return (c12 * rinv6 * rinv6);
107 /* reaction-field electrostatics */
108 template<class RealType>
109 static inline RealType reactionFieldScalarForce(const RealType qq,
115 return (qq * (rinv - two * krf * r * r));
117 template<class RealType>
118 static inline RealType reactionFieldPotential(const RealType qq,
122 const real potentialShift)
124 return (qq * (rinv + krf * r * r - potentialShift));
127 /* Ewald electrostatics */
128 template<class RealType>
129 static inline RealType ewaldScalarForce(const RealType coulomb, const RealType rinv)
131 return (coulomb * rinv);
133 template<class RealType>
134 static inline RealType ewaldPotential(const RealType coulomb, const RealType rinv, const real potentialShift)
136 return (coulomb * (rinv - potentialShift));
140 template<class RealType>
141 static inline RealType lennardJonesScalarForce(const RealType v6, const RealType v12)
145 template<class RealType>
146 static inline RealType lennardJonesPotential(const RealType v6,
150 const real repulsionShift,
151 const real dispersionShift,
153 const real onetwelfth)
155 return ((v12 + c12 * repulsionShift) * onetwelfth - (v6 + c6 * dispersionShift) * onesixth);
159 static inline real ewaldLennardJonesGridSubtract(const real c6grid, const real potentialShift, const real onesixth)
161 return (c6grid * potentialShift * onesixth);
164 /* LJ Potential switch */
165 template<class RealType>
166 static inline RealType potSwitchScalarForceMod(const RealType fScalarInp,
167 const RealType potential,
176 real fScalar = fScalarInp * sw - r * potential * dsw;
181 template<class RealType>
182 static inline RealType potSwitchPotentialMod(const RealType potentialInp,
190 real potential = potentialInp * sw;
197 //! Templated free-energy non-bonded kernel
198 template<typename DataTypes, bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald, bool vdwModifierIsPotSwitch>
199 static void nb_free_energy_kernel(const t_nblist* gmx_restrict nlist,
200 rvec* gmx_restrict xx,
201 gmx::ForceWithShiftForces* forceWithShiftForces,
202 const t_forcerec* gmx_restrict fr,
203 const t_mdatoms* gmx_restrict mdatoms,
204 nb_kernel_data_t* gmx_restrict kernel_data,
205 t_nrnb* gmx_restrict nrnb)
211 using RealType = typename DataTypes::RealType;
212 using IntType = typename DataTypes::IntType;
214 /* FIXME: How should these be handled with SIMD? */
215 constexpr real onetwelfth = 1.0 / 12.0;
216 constexpr real onesixth = 1.0 / 6.0;
217 constexpr real zero = 0.0;
218 constexpr real half = 0.5;
219 constexpr real one = 1.0;
220 constexpr real two = 2.0;
221 constexpr real six = 6.0;
223 /* Extract pointer to non-bonded interaction constants */
224 const interaction_const_t* ic = fr->ic;
226 // Extract pair list data
227 const int nri = nlist->nri;
228 const int* iinr = nlist->iinr;
229 const int* jindex = nlist->jindex;
230 const int* jjnr = nlist->jjnr;
231 const int* shift = nlist->shift;
232 const int* gid = nlist->gid;
234 const real* shiftvec = fr->shift_vec[0];
235 const real* chargeA = mdatoms->chargeA;
236 const real* chargeB = mdatoms->chargeB;
237 real* Vc = kernel_data->energygrp_elec;
238 const int* typeA = mdatoms->typeA;
239 const int* typeB = mdatoms->typeB;
240 const int ntype = fr->ntype;
241 const real* nbfp = fr->nbfp.data();
242 const real* nbfp_grid = fr->ljpme_c6grid;
243 real* Vv = kernel_data->energygrp_vdw;
244 const real lambda_coul = kernel_data->lambda[efptCOUL];
245 const real lambda_vdw = kernel_data->lambda[efptVDW];
246 real* dvdl = kernel_data->dvdl;
247 const real alpha_coul = fr->sc_alphacoul;
248 const real alpha_vdw = fr->sc_alphavdw;
249 const real lam_power = fr->sc_power;
250 const real sigma6_def = fr->sc_sigma6_def;
251 const real sigma6_min = fr->sc_sigma6_min;
252 const bool doForces = ((kernel_data->flags & GMX_NONBONDED_DO_FORCE) != 0);
253 const bool doShiftForces = ((kernel_data->flags & GMX_NONBONDED_DO_SHIFTFORCE) != 0);
254 const bool doPotential = ((kernel_data->flags & GMX_NONBONDED_DO_POTENTIAL) != 0);
256 // Extract data from interaction_const_t
257 const real facel = ic->epsfac;
258 const real rcoulomb = ic->rcoulomb;
259 const real krf = ic->k_rf;
260 const real crf = ic->c_rf;
261 const real sh_lj_ewald = ic->sh_lj_ewald;
262 const real rvdw = ic->rvdw;
263 const real dispersionShift = ic->dispersion_shift.cpot;
264 const real repulsionShift = ic->repulsion_shift.cpot;
266 // Note that the nbnxm kernels do not support Coulomb potential switching at all
267 GMX_ASSERT(ic->coulomb_modifier != eintmodPOTSWITCH,
268 "Potential switching is not supported for Coulomb with FEP");
270 real vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4;
271 if (vdwModifierIsPotSwitch)
273 const real d = ic->rvdw - ic->rvdw_switch;
274 vdw_swV3 = -10.0 / (d * d * d);
275 vdw_swV4 = 15.0 / (d * d * d * d);
276 vdw_swV5 = -6.0 / (d * d * d * d * d);
277 vdw_swF2 = -30.0 / (d * d * d);
278 vdw_swF3 = 60.0 / (d * d * d * d);
279 vdw_swF4 = -30.0 / (d * d * d * d * d);
283 /* Avoid warnings from stupid compilers (looking at you, Clang!) */
284 vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0;
288 if (ic->eeltype == eelCUT || EEL_RF(ic->eeltype))
290 icoul = GMX_NBKERNEL_ELEC_REACTIONFIELD;
294 icoul = GMX_NBKERNEL_ELEC_NONE;
297 real rcutoff_max2 = std::max(ic->rcoulomb, ic->rvdw);
298 rcutoff_max2 = rcutoff_max2 * rcutoff_max2;
300 const real* tab_ewald_F_lj = nullptr;
301 const real* tab_ewald_V_lj = nullptr;
302 const real* ewtab = nullptr;
304 real ewtabhalfspace = 0;
306 if (elecInteractionTypeIsEwald || vdwInteractionTypeIsEwald)
308 const auto& tables = *ic->coulombEwaldTables;
309 sh_ewald = ic->sh_ewald;
310 ewtab = tables.tableFDV0.data();
311 ewtabscale = tables.scale;
312 ewtabhalfspace = half / ewtabscale;
313 tab_ewald_F_lj = tables.tableF.data();
314 tab_ewald_V_lj = tables.tableV.data();
317 /* For Ewald/PME interactions we cannot easily apply the soft-core component to
318 * reciprocal space. When we use non-switched Ewald interactions, we
319 * assume the soft-coring does not significantly affect the grid contribution
320 * and apply the soft-core only to the full 1/r (- shift) pair contribution.
322 * However, we cannot use this approach for switch-modified since we would then
323 * effectively end up evaluating a significantly different interaction here compared to the
324 * normal (non-free-energy) kernels, either by applying a cutoff at a different
325 * position than what the user requested, or by switching different
326 * things (1/r rather than short-range Ewald). For these settings, we just
327 * use the traditional short-range Ewald interaction in that case.
329 GMX_RELEASE_ASSERT(!(vdwInteractionTypeIsEwald && vdwModifierIsPotSwitch),
330 "Can not apply soft-core to switched Ewald potentials");
335 /* Lambda factor for state A, 1-lambda*/
336 real LFC[NSTATES], LFV[NSTATES];
337 LFC[STATE_A] = one - lambda_coul;
338 LFV[STATE_A] = one - lambda_vdw;
340 /* Lambda factor for state B, lambda*/
341 LFC[STATE_B] = lambda_coul;
342 LFV[STATE_B] = lambda_vdw;
344 /*derivative of the lambda factor for state A and B */
349 real lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
350 constexpr real sc_r_power = 6.0_real;
351 for (int i = 0; i < NSTATES; i++)
353 lfac_coul[i] = (lam_power == 2 ? (1 - LFC[i]) * (1 - LFC[i]) : (1 - LFC[i]));
354 dlfac_coul[i] = DLF[i] * lam_power / sc_r_power * (lam_power == 2 ? (1 - LFC[i]) : 1);
355 lfac_vdw[i] = (lam_power == 2 ? (1 - LFV[i]) * (1 - LFV[i]) : (1 - LFV[i]));
356 dlfac_vdw[i] = DLF[i] * lam_power / sc_r_power * (lam_power == 2 ? (1 - LFV[i]) : 1);
359 // TODO: We should get rid of using pointers to real
360 const real* x = xx[0];
361 real* gmx_restrict f = &(forceWithShiftForces->force()[0][0]);
362 real* gmx_restrict fshift = &(forceWithShiftForces->shiftForces()[0][0]);
364 for (int n = 0; n < nri; n++)
366 int npair_within_cutoff = 0;
368 const int is3 = 3 * shift[n];
369 const real shX = shiftvec[is3];
370 const real shY = shiftvec[is3 + 1];
371 const real shZ = shiftvec[is3 + 2];
372 const int nj0 = jindex[n];
373 const int nj1 = jindex[n + 1];
374 const int ii = iinr[n];
375 const int ii3 = 3 * ii;
376 const real ix = shX + x[ii3 + 0];
377 const real iy = shY + x[ii3 + 1];
378 const real iz = shZ + x[ii3 + 2];
379 const real iqA = facel * chargeA[ii];
380 const real iqB = facel * chargeB[ii];
381 const int ntiA = 2 * ntype * typeA[ii];
382 const int ntiB = 2 * ntype * typeB[ii];
389 for (int k = nj0; k < nj1; k++)
392 const int jnr = jjnr[k];
393 const int j3 = 3 * jnr;
394 RealType c6[NSTATES], c12[NSTATES], qq[NSTATES], Vcoul[NSTATES], Vvdw[NSTATES];
395 RealType r, rinv, rp, rpm2;
396 RealType alpha_vdw_eff, alpha_coul_eff, sigma6[NSTATES];
397 const RealType dx = ix - x[j3];
398 const RealType dy = iy - x[j3 + 1];
399 const RealType dz = iz - x[j3 + 2];
400 const RealType rsq = dx * dx + dy * dy + dz * dz;
401 RealType FscalC[NSTATES], FscalV[NSTATES];
403 if (rsq >= rcutoff_max2)
405 /* We save significant time by skipping all code below.
406 * Note that with soft-core interactions, the actual cut-off
407 * check might be different. But since the soft-core distance
408 * is always larger than r, checking on r here is safe.
412 npair_within_cutoff++;
416 /* Note that unlike in the nbnxn kernels, we do not need
417 * to clamp the value of rsq before taking the invsqrt
418 * to avoid NaN in the LJ calculation, since here we do
419 * not calculate LJ interactions when C6 and C12 are zero.
422 rinv = gmx::invsqrt(rsq);
427 /* The force at r=0 is zero, because of symmetry.
428 * But note that the potential is in general non-zero,
429 * since the soft-cored r will be non-zero.
437 rpm2 = rsq * rsq; /* r4 */
438 rp = rpm2 * rsq; /* r6 */
442 /* The soft-core power p will not affect the results
443 * with not using soft-core, so we use power of 0 which gives
444 * the simplest math and cheapest code.
452 qq[STATE_A] = iqA * chargeA[jnr];
453 qq[STATE_B] = iqB * chargeB[jnr];
455 tj[STATE_A] = ntiA + 2 * typeA[jnr];
456 tj[STATE_B] = ntiB + 2 * typeB[jnr];
458 if (nlist->excl_fep == nullptr || nlist->excl_fep[k])
460 c6[STATE_A] = nbfp[tj[STATE_A]];
461 c6[STATE_B] = nbfp[tj[STATE_B]];
463 for (int i = 0; i < NSTATES; i++)
465 c12[i] = nbfp[tj[i] + 1];
468 if ((c6[i] > 0) && (c12[i] > 0))
470 /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
471 sigma6[i] = half * c12[i] / c6[i];
472 if (sigma6[i] < sigma6_min) /* for disappearing coul and vdw with soft core at the same time */
474 sigma6[i] = sigma6_min;
479 sigma6[i] = sigma6_def;
486 /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
487 if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
494 alpha_vdw_eff = alpha_vdw;
495 alpha_coul_eff = alpha_coul;
499 for (int i = 0; i < NSTATES; i++)
506 RealType rinvC, rinvV, rC, rV, rpinvC, rpinvV;
508 /* Only spend time on A or B state if it is non-zero */
509 if ((qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0))
511 /* this section has to be inside the loop because of the dependence on sigma6 */
514 rpinvC = one / (alpha_coul_eff * lfac_coul[i] * sigma6[i] + rp);
515 pthRoot(rpinvC, &rinvC, &rC);
516 if (scLambdasOrAlphasDiffer)
518 rpinvV = one / (alpha_vdw_eff * lfac_vdw[i] * sigma6[i] + rp);
519 pthRoot(rpinvV, &rinvV, &rV);
523 /* We can avoid one expensive pow and one / operation */
540 /* Only process the coulomb interactions if we have charges,
541 * and if we either include all entries in the list (no cutoff
542 * used in the kernel), or if we are within the cutoff.
544 bool computeElecInteraction = (elecInteractionTypeIsEwald && r < rcoulomb)
545 || (!elecInteractionTypeIsEwald && rC < rcoulomb);
547 if ((qq[i] != 0) && computeElecInteraction)
549 if (elecInteractionTypeIsEwald)
551 Vcoul[i] = ewaldPotential(qq[i], rinvC, sh_ewald);
552 FscalC[i] = ewaldScalarForce(qq[i], rinvC);
556 Vcoul[i] = reactionFieldPotential(qq[i], rinvC, rC, krf, crf);
557 FscalC[i] = reactionFieldScalarForce(qq[i], rinvC, rC, krf, two);
561 /* Only process the VDW interactions if we have
562 * some non-zero parameters, and if we either
563 * include all entries in the list (no cutoff used
564 * in the kernel), or if we are within the cutoff.
566 bool computeVdwInteraction = (vdwInteractionTypeIsEwald && r < rvdw)
567 || (!vdwInteractionTypeIsEwald && rV < rvdw);
568 if ((c6[i] != 0 || c12[i] != 0) && computeVdwInteraction)
577 rinv6 = calculateRinv6(rinvV);
579 RealType Vvdw6 = calculateVdw6(c6[i], rinv6);
580 RealType Vvdw12 = calculateVdw12(c12[i], rinv6);
582 Vvdw[i] = lennardJonesPotential(Vvdw6, Vvdw12, c6[i], c12[i], repulsionShift,
583 dispersionShift, onesixth, onetwelfth);
584 FscalV[i] = lennardJonesScalarForce(Vvdw6, Vvdw12);
586 if (vdwInteractionTypeIsEwald)
588 /* Subtract the grid potential at the cut-off */
589 Vvdw[i] += ewaldLennardJonesGridSubtract(nbfp_grid[tj[i]],
590 sh_lj_ewald, onesixth);
593 if (vdwModifierIsPotSwitch)
595 RealType d = rV - ic->rvdw_switch;
596 d = (d > zero) ? d : zero;
597 const RealType d2 = d * d;
599 one + d2 * d * (vdw_swV3 + d * (vdw_swV4 + d * vdw_swV5));
600 const RealType dsw = d2 * (vdw_swF2 + d * (vdw_swF3 + d * vdw_swF4));
602 FscalV[i] = potSwitchScalarForceMod(FscalV[i], Vvdw[i], sw, rV,
604 Vvdw[i] = potSwitchPotentialMod(Vvdw[i], sw, rV, rvdw, zero);
608 /* FscalC (and FscalV) now contain: dV/drC * rC
609 * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
610 * Further down we first multiply by r^p-2 and then by
611 * the vector r, which in total gives: dV/drC * (r/rC)^1-p
618 /* Assemble A and B states */
619 for (int i = 0; i < NSTATES; i++)
621 vctot += LFC[i] * Vcoul[i];
622 vvtot += LFV[i] * Vvdw[i];
624 Fscal += LFC[i] * FscalC[i] * rpm2;
625 Fscal += LFV[i] * FscalV[i] * rpm2;
629 dvdl_coul += Vcoul[i] * DLF[i]
630 + LFC[i] * alpha_coul_eff * dlfac_coul[i] * FscalC[i] * sigma6[i];
631 dvdl_vdw += Vvdw[i] * DLF[i]
632 + LFV[i] * alpha_vdw_eff * dlfac_vdw[i] * FscalV[i] * sigma6[i];
636 dvdl_coul += Vcoul[i] * DLF[i];
637 dvdl_vdw += Vvdw[i] * DLF[i];
641 else if (icoul == GMX_NBKERNEL_ELEC_REACTIONFIELD)
643 /* For excluded pairs, which are only in this pair list when
644 * using the Verlet scheme, we don't use soft-core.
645 * As there is no singularity, there is no need for soft-core.
647 const real FF = -two * krf;
648 RealType VV = krf * rsq - crf;
655 for (int i = 0; i < NSTATES; i++)
657 vctot += LFC[i] * qq[i] * VV;
658 Fscal += LFC[i] * qq[i] * FF;
659 dvdl_coul += DLF[i] * qq[i] * VV;
663 if (elecInteractionTypeIsEwald && r < rcoulomb)
665 /* See comment in the preamble. When using Ewald interactions
666 * (unless we use a switch modifier) we subtract the reciprocal-space
667 * Ewald component here which made it possible to apply the free
668 * energy interaction to 1/r (vanilla coulomb short-range part)
669 * above. This gets us closer to the ideal case of applying
670 * the softcore to the entire electrostatic interaction,
671 * including the reciprocal-space component.
675 const RealType ewrt = r * ewtabscale;
676 IntType ewitab = static_cast<IntType>(ewrt);
677 const RealType eweps = ewrt - ewitab;
679 f_lr = ewtab[ewitab] + eweps * ewtab[ewitab + 1];
680 v_lr = (ewtab[ewitab + 2] - ewtabhalfspace * eweps * (ewtab[ewitab] + f_lr));
683 /* Note that any possible Ewald shift has already been applied in
684 * the normal interaction part above.
689 /* If we get here, the i particle (ii) has itself (jnr)
690 * in its neighborlist. This can only happen with the Verlet
691 * scheme, and corresponds to a self-interaction that will
692 * occur twice. Scale it down by 50% to only include it once.
697 for (int i = 0; i < NSTATES; i++)
699 vctot -= LFC[i] * qq[i] * v_lr;
700 Fscal -= LFC[i] * qq[i] * f_lr;
701 dvdl_coul -= (DLF[i] * qq[i]) * v_lr;
705 if (vdwInteractionTypeIsEwald && r < rvdw)
707 /* See comment in the preamble. When using LJ-Ewald interactions
708 * (unless we use a switch modifier) we subtract the reciprocal-space
709 * Ewald component here which made it possible to apply the free
710 * energy interaction to r^-6 (vanilla LJ6 short-range part)
711 * above. This gets us closer to the ideal case of applying
712 * the softcore to the entire VdW interaction,
713 * including the reciprocal-space component.
715 /* We could also use the analytical form here
716 * iso a table, but that can cause issues for
717 * r close to 0 for non-interacting pairs.
720 const RealType rs = rsq * rinv * ewtabscale;
721 const IntType ri = static_cast<IntType>(rs);
722 const RealType frac = rs - ri;
723 const RealType f_lr = (1 - frac) * tab_ewald_F_lj[ri] + frac * tab_ewald_F_lj[ri + 1];
724 /* TODO: Currently the Ewald LJ table does not contain
725 * the factor 1/6, we should add this.
727 const RealType FF = f_lr * rinv / six;
729 (tab_ewald_V_lj[ri] - ewtabhalfspace * frac * (tab_ewald_F_lj[ri] + f_lr)) / six;
733 /* If we get here, the i particle (ii) has itself (jnr)
734 * in its neighborlist. This can only happen with the Verlet
735 * scheme, and corresponds to a self-interaction that will
736 * occur twice. Scale it down by 50% to only include it once.
741 for (int i = 0; i < NSTATES; i++)
743 const real c6grid = nbfp_grid[tj[i]];
744 vvtot += LFV[i] * c6grid * VV;
745 Fscal += LFV[i] * c6grid * FF;
746 dvdl_vdw += (DLF[i] * c6grid) * VV;
752 const real tx = Fscal * dx;
753 const real ty = Fscal * dy;
754 const real tz = Fscal * dz;
758 /* OpenMP atomics are expensive, but this kernels is also
759 * expensive, so we can take this hit, instead of using
760 * thread-local output buffers and extra reduction.
762 * All the OpenMP regions in this file are trivial and should
763 * not throw, so no need for try/catch.
774 /* The atomics below are expensive with many OpenMP threads.
775 * Here unperturbed i-particles will usually only have a few
776 * (perturbed) j-particles in the list. Thus with a buffered list
777 * we can skip a significant number of i-reductions with a check.
779 if (npair_within_cutoff > 0)
795 fshift[is3 + 1] += fiy;
797 fshift[is3 + 2] += fiz;
811 dvdl[efptCOUL] += dvdl_coul;
813 dvdl[efptVDW] += dvdl_vdw;
815 /* Estimate flops, average for free energy stuff:
816 * 12 flops per outer iteration
817 * 150 flops per inner iteration
820 inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri * 12 + nlist->jindex[nri] * 150);
823 typedef void (*KernelFunction)(const t_nblist* gmx_restrict nlist,
824 rvec* gmx_restrict xx,
825 gmx::ForceWithShiftForces* forceWithShiftForces,
826 const t_forcerec* gmx_restrict fr,
827 const t_mdatoms* gmx_restrict mdatoms,
828 nb_kernel_data_t* gmx_restrict kernel_data,
829 t_nrnb* gmx_restrict nrnb);
831 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald, bool vdwModifierIsPotSwitch>
832 static KernelFunction dispatchKernelOnUseSimd(const bool useSimd)
836 #if GMX_SIMD_HAVE_REAL && GMX_SIMD_HAVE_INT32_ARITHMETICS
837 /* FIXME: Here SimdDataTypes should be used to enable SIMD. So far, the code in nb_free_energy_kernel is not adapted to SIMD */
838 return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
839 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
841 return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
842 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
847 return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
848 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
852 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald>
853 static KernelFunction dispatchKernelOnVdwModifier(const bool vdwModifierIsPotSwitch, const bool useSimd)
855 if (vdwModifierIsPotSwitch)
857 return (dispatchKernelOnUseSimd<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
858 elecInteractionTypeIsEwald, true>(useSimd));
862 return (dispatchKernelOnUseSimd<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
863 elecInteractionTypeIsEwald, false>(useSimd));
867 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald>
868 static KernelFunction dispatchKernelOnElecInteractionType(const bool elecInteractionTypeIsEwald,
869 const bool vdwModifierIsPotSwitch,
872 if (elecInteractionTypeIsEwald)
874 return (dispatchKernelOnVdwModifier<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, true>(
875 vdwModifierIsPotSwitch, useSimd));
879 return (dispatchKernelOnVdwModifier<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, false>(
880 vdwModifierIsPotSwitch, useSimd));
884 template<bool useSoftCore, bool scLambdasOrAlphasDiffer>
885 static KernelFunction dispatchKernelOnVdwInteractionType(const bool vdwInteractionTypeIsEwald,
886 const bool elecInteractionTypeIsEwald,
887 const bool vdwModifierIsPotSwitch,
890 if (vdwInteractionTypeIsEwald)
892 return (dispatchKernelOnElecInteractionType<useSoftCore, scLambdasOrAlphasDiffer, true>(
893 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
897 return (dispatchKernelOnElecInteractionType<useSoftCore, scLambdasOrAlphasDiffer, false>(
898 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
902 template<bool useSoftCore>
903 static KernelFunction dispatchKernelOnScLambdasOrAlphasDifference(const bool scLambdasOrAlphasDiffer,
904 const bool vdwInteractionTypeIsEwald,
905 const bool elecInteractionTypeIsEwald,
906 const bool vdwModifierIsPotSwitch,
909 if (scLambdasOrAlphasDiffer)
911 return (dispatchKernelOnVdwInteractionType<useSoftCore, true>(
912 vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
916 return (dispatchKernelOnVdwInteractionType<useSoftCore, false>(
917 vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
921 static KernelFunction dispatchKernel(const bool scLambdasOrAlphasDiffer,
922 const bool vdwInteractionTypeIsEwald,
923 const bool elecInteractionTypeIsEwald,
924 const bool vdwModifierIsPotSwitch,
926 const t_forcerec* fr)
928 if (fr->sc_alphacoul == 0 && fr->sc_alphavdw == 0)
930 return (dispatchKernelOnScLambdasOrAlphasDifference<false>(
931 scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
932 vdwModifierIsPotSwitch, useSimd));
936 return (dispatchKernelOnScLambdasOrAlphasDifference<true>(
937 scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
938 vdwModifierIsPotSwitch, useSimd));
943 void gmx_nb_free_energy_kernel(const t_nblist* nlist,
945 gmx::ForceWithShiftForces* ff,
946 const t_forcerec* fr,
947 const t_mdatoms* mdatoms,
948 nb_kernel_data_t* kernel_data,
951 GMX_ASSERT(EEL_PME_EWALD(fr->ic->eeltype) || fr->ic->eeltype == eelCUT || EEL_RF(fr->ic->eeltype),
952 "Unsupported eeltype with free energy");
954 const bool vdwInteractionTypeIsEwald = (EVDW_PME(fr->ic->vdwtype));
955 const bool elecInteractionTypeIsEwald = (EEL_PME_EWALD(fr->ic->eeltype));
956 const bool vdwModifierIsPotSwitch = (fr->ic->vdw_modifier == eintmodPOTSWITCH);
957 bool scLambdasOrAlphasDiffer = true;
958 const bool useSimd = fr->use_simd_kernels;
960 if (fr->sc_alphacoul == 0 && fr->sc_alphavdw == 0)
962 scLambdasOrAlphasDiffer = false;
964 else if (fr->sc_r_power == 6.0_real)
966 if (kernel_data->lambda[efptCOUL] == kernel_data->lambda[efptVDW] && fr->sc_alphacoul == fr->sc_alphavdw)
968 scLambdasOrAlphasDiffer = false;
973 GMX_RELEASE_ASSERT(false, "Unsupported soft-core r-power");
976 KernelFunction kernelFunc;
977 kernelFunc = dispatchKernel(scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
978 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd, fr);
979 kernelFunc(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);