Refactor md_enums
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_free_energy.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
7  * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9  * and including many others, as listed in the AUTHORS file in the
10  * top-level source directory and at http://www.gromacs.org.
11  *
12  * GROMACS is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU Lesser General Public License
14  * as published by the Free Software Foundation; either version 2.1
15  * of the License, or (at your option) any later version.
16  *
17  * GROMACS is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public
23  * License along with GROMACS; if not, see
24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
26  *
27  * If you want to redistribute modifications to GROMACS, please
28  * consider that scientific software is very special. Version
29  * control is crucial - bugs must be traceable. We will be happy to
30  * consider code for inclusion in the official distribution, but
31  * derived work must not be called official GROMACS. Details are found
32  * in the README & COPYING files - if they are missing, get the
33  * official version at http://www.gromacs.org.
34  *
35  * To help us fund GROMACS development, we humbly ask that you cite
36  * the research papers on the package. Check out http://www.gromacs.org.
37  */
38 #include "gmxpre.h"
39
40 #include "nb_free_energy.h"
41
42 #include "config.h"
43
44 #include <cmath>
45
46 #include <algorithm>
47
48 #include "gromacs/gmxlib/nrnb.h"
49 #include "gromacs/gmxlib/nonbonded/nb_kernel.h"
50 #include "gromacs/gmxlib/nonbonded/nonbonded.h"
51 #include "gromacs/math/functions.h"
52 #include "gromacs/math/vec.h"
53 #include "gromacs/mdtypes/forceoutput.h"
54 #include "gromacs/mdtypes/forcerec.h"
55 #include "gromacs/mdtypes/interaction_const.h"
56 #include "gromacs/mdtypes/md_enums.h"
57 #include "gromacs/mdtypes/mdatom.h"
58 #include "gromacs/simd/simd.h"
59 #include "gromacs/utility/fatalerror.h"
60 #include "gromacs/utility/arrayref.h"
61
62
63 //! Scalar (non-SIMD) data types.
64 struct ScalarDataTypes
65 {
66     using RealType                     = real; //!< The data type to use as real.
67     using IntType                      = int;  //!< The data type to use as int.
68     static constexpr int simdRealWidth = 1;    //!< The width of the RealType.
69     static constexpr int simdIntWidth  = 1;    //!< The width of the IntType.
70 };
71
72 #if GMX_SIMD_HAVE_REAL && GMX_SIMD_HAVE_INT32_ARITHMETICS
73 //! SIMD data types.
74 struct SimdDataTypes
75 {
76     using RealType                     = gmx::SimdReal;         //!< The data type to use as real.
77     using IntType                      = gmx::SimdInt32;        //!< The data type to use as int.
78     static constexpr int simdRealWidth = GMX_SIMD_REAL_WIDTH;   //!< The width of the RealType.
79     static constexpr int simdIntWidth  = GMX_SIMD_FINT32_WIDTH; //!< The width of the IntType.
80 };
81 #endif
82
83 //! Computes r^(1/p) and 1/r^(1/p) for the standard p=6
84 template<class RealType>
85 static inline void pthRoot(const RealType r, RealType* pthRoot, RealType* invPthRoot)
86 {
87     *invPthRoot = gmx::invsqrt(std::cbrt(r));
88     *pthRoot    = 1 / (*invPthRoot);
89 }
90
91 template<class RealType>
92 static inline RealType calculateRinv6(const RealType rInvV)
93 {
94     RealType rInv6 = rInvV * rInvV;
95     return (rInv6 * rInv6 * rInv6);
96 }
97
98 template<class RealType>
99 static inline RealType calculateVdw6(const RealType c6, const RealType rInv6)
100 {
101     return (c6 * rInv6);
102 }
103
104 template<class RealType>
105 static inline RealType calculateVdw12(const RealType c12, const RealType rInv6)
106 {
107     return (c12 * rInv6 * rInv6);
108 }
109
110 /* reaction-field electrostatics */
111 template<class RealType>
112 static inline RealType reactionFieldScalarForce(const RealType qq,
113                                                 const RealType rInv,
114                                                 const RealType r,
115                                                 const real     krf,
116                                                 const real     two)
117 {
118     return (qq * (rInv - two * krf * r * r));
119 }
120 template<class RealType>
121 static inline RealType reactionFieldPotential(const RealType qq,
122                                               const RealType rInv,
123                                               const RealType r,
124                                               const real     krf,
125                                               const real     potentialShift)
126 {
127     return (qq * (rInv + krf * r * r - potentialShift));
128 }
129
130 /* Ewald electrostatics */
131 template<class RealType>
132 static inline RealType ewaldScalarForce(const RealType coulomb, const RealType rInv)
133 {
134     return (coulomb * rInv);
135 }
136 template<class RealType>
137 static inline RealType ewaldPotential(const RealType coulomb, const RealType rInv, const real potentialShift)
138 {
139     return (coulomb * (rInv - potentialShift));
140 }
141
142 /* cutoff LJ */
143 template<class RealType>
144 static inline RealType lennardJonesScalarForce(const RealType v6, const RealType v12)
145 {
146     return (v12 - v6);
147 }
148 template<class RealType>
149 static inline RealType lennardJonesPotential(const RealType v6,
150                                              const RealType v12,
151                                              const RealType c6,
152                                              const RealType c12,
153                                              const real     repulsionShift,
154                                              const real     dispersionShift,
155                                              const real     oneSixth,
156                                              const real     oneTwelfth)
157 {
158     return ((v12 + c12 * repulsionShift) * oneTwelfth - (v6 + c6 * dispersionShift) * oneSixth);
159 }
160
161 /* Ewald LJ */
162 static inline real ewaldLennardJonesGridSubtract(const real c6grid, const real potentialShift, const real oneSixth)
163 {
164     return (c6grid * potentialShift * oneSixth);
165 }
166
167 /* LJ Potential switch */
168 template<class RealType>
169 static inline RealType potSwitchScalarForceMod(const RealType fScalarInp,
170                                                const RealType potential,
171                                                const RealType sw,
172                                                const RealType r,
173                                                const RealType rVdw,
174                                                const RealType dsw,
175                                                const real     zero)
176 {
177     if (r < rVdw)
178     {
179         real fScalar = fScalarInp * sw - r * potential * dsw;
180         return (fScalar);
181     }
182     return (zero);
183 }
184 template<class RealType>
185 static inline RealType potSwitchPotentialMod(const RealType potentialInp,
186                                              const RealType sw,
187                                              const RealType r,
188                                              const RealType rVdw,
189                                              const real     zero)
190 {
191     if (r < rVdw)
192     {
193         real potential = potentialInp * sw;
194         return (potential);
195     }
196     return (zero);
197 }
198
199
200 //! Templated free-energy non-bonded kernel
201 template<typename DataTypes, bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald, bool vdwModifierIsPotSwitch>
202 static void nb_free_energy_kernel(const t_nblist* gmx_restrict nlist,
203                                   rvec* gmx_restrict         xx,
204                                   gmx::ForceWithShiftForces* forceWithShiftForces,
205                                   const t_forcerec* gmx_restrict fr,
206                                   const t_mdatoms* gmx_restrict mdatoms,
207                                   nb_kernel_data_t* gmx_restrict kernel_data,
208                                   t_nrnb* gmx_restrict nrnb)
209 {
210 #define STATE_A 0
211 #define STATE_B 1
212 #define NSTATES 2
213
214     using RealType = typename DataTypes::RealType;
215     using IntType  = typename DataTypes::IntType;
216
217     /* FIXME: How should these be handled with SIMD? */
218     constexpr real oneTwelfth = 1.0 / 12.0;
219     constexpr real oneSixth   = 1.0 / 6.0;
220     constexpr real zero       = 0.0;
221     constexpr real half       = 0.5;
222     constexpr real one        = 1.0;
223     constexpr real two        = 2.0;
224     constexpr real six        = 6.0;
225
226     /* Extract pointer to non-bonded interaction constants */
227     const interaction_const_t* ic = fr->ic;
228
229     // Extract pair list data
230     const int                nri    = nlist->nri;
231     gmx::ArrayRef<const int> iinr   = nlist->iinr;
232     gmx::ArrayRef<const int> jindex = nlist->jindex;
233     gmx::ArrayRef<const int> jjnr   = nlist->jjnr;
234     gmx::ArrayRef<const int> shift  = nlist->shift;
235     gmx::ArrayRef<const int> gid    = nlist->gid;
236
237     const real*               shiftvec  = fr->shift_vec[0];
238     const real*               chargeA   = mdatoms->chargeA;
239     const real*               chargeB   = mdatoms->chargeB;
240     real*                     Vc        = kernel_data->energygrp_elec;
241     const int*                typeA     = mdatoms->typeA;
242     const int*                typeB     = mdatoms->typeB;
243     const int                 ntype     = fr->ntype;
244     gmx::ArrayRef<const real> nbfp      = fr->nbfp;
245     gmx::ArrayRef<const real> nbfp_grid = fr->ljpme_c6grid;
246
247     real*      Vv = kernel_data->energygrp_vdw;
248     const real lambda_coul =
249             kernel_data->lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)];
250     const real lambda_vdw =
251             kernel_data->lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Vdw)];
252     gmx::ArrayRef<real> dvdl          = kernel_data->dvdl;
253     const auto&         scParams      = *ic->softCoreParameters;
254     const real          alpha_coul    = scParams.alphaCoulomb;
255     const real          alpha_vdw     = scParams.alphaVdw;
256     const real          lam_power     = scParams.lambdaPower;
257     const real          sigma6_def    = scParams.sigma6WithInvalidSigma;
258     const real          sigma6_min    = scParams.sigma6Minimum;
259     const bool          doForces      = ((kernel_data->flags & GMX_NONBONDED_DO_FORCE) != 0);
260     const bool          doShiftForces = ((kernel_data->flags & GMX_NONBONDED_DO_SHIFTFORCE) != 0);
261     const bool          doPotential   = ((kernel_data->flags & GMX_NONBONDED_DO_POTENTIAL) != 0);
262
263     // Extract data from interaction_const_t
264     const real facel           = ic->epsfac;
265     const real rCoulomb        = ic->rcoulomb;
266     const real krf             = ic->reactionFieldCoefficient;
267     const real crf             = ic->reactionFieldShift;
268     const real shLjEwald       = ic->sh_lj_ewald;
269     const real rVdw            = ic->rvdw;
270     const real dispersionShift = ic->dispersion_shift.cpot;
271     const real repulsionShift  = ic->repulsion_shift.cpot;
272
273     // Note that the nbnxm kernels do not support Coulomb potential switching at all
274     GMX_ASSERT(ic->coulomb_modifier != InteractionModifiers::PotSwitch,
275                "Potential switching is not supported for Coulomb with FEP");
276
277     real vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4;
278     if (vdwModifierIsPotSwitch)
279     {
280         const real d = ic->rvdw - ic->rvdw_switch;
281         vdw_swV3     = -10.0 / (d * d * d);
282         vdw_swV4     = 15.0 / (d * d * d * d);
283         vdw_swV5     = -6.0 / (d * d * d * d * d);
284         vdw_swF2     = -30.0 / (d * d * d);
285         vdw_swF3     = 60.0 / (d * d * d * d);
286         vdw_swF4     = -30.0 / (d * d * d * d * d);
287     }
288     else
289     {
290         /* Avoid warnings from stupid compilers (looking at you, Clang!) */
291         vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0;
292     }
293
294     NbkernelElecType icoul;
295     if (ic->eeltype == CoulombInteractionType::Cut || EEL_RF(ic->eeltype))
296     {
297         icoul = NbkernelElecType::ReactionField;
298     }
299     else
300     {
301         icoul = NbkernelElecType::None;
302     }
303
304     real rcutoff_max2 = std::max(ic->rcoulomb, ic->rvdw);
305     rcutoff_max2      = rcutoff_max2 * rcutoff_max2;
306
307     const real* tab_ewald_F_lj           = nullptr;
308     const real* tab_ewald_V_lj           = nullptr;
309     const real* ewtab                    = nullptr;
310     real        coulombTableScale        = 0;
311     real        coulombTableScaleInvHalf = 0;
312     real        vdwTableScale            = 0;
313     real        vdwTableScaleInvHalf     = 0;
314     real        sh_ewald                 = 0;
315     if (elecInteractionTypeIsEwald || vdwInteractionTypeIsEwald)
316     {
317         sh_ewald = ic->sh_ewald;
318     }
319     if (elecInteractionTypeIsEwald)
320     {
321         const auto& coulombTables = *ic->coulombEwaldTables;
322         ewtab                     = coulombTables.tableFDV0.data();
323         coulombTableScale         = coulombTables.scale;
324         coulombTableScaleInvHalf  = half / coulombTableScale;
325     }
326     if (vdwInteractionTypeIsEwald)
327     {
328         const auto& vdwTables = *ic->vdwEwaldTables;
329         tab_ewald_F_lj        = vdwTables.tableF.data();
330         tab_ewald_V_lj        = vdwTables.tableV.data();
331         vdwTableScale         = vdwTables.scale;
332         vdwTableScaleInvHalf  = half / vdwTableScale;
333     }
334
335     /* For Ewald/PME interactions we cannot easily apply the soft-core component to
336      * reciprocal space. When we use non-switched Ewald interactions, we
337      * assume the soft-coring does not significantly affect the grid contribution
338      * and apply the soft-core only to the full 1/r (- shift) pair contribution.
339      *
340      * However, we cannot use this approach for switch-modified since we would then
341      * effectively end up evaluating a significantly different interaction here compared to the
342      * normal (non-free-energy) kernels, either by applying a cutoff at a different
343      * position than what the user requested, or by switching different
344      * things (1/r rather than short-range Ewald). For these settings, we just
345      * use the traditional short-range Ewald interaction in that case.
346      */
347     GMX_RELEASE_ASSERT(!(vdwInteractionTypeIsEwald && vdwModifierIsPotSwitch),
348                        "Can not apply soft-core to switched Ewald potentials");
349
350     real dvdlCoul = 0;
351     real dvdlVdw  = 0;
352
353     /* Lambda factor for state A, 1-lambda*/
354     real LFC[NSTATES], LFV[NSTATES];
355     LFC[STATE_A] = one - lambda_coul;
356     LFV[STATE_A] = one - lambda_vdw;
357
358     /* Lambda factor for state B, lambda*/
359     LFC[STATE_B] = lambda_coul;
360     LFV[STATE_B] = lambda_vdw;
361
362     /*derivative of the lambda factor for state A and B */
363     real DLF[NSTATES];
364     DLF[STATE_A] = -1;
365     DLF[STATE_B] = 1;
366
367     real           lFacCoul[NSTATES], dlFacCoul[NSTATES], lFacVdw[NSTATES], dlFacVdw[NSTATES];
368     constexpr real sc_r_power = 6.0_real;
369     for (int i = 0; i < NSTATES; i++)
370     {
371         lFacCoul[i]  = (lam_power == 2 ? (1 - LFC[i]) * (1 - LFC[i]) : (1 - LFC[i]));
372         dlFacCoul[i] = DLF[i] * lam_power / sc_r_power * (lam_power == 2 ? (1 - LFC[i]) : 1);
373         lFacVdw[i]   = (lam_power == 2 ? (1 - LFV[i]) * (1 - LFV[i]) : (1 - LFV[i]));
374         dlFacVdw[i]  = DLF[i] * lam_power / sc_r_power * (lam_power == 2 ? (1 - LFV[i]) : 1);
375     }
376
377     // TODO: We should get rid of using pointers to real
378     const real* x             = xx[0];
379     real* gmx_restrict f      = &(forceWithShiftForces->force()[0][0]);
380     real* gmx_restrict fshift = &(forceWithShiftForces->shiftForces()[0][0]);
381
382     const real rlistSquared = gmx::square(fr->rlist);
383
384     int numExcludedPairsBeyondRlist = 0;
385
386     for (int n = 0; n < nri; n++)
387     {
388         int npair_within_cutoff = 0;
389
390         const int  is3   = 3 * shift[n];
391         const real shX   = shiftvec[is3];
392         const real shY   = shiftvec[is3 + 1];
393         const real shZ   = shiftvec[is3 + 2];
394         const int  nj0   = jindex[n];
395         const int  nj1   = jindex[n + 1];
396         const int  ii    = iinr[n];
397         const int  ii3   = 3 * ii;
398         const real ix    = shX + x[ii3 + 0];
399         const real iy    = shY + x[ii3 + 1];
400         const real iz    = shZ + x[ii3 + 2];
401         const real iqA   = facel * chargeA[ii];
402         const real iqB   = facel * chargeB[ii];
403         const int  ntiA  = 2 * ntype * typeA[ii];
404         const int  ntiB  = 2 * ntype * typeB[ii];
405         real       vCTot = 0;
406         real       vVTot = 0;
407         real       fIX   = 0;
408         real       fIY   = 0;
409         real       fIZ   = 0;
410
411         for (int k = nj0; k < nj1; k++)
412         {
413             int            tj[NSTATES];
414             const int      jnr = jjnr[k];
415             const int      j3  = 3 * jnr;
416             RealType       c6[NSTATES], c12[NSTATES], qq[NSTATES], vCoul[NSTATES], vVdw[NSTATES];
417             RealType       r, rInv, rp, rpm2;
418             RealType       alphaVdwEff, alphaCoulEff, sigma6[NSTATES];
419             const RealType dX  = ix - x[j3];
420             const RealType dY  = iy - x[j3 + 1];
421             const RealType dZ  = iz - x[j3 + 2];
422             const RealType rSq = dX * dX + dY * dY + dZ * dZ;
423             RealType       fScalC[NSTATES], fScalV[NSTATES];
424             /* Check if this pair on the exlusions list.*/
425             const bool bPairIncluded = nlist->excl_fep.empty() || nlist->excl_fep[k];
426
427             if (rSq >= rcutoff_max2 && bPairIncluded)
428             {
429                 /* We save significant time by skipping all code below.
430                  * Note that with soft-core interactions, the actual cut-off
431                  * check might be different. But since the soft-core distance
432                  * is always larger than r, checking on r here is safe.
433                  * Exclusions outside the cutoff can not be skipped as
434                  * when using Ewald: the reciprocal-space
435                  * Ewald component still needs to be subtracted.
436                  */
437
438                 continue;
439             }
440             npair_within_cutoff++;
441
442             if (rSq > rlistSquared)
443             {
444                 numExcludedPairsBeyondRlist++;
445             }
446
447             if (rSq > 0)
448             {
449                 /* Note that unlike in the nbnxn kernels, we do not need
450                  * to clamp the value of rSq before taking the invsqrt
451                  * to avoid NaN in the LJ calculation, since here we do
452                  * not calculate LJ interactions when C6 and C12 are zero.
453                  */
454
455                 rInv = gmx::invsqrt(rSq);
456                 r    = rSq * rInv;
457             }
458             else
459             {
460                 /* The force at r=0 is zero, because of symmetry.
461                  * But note that the potential is in general non-zero,
462                  * since the soft-cored r will be non-zero.
463                  */
464                 rInv = 0;
465                 r    = 0;
466             }
467
468             if (useSoftCore)
469             {
470                 rpm2 = rSq * rSq;  /* r4 */
471                 rp   = rpm2 * rSq; /* r6 */
472             }
473             else
474             {
475                 /* The soft-core power p will not affect the results
476                  * with not using soft-core, so we use power of 0 which gives
477                  * the simplest math and cheapest code.
478                  */
479                 rpm2 = rInv * rInv;
480                 rp   = 1;
481             }
482
483             RealType fScal = 0;
484
485             qq[STATE_A] = iqA * chargeA[jnr];
486             qq[STATE_B] = iqB * chargeB[jnr];
487
488             tj[STATE_A] = ntiA + 2 * typeA[jnr];
489             tj[STATE_B] = ntiB + 2 * typeB[jnr];
490
491             if (bPairIncluded)
492             {
493                 c6[STATE_A] = nbfp[tj[STATE_A]];
494                 c6[STATE_B] = nbfp[tj[STATE_B]];
495
496                 for (int i = 0; i < NSTATES; i++)
497                 {
498                     c12[i] = nbfp[tj[i] + 1];
499                     if (useSoftCore)
500                     {
501                         if ((c6[i] > 0) && (c12[i] > 0))
502                         {
503                             /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
504                             sigma6[i] = half * c12[i] / c6[i];
505                             if (sigma6[i] < sigma6_min) /* for disappearing coul and vdw with soft core at the same time */
506                             {
507                                 sigma6[i] = sigma6_min;
508                             }
509                         }
510                         else
511                         {
512                             sigma6[i] = sigma6_def;
513                         }
514                     }
515                 }
516
517                 if (useSoftCore)
518                 {
519                     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
520                     if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
521                     {
522                         alphaVdwEff  = 0;
523                         alphaCoulEff = 0;
524                     }
525                     else
526                     {
527                         alphaVdwEff  = alpha_vdw;
528                         alphaCoulEff = alpha_coul;
529                     }
530                 }
531
532                 for (int i = 0; i < NSTATES; i++)
533                 {
534                     fScalC[i] = 0;
535                     fScalV[i] = 0;
536                     vCoul[i]  = 0;
537                     vVdw[i]   = 0;
538
539                     RealType rInvC, rInvV, rC, rV, rPInvC, rPInvV;
540
541                     /* Only spend time on A or B state if it is non-zero */
542                     if ((qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0))
543                     {
544                         /* this section has to be inside the loop because of the dependence on sigma6 */
545                         if (useSoftCore)
546                         {
547                             rPInvC = one / (alphaCoulEff * lFacCoul[i] * sigma6[i] + rp);
548                             pthRoot(rPInvC, &rInvC, &rC);
549                             if (scLambdasOrAlphasDiffer)
550                             {
551                                 rPInvV = one / (alphaVdwEff * lFacVdw[i] * sigma6[i] + rp);
552                                 pthRoot(rPInvV, &rInvV, &rV);
553                             }
554                             else
555                             {
556                                 /* We can avoid one expensive pow and one / operation */
557                                 rPInvV = rPInvC;
558                                 rInvV  = rInvC;
559                                 rV     = rC;
560                             }
561                         }
562                         else
563                         {
564                             rPInvC = 1;
565                             rInvC  = rInv;
566                             rC     = r;
567
568                             rPInvV = 1;
569                             rInvV  = rInv;
570                             rV     = r;
571                         }
572
573                         /* Only process the coulomb interactions if we have charges,
574                          * and if we either include all entries in the list (no cutoff
575                          * used in the kernel), or if we are within the cutoff.
576                          */
577                         bool computeElecInteraction = (elecInteractionTypeIsEwald && r < rCoulomb)
578                                                       || (!elecInteractionTypeIsEwald && rC < rCoulomb);
579
580                         if ((qq[i] != 0) && computeElecInteraction)
581                         {
582                             if (elecInteractionTypeIsEwald)
583                             {
584                                 vCoul[i]  = ewaldPotential(qq[i], rInvC, sh_ewald);
585                                 fScalC[i] = ewaldScalarForce(qq[i], rInvC);
586                             }
587                             else
588                             {
589                                 vCoul[i]  = reactionFieldPotential(qq[i], rInvC, rC, krf, crf);
590                                 fScalC[i] = reactionFieldScalarForce(qq[i], rInvC, rC, krf, two);
591                             }
592                         }
593
594                         /* Only process the VDW interactions if we have
595                          * some non-zero parameters, and if we either
596                          * include all entries in the list (no cutoff used
597                          * in the kernel), or if we are within the cutoff.
598                          */
599                         bool computeVdwInteraction = (vdwInteractionTypeIsEwald && r < rVdw)
600                                                      || (!vdwInteractionTypeIsEwald && rV < rVdw);
601                         if ((c6[i] != 0 || c12[i] != 0) && computeVdwInteraction)
602                         {
603                             RealType rInv6;
604                             if (useSoftCore)
605                             {
606                                 rInv6 = rPInvV;
607                             }
608                             else
609                             {
610                                 rInv6 = calculateRinv6(rInvV);
611                             }
612                             RealType vVdw6  = calculateVdw6(c6[i], rInv6);
613                             RealType vVdw12 = calculateVdw12(c12[i], rInv6);
614
615                             vVdw[i] = lennardJonesPotential(
616                                     vVdw6, vVdw12, c6[i], c12[i], repulsionShift, dispersionShift, oneSixth, oneTwelfth);
617                             fScalV[i] = lennardJonesScalarForce(vVdw6, vVdw12);
618
619                             if (vdwInteractionTypeIsEwald)
620                             {
621                                 /* Subtract the grid potential at the cut-off */
622                                 vVdw[i] += ewaldLennardJonesGridSubtract(
623                                         nbfp_grid[tj[i]], shLjEwald, oneSixth);
624                             }
625
626                             if (vdwModifierIsPotSwitch)
627                             {
628                                 RealType d        = rV - ic->rvdw_switch;
629                                 d                 = (d > zero) ? d : zero;
630                                 const RealType d2 = d * d;
631                                 const RealType sw =
632                                         one + d2 * d * (vdw_swV3 + d * (vdw_swV4 + d * vdw_swV5));
633                                 const RealType dsw = d2 * (vdw_swF2 + d * (vdw_swF3 + d * vdw_swF4));
634
635                                 fScalV[i] = potSwitchScalarForceMod(
636                                         fScalV[i], vVdw[i], sw, rV, rVdw, dsw, zero);
637                                 vVdw[i] = potSwitchPotentialMod(vVdw[i], sw, rV, rVdw, zero);
638                             }
639                         }
640
641                         /* fScalC (and fScalV) now contain: dV/drC * rC
642                          * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
643                          * Further down we first multiply by r^p-2 and then by
644                          * the vector r, which in total gives: dV/drC * (r/rC)^1-p
645                          */
646                         fScalC[i] *= rPInvC;
647                         fScalV[i] *= rPInvV;
648                     }
649                 } // end for (int i = 0; i < NSTATES; i++)
650
651                 /* Assemble A and B states */
652                 for (int i = 0; i < NSTATES; i++)
653                 {
654                     vCTot += LFC[i] * vCoul[i];
655                     vVTot += LFV[i] * vVdw[i];
656
657                     fScal += LFC[i] * fScalC[i] * rpm2;
658                     fScal += LFV[i] * fScalV[i] * rpm2;
659
660                     if (useSoftCore)
661                     {
662                         dvdlCoul += vCoul[i] * DLF[i]
663                                     + LFC[i] * alphaCoulEff * dlFacCoul[i] * fScalC[i] * sigma6[i];
664                         dvdlVdw += vVdw[i] * DLF[i]
665                                    + LFV[i] * alphaVdwEff * dlFacVdw[i] * fScalV[i] * sigma6[i];
666                     }
667                     else
668                     {
669                         dvdlCoul += vCoul[i] * DLF[i];
670                         dvdlVdw += vVdw[i] * DLF[i];
671                     }
672                 }
673             } // end if (bPairIncluded)
674             else if (icoul == NbkernelElecType::ReactionField)
675             {
676                 /* For excluded pairs, which are only in this pair list when
677                  * using the Verlet scheme, we don't use soft-core.
678                  * As there is no singularity, there is no need for soft-core.
679                  */
680                 const real FF = -two * krf;
681                 RealType   VV = krf * rSq - crf;
682
683                 if (ii == jnr)
684                 {
685                     VV *= half;
686                 }
687
688                 for (int i = 0; i < NSTATES; i++)
689                 {
690                     vCTot += LFC[i] * qq[i] * VV;
691                     fScal += LFC[i] * qq[i] * FF;
692                     dvdlCoul += DLF[i] * qq[i] * VV;
693                 }
694             }
695
696             if (elecInteractionTypeIsEwald && (r < rCoulomb || !bPairIncluded))
697             {
698                 /* See comment in the preamble. When using Ewald interactions
699                  * (unless we use a switch modifier) we subtract the reciprocal-space
700                  * Ewald component here which made it possible to apply the free
701                  * energy interaction to 1/r (vanilla coulomb short-range part)
702                  * above. This gets us closer to the ideal case of applying
703                  * the softcore to the entire electrostatic interaction,
704                  * including the reciprocal-space component.
705                  */
706                 real v_lr, f_lr;
707
708                 const RealType ewrt   = r * coulombTableScale;
709                 IntType        ewitab = static_cast<IntType>(ewrt);
710                 const RealType eweps  = ewrt - ewitab;
711                 ewitab                = 4 * ewitab;
712                 f_lr                  = ewtab[ewitab] + eweps * ewtab[ewitab + 1];
713                 v_lr = (ewtab[ewitab + 2] - coulombTableScaleInvHalf * eweps * (ewtab[ewitab] + f_lr));
714                 f_lr *= rInv;
715
716                 /* Note that any possible Ewald shift has already been applied in
717                  * the normal interaction part above.
718                  */
719
720                 if (ii == jnr)
721                 {
722                     /* If we get here, the i particle (ii) has itself (jnr)
723                      * in its neighborlist. This can only happen with the Verlet
724                      * scheme, and corresponds to a self-interaction that will
725                      * occur twice. Scale it down by 50% to only include it once.
726                      */
727                     v_lr *= half;
728                 }
729
730                 for (int i = 0; i < NSTATES; i++)
731                 {
732                     vCTot -= LFC[i] * qq[i] * v_lr;
733                     fScal -= LFC[i] * qq[i] * f_lr;
734                     dvdlCoul -= (DLF[i] * qq[i]) * v_lr;
735                 }
736             }
737
738             if (vdwInteractionTypeIsEwald && (r < rVdw || !bPairIncluded))
739             {
740                 /* See comment in the preamble. When using LJ-Ewald interactions
741                  * (unless we use a switch modifier) we subtract the reciprocal-space
742                  * Ewald component here which made it possible to apply the free
743                  * energy interaction to r^-6 (vanilla LJ6 short-range part)
744                  * above. This gets us closer to the ideal case of applying
745                  * the softcore to the entire VdW interaction,
746                  * including the reciprocal-space component.
747                  */
748                 /* We could also use the analytical form here
749                  * iso a table, but that can cause issues for
750                  * r close to 0 for non-interacting pairs.
751                  */
752
753                 const RealType rs   = rSq * rInv * vdwTableScale;
754                 const IntType  ri   = static_cast<IntType>(rs);
755                 const RealType frac = rs - ri;
756                 const RealType f_lr = (1 - frac) * tab_ewald_F_lj[ri] + frac * tab_ewald_F_lj[ri + 1];
757                 /* TODO: Currently the Ewald LJ table does not contain
758                  * the factor 1/6, we should add this.
759                  */
760                 const RealType FF = f_lr * rInv / six;
761                 RealType       VV =
762                         (tab_ewald_V_lj[ri] - vdwTableScaleInvHalf * frac * (tab_ewald_F_lj[ri] + f_lr))
763                         / six;
764
765                 if (ii == jnr)
766                 {
767                     /* If we get here, the i particle (ii) has itself (jnr)
768                      * in its neighborlist. This can only happen with the Verlet
769                      * scheme, and corresponds to a self-interaction that will
770                      * occur twice. Scale it down by 50% to only include it once.
771                      */
772                     VV *= half;
773                 }
774
775                 for (int i = 0; i < NSTATES; i++)
776                 {
777                     const real c6grid = nbfp_grid[tj[i]];
778                     vVTot += LFV[i] * c6grid * VV;
779                     fScal += LFV[i] * c6grid * FF;
780                     dvdlVdw += (DLF[i] * c6grid) * VV;
781                 }
782             }
783
784             if (doForces)
785             {
786                 const real tX = fScal * dX;
787                 const real tY = fScal * dY;
788                 const real tZ = fScal * dZ;
789                 fIX           = fIX + tX;
790                 fIY           = fIY + tY;
791                 fIZ           = fIZ + tZ;
792                 /* OpenMP atomics are expensive, but this kernels is also
793                  * expensive, so we can take this hit, instead of using
794                  * thread-local output buffers and extra reduction.
795                  *
796                  * All the OpenMP regions in this file are trivial and should
797                  * not throw, so no need for try/catch.
798                  */
799 #pragma omp atomic
800                 f[j3] -= tX;
801 #pragma omp atomic
802                 f[j3 + 1] -= tY;
803 #pragma omp atomic
804                 f[j3 + 2] -= tZ;
805             }
806         } // end for (int k = nj0; k < nj1; k++)
807
808         /* The atomics below are expensive with many OpenMP threads.
809          * Here unperturbed i-particles will usually only have a few
810          * (perturbed) j-particles in the list. Thus with a buffered list
811          * we can skip a significant number of i-reductions with a check.
812          */
813         if (npair_within_cutoff > 0)
814         {
815             if (doForces)
816             {
817 #pragma omp atomic
818                 f[ii3] += fIX;
819 #pragma omp atomic
820                 f[ii3 + 1] += fIY;
821 #pragma omp atomic
822                 f[ii3 + 2] += fIZ;
823             }
824             if (doShiftForces)
825             {
826 #pragma omp atomic
827                 fshift[is3] += fIX;
828 #pragma omp atomic
829                 fshift[is3 + 1] += fIY;
830 #pragma omp atomic
831                 fshift[is3 + 2] += fIZ;
832             }
833             if (doPotential)
834             {
835                 int ggid = gid[n];
836 #pragma omp atomic
837                 Vc[ggid] += vCTot;
838 #pragma omp atomic
839                 Vv[ggid] += vVTot;
840             }
841         }
842     } // end for (int n = 0; n < nri; n++)
843
844 #pragma omp atomic
845     dvdl[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)] += dvdlCoul;
846 #pragma omp atomic
847     dvdl[static_cast<int>(FreeEnergyPerturbationCouplingType::Vdw)] += dvdlVdw;
848
849     /* Estimate flops, average for free energy stuff:
850      * 12  flops per outer iteration
851      * 150 flops per inner iteration
852      */
853 #pragma omp atomic
854     inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri * 12 + nlist->jindex[nri] * 150);
855
856     if (numExcludedPairsBeyondRlist > 0)
857     {
858         gmx_fatal(FARGS,
859                   "There are %d perturbed non-bonded pair interactions beyond the pair-list cutoff "
860                   "of %g nm, which is not supported. This can happen because the system is "
861                   "unstable or because intra-molecular interactions at long distances are "
862                   "excluded. If the "
863                   "latter is the case, you can try to increase nstlist or rlist to avoid this."
864                   "The error is likely triggered by the use of couple-intramol=no "
865                   "and the maximal distance in the decoupled molecule exceeding rlist.",
866                   numExcludedPairsBeyondRlist,
867                   fr->rlist);
868     }
869 }
870
871 typedef void (*KernelFunction)(const t_nblist* gmx_restrict nlist,
872                                rvec* gmx_restrict         xx,
873                                gmx::ForceWithShiftForces* forceWithShiftForces,
874                                const t_forcerec* gmx_restrict fr,
875                                const t_mdatoms* gmx_restrict mdatoms,
876                                nb_kernel_data_t* gmx_restrict kernel_data,
877                                t_nrnb* gmx_restrict nrnb);
878
879 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald, bool vdwModifierIsPotSwitch>
880 static KernelFunction dispatchKernelOnUseSimd(const bool useSimd)
881 {
882     if (useSimd)
883     {
884 #if GMX_SIMD_HAVE_REAL && GMX_SIMD_HAVE_INT32_ARITHMETICS && GMX_USE_SIMD_KERNELS
885         /* FIXME: Here SimdDataTypes should be used to enable SIMD. So far, the code in nb_free_energy_kernel is not adapted to SIMD */
886         return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
887 #else
888         return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
889 #endif
890     }
891     else
892     {
893         return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
894     }
895 }
896
897 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald>
898 static KernelFunction dispatchKernelOnVdwModifier(const bool vdwModifierIsPotSwitch, const bool useSimd)
899 {
900     if (vdwModifierIsPotSwitch)
901     {
902         return (dispatchKernelOnUseSimd<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, true>(
903                 useSimd));
904     }
905     else
906     {
907         return (dispatchKernelOnUseSimd<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, false>(
908                 useSimd));
909     }
910 }
911
912 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald>
913 static KernelFunction dispatchKernelOnElecInteractionType(const bool elecInteractionTypeIsEwald,
914                                                           const bool vdwModifierIsPotSwitch,
915                                                           const bool useSimd)
916 {
917     if (elecInteractionTypeIsEwald)
918     {
919         return (dispatchKernelOnVdwModifier<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, true>(
920                 vdwModifierIsPotSwitch, useSimd));
921     }
922     else
923     {
924         return (dispatchKernelOnVdwModifier<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, false>(
925                 vdwModifierIsPotSwitch, useSimd));
926     }
927 }
928
929 template<bool useSoftCore, bool scLambdasOrAlphasDiffer>
930 static KernelFunction dispatchKernelOnVdwInteractionType(const bool vdwInteractionTypeIsEwald,
931                                                          const bool elecInteractionTypeIsEwald,
932                                                          const bool vdwModifierIsPotSwitch,
933                                                          const bool useSimd)
934 {
935     if (vdwInteractionTypeIsEwald)
936     {
937         return (dispatchKernelOnElecInteractionType<useSoftCore, scLambdasOrAlphasDiffer, true>(
938                 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
939     }
940     else
941     {
942         return (dispatchKernelOnElecInteractionType<useSoftCore, scLambdasOrAlphasDiffer, false>(
943                 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
944     }
945 }
946
947 template<bool useSoftCore>
948 static KernelFunction dispatchKernelOnScLambdasOrAlphasDifference(const bool scLambdasOrAlphasDiffer,
949                                                                   const bool vdwInteractionTypeIsEwald,
950                                                                   const bool elecInteractionTypeIsEwald,
951                                                                   const bool vdwModifierIsPotSwitch,
952                                                                   const bool useSimd)
953 {
954     if (scLambdasOrAlphasDiffer)
955     {
956         return (dispatchKernelOnVdwInteractionType<useSoftCore, true>(
957                 vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
958     }
959     else
960     {
961         return (dispatchKernelOnVdwInteractionType<useSoftCore, false>(
962                 vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
963     }
964 }
965
966 static KernelFunction dispatchKernel(const bool                 scLambdasOrAlphasDiffer,
967                                      const bool                 vdwInteractionTypeIsEwald,
968                                      const bool                 elecInteractionTypeIsEwald,
969                                      const bool                 vdwModifierIsPotSwitch,
970                                      const bool                 useSimd,
971                                      const interaction_const_t& ic)
972 {
973     if (ic.softCoreParameters->alphaCoulomb == 0 && ic.softCoreParameters->alphaVdw == 0)
974     {
975         return (dispatchKernelOnScLambdasOrAlphasDifference<false>(scLambdasOrAlphasDiffer,
976                                                                    vdwInteractionTypeIsEwald,
977                                                                    elecInteractionTypeIsEwald,
978                                                                    vdwModifierIsPotSwitch,
979                                                                    useSimd));
980     }
981     else
982     {
983         return (dispatchKernelOnScLambdasOrAlphasDifference<true>(scLambdasOrAlphasDiffer,
984                                                                   vdwInteractionTypeIsEwald,
985                                                                   elecInteractionTypeIsEwald,
986                                                                   vdwModifierIsPotSwitch,
987                                                                   useSimd));
988     }
989 }
990
991
992 void gmx_nb_free_energy_kernel(const t_nblist*            nlist,
993                                rvec*                      xx,
994                                gmx::ForceWithShiftForces* ff,
995                                const t_forcerec*          fr,
996                                const t_mdatoms*           mdatoms,
997                                nb_kernel_data_t*          kernel_data,
998                                t_nrnb*                    nrnb)
999 {
1000     const interaction_const_t& ic = *fr->ic;
1001     GMX_ASSERT(EEL_PME_EWALD(ic.eeltype) || ic.eeltype == CoulombInteractionType::Cut || EEL_RF(ic.eeltype),
1002                "Unsupported eeltype with free energy");
1003     GMX_ASSERT(ic.softCoreParameters, "We need soft-core parameters");
1004
1005     const auto& scParams                   = *ic.softCoreParameters;
1006     const bool  vdwInteractionTypeIsEwald  = (EVDW_PME(ic.vdwtype));
1007     const bool  elecInteractionTypeIsEwald = (EEL_PME_EWALD(ic.eeltype));
1008     const bool  vdwModifierIsPotSwitch     = (ic.vdw_modifier == InteractionModifiers::PotSwitch);
1009     bool        scLambdasOrAlphasDiffer    = true;
1010     const bool  useSimd                    = fr->use_simd_kernels;
1011
1012     if (scParams.alphaCoulomb == 0 && scParams.alphaVdw == 0)
1013     {
1014         scLambdasOrAlphasDiffer = false;
1015     }
1016     else
1017     {
1018         if (kernel_data->lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)]
1019                     == kernel_data->lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Vdw)]
1020             && scParams.alphaCoulomb == scParams.alphaVdw)
1021         {
1022             scLambdasOrAlphasDiffer = false;
1023         }
1024     }
1025
1026     KernelFunction kernelFunc;
1027     kernelFunc = dispatchKernel(scLambdasOrAlphasDiffer,
1028                                 vdwInteractionTypeIsEwald,
1029                                 elecInteractionTypeIsEwald,
1030                                 vdwModifierIsPotSwitch,
1031                                 useSimd,
1032                                 ic);
1033     kernelFunc(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
1034 }