Remove nb_kernel_data_t
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_free_energy.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
7  * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9  * and including many others, as listed in the AUTHORS file in the
10  * top-level source directory and at http://www.gromacs.org.
11  *
12  * GROMACS is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU Lesser General Public License
14  * as published by the Free Software Foundation; either version 2.1
15  * of the License, or (at your option) any later version.
16  *
17  * GROMACS is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public
23  * License along with GROMACS; if not, see
24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
26  *
27  * If you want to redistribute modifications to GROMACS, please
28  * consider that scientific software is very special. Version
29  * control is crucial - bugs must be traceable. We will be happy to
30  * consider code for inclusion in the official distribution, but
31  * derived work must not be called official GROMACS. Details are found
32  * in the README & COPYING files - if they are missing, get the
33  * official version at http://www.gromacs.org.
34  *
35  * To help us fund GROMACS development, we humbly ask that you cite
36  * the research papers on the package. Check out http://www.gromacs.org.
37  */
38 #include "gmxpre.h"
39
40 #include "nb_free_energy.h"
41
42 #include "config.h"
43
44 #include <cmath>
45
46 #include <algorithm>
47
48 #include "gromacs/gmxlib/nrnb.h"
49 #include "gromacs/gmxlib/nonbonded/nonbonded.h"
50 #include "gromacs/math/functions.h"
51 #include "gromacs/math/vec.h"
52 #include "gromacs/mdtypes/forceoutput.h"
53 #include "gromacs/mdtypes/forcerec.h"
54 #include "gromacs/mdtypes/interaction_const.h"
55 #include "gromacs/mdtypes/md_enums.h"
56 #include "gromacs/mdtypes/mdatom.h"
57 #include "gromacs/mdtypes/nblist.h"
58 #include "gromacs/simd/simd.h"
59 #include "gromacs/utility/fatalerror.h"
60 #include "gromacs/utility/arrayref.h"
61
62
63 //! Scalar (non-SIMD) data types.
64 struct ScalarDataTypes
65 {
66     using RealType                     = real; //!< The data type to use as real.
67     using IntType                      = int;  //!< The data type to use as int.
68     static constexpr int simdRealWidth = 1;    //!< The width of the RealType.
69     static constexpr int simdIntWidth  = 1;    //!< The width of the IntType.
70 };
71
72 #if GMX_SIMD_HAVE_REAL && GMX_SIMD_HAVE_INT32_ARITHMETICS
73 //! SIMD data types.
74 struct SimdDataTypes
75 {
76     using RealType                     = gmx::SimdReal;         //!< The data type to use as real.
77     using IntType                      = gmx::SimdInt32;        //!< The data type to use as int.
78     static constexpr int simdRealWidth = GMX_SIMD_REAL_WIDTH;   //!< The width of the RealType.
79     static constexpr int simdIntWidth  = GMX_SIMD_FINT32_WIDTH; //!< The width of the IntType.
80 };
81 #endif
82
83 //! Computes r^(1/p) and 1/r^(1/p) for the standard p=6
84 template<class RealType>
85 static inline void pthRoot(const RealType r, RealType* pthRoot, RealType* invPthRoot)
86 {
87     *invPthRoot = gmx::invsqrt(std::cbrt(r));
88     *pthRoot    = 1 / (*invPthRoot);
89 }
90
91 template<class RealType>
92 static inline RealType calculateRinv6(const RealType rInvV)
93 {
94     RealType rInv6 = rInvV * rInvV;
95     return (rInv6 * rInv6 * rInv6);
96 }
97
98 template<class RealType>
99 static inline RealType calculateVdw6(const RealType c6, const RealType rInv6)
100 {
101     return (c6 * rInv6);
102 }
103
104 template<class RealType>
105 static inline RealType calculateVdw12(const RealType c12, const RealType rInv6)
106 {
107     return (c12 * rInv6 * rInv6);
108 }
109
110 /* reaction-field electrostatics */
111 template<class RealType>
112 static inline RealType reactionFieldScalarForce(const RealType qq,
113                                                 const RealType rInv,
114                                                 const RealType r,
115                                                 const real     krf,
116                                                 const real     two)
117 {
118     return (qq * (rInv - two * krf * r * r));
119 }
120 template<class RealType>
121 static inline RealType reactionFieldPotential(const RealType qq,
122                                               const RealType rInv,
123                                               const RealType r,
124                                               const real     krf,
125                                               const real     potentialShift)
126 {
127     return (qq * (rInv + krf * r * r - potentialShift));
128 }
129
130 /* Ewald electrostatics */
131 template<class RealType>
132 static inline RealType ewaldScalarForce(const RealType coulomb, const RealType rInv)
133 {
134     return (coulomb * rInv);
135 }
136 template<class RealType>
137 static inline RealType ewaldPotential(const RealType coulomb, const RealType rInv, const real potentialShift)
138 {
139     return (coulomb * (rInv - potentialShift));
140 }
141
142 /* cutoff LJ */
143 template<class RealType>
144 static inline RealType lennardJonesScalarForce(const RealType v6, const RealType v12)
145 {
146     return (v12 - v6);
147 }
148 template<class RealType>
149 static inline RealType lennardJonesPotential(const RealType v6,
150                                              const RealType v12,
151                                              const RealType c6,
152                                              const RealType c12,
153                                              const real     repulsionShift,
154                                              const real     dispersionShift,
155                                              const real     oneSixth,
156                                              const real     oneTwelfth)
157 {
158     return ((v12 + c12 * repulsionShift) * oneTwelfth - (v6 + c6 * dispersionShift) * oneSixth);
159 }
160
161 /* Ewald LJ */
162 static inline real ewaldLennardJonesGridSubtract(const real c6grid, const real potentialShift, const real oneSixth)
163 {
164     return (c6grid * potentialShift * oneSixth);
165 }
166
167 /* LJ Potential switch */
168 template<class RealType>
169 static inline RealType potSwitchScalarForceMod(const RealType fScalarInp,
170                                                const RealType potential,
171                                                const RealType sw,
172                                                const RealType r,
173                                                const RealType rVdw,
174                                                const RealType dsw,
175                                                const real     zero)
176 {
177     if (r < rVdw)
178     {
179         real fScalar = fScalarInp * sw - r * potential * dsw;
180         return (fScalar);
181     }
182     return (zero);
183 }
184 template<class RealType>
185 static inline RealType potSwitchPotentialMod(const RealType potentialInp,
186                                              const RealType sw,
187                                              const RealType r,
188                                              const RealType rVdw,
189                                              const real     zero)
190 {
191     if (r < rVdw)
192     {
193         real potential = potentialInp * sw;
194         return (potential);
195     }
196     return (zero);
197 }
198
199
200 //! Templated free-energy non-bonded kernel
201 template<typename DataTypes, bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald, bool vdwModifierIsPotSwitch>
202 static void nb_free_energy_kernel(const t_nblist* gmx_restrict nlist,
203                                   rvec* gmx_restrict         xx,
204                                   gmx::ForceWithShiftForces* forceWithShiftForces,
205                                   const t_forcerec* gmx_restrict fr,
206                                   const t_mdatoms* gmx_restrict mdatoms,
207                                   int                           flags,
208                                   gmx::ArrayRef<const real>     lambda,
209                                   gmx::ArrayRef<real>           dvdl,
210                                   gmx::ArrayRef<real>           energygrp_elec,
211                                   gmx::ArrayRef<real>           energygrp_vdw,
212                                   t_nrnb* gmx_restrict nrnb)
213 {
214 #define STATE_A 0
215 #define STATE_B 1
216 #define NSTATES 2
217
218     using RealType = typename DataTypes::RealType;
219     using IntType  = typename DataTypes::IntType;
220
221     /* FIXME: How should these be handled with SIMD? */
222     constexpr real oneTwelfth = 1.0 / 12.0;
223     constexpr real oneSixth   = 1.0 / 6.0;
224     constexpr real zero       = 0.0;
225     constexpr real half       = 0.5;
226     constexpr real one        = 1.0;
227     constexpr real two        = 2.0;
228     constexpr real six        = 6.0;
229
230     /* Extract pointer to non-bonded interaction constants */
231     const interaction_const_t* ic = fr->ic.get();
232
233     // Extract pair list data
234     const int                nri    = nlist->nri;
235     gmx::ArrayRef<const int> iinr   = nlist->iinr;
236     gmx::ArrayRef<const int> jindex = nlist->jindex;
237     gmx::ArrayRef<const int> jjnr   = nlist->jjnr;
238     gmx::ArrayRef<const int> shift  = nlist->shift;
239     gmx::ArrayRef<const int> gid    = nlist->gid;
240
241     const real*               shiftvec  = fr->shift_vec[0];
242     const real*               chargeA   = mdatoms->chargeA;
243     const real*               chargeB   = mdatoms->chargeB;
244     const int*                typeA     = mdatoms->typeA;
245     const int*                typeB     = mdatoms->typeB;
246     const int                 ntype     = fr->ntype;
247     gmx::ArrayRef<const real> nbfp      = fr->nbfp;
248     gmx::ArrayRef<const real> nbfp_grid = fr->ljpme_c6grid;
249
250     const real  lambda_coul   = lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)];
251     const real  lambda_vdw    = lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Vdw)];
252     const auto& scParams      = *ic->softCoreParameters;
253     const real  alpha_coul    = scParams.alphaCoulomb;
254     const real  alpha_vdw     = scParams.alphaVdw;
255     const real  lam_power     = scParams.lambdaPower;
256     const real  sigma6_def    = scParams.sigma6WithInvalidSigma;
257     const real  sigma6_min    = scParams.sigma6Minimum;
258     const bool  doForces      = ((flags & GMX_NONBONDED_DO_FORCE) != 0);
259     const bool  doShiftForces = ((flags & GMX_NONBONDED_DO_SHIFTFORCE) != 0);
260     const bool  doPotential   = ((flags & GMX_NONBONDED_DO_POTENTIAL) != 0);
261
262     // Extract data from interaction_const_t
263     const real facel           = ic->epsfac;
264     const real rCoulomb        = ic->rcoulomb;
265     const real krf             = ic->reactionFieldCoefficient;
266     const real crf             = ic->reactionFieldShift;
267     const real shLjEwald       = ic->sh_lj_ewald;
268     const real rVdw            = ic->rvdw;
269     const real dispersionShift = ic->dispersion_shift.cpot;
270     const real repulsionShift  = ic->repulsion_shift.cpot;
271
272     // Note that the nbnxm kernels do not support Coulomb potential switching at all
273     GMX_ASSERT(ic->coulomb_modifier != InteractionModifiers::PotSwitch,
274                "Potential switching is not supported for Coulomb with FEP");
275
276     real vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4;
277     if (vdwModifierIsPotSwitch)
278     {
279         const real d = ic->rvdw - ic->rvdw_switch;
280         vdw_swV3     = -10.0 / (d * d * d);
281         vdw_swV4     = 15.0 / (d * d * d * d);
282         vdw_swV5     = -6.0 / (d * d * d * d * d);
283         vdw_swF2     = -30.0 / (d * d * d);
284         vdw_swF3     = 60.0 / (d * d * d * d);
285         vdw_swF4     = -30.0 / (d * d * d * d * d);
286     }
287     else
288     {
289         /* Avoid warnings from stupid compilers (looking at you, Clang!) */
290         vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0;
291     }
292
293     NbkernelElecType icoul;
294     if (ic->eeltype == CoulombInteractionType::Cut || EEL_RF(ic->eeltype))
295     {
296         icoul = NbkernelElecType::ReactionField;
297     }
298     else
299     {
300         icoul = NbkernelElecType::None;
301     }
302
303     real rcutoff_max2 = std::max(ic->rcoulomb, ic->rvdw);
304     rcutoff_max2      = rcutoff_max2 * rcutoff_max2;
305
306     const real* tab_ewald_F_lj           = nullptr;
307     const real* tab_ewald_V_lj           = nullptr;
308     const real* ewtab                    = nullptr;
309     real        coulombTableScale        = 0;
310     real        coulombTableScaleInvHalf = 0;
311     real        vdwTableScale            = 0;
312     real        vdwTableScaleInvHalf     = 0;
313     real        sh_ewald                 = 0;
314     if (elecInteractionTypeIsEwald || vdwInteractionTypeIsEwald)
315     {
316         sh_ewald = ic->sh_ewald;
317     }
318     if (elecInteractionTypeIsEwald)
319     {
320         const auto& coulombTables = *ic->coulombEwaldTables;
321         ewtab                     = coulombTables.tableFDV0.data();
322         coulombTableScale         = coulombTables.scale;
323         coulombTableScaleInvHalf  = half / coulombTableScale;
324     }
325     if (vdwInteractionTypeIsEwald)
326     {
327         const auto& vdwTables = *ic->vdwEwaldTables;
328         tab_ewald_F_lj        = vdwTables.tableF.data();
329         tab_ewald_V_lj        = vdwTables.tableV.data();
330         vdwTableScale         = vdwTables.scale;
331         vdwTableScaleInvHalf  = half / vdwTableScale;
332     }
333
334     /* For Ewald/PME interactions we cannot easily apply the soft-core component to
335      * reciprocal space. When we use non-switched Ewald interactions, we
336      * assume the soft-coring does not significantly affect the grid contribution
337      * and apply the soft-core only to the full 1/r (- shift) pair contribution.
338      *
339      * However, we cannot use this approach for switch-modified since we would then
340      * effectively end up evaluating a significantly different interaction here compared to the
341      * normal (non-free-energy) kernels, either by applying a cutoff at a different
342      * position than what the user requested, or by switching different
343      * things (1/r rather than short-range Ewald). For these settings, we just
344      * use the traditional short-range Ewald interaction in that case.
345      */
346     GMX_RELEASE_ASSERT(!(vdwInteractionTypeIsEwald && vdwModifierIsPotSwitch),
347                        "Can not apply soft-core to switched Ewald potentials");
348
349     real dvdlCoul = 0;
350     real dvdlVdw  = 0;
351
352     /* Lambda factor for state A, 1-lambda*/
353     real LFC[NSTATES], LFV[NSTATES];
354     LFC[STATE_A] = one - lambda_coul;
355     LFV[STATE_A] = one - lambda_vdw;
356
357     /* Lambda factor for state B, lambda*/
358     LFC[STATE_B] = lambda_coul;
359     LFV[STATE_B] = lambda_vdw;
360
361     /*derivative of the lambda factor for state A and B */
362     real DLF[NSTATES];
363     DLF[STATE_A] = -1;
364     DLF[STATE_B] = 1;
365
366     real           lFacCoul[NSTATES], dlFacCoul[NSTATES], lFacVdw[NSTATES], dlFacVdw[NSTATES];
367     constexpr real sc_r_power = 6.0_real;
368     for (int i = 0; i < NSTATES; i++)
369     {
370         lFacCoul[i]  = (lam_power == 2 ? (1 - LFC[i]) * (1 - LFC[i]) : (1 - LFC[i]));
371         dlFacCoul[i] = DLF[i] * lam_power / sc_r_power * (lam_power == 2 ? (1 - LFC[i]) : 1);
372         lFacVdw[i]   = (lam_power == 2 ? (1 - LFV[i]) * (1 - LFV[i]) : (1 - LFV[i]));
373         dlFacVdw[i]  = DLF[i] * lam_power / sc_r_power * (lam_power == 2 ? (1 - LFV[i]) : 1);
374     }
375
376     // TODO: We should get rid of using pointers to real
377     const real* x             = xx[0];
378     real* gmx_restrict f      = &(forceWithShiftForces->force()[0][0]);
379     real* gmx_restrict fshift = &(forceWithShiftForces->shiftForces()[0][0]);
380
381     const real rlistSquared = gmx::square(fr->rlist);
382
383     int numExcludedPairsBeyondRlist = 0;
384
385     for (int n = 0; n < nri; n++)
386     {
387         int npair_within_cutoff = 0;
388
389         const int  is3   = 3 * shift[n];
390         const real shX   = shiftvec[is3];
391         const real shY   = shiftvec[is3 + 1];
392         const real shZ   = shiftvec[is3 + 2];
393         const int  nj0   = jindex[n];
394         const int  nj1   = jindex[n + 1];
395         const int  ii    = iinr[n];
396         const int  ii3   = 3 * ii;
397         const real ix    = shX + x[ii3 + 0];
398         const real iy    = shY + x[ii3 + 1];
399         const real iz    = shZ + x[ii3 + 2];
400         const real iqA   = facel * chargeA[ii];
401         const real iqB   = facel * chargeB[ii];
402         const int  ntiA  = 2 * ntype * typeA[ii];
403         const int  ntiB  = 2 * ntype * typeB[ii];
404         real       vCTot = 0;
405         real       vVTot = 0;
406         real       fIX   = 0;
407         real       fIY   = 0;
408         real       fIZ   = 0;
409
410         for (int k = nj0; k < nj1; k++)
411         {
412             int            tj[NSTATES];
413             const int      jnr = jjnr[k];
414             const int      j3  = 3 * jnr;
415             RealType       c6[NSTATES], c12[NSTATES], qq[NSTATES], vCoul[NSTATES], vVdw[NSTATES];
416             RealType       r, rInv, rp, rpm2;
417             RealType       alphaVdwEff, alphaCoulEff, sigma6[NSTATES];
418             const RealType dX  = ix - x[j3];
419             const RealType dY  = iy - x[j3 + 1];
420             const RealType dZ  = iz - x[j3 + 2];
421             const RealType rSq = dX * dX + dY * dY + dZ * dZ;
422             RealType       fScalC[NSTATES], fScalV[NSTATES];
423             /* Check if this pair on the exlusions list.*/
424             const bool bPairIncluded = nlist->excl_fep.empty() || nlist->excl_fep[k];
425
426             if (rSq >= rcutoff_max2 && bPairIncluded)
427             {
428                 /* We save significant time by skipping all code below.
429                  * Note that with soft-core interactions, the actual cut-off
430                  * check might be different. But since the soft-core distance
431                  * is always larger than r, checking on r here is safe.
432                  * Exclusions outside the cutoff can not be skipped as
433                  * when using Ewald: the reciprocal-space
434                  * Ewald component still needs to be subtracted.
435                  */
436
437                 continue;
438             }
439             npair_within_cutoff++;
440
441             if (rSq > rlistSquared)
442             {
443                 numExcludedPairsBeyondRlist++;
444             }
445
446             if (rSq > 0)
447             {
448                 /* Note that unlike in the nbnxn kernels, we do not need
449                  * to clamp the value of rSq before taking the invsqrt
450                  * to avoid NaN in the LJ calculation, since here we do
451                  * not calculate LJ interactions when C6 and C12 are zero.
452                  */
453
454                 rInv = gmx::invsqrt(rSq);
455                 r    = rSq * rInv;
456             }
457             else
458             {
459                 /* The force at r=0 is zero, because of symmetry.
460                  * But note that the potential is in general non-zero,
461                  * since the soft-cored r will be non-zero.
462                  */
463                 rInv = 0;
464                 r    = 0;
465             }
466
467             if (useSoftCore)
468             {
469                 rpm2 = rSq * rSq;  /* r4 */
470                 rp   = rpm2 * rSq; /* r6 */
471             }
472             else
473             {
474                 /* The soft-core power p will not affect the results
475                  * with not using soft-core, so we use power of 0 which gives
476                  * the simplest math and cheapest code.
477                  */
478                 rpm2 = rInv * rInv;
479                 rp   = 1;
480             }
481
482             RealType fScal = 0;
483
484             qq[STATE_A] = iqA * chargeA[jnr];
485             qq[STATE_B] = iqB * chargeB[jnr];
486
487             tj[STATE_A] = ntiA + 2 * typeA[jnr];
488             tj[STATE_B] = ntiB + 2 * typeB[jnr];
489
490             if (bPairIncluded)
491             {
492                 c6[STATE_A] = nbfp[tj[STATE_A]];
493                 c6[STATE_B] = nbfp[tj[STATE_B]];
494
495                 for (int i = 0; i < NSTATES; i++)
496                 {
497                     c12[i] = nbfp[tj[i] + 1];
498                     if (useSoftCore)
499                     {
500                         if ((c6[i] > 0) && (c12[i] > 0))
501                         {
502                             /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
503                             sigma6[i] = half * c12[i] / c6[i];
504                             if (sigma6[i] < sigma6_min) /* for disappearing coul and vdw with soft core at the same time */
505                             {
506                                 sigma6[i] = sigma6_min;
507                             }
508                         }
509                         else
510                         {
511                             sigma6[i] = sigma6_def;
512                         }
513                     }
514                 }
515
516                 if (useSoftCore)
517                 {
518                     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
519                     if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
520                     {
521                         alphaVdwEff  = 0;
522                         alphaCoulEff = 0;
523                     }
524                     else
525                     {
526                         alphaVdwEff  = alpha_vdw;
527                         alphaCoulEff = alpha_coul;
528                     }
529                 }
530
531                 for (int i = 0; i < NSTATES; i++)
532                 {
533                     fScalC[i] = 0;
534                     fScalV[i] = 0;
535                     vCoul[i]  = 0;
536                     vVdw[i]   = 0;
537
538                     RealType rInvC, rInvV, rC, rV, rPInvC, rPInvV;
539
540                     /* Only spend time on A or B state if it is non-zero */
541                     if ((qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0))
542                     {
543                         /* this section has to be inside the loop because of the dependence on sigma6 */
544                         if (useSoftCore)
545                         {
546                             rPInvC = one / (alphaCoulEff * lFacCoul[i] * sigma6[i] + rp);
547                             pthRoot(rPInvC, &rInvC, &rC);
548                             if (scLambdasOrAlphasDiffer)
549                             {
550                                 rPInvV = one / (alphaVdwEff * lFacVdw[i] * sigma6[i] + rp);
551                                 pthRoot(rPInvV, &rInvV, &rV);
552                             }
553                             else
554                             {
555                                 /* We can avoid one expensive pow and one / operation */
556                                 rPInvV = rPInvC;
557                                 rInvV  = rInvC;
558                                 rV     = rC;
559                             }
560                         }
561                         else
562                         {
563                             rPInvC = 1;
564                             rInvC  = rInv;
565                             rC     = r;
566
567                             rPInvV = 1;
568                             rInvV  = rInv;
569                             rV     = r;
570                         }
571
572                         /* Only process the coulomb interactions if we have charges,
573                          * and if we either include all entries in the list (no cutoff
574                          * used in the kernel), or if we are within the cutoff.
575                          */
576                         bool computeElecInteraction = (elecInteractionTypeIsEwald && r < rCoulomb)
577                                                       || (!elecInteractionTypeIsEwald && rC < rCoulomb);
578
579                         if ((qq[i] != 0) && computeElecInteraction)
580                         {
581                             if (elecInteractionTypeIsEwald)
582                             {
583                                 vCoul[i]  = ewaldPotential(qq[i], rInvC, sh_ewald);
584                                 fScalC[i] = ewaldScalarForce(qq[i], rInvC);
585                             }
586                             else
587                             {
588                                 vCoul[i]  = reactionFieldPotential(qq[i], rInvC, rC, krf, crf);
589                                 fScalC[i] = reactionFieldScalarForce(qq[i], rInvC, rC, krf, two);
590                             }
591                         }
592
593                         /* Only process the VDW interactions if we have
594                          * some non-zero parameters, and if we either
595                          * include all entries in the list (no cutoff used
596                          * in the kernel), or if we are within the cutoff.
597                          */
598                         bool computeVdwInteraction = (vdwInteractionTypeIsEwald && r < rVdw)
599                                                      || (!vdwInteractionTypeIsEwald && rV < rVdw);
600                         if ((c6[i] != 0 || c12[i] != 0) && computeVdwInteraction)
601                         {
602                             RealType rInv6;
603                             if (useSoftCore)
604                             {
605                                 rInv6 = rPInvV;
606                             }
607                             else
608                             {
609                                 rInv6 = calculateRinv6(rInvV);
610                             }
611                             RealType vVdw6  = calculateVdw6(c6[i], rInv6);
612                             RealType vVdw12 = calculateVdw12(c12[i], rInv6);
613
614                             vVdw[i] = lennardJonesPotential(
615                                     vVdw6, vVdw12, c6[i], c12[i], repulsionShift, dispersionShift, oneSixth, oneTwelfth);
616                             fScalV[i] = lennardJonesScalarForce(vVdw6, vVdw12);
617
618                             if (vdwInteractionTypeIsEwald)
619                             {
620                                 /* Subtract the grid potential at the cut-off */
621                                 vVdw[i] += ewaldLennardJonesGridSubtract(
622                                         nbfp_grid[tj[i]], shLjEwald, oneSixth);
623                             }
624
625                             if (vdwModifierIsPotSwitch)
626                             {
627                                 RealType d        = rV - ic->rvdw_switch;
628                                 d                 = (d > zero) ? d : zero;
629                                 const RealType d2 = d * d;
630                                 const RealType sw =
631                                         one + d2 * d * (vdw_swV3 + d * (vdw_swV4 + d * vdw_swV5));
632                                 const RealType dsw = d2 * (vdw_swF2 + d * (vdw_swF3 + d * vdw_swF4));
633
634                                 fScalV[i] = potSwitchScalarForceMod(
635                                         fScalV[i], vVdw[i], sw, rV, rVdw, dsw, zero);
636                                 vVdw[i] = potSwitchPotentialMod(vVdw[i], sw, rV, rVdw, zero);
637                             }
638                         }
639
640                         /* fScalC (and fScalV) now contain: dV/drC * rC
641                          * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
642                          * Further down we first multiply by r^p-2 and then by
643                          * the vector r, which in total gives: dV/drC * (r/rC)^1-p
644                          */
645                         fScalC[i] *= rPInvC;
646                         fScalV[i] *= rPInvV;
647                     }
648                 } // end for (int i = 0; i < NSTATES; i++)
649
650                 /* Assemble A and B states */
651                 for (int i = 0; i < NSTATES; i++)
652                 {
653                     vCTot += LFC[i] * vCoul[i];
654                     vVTot += LFV[i] * vVdw[i];
655
656                     fScal += LFC[i] * fScalC[i] * rpm2;
657                     fScal += LFV[i] * fScalV[i] * rpm2;
658
659                     if (useSoftCore)
660                     {
661                         dvdlCoul += vCoul[i] * DLF[i]
662                                     + LFC[i] * alphaCoulEff * dlFacCoul[i] * fScalC[i] * sigma6[i];
663                         dvdlVdw += vVdw[i] * DLF[i]
664                                    + LFV[i] * alphaVdwEff * dlFacVdw[i] * fScalV[i] * sigma6[i];
665                     }
666                     else
667                     {
668                         dvdlCoul += vCoul[i] * DLF[i];
669                         dvdlVdw += vVdw[i] * DLF[i];
670                     }
671                 }
672             } // end if (bPairIncluded)
673             else if (icoul == NbkernelElecType::ReactionField)
674             {
675                 /* For excluded pairs, which are only in this pair list when
676                  * using the Verlet scheme, we don't use soft-core.
677                  * As there is no singularity, there is no need for soft-core.
678                  */
679                 const real FF = -two * krf;
680                 RealType   VV = krf * rSq - crf;
681
682                 if (ii == jnr)
683                 {
684                     VV *= half;
685                 }
686
687                 for (int i = 0; i < NSTATES; i++)
688                 {
689                     vCTot += LFC[i] * qq[i] * VV;
690                     fScal += LFC[i] * qq[i] * FF;
691                     dvdlCoul += DLF[i] * qq[i] * VV;
692                 }
693             }
694
695             if (elecInteractionTypeIsEwald && (r < rCoulomb || !bPairIncluded))
696             {
697                 /* See comment in the preamble. When using Ewald interactions
698                  * (unless we use a switch modifier) we subtract the reciprocal-space
699                  * Ewald component here which made it possible to apply the free
700                  * energy interaction to 1/r (vanilla coulomb short-range part)
701                  * above. This gets us closer to the ideal case of applying
702                  * the softcore to the entire electrostatic interaction,
703                  * including the reciprocal-space component.
704                  */
705                 real v_lr, f_lr;
706
707                 const RealType ewrt   = r * coulombTableScale;
708                 IntType        ewitab = static_cast<IntType>(ewrt);
709                 const RealType eweps  = ewrt - ewitab;
710                 ewitab                = 4 * ewitab;
711                 f_lr                  = ewtab[ewitab] + eweps * ewtab[ewitab + 1];
712                 v_lr = (ewtab[ewitab + 2] - coulombTableScaleInvHalf * eweps * (ewtab[ewitab] + f_lr));
713                 f_lr *= rInv;
714
715                 /* Note that any possible Ewald shift has already been applied in
716                  * the normal interaction part above.
717                  */
718
719                 if (ii == jnr)
720                 {
721                     /* If we get here, the i particle (ii) has itself (jnr)
722                      * in its neighborlist. This can only happen with the Verlet
723                      * scheme, and corresponds to a self-interaction that will
724                      * occur twice. Scale it down by 50% to only include it once.
725                      */
726                     v_lr *= half;
727                 }
728
729                 for (int i = 0; i < NSTATES; i++)
730                 {
731                     vCTot -= LFC[i] * qq[i] * v_lr;
732                     fScal -= LFC[i] * qq[i] * f_lr;
733                     dvdlCoul -= (DLF[i] * qq[i]) * v_lr;
734                 }
735             }
736
737             if (vdwInteractionTypeIsEwald && (r < rVdw || !bPairIncluded))
738             {
739                 /* See comment in the preamble. When using LJ-Ewald interactions
740                  * (unless we use a switch modifier) we subtract the reciprocal-space
741                  * Ewald component here which made it possible to apply the free
742                  * energy interaction to r^-6 (vanilla LJ6 short-range part)
743                  * above. This gets us closer to the ideal case of applying
744                  * the softcore to the entire VdW interaction,
745                  * including the reciprocal-space component.
746                  */
747                 /* We could also use the analytical form here
748                  * iso a table, but that can cause issues for
749                  * r close to 0 for non-interacting pairs.
750                  */
751
752                 const RealType rs   = rSq * rInv * vdwTableScale;
753                 const IntType  ri   = static_cast<IntType>(rs);
754                 const RealType frac = rs - ri;
755                 const RealType f_lr = (1 - frac) * tab_ewald_F_lj[ri] + frac * tab_ewald_F_lj[ri + 1];
756                 /* TODO: Currently the Ewald LJ table does not contain
757                  * the factor 1/6, we should add this.
758                  */
759                 const RealType FF = f_lr * rInv / six;
760                 RealType       VV =
761                         (tab_ewald_V_lj[ri] - vdwTableScaleInvHalf * frac * (tab_ewald_F_lj[ri] + f_lr))
762                         / six;
763
764                 if (ii == jnr)
765                 {
766                     /* If we get here, the i particle (ii) has itself (jnr)
767                      * in its neighborlist. This can only happen with the Verlet
768                      * scheme, and corresponds to a self-interaction that will
769                      * occur twice. Scale it down by 50% to only include it once.
770                      */
771                     VV *= half;
772                 }
773
774                 for (int i = 0; i < NSTATES; i++)
775                 {
776                     const real c6grid = nbfp_grid[tj[i]];
777                     vVTot += LFV[i] * c6grid * VV;
778                     fScal += LFV[i] * c6grid * FF;
779                     dvdlVdw += (DLF[i] * c6grid) * VV;
780                 }
781             }
782
783             if (doForces)
784             {
785                 const real tX = fScal * dX;
786                 const real tY = fScal * dY;
787                 const real tZ = fScal * dZ;
788                 fIX           = fIX + tX;
789                 fIY           = fIY + tY;
790                 fIZ           = fIZ + tZ;
791                 /* OpenMP atomics are expensive, but this kernels is also
792                  * expensive, so we can take this hit, instead of using
793                  * thread-local output buffers and extra reduction.
794                  *
795                  * All the OpenMP regions in this file are trivial and should
796                  * not throw, so no need for try/catch.
797                  */
798 #pragma omp atomic
799                 f[j3] -= tX;
800 #pragma omp atomic
801                 f[j3 + 1] -= tY;
802 #pragma omp atomic
803                 f[j3 + 2] -= tZ;
804             }
805         } // end for (int k = nj0; k < nj1; k++)
806
807         /* The atomics below are expensive with many OpenMP threads.
808          * Here unperturbed i-particles will usually only have a few
809          * (perturbed) j-particles in the list. Thus with a buffered list
810          * we can skip a significant number of i-reductions with a check.
811          */
812         if (npair_within_cutoff > 0)
813         {
814             if (doForces)
815             {
816 #pragma omp atomic
817                 f[ii3] += fIX;
818 #pragma omp atomic
819                 f[ii3 + 1] += fIY;
820 #pragma omp atomic
821                 f[ii3 + 2] += fIZ;
822             }
823             if (doShiftForces)
824             {
825 #pragma omp atomic
826                 fshift[is3] += fIX;
827 #pragma omp atomic
828                 fshift[is3 + 1] += fIY;
829 #pragma omp atomic
830                 fshift[is3 + 2] += fIZ;
831             }
832             if (doPotential)
833             {
834                 int ggid = gid[n];
835 #pragma omp atomic
836                 energygrp_elec[ggid] += vCTot;
837 #pragma omp atomic
838                 energygrp_vdw[ggid] += vVTot;
839             }
840         }
841     } // end for (int n = 0; n < nri; n++)
842
843 #pragma omp atomic
844     dvdl[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)] += dvdlCoul;
845 #pragma omp atomic
846     dvdl[static_cast<int>(FreeEnergyPerturbationCouplingType::Vdw)] += dvdlVdw;
847
848     /* Estimate flops, average for free energy stuff:
849      * 12  flops per outer iteration
850      * 150 flops per inner iteration
851      */
852     atomicNrnbIncrement(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri * 12 + nlist->jindex[nri] * 150);
853
854     if (numExcludedPairsBeyondRlist > 0)
855     {
856         gmx_fatal(FARGS,
857                   "There are %d perturbed non-bonded pair interactions beyond the pair-list cutoff "
858                   "of %g nm, which is not supported. This can happen because the system is "
859                   "unstable or because intra-molecular interactions at long distances are "
860                   "excluded. If the "
861                   "latter is the case, you can try to increase nstlist or rlist to avoid this."
862                   "The error is likely triggered by the use of couple-intramol=no "
863                   "and the maximal distance in the decoupled molecule exceeding rlist.",
864                   numExcludedPairsBeyondRlist,
865                   fr->rlist);
866     }
867 }
868
869 typedef void (*KernelFunction)(const t_nblist* gmx_restrict nlist,
870                                rvec* gmx_restrict         xx,
871                                gmx::ForceWithShiftForces* forceWithShiftForces,
872                                const t_forcerec* gmx_restrict fr,
873                                const t_mdatoms* gmx_restrict mdatoms,
874                                int                           flags,
875                                gmx::ArrayRef<const real>     lambda,
876                                gmx::ArrayRef<real>           dvdl,
877                                gmx::ArrayRef<real>           energygrp_elec,
878                                gmx::ArrayRef<real>           energygrp_vdw,
879                                t_nrnb* gmx_restrict nrnb);
880
881 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald, bool vdwModifierIsPotSwitch>
882 static KernelFunction dispatchKernelOnUseSimd(const bool useSimd)
883 {
884     if (useSimd)
885     {
886 #if GMX_SIMD_HAVE_REAL && GMX_SIMD_HAVE_INT32_ARITHMETICS && GMX_USE_SIMD_KERNELS
887         /* FIXME: Here SimdDataTypes should be used to enable SIMD. So far, the code in nb_free_energy_kernel is not adapted to SIMD */
888         return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
889 #else
890         return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
891 #endif
892     }
893     else
894     {
895         return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
896     }
897 }
898
899 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald>
900 static KernelFunction dispatchKernelOnVdwModifier(const bool vdwModifierIsPotSwitch, const bool useSimd)
901 {
902     if (vdwModifierIsPotSwitch)
903     {
904         return (dispatchKernelOnUseSimd<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, true>(
905                 useSimd));
906     }
907     else
908     {
909         return (dispatchKernelOnUseSimd<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, false>(
910                 useSimd));
911     }
912 }
913
914 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald>
915 static KernelFunction dispatchKernelOnElecInteractionType(const bool elecInteractionTypeIsEwald,
916                                                           const bool vdwModifierIsPotSwitch,
917                                                           const bool useSimd)
918 {
919     if (elecInteractionTypeIsEwald)
920     {
921         return (dispatchKernelOnVdwModifier<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, true>(
922                 vdwModifierIsPotSwitch, useSimd));
923     }
924     else
925     {
926         return (dispatchKernelOnVdwModifier<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, false>(
927                 vdwModifierIsPotSwitch, useSimd));
928     }
929 }
930
931 template<bool useSoftCore, bool scLambdasOrAlphasDiffer>
932 static KernelFunction dispatchKernelOnVdwInteractionType(const bool vdwInteractionTypeIsEwald,
933                                                          const bool elecInteractionTypeIsEwald,
934                                                          const bool vdwModifierIsPotSwitch,
935                                                          const bool useSimd)
936 {
937     if (vdwInteractionTypeIsEwald)
938     {
939         return (dispatchKernelOnElecInteractionType<useSoftCore, scLambdasOrAlphasDiffer, true>(
940                 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
941     }
942     else
943     {
944         return (dispatchKernelOnElecInteractionType<useSoftCore, scLambdasOrAlphasDiffer, false>(
945                 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
946     }
947 }
948
949 template<bool useSoftCore>
950 static KernelFunction dispatchKernelOnScLambdasOrAlphasDifference(const bool scLambdasOrAlphasDiffer,
951                                                                   const bool vdwInteractionTypeIsEwald,
952                                                                   const bool elecInteractionTypeIsEwald,
953                                                                   const bool vdwModifierIsPotSwitch,
954                                                                   const bool useSimd)
955 {
956     if (scLambdasOrAlphasDiffer)
957     {
958         return (dispatchKernelOnVdwInteractionType<useSoftCore, true>(
959                 vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
960     }
961     else
962     {
963         return (dispatchKernelOnVdwInteractionType<useSoftCore, false>(
964                 vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
965     }
966 }
967
968 static KernelFunction dispatchKernel(const bool                 scLambdasOrAlphasDiffer,
969                                      const bool                 vdwInteractionTypeIsEwald,
970                                      const bool                 elecInteractionTypeIsEwald,
971                                      const bool                 vdwModifierIsPotSwitch,
972                                      const bool                 useSimd,
973                                      const interaction_const_t& ic)
974 {
975     if (ic.softCoreParameters->alphaCoulomb == 0 && ic.softCoreParameters->alphaVdw == 0)
976     {
977         return (dispatchKernelOnScLambdasOrAlphasDifference<false>(scLambdasOrAlphasDiffer,
978                                                                    vdwInteractionTypeIsEwald,
979                                                                    elecInteractionTypeIsEwald,
980                                                                    vdwModifierIsPotSwitch,
981                                                                    useSimd));
982     }
983     else
984     {
985         return (dispatchKernelOnScLambdasOrAlphasDifference<true>(scLambdasOrAlphasDiffer,
986                                                                   vdwInteractionTypeIsEwald,
987                                                                   elecInteractionTypeIsEwald,
988                                                                   vdwModifierIsPotSwitch,
989                                                                   useSimd));
990     }
991 }
992
993
994 void gmx_nb_free_energy_kernel(const t_nblist*            nlist,
995                                rvec*                      xx,
996                                gmx::ForceWithShiftForces* ff,
997                                const t_forcerec*          fr,
998                                const t_mdatoms*           mdatoms,
999                                int                        flags,
1000                                gmx::ArrayRef<const real>  lambda,
1001                                gmx::ArrayRef<real>        dvdl,
1002                                gmx::ArrayRef<real>        energygrp_elec,
1003                                gmx::ArrayRef<real>        energygrp_vdw,
1004                                t_nrnb*                    nrnb)
1005 {
1006     const interaction_const_t& ic = *fr->ic;
1007     GMX_ASSERT(EEL_PME_EWALD(ic.eeltype) || ic.eeltype == CoulombInteractionType::Cut || EEL_RF(ic.eeltype),
1008                "Unsupported eeltype with free energy");
1009     GMX_ASSERT(ic.softCoreParameters, "We need soft-core parameters");
1010
1011     const auto& scParams                   = *ic.softCoreParameters;
1012     const bool  vdwInteractionTypeIsEwald  = (EVDW_PME(ic.vdwtype));
1013     const bool  elecInteractionTypeIsEwald = (EEL_PME_EWALD(ic.eeltype));
1014     const bool  vdwModifierIsPotSwitch     = (ic.vdw_modifier == InteractionModifiers::PotSwitch);
1015     bool        scLambdasOrAlphasDiffer    = true;
1016     const bool  useSimd                    = fr->use_simd_kernels;
1017
1018     if (scParams.alphaCoulomb == 0 && scParams.alphaVdw == 0)
1019     {
1020         scLambdasOrAlphasDiffer = false;
1021     }
1022     else
1023     {
1024         if (lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)]
1025                     == lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Vdw)]
1026             && scParams.alphaCoulomb == scParams.alphaVdw)
1027         {
1028             scLambdasOrAlphasDiffer = false;
1029         }
1030     }
1031
1032     KernelFunction kernelFunc;
1033     kernelFunc = dispatchKernel(scLambdasOrAlphasDiffer,
1034                                 vdwInteractionTypeIsEwald,
1035                                 elecInteractionTypeIsEwald,
1036                                 vdwModifierIsPotSwitch,
1037                                 useSimd,
1038                                 ic);
1039     kernelFunc(nlist, xx, ff, fr, mdatoms, flags, lambda, dvdl, energygrp_elec, energygrp_vdw, nrnb);
1040 }