d50377fa674200dea996af6906ae7d050107ba1a
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_free_energy.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
7  * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9  * and including many others, as listed in the AUTHORS file in the
10  * top-level source directory and at http://www.gromacs.org.
11  *
12  * GROMACS is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU Lesser General Public License
14  * as published by the Free Software Foundation; either version 2.1
15  * of the License, or (at your option) any later version.
16  *
17  * GROMACS is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public
23  * License along with GROMACS; if not, see
24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
26  *
27  * If you want to redistribute modifications to GROMACS, please
28  * consider that scientific software is very special. Version
29  * control is crucial - bugs must be traceable. We will be happy to
30  * consider code for inclusion in the official distribution, but
31  * derived work must not be called official GROMACS. Details are found
32  * in the README & COPYING files - if they are missing, get the
33  * official version at http://www.gromacs.org.
34  *
35  * To help us fund GROMACS development, we humbly ask that you cite
36  * the research papers on the package. Check out http://www.gromacs.org.
37  */
38 #include "gmxpre.h"
39
40 #include "nb_free_energy.h"
41
42 #include "config.h"
43
44 #include <cmath>
45
46 #include <algorithm>
47
48 #include "gromacs/gmxlib/nrnb.h"
49 #include "gromacs/gmxlib/nonbonded/nb_kernel.h"
50 #include "gromacs/gmxlib/nonbonded/nonbonded.h"
51 #include "gromacs/math/functions.h"
52 #include "gromacs/math/vec.h"
53 #include "gromacs/mdtypes/forceoutput.h"
54 #include "gromacs/mdtypes/forcerec.h"
55 #include "gromacs/mdtypes/interaction_const.h"
56 #include "gromacs/mdtypes/md_enums.h"
57 #include "gromacs/mdtypes/mdatom.h"
58 #include "gromacs/mdtypes/nblist.h"
59 #include "gromacs/simd/simd.h"
60 #include "gromacs/utility/fatalerror.h"
61 #include "gromacs/utility/arrayref.h"
62
63
64 //! Scalar (non-SIMD) data types.
65 struct ScalarDataTypes
66 {
67     using RealType                     = real; //!< The data type to use as real.
68     using IntType                      = int;  //!< The data type to use as int.
69     static constexpr int simdRealWidth = 1;    //!< The width of the RealType.
70     static constexpr int simdIntWidth  = 1;    //!< The width of the IntType.
71 };
72
73 #if GMX_SIMD_HAVE_REAL && GMX_SIMD_HAVE_INT32_ARITHMETICS
74 //! SIMD data types.
75 struct SimdDataTypes
76 {
77     using RealType                     = gmx::SimdReal;         //!< The data type to use as real.
78     using IntType                      = gmx::SimdInt32;        //!< The data type to use as int.
79     static constexpr int simdRealWidth = GMX_SIMD_REAL_WIDTH;   //!< The width of the RealType.
80     static constexpr int simdIntWidth  = GMX_SIMD_FINT32_WIDTH; //!< The width of the IntType.
81 };
82 #endif
83
84 //! Computes r^(1/p) and 1/r^(1/p) for the standard p=6
85 template<class RealType>
86 static inline void pthRoot(const RealType r, RealType* pthRoot, RealType* invPthRoot)
87 {
88     *invPthRoot = gmx::invsqrt(std::cbrt(r));
89     *pthRoot    = 1 / (*invPthRoot);
90 }
91
92 template<class RealType>
93 static inline RealType calculateRinv6(const RealType rInvV)
94 {
95     RealType rInv6 = rInvV * rInvV;
96     return (rInv6 * rInv6 * rInv6);
97 }
98
99 template<class RealType>
100 static inline RealType calculateVdw6(const RealType c6, const RealType rInv6)
101 {
102     return (c6 * rInv6);
103 }
104
105 template<class RealType>
106 static inline RealType calculateVdw12(const RealType c12, const RealType rInv6)
107 {
108     return (c12 * rInv6 * rInv6);
109 }
110
111 /* reaction-field electrostatics */
112 template<class RealType>
113 static inline RealType reactionFieldScalarForce(const RealType qq,
114                                                 const RealType rInv,
115                                                 const RealType r,
116                                                 const real     krf,
117                                                 const real     two)
118 {
119     return (qq * (rInv - two * krf * r * r));
120 }
121 template<class RealType>
122 static inline RealType reactionFieldPotential(const RealType qq,
123                                               const RealType rInv,
124                                               const RealType r,
125                                               const real     krf,
126                                               const real     potentialShift)
127 {
128     return (qq * (rInv + krf * r * r - potentialShift));
129 }
130
131 /* Ewald electrostatics */
132 template<class RealType>
133 static inline RealType ewaldScalarForce(const RealType coulomb, const RealType rInv)
134 {
135     return (coulomb * rInv);
136 }
137 template<class RealType>
138 static inline RealType ewaldPotential(const RealType coulomb, const RealType rInv, const real potentialShift)
139 {
140     return (coulomb * (rInv - potentialShift));
141 }
142
143 /* cutoff LJ */
144 template<class RealType>
145 static inline RealType lennardJonesScalarForce(const RealType v6, const RealType v12)
146 {
147     return (v12 - v6);
148 }
149 template<class RealType>
150 static inline RealType lennardJonesPotential(const RealType v6,
151                                              const RealType v12,
152                                              const RealType c6,
153                                              const RealType c12,
154                                              const real     repulsionShift,
155                                              const real     dispersionShift,
156                                              const real     oneSixth,
157                                              const real     oneTwelfth)
158 {
159     return ((v12 + c12 * repulsionShift) * oneTwelfth - (v6 + c6 * dispersionShift) * oneSixth);
160 }
161
162 /* Ewald LJ */
163 static inline real ewaldLennardJonesGridSubtract(const real c6grid, const real potentialShift, const real oneSixth)
164 {
165     return (c6grid * potentialShift * oneSixth);
166 }
167
168 /* LJ Potential switch */
169 template<class RealType>
170 static inline RealType potSwitchScalarForceMod(const RealType fScalarInp,
171                                                const RealType potential,
172                                                const RealType sw,
173                                                const RealType r,
174                                                const RealType rVdw,
175                                                const RealType dsw,
176                                                const real     zero)
177 {
178     if (r < rVdw)
179     {
180         real fScalar = fScalarInp * sw - r * potential * dsw;
181         return (fScalar);
182     }
183     return (zero);
184 }
185 template<class RealType>
186 static inline RealType potSwitchPotentialMod(const RealType potentialInp,
187                                              const RealType sw,
188                                              const RealType r,
189                                              const RealType rVdw,
190                                              const real     zero)
191 {
192     if (r < rVdw)
193     {
194         real potential = potentialInp * sw;
195         return (potential);
196     }
197     return (zero);
198 }
199
200
201 //! Templated free-energy non-bonded kernel
202 template<typename DataTypes, bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald, bool vdwModifierIsPotSwitch>
203 static void nb_free_energy_kernel(const t_nblist* gmx_restrict nlist,
204                                   rvec* gmx_restrict         xx,
205                                   gmx::ForceWithShiftForces* forceWithShiftForces,
206                                   const t_forcerec* gmx_restrict fr,
207                                   const t_mdatoms* gmx_restrict mdatoms,
208                                   nb_kernel_data_t* gmx_restrict kernel_data,
209                                   gmx::ArrayRef<real>            energygrp_elec,
210                                   gmx::ArrayRef<real>            energygrp_vdw,
211                                   t_nrnb* gmx_restrict nrnb)
212 {
213 #define STATE_A 0
214 #define STATE_B 1
215 #define NSTATES 2
216
217     using RealType = typename DataTypes::RealType;
218     using IntType  = typename DataTypes::IntType;
219
220     /* FIXME: How should these be handled with SIMD? */
221     constexpr real oneTwelfth = 1.0 / 12.0;
222     constexpr real oneSixth   = 1.0 / 6.0;
223     constexpr real zero       = 0.0;
224     constexpr real half       = 0.5;
225     constexpr real one        = 1.0;
226     constexpr real two        = 2.0;
227     constexpr real six        = 6.0;
228
229     /* Extract pointer to non-bonded interaction constants */
230     const interaction_const_t* ic = fr->ic.get();
231
232     // Extract pair list data
233     const int                nri    = nlist->nri;
234     gmx::ArrayRef<const int> iinr   = nlist->iinr;
235     gmx::ArrayRef<const int> jindex = nlist->jindex;
236     gmx::ArrayRef<const int> jjnr   = nlist->jjnr;
237     gmx::ArrayRef<const int> shift  = nlist->shift;
238     gmx::ArrayRef<const int> gid    = nlist->gid;
239
240     const real*               shiftvec  = fr->shift_vec[0];
241     const real*               chargeA   = mdatoms->chargeA;
242     const real*               chargeB   = mdatoms->chargeB;
243     const int*                typeA     = mdatoms->typeA;
244     const int*                typeB     = mdatoms->typeB;
245     const int                 ntype     = fr->ntype;
246     gmx::ArrayRef<const real> nbfp      = fr->nbfp;
247     gmx::ArrayRef<const real> nbfp_grid = fr->ljpme_c6grid;
248
249     const real lambda_coul =
250             kernel_data->lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)];
251     const real lambda_vdw =
252             kernel_data->lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Vdw)];
253     gmx::ArrayRef<real> dvdl          = kernel_data->dvdl;
254     const auto&         scParams      = *ic->softCoreParameters;
255     const real          alpha_coul    = scParams.alphaCoulomb;
256     const real          alpha_vdw     = scParams.alphaVdw;
257     const real          lam_power     = scParams.lambdaPower;
258     const real          sigma6_def    = scParams.sigma6WithInvalidSigma;
259     const real          sigma6_min    = scParams.sigma6Minimum;
260     const bool          doForces      = ((kernel_data->flags & GMX_NONBONDED_DO_FORCE) != 0);
261     const bool          doShiftForces = ((kernel_data->flags & GMX_NONBONDED_DO_SHIFTFORCE) != 0);
262     const bool          doPotential   = ((kernel_data->flags & GMX_NONBONDED_DO_POTENTIAL) != 0);
263
264     // Extract data from interaction_const_t
265     const real facel           = ic->epsfac;
266     const real rCoulomb        = ic->rcoulomb;
267     const real krf             = ic->reactionFieldCoefficient;
268     const real crf             = ic->reactionFieldShift;
269     const real shLjEwald       = ic->sh_lj_ewald;
270     const real rVdw            = ic->rvdw;
271     const real dispersionShift = ic->dispersion_shift.cpot;
272     const real repulsionShift  = ic->repulsion_shift.cpot;
273
274     // Note that the nbnxm kernels do not support Coulomb potential switching at all
275     GMX_ASSERT(ic->coulomb_modifier != InteractionModifiers::PotSwitch,
276                "Potential switching is not supported for Coulomb with FEP");
277
278     real vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4;
279     if (vdwModifierIsPotSwitch)
280     {
281         const real d = ic->rvdw - ic->rvdw_switch;
282         vdw_swV3     = -10.0 / (d * d * d);
283         vdw_swV4     = 15.0 / (d * d * d * d);
284         vdw_swV5     = -6.0 / (d * d * d * d * d);
285         vdw_swF2     = -30.0 / (d * d * d);
286         vdw_swF3     = 60.0 / (d * d * d * d);
287         vdw_swF4     = -30.0 / (d * d * d * d * d);
288     }
289     else
290     {
291         /* Avoid warnings from stupid compilers (looking at you, Clang!) */
292         vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0;
293     }
294
295     NbkernelElecType icoul;
296     if (ic->eeltype == CoulombInteractionType::Cut || EEL_RF(ic->eeltype))
297     {
298         icoul = NbkernelElecType::ReactionField;
299     }
300     else
301     {
302         icoul = NbkernelElecType::None;
303     }
304
305     real rcutoff_max2 = std::max(ic->rcoulomb, ic->rvdw);
306     rcutoff_max2      = rcutoff_max2 * rcutoff_max2;
307
308     const real* tab_ewald_F_lj           = nullptr;
309     const real* tab_ewald_V_lj           = nullptr;
310     const real* ewtab                    = nullptr;
311     real        coulombTableScale        = 0;
312     real        coulombTableScaleInvHalf = 0;
313     real        vdwTableScale            = 0;
314     real        vdwTableScaleInvHalf     = 0;
315     real        sh_ewald                 = 0;
316     if (elecInteractionTypeIsEwald || vdwInteractionTypeIsEwald)
317     {
318         sh_ewald = ic->sh_ewald;
319     }
320     if (elecInteractionTypeIsEwald)
321     {
322         const auto& coulombTables = *ic->coulombEwaldTables;
323         ewtab                     = coulombTables.tableFDV0.data();
324         coulombTableScale         = coulombTables.scale;
325         coulombTableScaleInvHalf  = half / coulombTableScale;
326     }
327     if (vdwInteractionTypeIsEwald)
328     {
329         const auto& vdwTables = *ic->vdwEwaldTables;
330         tab_ewald_F_lj        = vdwTables.tableF.data();
331         tab_ewald_V_lj        = vdwTables.tableV.data();
332         vdwTableScale         = vdwTables.scale;
333         vdwTableScaleInvHalf  = half / vdwTableScale;
334     }
335
336     /* For Ewald/PME interactions we cannot easily apply the soft-core component to
337      * reciprocal space. When we use non-switched Ewald interactions, we
338      * assume the soft-coring does not significantly affect the grid contribution
339      * and apply the soft-core only to the full 1/r (- shift) pair contribution.
340      *
341      * However, we cannot use this approach for switch-modified since we would then
342      * effectively end up evaluating a significantly different interaction here compared to the
343      * normal (non-free-energy) kernels, either by applying a cutoff at a different
344      * position than what the user requested, or by switching different
345      * things (1/r rather than short-range Ewald). For these settings, we just
346      * use the traditional short-range Ewald interaction in that case.
347      */
348     GMX_RELEASE_ASSERT(!(vdwInteractionTypeIsEwald && vdwModifierIsPotSwitch),
349                        "Can not apply soft-core to switched Ewald potentials");
350
351     real dvdlCoul = 0;
352     real dvdlVdw  = 0;
353
354     /* Lambda factor for state A, 1-lambda*/
355     real LFC[NSTATES], LFV[NSTATES];
356     LFC[STATE_A] = one - lambda_coul;
357     LFV[STATE_A] = one - lambda_vdw;
358
359     /* Lambda factor for state B, lambda*/
360     LFC[STATE_B] = lambda_coul;
361     LFV[STATE_B] = lambda_vdw;
362
363     /*derivative of the lambda factor for state A and B */
364     real DLF[NSTATES];
365     DLF[STATE_A] = -1;
366     DLF[STATE_B] = 1;
367
368     real           lFacCoul[NSTATES], dlFacCoul[NSTATES], lFacVdw[NSTATES], dlFacVdw[NSTATES];
369     constexpr real sc_r_power = 6.0_real;
370     for (int i = 0; i < NSTATES; i++)
371     {
372         lFacCoul[i]  = (lam_power == 2 ? (1 - LFC[i]) * (1 - LFC[i]) : (1 - LFC[i]));
373         dlFacCoul[i] = DLF[i] * lam_power / sc_r_power * (lam_power == 2 ? (1 - LFC[i]) : 1);
374         lFacVdw[i]   = (lam_power == 2 ? (1 - LFV[i]) * (1 - LFV[i]) : (1 - LFV[i]));
375         dlFacVdw[i]  = DLF[i] * lam_power / sc_r_power * (lam_power == 2 ? (1 - LFV[i]) : 1);
376     }
377
378     // TODO: We should get rid of using pointers to real
379     const real* x             = xx[0];
380     real* gmx_restrict f      = &(forceWithShiftForces->force()[0][0]);
381     real* gmx_restrict fshift = &(forceWithShiftForces->shiftForces()[0][0]);
382
383     const real rlistSquared = gmx::square(fr->rlist);
384
385     int numExcludedPairsBeyondRlist = 0;
386
387     for (int n = 0; n < nri; n++)
388     {
389         int npair_within_cutoff = 0;
390
391         const int  is3   = 3 * shift[n];
392         const real shX   = shiftvec[is3];
393         const real shY   = shiftvec[is3 + 1];
394         const real shZ   = shiftvec[is3 + 2];
395         const int  nj0   = jindex[n];
396         const int  nj1   = jindex[n + 1];
397         const int  ii    = iinr[n];
398         const int  ii3   = 3 * ii;
399         const real ix    = shX + x[ii3 + 0];
400         const real iy    = shY + x[ii3 + 1];
401         const real iz    = shZ + x[ii3 + 2];
402         const real iqA   = facel * chargeA[ii];
403         const real iqB   = facel * chargeB[ii];
404         const int  ntiA  = 2 * ntype * typeA[ii];
405         const int  ntiB  = 2 * ntype * typeB[ii];
406         real       vCTot = 0;
407         real       vVTot = 0;
408         real       fIX   = 0;
409         real       fIY   = 0;
410         real       fIZ   = 0;
411
412         for (int k = nj0; k < nj1; k++)
413         {
414             int            tj[NSTATES];
415             const int      jnr = jjnr[k];
416             const int      j3  = 3 * jnr;
417             RealType       c6[NSTATES], c12[NSTATES], qq[NSTATES], vCoul[NSTATES], vVdw[NSTATES];
418             RealType       r, rInv, rp, rpm2;
419             RealType       alphaVdwEff, alphaCoulEff, sigma6[NSTATES];
420             const RealType dX  = ix - x[j3];
421             const RealType dY  = iy - x[j3 + 1];
422             const RealType dZ  = iz - x[j3 + 2];
423             const RealType rSq = dX * dX + dY * dY + dZ * dZ;
424             RealType       fScalC[NSTATES], fScalV[NSTATES];
425             /* Check if this pair on the exlusions list.*/
426             const bool bPairIncluded = nlist->excl_fep.empty() || nlist->excl_fep[k];
427
428             if (rSq >= rcutoff_max2 && bPairIncluded)
429             {
430                 /* We save significant time by skipping all code below.
431                  * Note that with soft-core interactions, the actual cut-off
432                  * check might be different. But since the soft-core distance
433                  * is always larger than r, checking on r here is safe.
434                  * Exclusions outside the cutoff can not be skipped as
435                  * when using Ewald: the reciprocal-space
436                  * Ewald component still needs to be subtracted.
437                  */
438
439                 continue;
440             }
441             npair_within_cutoff++;
442
443             if (rSq > rlistSquared)
444             {
445                 numExcludedPairsBeyondRlist++;
446             }
447
448             if (rSq > 0)
449             {
450                 /* Note that unlike in the nbnxn kernels, we do not need
451                  * to clamp the value of rSq before taking the invsqrt
452                  * to avoid NaN in the LJ calculation, since here we do
453                  * not calculate LJ interactions when C6 and C12 are zero.
454                  */
455
456                 rInv = gmx::invsqrt(rSq);
457                 r    = rSq * rInv;
458             }
459             else
460             {
461                 /* The force at r=0 is zero, because of symmetry.
462                  * But note that the potential is in general non-zero,
463                  * since the soft-cored r will be non-zero.
464                  */
465                 rInv = 0;
466                 r    = 0;
467             }
468
469             if (useSoftCore)
470             {
471                 rpm2 = rSq * rSq;  /* r4 */
472                 rp   = rpm2 * rSq; /* r6 */
473             }
474             else
475             {
476                 /* The soft-core power p will not affect the results
477                  * with not using soft-core, so we use power of 0 which gives
478                  * the simplest math and cheapest code.
479                  */
480                 rpm2 = rInv * rInv;
481                 rp   = 1;
482             }
483
484             RealType fScal = 0;
485
486             qq[STATE_A] = iqA * chargeA[jnr];
487             qq[STATE_B] = iqB * chargeB[jnr];
488
489             tj[STATE_A] = ntiA + 2 * typeA[jnr];
490             tj[STATE_B] = ntiB + 2 * typeB[jnr];
491
492             if (bPairIncluded)
493             {
494                 c6[STATE_A] = nbfp[tj[STATE_A]];
495                 c6[STATE_B] = nbfp[tj[STATE_B]];
496
497                 for (int i = 0; i < NSTATES; i++)
498                 {
499                     c12[i] = nbfp[tj[i] + 1];
500                     if (useSoftCore)
501                     {
502                         if ((c6[i] > 0) && (c12[i] > 0))
503                         {
504                             /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
505                             sigma6[i] = half * c12[i] / c6[i];
506                             if (sigma6[i] < sigma6_min) /* for disappearing coul and vdw with soft core at the same time */
507                             {
508                                 sigma6[i] = sigma6_min;
509                             }
510                         }
511                         else
512                         {
513                             sigma6[i] = sigma6_def;
514                         }
515                     }
516                 }
517
518                 if (useSoftCore)
519                 {
520                     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
521                     if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
522                     {
523                         alphaVdwEff  = 0;
524                         alphaCoulEff = 0;
525                     }
526                     else
527                     {
528                         alphaVdwEff  = alpha_vdw;
529                         alphaCoulEff = alpha_coul;
530                     }
531                 }
532
533                 for (int i = 0; i < NSTATES; i++)
534                 {
535                     fScalC[i] = 0;
536                     fScalV[i] = 0;
537                     vCoul[i]  = 0;
538                     vVdw[i]   = 0;
539
540                     RealType rInvC, rInvV, rC, rV, rPInvC, rPInvV;
541
542                     /* Only spend time on A or B state if it is non-zero */
543                     if ((qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0))
544                     {
545                         /* this section has to be inside the loop because of the dependence on sigma6 */
546                         if (useSoftCore)
547                         {
548                             rPInvC = one / (alphaCoulEff * lFacCoul[i] * sigma6[i] + rp);
549                             pthRoot(rPInvC, &rInvC, &rC);
550                             if (scLambdasOrAlphasDiffer)
551                             {
552                                 rPInvV = one / (alphaVdwEff * lFacVdw[i] * sigma6[i] + rp);
553                                 pthRoot(rPInvV, &rInvV, &rV);
554                             }
555                             else
556                             {
557                                 /* We can avoid one expensive pow and one / operation */
558                                 rPInvV = rPInvC;
559                                 rInvV  = rInvC;
560                                 rV     = rC;
561                             }
562                         }
563                         else
564                         {
565                             rPInvC = 1;
566                             rInvC  = rInv;
567                             rC     = r;
568
569                             rPInvV = 1;
570                             rInvV  = rInv;
571                             rV     = r;
572                         }
573
574                         /* Only process the coulomb interactions if we have charges,
575                          * and if we either include all entries in the list (no cutoff
576                          * used in the kernel), or if we are within the cutoff.
577                          */
578                         bool computeElecInteraction = (elecInteractionTypeIsEwald && r < rCoulomb)
579                                                       || (!elecInteractionTypeIsEwald && rC < rCoulomb);
580
581                         if ((qq[i] != 0) && computeElecInteraction)
582                         {
583                             if (elecInteractionTypeIsEwald)
584                             {
585                                 vCoul[i]  = ewaldPotential(qq[i], rInvC, sh_ewald);
586                                 fScalC[i] = ewaldScalarForce(qq[i], rInvC);
587                             }
588                             else
589                             {
590                                 vCoul[i]  = reactionFieldPotential(qq[i], rInvC, rC, krf, crf);
591                                 fScalC[i] = reactionFieldScalarForce(qq[i], rInvC, rC, krf, two);
592                             }
593                         }
594
595                         /* Only process the VDW interactions if we have
596                          * some non-zero parameters, and if we either
597                          * include all entries in the list (no cutoff used
598                          * in the kernel), or if we are within the cutoff.
599                          */
600                         bool computeVdwInteraction = (vdwInteractionTypeIsEwald && r < rVdw)
601                                                      || (!vdwInteractionTypeIsEwald && rV < rVdw);
602                         if ((c6[i] != 0 || c12[i] != 0) && computeVdwInteraction)
603                         {
604                             RealType rInv6;
605                             if (useSoftCore)
606                             {
607                                 rInv6 = rPInvV;
608                             }
609                             else
610                             {
611                                 rInv6 = calculateRinv6(rInvV);
612                             }
613                             RealType vVdw6  = calculateVdw6(c6[i], rInv6);
614                             RealType vVdw12 = calculateVdw12(c12[i], rInv6);
615
616                             vVdw[i] = lennardJonesPotential(
617                                     vVdw6, vVdw12, c6[i], c12[i], repulsionShift, dispersionShift, oneSixth, oneTwelfth);
618                             fScalV[i] = lennardJonesScalarForce(vVdw6, vVdw12);
619
620                             if (vdwInteractionTypeIsEwald)
621                             {
622                                 /* Subtract the grid potential at the cut-off */
623                                 vVdw[i] += ewaldLennardJonesGridSubtract(
624                                         nbfp_grid[tj[i]], shLjEwald, oneSixth);
625                             }
626
627                             if (vdwModifierIsPotSwitch)
628                             {
629                                 RealType d        = rV - ic->rvdw_switch;
630                                 d                 = (d > zero) ? d : zero;
631                                 const RealType d2 = d * d;
632                                 const RealType sw =
633                                         one + d2 * d * (vdw_swV3 + d * (vdw_swV4 + d * vdw_swV5));
634                                 const RealType dsw = d2 * (vdw_swF2 + d * (vdw_swF3 + d * vdw_swF4));
635
636                                 fScalV[i] = potSwitchScalarForceMod(
637                                         fScalV[i], vVdw[i], sw, rV, rVdw, dsw, zero);
638                                 vVdw[i] = potSwitchPotentialMod(vVdw[i], sw, rV, rVdw, zero);
639                             }
640                         }
641
642                         /* fScalC (and fScalV) now contain: dV/drC * rC
643                          * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
644                          * Further down we first multiply by r^p-2 and then by
645                          * the vector r, which in total gives: dV/drC * (r/rC)^1-p
646                          */
647                         fScalC[i] *= rPInvC;
648                         fScalV[i] *= rPInvV;
649                     }
650                 } // end for (int i = 0; i < NSTATES; i++)
651
652                 /* Assemble A and B states */
653                 for (int i = 0; i < NSTATES; i++)
654                 {
655                     vCTot += LFC[i] * vCoul[i];
656                     vVTot += LFV[i] * vVdw[i];
657
658                     fScal += LFC[i] * fScalC[i] * rpm2;
659                     fScal += LFV[i] * fScalV[i] * rpm2;
660
661                     if (useSoftCore)
662                     {
663                         dvdlCoul += vCoul[i] * DLF[i]
664                                     + LFC[i] * alphaCoulEff * dlFacCoul[i] * fScalC[i] * sigma6[i];
665                         dvdlVdw += vVdw[i] * DLF[i]
666                                    + LFV[i] * alphaVdwEff * dlFacVdw[i] * fScalV[i] * sigma6[i];
667                     }
668                     else
669                     {
670                         dvdlCoul += vCoul[i] * DLF[i];
671                         dvdlVdw += vVdw[i] * DLF[i];
672                     }
673                 }
674             } // end if (bPairIncluded)
675             else if (icoul == NbkernelElecType::ReactionField)
676             {
677                 /* For excluded pairs, which are only in this pair list when
678                  * using the Verlet scheme, we don't use soft-core.
679                  * As there is no singularity, there is no need for soft-core.
680                  */
681                 const real FF = -two * krf;
682                 RealType   VV = krf * rSq - crf;
683
684                 if (ii == jnr)
685                 {
686                     VV *= half;
687                 }
688
689                 for (int i = 0; i < NSTATES; i++)
690                 {
691                     vCTot += LFC[i] * qq[i] * VV;
692                     fScal += LFC[i] * qq[i] * FF;
693                     dvdlCoul += DLF[i] * qq[i] * VV;
694                 }
695             }
696
697             if (elecInteractionTypeIsEwald && (r < rCoulomb || !bPairIncluded))
698             {
699                 /* See comment in the preamble. When using Ewald interactions
700                  * (unless we use a switch modifier) we subtract the reciprocal-space
701                  * Ewald component here which made it possible to apply the free
702                  * energy interaction to 1/r (vanilla coulomb short-range part)
703                  * above. This gets us closer to the ideal case of applying
704                  * the softcore to the entire electrostatic interaction,
705                  * including the reciprocal-space component.
706                  */
707                 real v_lr, f_lr;
708
709                 const RealType ewrt   = r * coulombTableScale;
710                 IntType        ewitab = static_cast<IntType>(ewrt);
711                 const RealType eweps  = ewrt - ewitab;
712                 ewitab                = 4 * ewitab;
713                 f_lr                  = ewtab[ewitab] + eweps * ewtab[ewitab + 1];
714                 v_lr = (ewtab[ewitab + 2] - coulombTableScaleInvHalf * eweps * (ewtab[ewitab] + f_lr));
715                 f_lr *= rInv;
716
717                 /* Note that any possible Ewald shift has already been applied in
718                  * the normal interaction part above.
719                  */
720
721                 if (ii == jnr)
722                 {
723                     /* If we get here, the i particle (ii) has itself (jnr)
724                      * in its neighborlist. This can only happen with the Verlet
725                      * scheme, and corresponds to a self-interaction that will
726                      * occur twice. Scale it down by 50% to only include it once.
727                      */
728                     v_lr *= half;
729                 }
730
731                 for (int i = 0; i < NSTATES; i++)
732                 {
733                     vCTot -= LFC[i] * qq[i] * v_lr;
734                     fScal -= LFC[i] * qq[i] * f_lr;
735                     dvdlCoul -= (DLF[i] * qq[i]) * v_lr;
736                 }
737             }
738
739             if (vdwInteractionTypeIsEwald && (r < rVdw || !bPairIncluded))
740             {
741                 /* See comment in the preamble. When using LJ-Ewald interactions
742                  * (unless we use a switch modifier) we subtract the reciprocal-space
743                  * Ewald component here which made it possible to apply the free
744                  * energy interaction to r^-6 (vanilla LJ6 short-range part)
745                  * above. This gets us closer to the ideal case of applying
746                  * the softcore to the entire VdW interaction,
747                  * including the reciprocal-space component.
748                  */
749                 /* We could also use the analytical form here
750                  * iso a table, but that can cause issues for
751                  * r close to 0 for non-interacting pairs.
752                  */
753
754                 const RealType rs   = rSq * rInv * vdwTableScale;
755                 const IntType  ri   = static_cast<IntType>(rs);
756                 const RealType frac = rs - ri;
757                 const RealType f_lr = (1 - frac) * tab_ewald_F_lj[ri] + frac * tab_ewald_F_lj[ri + 1];
758                 /* TODO: Currently the Ewald LJ table does not contain
759                  * the factor 1/6, we should add this.
760                  */
761                 const RealType FF = f_lr * rInv / six;
762                 RealType       VV =
763                         (tab_ewald_V_lj[ri] - vdwTableScaleInvHalf * frac * (tab_ewald_F_lj[ri] + f_lr))
764                         / six;
765
766                 if (ii == jnr)
767                 {
768                     /* If we get here, the i particle (ii) has itself (jnr)
769                      * in its neighborlist. This can only happen with the Verlet
770                      * scheme, and corresponds to a self-interaction that will
771                      * occur twice. Scale it down by 50% to only include it once.
772                      */
773                     VV *= half;
774                 }
775
776                 for (int i = 0; i < NSTATES; i++)
777                 {
778                     const real c6grid = nbfp_grid[tj[i]];
779                     vVTot += LFV[i] * c6grid * VV;
780                     fScal += LFV[i] * c6grid * FF;
781                     dvdlVdw += (DLF[i] * c6grid) * VV;
782                 }
783             }
784
785             if (doForces)
786             {
787                 const real tX = fScal * dX;
788                 const real tY = fScal * dY;
789                 const real tZ = fScal * dZ;
790                 fIX           = fIX + tX;
791                 fIY           = fIY + tY;
792                 fIZ           = fIZ + tZ;
793                 /* OpenMP atomics are expensive, but this kernels is also
794                  * expensive, so we can take this hit, instead of using
795                  * thread-local output buffers and extra reduction.
796                  *
797                  * All the OpenMP regions in this file are trivial and should
798                  * not throw, so no need for try/catch.
799                  */
800 #pragma omp atomic
801                 f[j3] -= tX;
802 #pragma omp atomic
803                 f[j3 + 1] -= tY;
804 #pragma omp atomic
805                 f[j3 + 2] -= tZ;
806             }
807         } // end for (int k = nj0; k < nj1; k++)
808
809         /* The atomics below are expensive with many OpenMP threads.
810          * Here unperturbed i-particles will usually only have a few
811          * (perturbed) j-particles in the list. Thus with a buffered list
812          * we can skip a significant number of i-reductions with a check.
813          */
814         if (npair_within_cutoff > 0)
815         {
816             if (doForces)
817             {
818 #pragma omp atomic
819                 f[ii3] += fIX;
820 #pragma omp atomic
821                 f[ii3 + 1] += fIY;
822 #pragma omp atomic
823                 f[ii3 + 2] += fIZ;
824             }
825             if (doShiftForces)
826             {
827 #pragma omp atomic
828                 fshift[is3] += fIX;
829 #pragma omp atomic
830                 fshift[is3 + 1] += fIY;
831 #pragma omp atomic
832                 fshift[is3 + 2] += fIZ;
833             }
834             if (doPotential)
835             {
836                 int ggid = gid[n];
837 #pragma omp atomic
838                 energygrp_elec[ggid] += vCTot;
839 #pragma omp atomic
840                 energygrp_vdw[ggid] += vVTot;
841             }
842         }
843     } // end for (int n = 0; n < nri; n++)
844
845 #pragma omp atomic
846     dvdl[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)] += dvdlCoul;
847 #pragma omp atomic
848     dvdl[static_cast<int>(FreeEnergyPerturbationCouplingType::Vdw)] += dvdlVdw;
849
850     /* Estimate flops, average for free energy stuff:
851      * 12  flops per outer iteration
852      * 150 flops per inner iteration
853      */
854     atomicNrnbIncrement(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri * 12 + nlist->jindex[nri] * 150);
855
856     if (numExcludedPairsBeyondRlist > 0)
857     {
858         gmx_fatal(FARGS,
859                   "There are %d perturbed non-bonded pair interactions beyond the pair-list cutoff "
860                   "of %g nm, which is not supported. This can happen because the system is "
861                   "unstable or because intra-molecular interactions at long distances are "
862                   "excluded. If the "
863                   "latter is the case, you can try to increase nstlist or rlist to avoid this."
864                   "The error is likely triggered by the use of couple-intramol=no "
865                   "and the maximal distance in the decoupled molecule exceeding rlist.",
866                   numExcludedPairsBeyondRlist,
867                   fr->rlist);
868     }
869 }
870
871 typedef void (*KernelFunction)(const t_nblist* gmx_restrict nlist,
872                                rvec* gmx_restrict         xx,
873                                gmx::ForceWithShiftForces* forceWithShiftForces,
874                                const t_forcerec* gmx_restrict fr,
875                                const t_mdatoms* gmx_restrict mdatoms,
876                                nb_kernel_data_t* gmx_restrict kernel_data,
877                                gmx::ArrayRef<real>            energygrp_elec,
878                                gmx::ArrayRef<real>            energygrp_vdw,
879                                t_nrnb* gmx_restrict nrnb);
880
881 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald, bool vdwModifierIsPotSwitch>
882 static KernelFunction dispatchKernelOnUseSimd(const bool useSimd)
883 {
884     if (useSimd)
885     {
886 #if GMX_SIMD_HAVE_REAL && GMX_SIMD_HAVE_INT32_ARITHMETICS && GMX_USE_SIMD_KERNELS
887         /* FIXME: Here SimdDataTypes should be used to enable SIMD. So far, the code in nb_free_energy_kernel is not adapted to SIMD */
888         return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
889 #else
890         return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
891 #endif
892     }
893     else
894     {
895         return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
896     }
897 }
898
899 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald>
900 static KernelFunction dispatchKernelOnVdwModifier(const bool vdwModifierIsPotSwitch, const bool useSimd)
901 {
902     if (vdwModifierIsPotSwitch)
903     {
904         return (dispatchKernelOnUseSimd<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, true>(
905                 useSimd));
906     }
907     else
908     {
909         return (dispatchKernelOnUseSimd<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, false>(
910                 useSimd));
911     }
912 }
913
914 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald>
915 static KernelFunction dispatchKernelOnElecInteractionType(const bool elecInteractionTypeIsEwald,
916                                                           const bool vdwModifierIsPotSwitch,
917                                                           const bool useSimd)
918 {
919     if (elecInteractionTypeIsEwald)
920     {
921         return (dispatchKernelOnVdwModifier<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, true>(
922                 vdwModifierIsPotSwitch, useSimd));
923     }
924     else
925     {
926         return (dispatchKernelOnVdwModifier<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, false>(
927                 vdwModifierIsPotSwitch, useSimd));
928     }
929 }
930
931 template<bool useSoftCore, bool scLambdasOrAlphasDiffer>
932 static KernelFunction dispatchKernelOnVdwInteractionType(const bool vdwInteractionTypeIsEwald,
933                                                          const bool elecInteractionTypeIsEwald,
934                                                          const bool vdwModifierIsPotSwitch,
935                                                          const bool useSimd)
936 {
937     if (vdwInteractionTypeIsEwald)
938     {
939         return (dispatchKernelOnElecInteractionType<useSoftCore, scLambdasOrAlphasDiffer, true>(
940                 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
941     }
942     else
943     {
944         return (dispatchKernelOnElecInteractionType<useSoftCore, scLambdasOrAlphasDiffer, false>(
945                 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
946     }
947 }
948
949 template<bool useSoftCore>
950 static KernelFunction dispatchKernelOnScLambdasOrAlphasDifference(const bool scLambdasOrAlphasDiffer,
951                                                                   const bool vdwInteractionTypeIsEwald,
952                                                                   const bool elecInteractionTypeIsEwald,
953                                                                   const bool vdwModifierIsPotSwitch,
954                                                                   const bool useSimd)
955 {
956     if (scLambdasOrAlphasDiffer)
957     {
958         return (dispatchKernelOnVdwInteractionType<useSoftCore, true>(
959                 vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
960     }
961     else
962     {
963         return (dispatchKernelOnVdwInteractionType<useSoftCore, false>(
964                 vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
965     }
966 }
967
968 static KernelFunction dispatchKernel(const bool                 scLambdasOrAlphasDiffer,
969                                      const bool                 vdwInteractionTypeIsEwald,
970                                      const bool                 elecInteractionTypeIsEwald,
971                                      const bool                 vdwModifierIsPotSwitch,
972                                      const bool                 useSimd,
973                                      const interaction_const_t& ic)
974 {
975     if (ic.softCoreParameters->alphaCoulomb == 0 && ic.softCoreParameters->alphaVdw == 0)
976     {
977         return (dispatchKernelOnScLambdasOrAlphasDifference<false>(scLambdasOrAlphasDiffer,
978                                                                    vdwInteractionTypeIsEwald,
979                                                                    elecInteractionTypeIsEwald,
980                                                                    vdwModifierIsPotSwitch,
981                                                                    useSimd));
982     }
983     else
984     {
985         return (dispatchKernelOnScLambdasOrAlphasDifference<true>(scLambdasOrAlphasDiffer,
986                                                                   vdwInteractionTypeIsEwald,
987                                                                   elecInteractionTypeIsEwald,
988                                                                   vdwModifierIsPotSwitch,
989                                                                   useSimd));
990     }
991 }
992
993
994 void gmx_nb_free_energy_kernel(const t_nblist*            nlist,
995                                rvec*                      xx,
996                                gmx::ForceWithShiftForces* ff,
997                                const t_forcerec*          fr,
998                                const t_mdatoms*           mdatoms,
999                                nb_kernel_data_t*          kernel_data,
1000                                gmx::ArrayRef<real>        energygrp_elec,
1001                                gmx::ArrayRef<real>        energygrp_vdw,
1002                                t_nrnb*                    nrnb)
1003 {
1004     const interaction_const_t& ic = *fr->ic;
1005     GMX_ASSERT(EEL_PME_EWALD(ic.eeltype) || ic.eeltype == CoulombInteractionType::Cut || EEL_RF(ic.eeltype),
1006                "Unsupported eeltype with free energy");
1007     GMX_ASSERT(ic.softCoreParameters, "We need soft-core parameters");
1008
1009     const auto& scParams                   = *ic.softCoreParameters;
1010     const bool  vdwInteractionTypeIsEwald  = (EVDW_PME(ic.vdwtype));
1011     const bool  elecInteractionTypeIsEwald = (EEL_PME_EWALD(ic.eeltype));
1012     const bool  vdwModifierIsPotSwitch     = (ic.vdw_modifier == InteractionModifiers::PotSwitch);
1013     bool        scLambdasOrAlphasDiffer    = true;
1014     const bool  useSimd                    = fr->use_simd_kernels;
1015
1016     if (scParams.alphaCoulomb == 0 && scParams.alphaVdw == 0)
1017     {
1018         scLambdasOrAlphasDiffer = false;
1019     }
1020     else
1021     {
1022         if (kernel_data->lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)]
1023                     == kernel_data->lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Vdw)]
1024             && scParams.alphaCoulomb == scParams.alphaVdw)
1025         {
1026             scLambdasOrAlphasDiffer = false;
1027         }
1028     }
1029
1030     KernelFunction kernelFunc;
1031     kernelFunc = dispatchKernel(scLambdasOrAlphasDiffer,
1032                                 vdwInteractionTypeIsEwald,
1033                                 elecInteractionTypeIsEwald,
1034                                 vdwModifierIsPotSwitch,
1035                                 useSimd,
1036                                 ic);
1037     kernelFunc(nlist, xx, ff, fr, mdatoms, kernel_data, energygrp_elec, energygrp_vdw, nrnb);
1038 }