Use ArrayRefs instead of mdatoms in gmx_nb_free_energy_kernel signature
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_free_energy.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
7  * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9  * and including many others, as listed in the AUTHORS file in the
10  * top-level source directory and at http://www.gromacs.org.
11  *
12  * GROMACS is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU Lesser General Public License
14  * as published by the Free Software Foundation; either version 2.1
15  * of the License, or (at your option) any later version.
16  *
17  * GROMACS is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public
23  * License along with GROMACS; if not, see
24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
26  *
27  * If you want to redistribute modifications to GROMACS, please
28  * consider that scientific software is very special. Version
29  * control is crucial - bugs must be traceable. We will be happy to
30  * consider code for inclusion in the official distribution, but
31  * derived work must not be called official GROMACS. Details are found
32  * in the README & COPYING files - if they are missing, get the
33  * official version at http://www.gromacs.org.
34  *
35  * To help us fund GROMACS development, we humbly ask that you cite
36  * the research papers on the package. Check out http://www.gromacs.org.
37  */
38 #include "gmxpre.h"
39
40 #include "nb_free_energy.h"
41
42 #include "config.h"
43
44 #include <cmath>
45
46 #include <algorithm>
47
48 #include "gromacs/gmxlib/nrnb.h"
49 #include "gromacs/gmxlib/nonbonded/nonbonded.h"
50 #include "gromacs/math/functions.h"
51 #include "gromacs/math/vec.h"
52 #include "gromacs/mdtypes/forceoutput.h"
53 #include "gromacs/mdtypes/forcerec.h"
54 #include "gromacs/mdtypes/interaction_const.h"
55 #include "gromacs/mdtypes/md_enums.h"
56 #include "gromacs/mdtypes/mdatom.h"
57 #include "gromacs/mdtypes/nblist.h"
58 #include "gromacs/simd/simd.h"
59 #include "gromacs/utility/fatalerror.h"
60 #include "gromacs/utility/arrayref.h"
61
62
63 //! Scalar (non-SIMD) data types.
64 struct ScalarDataTypes
65 {
66     using RealType                     = real; //!< The data type to use as real.
67     using IntType                      = int;  //!< The data type to use as int.
68     static constexpr int simdRealWidth = 1;    //!< The width of the RealType.
69     static constexpr int simdIntWidth  = 1;    //!< The width of the IntType.
70 };
71
72 #if GMX_SIMD_HAVE_REAL && GMX_SIMD_HAVE_INT32_ARITHMETICS
73 //! SIMD data types.
74 struct SimdDataTypes
75 {
76     using RealType                     = gmx::SimdReal;         //!< The data type to use as real.
77     using IntType                      = gmx::SimdInt32;        //!< The data type to use as int.
78     static constexpr int simdRealWidth = GMX_SIMD_REAL_WIDTH;   //!< The width of the RealType.
79     static constexpr int simdIntWidth  = GMX_SIMD_FINT32_WIDTH; //!< The width of the IntType.
80 };
81 #endif
82
83 //! Computes r^(1/p) and 1/r^(1/p) for the standard p=6
84 template<class RealType>
85 static inline void pthRoot(const RealType r, RealType* pthRoot, RealType* invPthRoot)
86 {
87     *invPthRoot = gmx::invsqrt(std::cbrt(r));
88     *pthRoot    = 1 / (*invPthRoot);
89 }
90
91 template<class RealType>
92 static inline RealType calculateRinv6(const RealType rInvV)
93 {
94     RealType rInv6 = rInvV * rInvV;
95     return (rInv6 * rInv6 * rInv6);
96 }
97
98 template<class RealType>
99 static inline RealType calculateVdw6(const RealType c6, const RealType rInv6)
100 {
101     return (c6 * rInv6);
102 }
103
104 template<class RealType>
105 static inline RealType calculateVdw12(const RealType c12, const RealType rInv6)
106 {
107     return (c12 * rInv6 * rInv6);
108 }
109
110 /* reaction-field electrostatics */
111 template<class RealType>
112 static inline RealType reactionFieldScalarForce(const RealType qq,
113                                                 const RealType rInv,
114                                                 const RealType r,
115                                                 const real     krf,
116                                                 const real     two)
117 {
118     return (qq * (rInv - two * krf * r * r));
119 }
120 template<class RealType>
121 static inline RealType reactionFieldPotential(const RealType qq,
122                                               const RealType rInv,
123                                               const RealType r,
124                                               const real     krf,
125                                               const real     potentialShift)
126 {
127     return (qq * (rInv + krf * r * r - potentialShift));
128 }
129
130 /* Ewald electrostatics */
131 template<class RealType>
132 static inline RealType ewaldScalarForce(const RealType coulomb, const RealType rInv)
133 {
134     return (coulomb * rInv);
135 }
136 template<class RealType>
137 static inline RealType ewaldPotential(const RealType coulomb, const RealType rInv, const real potentialShift)
138 {
139     return (coulomb * (rInv - potentialShift));
140 }
141
142 /* cutoff LJ */
143 template<class RealType>
144 static inline RealType lennardJonesScalarForce(const RealType v6, const RealType v12)
145 {
146     return (v12 - v6);
147 }
148 template<class RealType>
149 static inline RealType lennardJonesPotential(const RealType v6,
150                                              const RealType v12,
151                                              const RealType c6,
152                                              const RealType c12,
153                                              const real     repulsionShift,
154                                              const real     dispersionShift,
155                                              const real     oneSixth,
156                                              const real     oneTwelfth)
157 {
158     return ((v12 + c12 * repulsionShift) * oneTwelfth - (v6 + c6 * dispersionShift) * oneSixth);
159 }
160
161 /* Ewald LJ */
162 static inline real ewaldLennardJonesGridSubtract(const real c6grid, const real potentialShift, const real oneSixth)
163 {
164     return (c6grid * potentialShift * oneSixth);
165 }
166
167 /* LJ Potential switch */
168 template<class RealType>
169 static inline RealType potSwitchScalarForceMod(const RealType fScalarInp,
170                                                const RealType potential,
171                                                const RealType sw,
172                                                const RealType r,
173                                                const RealType rVdw,
174                                                const RealType dsw,
175                                                const real     zero)
176 {
177     if (r < rVdw)
178     {
179         real fScalar = fScalarInp * sw - r * potential * dsw;
180         return (fScalar);
181     }
182     return (zero);
183 }
184 template<class RealType>
185 static inline RealType potSwitchPotentialMod(const RealType potentialInp,
186                                              const RealType sw,
187                                              const RealType r,
188                                              const RealType rVdw,
189                                              const real     zero)
190 {
191     if (r < rVdw)
192     {
193         real potential = potentialInp * sw;
194         return (potential);
195     }
196     return (zero);
197 }
198
199
200 //! Templated free-energy non-bonded kernel
201 template<typename DataTypes, bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald, bool vdwModifierIsPotSwitch>
202 static void nb_free_energy_kernel(const t_nblist&                nlist,
203                                   gmx::ArrayRef<const gmx::RVec> coords,
204                                   gmx::ForceWithShiftForces*     forceWithShiftForces,
205                                   const t_forcerec&              fr,
206                                   gmx::ArrayRef<const real>      chargeA,
207                                   gmx::ArrayRef<const real>      chargeB,
208                                   gmx::ArrayRef<const int>       typeA,
209                                   gmx::ArrayRef<const int>       typeB,
210                                   int                            flags,
211                                   gmx::ArrayRef<const real>      lambda,
212                                   gmx::ArrayRef<real>            dvdl,
213                                   gmx::ArrayRef<real>            energygrp_elec,
214                                   gmx::ArrayRef<real>            energygrp_vdw,
215                                   t_nrnb* gmx_restrict nrnb)
216 {
217 #define STATE_A 0
218 #define STATE_B 1
219 #define NSTATES 2
220
221     using RealType = typename DataTypes::RealType;
222     using IntType  = typename DataTypes::IntType;
223
224     /* FIXME: How should these be handled with SIMD? */
225     constexpr real oneTwelfth = 1.0 / 12.0;
226     constexpr real oneSixth   = 1.0 / 6.0;
227     constexpr real zero       = 0.0;
228     constexpr real half       = 0.5;
229     constexpr real one        = 1.0;
230     constexpr real two        = 2.0;
231     constexpr real six        = 6.0;
232
233     /* Extract pointer to non-bonded interaction constants */
234     const interaction_const_t* ic = fr.ic.get();
235
236     // Extract pair list data
237     const int                nri    = nlist.nri;
238     gmx::ArrayRef<const int> iinr   = nlist.iinr;
239     gmx::ArrayRef<const int> jindex = nlist.jindex;
240     gmx::ArrayRef<const int> jjnr   = nlist.jjnr;
241     gmx::ArrayRef<const int> shift  = nlist.shift;
242     gmx::ArrayRef<const int> gid    = nlist.gid;
243
244     const real*               shiftvec  = fr.shift_vec[0];
245     const int                 ntype     = fr.ntype;
246     gmx::ArrayRef<const real> nbfp      = fr.nbfp;
247     gmx::ArrayRef<const real> nbfp_grid = fr.ljpme_c6grid;
248
249     const real  lambda_coul   = lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)];
250     const real  lambda_vdw    = lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Vdw)];
251     const auto& scParams      = *ic->softCoreParameters;
252     const real  alpha_coul    = scParams.alphaCoulomb;
253     const real  alpha_vdw     = scParams.alphaVdw;
254     const real  lam_power     = scParams.lambdaPower;
255     const real  sigma6_def    = scParams.sigma6WithInvalidSigma;
256     const real  sigma6_min    = scParams.sigma6Minimum;
257     const bool  doForces      = ((flags & GMX_NONBONDED_DO_FORCE) != 0);
258     const bool  doShiftForces = ((flags & GMX_NONBONDED_DO_SHIFTFORCE) != 0);
259     const bool  doPotential   = ((flags & GMX_NONBONDED_DO_POTENTIAL) != 0);
260
261     // Extract data from interaction_const_t
262     const real facel           = ic->epsfac;
263     const real rCoulomb        = ic->rcoulomb;
264     const real krf             = ic->reactionFieldCoefficient;
265     const real crf             = ic->reactionFieldShift;
266     const real shLjEwald       = ic->sh_lj_ewald;
267     const real rVdw            = ic->rvdw;
268     const real dispersionShift = ic->dispersion_shift.cpot;
269     const real repulsionShift  = ic->repulsion_shift.cpot;
270
271     // Note that the nbnxm kernels do not support Coulomb potential switching at all
272     GMX_ASSERT(ic->coulomb_modifier != InteractionModifiers::PotSwitch,
273                "Potential switching is not supported for Coulomb with FEP");
274
275     real vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4;
276     if (vdwModifierIsPotSwitch)
277     {
278         const real d = ic->rvdw - ic->rvdw_switch;
279         vdw_swV3     = -10.0 / (d * d * d);
280         vdw_swV4     = 15.0 / (d * d * d * d);
281         vdw_swV5     = -6.0 / (d * d * d * d * d);
282         vdw_swF2     = -30.0 / (d * d * d);
283         vdw_swF3     = 60.0 / (d * d * d * d);
284         vdw_swF4     = -30.0 / (d * d * d * d * d);
285     }
286     else
287     {
288         /* Avoid warnings from stupid compilers (looking at you, Clang!) */
289         vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0;
290     }
291
292     NbkernelElecType icoul;
293     if (ic->eeltype == CoulombInteractionType::Cut || EEL_RF(ic->eeltype))
294     {
295         icoul = NbkernelElecType::ReactionField;
296     }
297     else
298     {
299         icoul = NbkernelElecType::None;
300     }
301
302     real rcutoff_max2 = std::max(ic->rcoulomb, ic->rvdw);
303     rcutoff_max2      = rcutoff_max2 * rcutoff_max2;
304
305     const real* tab_ewald_F_lj           = nullptr;
306     const real* tab_ewald_V_lj           = nullptr;
307     const real* ewtab                    = nullptr;
308     real        coulombTableScale        = 0;
309     real        coulombTableScaleInvHalf = 0;
310     real        vdwTableScale            = 0;
311     real        vdwTableScaleInvHalf     = 0;
312     real        sh_ewald                 = 0;
313     if (elecInteractionTypeIsEwald || vdwInteractionTypeIsEwald)
314     {
315         sh_ewald = ic->sh_ewald;
316     }
317     if (elecInteractionTypeIsEwald)
318     {
319         const auto& coulombTables = *ic->coulombEwaldTables;
320         ewtab                     = coulombTables.tableFDV0.data();
321         coulombTableScale         = coulombTables.scale;
322         coulombTableScaleInvHalf  = half / coulombTableScale;
323     }
324     if (vdwInteractionTypeIsEwald)
325     {
326         const auto& vdwTables = *ic->vdwEwaldTables;
327         tab_ewald_F_lj        = vdwTables.tableF.data();
328         tab_ewald_V_lj        = vdwTables.tableV.data();
329         vdwTableScale         = vdwTables.scale;
330         vdwTableScaleInvHalf  = half / vdwTableScale;
331     }
332
333     /* For Ewald/PME interactions we cannot easily apply the soft-core component to
334      * reciprocal space. When we use non-switched Ewald interactions, we
335      * assume the soft-coring does not significantly affect the grid contribution
336      * and apply the soft-core only to the full 1/r (- shift) pair contribution.
337      *
338      * However, we cannot use this approach for switch-modified since we would then
339      * effectively end up evaluating a significantly different interaction here compared to the
340      * normal (non-free-energy) kernels, either by applying a cutoff at a different
341      * position than what the user requested, or by switching different
342      * things (1/r rather than short-range Ewald). For these settings, we just
343      * use the traditional short-range Ewald interaction in that case.
344      */
345     GMX_RELEASE_ASSERT(!(vdwInteractionTypeIsEwald && vdwModifierIsPotSwitch),
346                        "Can not apply soft-core to switched Ewald potentials");
347
348     real dvdlCoul = 0;
349     real dvdlVdw  = 0;
350
351     /* Lambda factor for state A, 1-lambda*/
352     real LFC[NSTATES], LFV[NSTATES];
353     LFC[STATE_A] = one - lambda_coul;
354     LFV[STATE_A] = one - lambda_vdw;
355
356     /* Lambda factor for state B, lambda*/
357     LFC[STATE_B] = lambda_coul;
358     LFV[STATE_B] = lambda_vdw;
359
360     /*derivative of the lambda factor for state A and B */
361     real DLF[NSTATES];
362     DLF[STATE_A] = -1;
363     DLF[STATE_B] = 1;
364
365     real           lFacCoul[NSTATES], dlFacCoul[NSTATES], lFacVdw[NSTATES], dlFacVdw[NSTATES];
366     constexpr real sc_r_power = 6.0_real;
367     for (int i = 0; i < NSTATES; i++)
368     {
369         lFacCoul[i]  = (lam_power == 2 ? (1 - LFC[i]) * (1 - LFC[i]) : (1 - LFC[i]));
370         dlFacCoul[i] = DLF[i] * lam_power / sc_r_power * (lam_power == 2 ? (1 - LFC[i]) : 1);
371         lFacVdw[i]   = (lam_power == 2 ? (1 - LFV[i]) * (1 - LFV[i]) : (1 - LFV[i]));
372         dlFacVdw[i]  = DLF[i] * lam_power / sc_r_power * (lam_power == 2 ? (1 - LFV[i]) : 1);
373     }
374
375     // TODO: We should get rid of using pointers to real
376     const real* x             = coords[0];
377     real* gmx_restrict f      = &(forceWithShiftForces->force()[0][0]);
378     real* gmx_restrict fshift = &(forceWithShiftForces->shiftForces()[0][0]);
379
380     const real rlistSquared = gmx::square(fr.rlist);
381
382     int numExcludedPairsBeyondRlist = 0;
383
384     for (int n = 0; n < nri; n++)
385     {
386         int npair_within_cutoff = 0;
387
388         const int  is3   = 3 * shift[n];
389         const real shX   = shiftvec[is3];
390         const real shY   = shiftvec[is3 + 1];
391         const real shZ   = shiftvec[is3 + 2];
392         const int  nj0   = jindex[n];
393         const int  nj1   = jindex[n + 1];
394         const int  ii    = iinr[n];
395         const int  ii3   = 3 * ii;
396         const real ix    = shX + x[ii3 + 0];
397         const real iy    = shY + x[ii3 + 1];
398         const real iz    = shZ + x[ii3 + 2];
399         const real iqA   = facel * chargeA[ii];
400         const real iqB   = facel * chargeB[ii];
401         const int  ntiA  = 2 * ntype * typeA[ii];
402         const int  ntiB  = 2 * ntype * typeB[ii];
403         real       vCTot = 0;
404         real       vVTot = 0;
405         real       fIX   = 0;
406         real       fIY   = 0;
407         real       fIZ   = 0;
408
409         for (int k = nj0; k < nj1; k++)
410         {
411             int            tj[NSTATES];
412             const int      jnr = jjnr[k];
413             const int      j3  = 3 * jnr;
414             RealType       c6[NSTATES], c12[NSTATES], qq[NSTATES], vCoul[NSTATES], vVdw[NSTATES];
415             RealType       r, rInv, rp, rpm2;
416             RealType       alphaVdwEff, alphaCoulEff, sigma6[NSTATES];
417             const RealType dX  = ix - x[j3];
418             const RealType dY  = iy - x[j3 + 1];
419             const RealType dZ  = iz - x[j3 + 2];
420             const RealType rSq = dX * dX + dY * dY + dZ * dZ;
421             RealType       fScalC[NSTATES], fScalV[NSTATES];
422             /* Check if this pair on the exlusions list.*/
423             const bool bPairIncluded = nlist.excl_fep.empty() || nlist.excl_fep[k];
424
425             if (rSq >= rcutoff_max2 && bPairIncluded)
426             {
427                 /* We save significant time by skipping all code below.
428                  * Note that with soft-core interactions, the actual cut-off
429                  * check might be different. But since the soft-core distance
430                  * is always larger than r, checking on r here is safe.
431                  * Exclusions outside the cutoff can not be skipped as
432                  * when using Ewald: the reciprocal-space
433                  * Ewald component still needs to be subtracted.
434                  */
435
436                 continue;
437             }
438             npair_within_cutoff++;
439
440             if (rSq > rlistSquared)
441             {
442                 numExcludedPairsBeyondRlist++;
443             }
444
445             if (rSq > 0)
446             {
447                 /* Note that unlike in the nbnxn kernels, we do not need
448                  * to clamp the value of rSq before taking the invsqrt
449                  * to avoid NaN in the LJ calculation, since here we do
450                  * not calculate LJ interactions when C6 and C12 are zero.
451                  */
452
453                 rInv = gmx::invsqrt(rSq);
454                 r    = rSq * rInv;
455             }
456             else
457             {
458                 /* The force at r=0 is zero, because of symmetry.
459                  * But note that the potential is in general non-zero,
460                  * since the soft-cored r will be non-zero.
461                  */
462                 rInv = 0;
463                 r    = 0;
464             }
465
466             if (useSoftCore)
467             {
468                 rpm2 = rSq * rSq;  /* r4 */
469                 rp   = rpm2 * rSq; /* r6 */
470             }
471             else
472             {
473                 /* The soft-core power p will not affect the results
474                  * with not using soft-core, so we use power of 0 which gives
475                  * the simplest math and cheapest code.
476                  */
477                 rpm2 = rInv * rInv;
478                 rp   = 1;
479             }
480
481             RealType fScal = 0;
482
483             qq[STATE_A] = iqA * chargeA[jnr];
484             qq[STATE_B] = iqB * chargeB[jnr];
485
486             tj[STATE_A] = ntiA + 2 * typeA[jnr];
487             tj[STATE_B] = ntiB + 2 * typeB[jnr];
488
489             if (bPairIncluded)
490             {
491                 c6[STATE_A] = nbfp[tj[STATE_A]];
492                 c6[STATE_B] = nbfp[tj[STATE_B]];
493
494                 for (int i = 0; i < NSTATES; i++)
495                 {
496                     c12[i] = nbfp[tj[i] + 1];
497                     if (useSoftCore)
498                     {
499                         if ((c6[i] > 0) && (c12[i] > 0))
500                         {
501                             /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
502                             sigma6[i] = half * c12[i] / c6[i];
503                             if (sigma6[i] < sigma6_min) /* for disappearing coul and vdw with soft core at the same time */
504                             {
505                                 sigma6[i] = sigma6_min;
506                             }
507                         }
508                         else
509                         {
510                             sigma6[i] = sigma6_def;
511                         }
512                     }
513                 }
514
515                 if (useSoftCore)
516                 {
517                     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
518                     if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
519                     {
520                         alphaVdwEff  = 0;
521                         alphaCoulEff = 0;
522                     }
523                     else
524                     {
525                         alphaVdwEff  = alpha_vdw;
526                         alphaCoulEff = alpha_coul;
527                     }
528                 }
529
530                 for (int i = 0; i < NSTATES; i++)
531                 {
532                     fScalC[i] = 0;
533                     fScalV[i] = 0;
534                     vCoul[i]  = 0;
535                     vVdw[i]   = 0;
536
537                     RealType rInvC, rInvV, rC, rV, rPInvC, rPInvV;
538
539                     /* Only spend time on A or B state if it is non-zero */
540                     if ((qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0))
541                     {
542                         /* this section has to be inside the loop because of the dependence on sigma6 */
543                         if (useSoftCore)
544                         {
545                             rPInvC = one / (alphaCoulEff * lFacCoul[i] * sigma6[i] + rp);
546                             pthRoot(rPInvC, &rInvC, &rC);
547                             if (scLambdasOrAlphasDiffer)
548                             {
549                                 rPInvV = one / (alphaVdwEff * lFacVdw[i] * sigma6[i] + rp);
550                                 pthRoot(rPInvV, &rInvV, &rV);
551                             }
552                             else
553                             {
554                                 /* We can avoid one expensive pow and one / operation */
555                                 rPInvV = rPInvC;
556                                 rInvV  = rInvC;
557                                 rV     = rC;
558                             }
559                         }
560                         else
561                         {
562                             rPInvC = 1;
563                             rInvC  = rInv;
564                             rC     = r;
565
566                             rPInvV = 1;
567                             rInvV  = rInv;
568                             rV     = r;
569                         }
570
571                         /* Only process the coulomb interactions if we have charges,
572                          * and if we either include all entries in the list (no cutoff
573                          * used in the kernel), or if we are within the cutoff.
574                          */
575                         bool computeElecInteraction = (elecInteractionTypeIsEwald && r < rCoulomb)
576                                                       || (!elecInteractionTypeIsEwald && rC < rCoulomb);
577
578                         if ((qq[i] != 0) && computeElecInteraction)
579                         {
580                             if (elecInteractionTypeIsEwald)
581                             {
582                                 vCoul[i]  = ewaldPotential(qq[i], rInvC, sh_ewald);
583                                 fScalC[i] = ewaldScalarForce(qq[i], rInvC);
584                             }
585                             else
586                             {
587                                 vCoul[i]  = reactionFieldPotential(qq[i], rInvC, rC, krf, crf);
588                                 fScalC[i] = reactionFieldScalarForce(qq[i], rInvC, rC, krf, two);
589                             }
590                         }
591
592                         /* Only process the VDW interactions if we have
593                          * some non-zero parameters, and if we either
594                          * include all entries in the list (no cutoff used
595                          * in the kernel), or if we are within the cutoff.
596                          */
597                         bool computeVdwInteraction = (vdwInteractionTypeIsEwald && r < rVdw)
598                                                      || (!vdwInteractionTypeIsEwald && rV < rVdw);
599                         if ((c6[i] != 0 || c12[i] != 0) && computeVdwInteraction)
600                         {
601                             RealType rInv6;
602                             if (useSoftCore)
603                             {
604                                 rInv6 = rPInvV;
605                             }
606                             else
607                             {
608                                 rInv6 = calculateRinv6(rInvV);
609                             }
610                             RealType vVdw6  = calculateVdw6(c6[i], rInv6);
611                             RealType vVdw12 = calculateVdw12(c12[i], rInv6);
612
613                             vVdw[i] = lennardJonesPotential(
614                                     vVdw6, vVdw12, c6[i], c12[i], repulsionShift, dispersionShift, oneSixth, oneTwelfth);
615                             fScalV[i] = lennardJonesScalarForce(vVdw6, vVdw12);
616
617                             if (vdwInteractionTypeIsEwald)
618                             {
619                                 /* Subtract the grid potential at the cut-off */
620                                 vVdw[i] += ewaldLennardJonesGridSubtract(
621                                         nbfp_grid[tj[i]], shLjEwald, oneSixth);
622                             }
623
624                             if (vdwModifierIsPotSwitch)
625                             {
626                                 RealType d        = rV - ic->rvdw_switch;
627                                 d                 = (d > zero) ? d : zero;
628                                 const RealType d2 = d * d;
629                                 const RealType sw =
630                                         one + d2 * d * (vdw_swV3 + d * (vdw_swV4 + d * vdw_swV5));
631                                 const RealType dsw = d2 * (vdw_swF2 + d * (vdw_swF3 + d * vdw_swF4));
632
633                                 fScalV[i] = potSwitchScalarForceMod(
634                                         fScalV[i], vVdw[i], sw, rV, rVdw, dsw, zero);
635                                 vVdw[i] = potSwitchPotentialMod(vVdw[i], sw, rV, rVdw, zero);
636                             }
637                         }
638
639                         /* fScalC (and fScalV) now contain: dV/drC * rC
640                          * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
641                          * Further down we first multiply by r^p-2 and then by
642                          * the vector r, which in total gives: dV/drC * (r/rC)^1-p
643                          */
644                         fScalC[i] *= rPInvC;
645                         fScalV[i] *= rPInvV;
646                     }
647                 } // end for (int i = 0; i < NSTATES; i++)
648
649                 /* Assemble A and B states */
650                 for (int i = 0; i < NSTATES; i++)
651                 {
652                     vCTot += LFC[i] * vCoul[i];
653                     vVTot += LFV[i] * vVdw[i];
654
655                     fScal += LFC[i] * fScalC[i] * rpm2;
656                     fScal += LFV[i] * fScalV[i] * rpm2;
657
658                     if (useSoftCore)
659                     {
660                         dvdlCoul += vCoul[i] * DLF[i]
661                                     + LFC[i] * alphaCoulEff * dlFacCoul[i] * fScalC[i] * sigma6[i];
662                         dvdlVdw += vVdw[i] * DLF[i]
663                                    + LFV[i] * alphaVdwEff * dlFacVdw[i] * fScalV[i] * sigma6[i];
664                     }
665                     else
666                     {
667                         dvdlCoul += vCoul[i] * DLF[i];
668                         dvdlVdw += vVdw[i] * DLF[i];
669                     }
670                 }
671             } // end if (bPairIncluded)
672             else if (icoul == NbkernelElecType::ReactionField)
673             {
674                 /* For excluded pairs, which are only in this pair list when
675                  * using the Verlet scheme, we don't use soft-core.
676                  * As there is no singularity, there is no need for soft-core.
677                  */
678                 const real FF = -two * krf;
679                 RealType   VV = krf * rSq - crf;
680
681                 if (ii == jnr)
682                 {
683                     VV *= half;
684                 }
685
686                 for (int i = 0; i < NSTATES; i++)
687                 {
688                     vCTot += LFC[i] * qq[i] * VV;
689                     fScal += LFC[i] * qq[i] * FF;
690                     dvdlCoul += DLF[i] * qq[i] * VV;
691                 }
692             }
693
694             if (elecInteractionTypeIsEwald && (r < rCoulomb || !bPairIncluded))
695             {
696                 /* See comment in the preamble. When using Ewald interactions
697                  * (unless we use a switch modifier) we subtract the reciprocal-space
698                  * Ewald component here which made it possible to apply the free
699                  * energy interaction to 1/r (vanilla coulomb short-range part)
700                  * above. This gets us closer to the ideal case of applying
701                  * the softcore to the entire electrostatic interaction,
702                  * including the reciprocal-space component.
703                  */
704                 real v_lr, f_lr;
705
706                 const RealType ewrt   = r * coulombTableScale;
707                 IntType        ewitab = static_cast<IntType>(ewrt);
708                 const RealType eweps  = ewrt - ewitab;
709                 ewitab                = 4 * ewitab;
710                 f_lr                  = ewtab[ewitab] + eweps * ewtab[ewitab + 1];
711                 v_lr = (ewtab[ewitab + 2] - coulombTableScaleInvHalf * eweps * (ewtab[ewitab] + f_lr));
712                 f_lr *= rInv;
713
714                 /* Note that any possible Ewald shift has already been applied in
715                  * the normal interaction part above.
716                  */
717
718                 if (ii == jnr)
719                 {
720                     /* If we get here, the i particle (ii) has itself (jnr)
721                      * in its neighborlist. This can only happen with the Verlet
722                      * scheme, and corresponds to a self-interaction that will
723                      * occur twice. Scale it down by 50% to only include it once.
724                      */
725                     v_lr *= half;
726                 }
727
728                 for (int i = 0; i < NSTATES; i++)
729                 {
730                     vCTot -= LFC[i] * qq[i] * v_lr;
731                     fScal -= LFC[i] * qq[i] * f_lr;
732                     dvdlCoul -= (DLF[i] * qq[i]) * v_lr;
733                 }
734             }
735
736             if (vdwInteractionTypeIsEwald && (r < rVdw || !bPairIncluded))
737             {
738                 /* See comment in the preamble. When using LJ-Ewald interactions
739                  * (unless we use a switch modifier) we subtract the reciprocal-space
740                  * Ewald component here which made it possible to apply the free
741                  * energy interaction to r^-6 (vanilla LJ6 short-range part)
742                  * above. This gets us closer to the ideal case of applying
743                  * the softcore to the entire VdW interaction,
744                  * including the reciprocal-space component.
745                  */
746                 /* We could also use the analytical form here
747                  * iso a table, but that can cause issues for
748                  * r close to 0 for non-interacting pairs.
749                  */
750
751                 const RealType rs   = rSq * rInv * vdwTableScale;
752                 const IntType  ri   = static_cast<IntType>(rs);
753                 const RealType frac = rs - ri;
754                 const RealType f_lr = (1 - frac) * tab_ewald_F_lj[ri] + frac * tab_ewald_F_lj[ri + 1];
755                 /* TODO: Currently the Ewald LJ table does not contain
756                  * the factor 1/6, we should add this.
757                  */
758                 const RealType FF = f_lr * rInv / six;
759                 RealType       VV =
760                         (tab_ewald_V_lj[ri] - vdwTableScaleInvHalf * frac * (tab_ewald_F_lj[ri] + f_lr))
761                         / six;
762
763                 if (ii == jnr)
764                 {
765                     /* If we get here, the i particle (ii) has itself (jnr)
766                      * in its neighborlist. This can only happen with the Verlet
767                      * scheme, and corresponds to a self-interaction that will
768                      * occur twice. Scale it down by 50% to only include it once.
769                      */
770                     VV *= half;
771                 }
772
773                 for (int i = 0; i < NSTATES; i++)
774                 {
775                     const real c6grid = nbfp_grid[tj[i]];
776                     vVTot += LFV[i] * c6grid * VV;
777                     fScal += LFV[i] * c6grid * FF;
778                     dvdlVdw += (DLF[i] * c6grid) * VV;
779                 }
780             }
781
782             if (doForces)
783             {
784                 const real tX = fScal * dX;
785                 const real tY = fScal * dY;
786                 const real tZ = fScal * dZ;
787                 fIX           = fIX + tX;
788                 fIY           = fIY + tY;
789                 fIZ           = fIZ + tZ;
790                 /* OpenMP atomics are expensive, but this kernels is also
791                  * expensive, so we can take this hit, instead of using
792                  * thread-local output buffers and extra reduction.
793                  *
794                  * All the OpenMP regions in this file are trivial and should
795                  * not throw, so no need for try/catch.
796                  */
797 #pragma omp atomic
798                 f[j3] -= tX;
799 #pragma omp atomic
800                 f[j3 + 1] -= tY;
801 #pragma omp atomic
802                 f[j3 + 2] -= tZ;
803             }
804         } // end for (int k = nj0; k < nj1; k++)
805
806         /* The atomics below are expensive with many OpenMP threads.
807          * Here unperturbed i-particles will usually only have a few
808          * (perturbed) j-particles in the list. Thus with a buffered list
809          * we can skip a significant number of i-reductions with a check.
810          */
811         if (npair_within_cutoff > 0)
812         {
813             if (doForces)
814             {
815 #pragma omp atomic
816                 f[ii3] += fIX;
817 #pragma omp atomic
818                 f[ii3 + 1] += fIY;
819 #pragma omp atomic
820                 f[ii3 + 2] += fIZ;
821             }
822             if (doShiftForces)
823             {
824 #pragma omp atomic
825                 fshift[is3] += fIX;
826 #pragma omp atomic
827                 fshift[is3 + 1] += fIY;
828 #pragma omp atomic
829                 fshift[is3 + 2] += fIZ;
830             }
831             if (doPotential)
832             {
833                 int ggid = gid[n];
834 #pragma omp atomic
835                 energygrp_elec[ggid] += vCTot;
836 #pragma omp atomic
837                 energygrp_vdw[ggid] += vVTot;
838             }
839         }
840     } // end for (int n = 0; n < nri; n++)
841
842 #pragma omp atomic
843     dvdl[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)] += dvdlCoul;
844 #pragma omp atomic
845     dvdl[static_cast<int>(FreeEnergyPerturbationCouplingType::Vdw)] += dvdlVdw;
846
847     /* Estimate flops, average for free energy stuff:
848      * 12  flops per outer iteration
849      * 150 flops per inner iteration
850      */
851     atomicNrnbIncrement(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist.nri * 12 + nlist.jindex[nri] * 150);
852
853     if (numExcludedPairsBeyondRlist > 0)
854     {
855         gmx_fatal(FARGS,
856                   "There are %d perturbed non-bonded pair interactions beyond the pair-list cutoff "
857                   "of %g nm, which is not supported. This can happen because the system is "
858                   "unstable or because intra-molecular interactions at long distances are "
859                   "excluded. If the "
860                   "latter is the case, you can try to increase nstlist or rlist to avoid this."
861                   "The error is likely triggered by the use of couple-intramol=no "
862                   "and the maximal distance in the decoupled molecule exceeding rlist.",
863                   numExcludedPairsBeyondRlist,
864                   fr.rlist);
865     }
866 }
867
868 typedef void (*KernelFunction)(const t_nblist&                nlist,
869                                gmx::ArrayRef<const gmx::RVec> coords,
870                                gmx::ForceWithShiftForces*     forceWithShiftForces,
871                                const t_forcerec&              fr,
872                                gmx::ArrayRef<const real>      chargeA,
873                                gmx::ArrayRef<const real>      chargeB,
874                                gmx::ArrayRef<const int>       typeA,
875                                gmx::ArrayRef<const int>       typeB,
876                                int                            flags,
877                                gmx::ArrayRef<const real>      lambda,
878                                gmx::ArrayRef<real>            dvdl,
879                                gmx::ArrayRef<real>            energygrp_elec,
880                                gmx::ArrayRef<real>            energygrp_vdw,
881                                t_nrnb* gmx_restrict nrnb);
882
883 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald, bool vdwModifierIsPotSwitch>
884 static KernelFunction dispatchKernelOnUseSimd(const bool useSimd)
885 {
886     if (useSimd)
887     {
888 #if GMX_SIMD_HAVE_REAL && GMX_SIMD_HAVE_INT32_ARITHMETICS && GMX_USE_SIMD_KERNELS
889         /* FIXME: Here SimdDataTypes should be used to enable SIMD. So far, the code in nb_free_energy_kernel is not adapted to SIMD */
890         return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
891 #else
892         return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
893 #endif
894     }
895     else
896     {
897         return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
898     }
899 }
900
901 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald>
902 static KernelFunction dispatchKernelOnVdwModifier(const bool vdwModifierIsPotSwitch, const bool useSimd)
903 {
904     if (vdwModifierIsPotSwitch)
905     {
906         return (dispatchKernelOnUseSimd<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, true>(
907                 useSimd));
908     }
909     else
910     {
911         return (dispatchKernelOnUseSimd<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, false>(
912                 useSimd));
913     }
914 }
915
916 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald>
917 static KernelFunction dispatchKernelOnElecInteractionType(const bool elecInteractionTypeIsEwald,
918                                                           const bool vdwModifierIsPotSwitch,
919                                                           const bool useSimd)
920 {
921     if (elecInteractionTypeIsEwald)
922     {
923         return (dispatchKernelOnVdwModifier<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, true>(
924                 vdwModifierIsPotSwitch, useSimd));
925     }
926     else
927     {
928         return (dispatchKernelOnVdwModifier<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, false>(
929                 vdwModifierIsPotSwitch, useSimd));
930     }
931 }
932
933 template<bool useSoftCore, bool scLambdasOrAlphasDiffer>
934 static KernelFunction dispatchKernelOnVdwInteractionType(const bool vdwInteractionTypeIsEwald,
935                                                          const bool elecInteractionTypeIsEwald,
936                                                          const bool vdwModifierIsPotSwitch,
937                                                          const bool useSimd)
938 {
939     if (vdwInteractionTypeIsEwald)
940     {
941         return (dispatchKernelOnElecInteractionType<useSoftCore, scLambdasOrAlphasDiffer, true>(
942                 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
943     }
944     else
945     {
946         return (dispatchKernelOnElecInteractionType<useSoftCore, scLambdasOrAlphasDiffer, false>(
947                 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
948     }
949 }
950
951 template<bool useSoftCore>
952 static KernelFunction dispatchKernelOnScLambdasOrAlphasDifference(const bool scLambdasOrAlphasDiffer,
953                                                                   const bool vdwInteractionTypeIsEwald,
954                                                                   const bool elecInteractionTypeIsEwald,
955                                                                   const bool vdwModifierIsPotSwitch,
956                                                                   const bool useSimd)
957 {
958     if (scLambdasOrAlphasDiffer)
959     {
960         return (dispatchKernelOnVdwInteractionType<useSoftCore, true>(
961                 vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
962     }
963     else
964     {
965         return (dispatchKernelOnVdwInteractionType<useSoftCore, false>(
966                 vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
967     }
968 }
969
970 static KernelFunction dispatchKernel(const bool                 scLambdasOrAlphasDiffer,
971                                      const bool                 vdwInteractionTypeIsEwald,
972                                      const bool                 elecInteractionTypeIsEwald,
973                                      const bool                 vdwModifierIsPotSwitch,
974                                      const bool                 useSimd,
975                                      const interaction_const_t& ic)
976 {
977     if (ic.softCoreParameters->alphaCoulomb == 0 && ic.softCoreParameters->alphaVdw == 0)
978     {
979         return (dispatchKernelOnScLambdasOrAlphasDifference<false>(scLambdasOrAlphasDiffer,
980                                                                    vdwInteractionTypeIsEwald,
981                                                                    elecInteractionTypeIsEwald,
982                                                                    vdwModifierIsPotSwitch,
983                                                                    useSimd));
984     }
985     else
986     {
987         return (dispatchKernelOnScLambdasOrAlphasDifference<true>(scLambdasOrAlphasDiffer,
988                                                                   vdwInteractionTypeIsEwald,
989                                                                   elecInteractionTypeIsEwald,
990                                                                   vdwModifierIsPotSwitch,
991                                                                   useSimd));
992     }
993 }
994
995
996 void gmx_nb_free_energy_kernel(const t_nblist&                nlist,
997                                gmx::ArrayRef<const gmx::RVec> coords,
998                                gmx::ForceWithShiftForces*     ff,
999                                const t_forcerec&              fr,
1000                                gmx::ArrayRef<const real>      chargeA,
1001                                gmx::ArrayRef<const real>      chargeB,
1002                                gmx::ArrayRef<const int>       typeA,
1003                                gmx::ArrayRef<const int>       typeB,
1004                                int                            flags,
1005                                gmx::ArrayRef<const real>      lambda,
1006                                gmx::ArrayRef<real>            dvdl,
1007                                gmx::ArrayRef<real>            energygrp_elec,
1008                                gmx::ArrayRef<real>            energygrp_vdw,
1009                                t_nrnb*                        nrnb)
1010 {
1011     const interaction_const_t& ic = *fr.ic;
1012     GMX_ASSERT(EEL_PME_EWALD(ic.eeltype) || ic.eeltype == CoulombInteractionType::Cut || EEL_RF(ic.eeltype),
1013                "Unsupported eeltype with free energy");
1014     GMX_ASSERT(ic.softCoreParameters, "We need soft-core parameters");
1015
1016     const auto& scParams                   = *ic.softCoreParameters;
1017     const bool  vdwInteractionTypeIsEwald  = (EVDW_PME(ic.vdwtype));
1018     const bool  elecInteractionTypeIsEwald = (EEL_PME_EWALD(ic.eeltype));
1019     const bool  vdwModifierIsPotSwitch     = (ic.vdw_modifier == InteractionModifiers::PotSwitch);
1020     bool        scLambdasOrAlphasDiffer    = true;
1021     const bool  useSimd                    = fr.use_simd_kernels;
1022
1023     if (scParams.alphaCoulomb == 0 && scParams.alphaVdw == 0)
1024     {
1025         scLambdasOrAlphasDiffer = false;
1026     }
1027     else
1028     {
1029         if (lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)]
1030                     == lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Vdw)]
1031             && scParams.alphaCoulomb == scParams.alphaVdw)
1032         {
1033             scLambdasOrAlphasDiffer = false;
1034         }
1035     }
1036
1037     KernelFunction kernelFunc;
1038     kernelFunc = dispatchKernel(scLambdasOrAlphasDiffer,
1039                                 vdwInteractionTypeIsEwald,
1040                                 elecInteractionTypeIsEwald,
1041                                 vdwModifierIsPotSwitch,
1042                                 useSimd,
1043                                 ic);
1044     kernelFunc(nlist, coords, ff, fr, chargeA, chargeB, typeA, typeB, flags, lambda, dvdl, energygrp_elec, energygrp_vdw, nrnb);
1045 }