Merge remote-tracking branch 'origin/release-2021' into master
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_free_energy.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
7  * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9  * and including many others, as listed in the AUTHORS file in the
10  * top-level source directory and at http://www.gromacs.org.
11  *
12  * GROMACS is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU Lesser General Public License
14  * as published by the Free Software Foundation; either version 2.1
15  * of the License, or (at your option) any later version.
16  *
17  * GROMACS is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public
23  * License along with GROMACS; if not, see
24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
26  *
27  * If you want to redistribute modifications to GROMACS, please
28  * consider that scientific software is very special. Version
29  * control is crucial - bugs must be traceable. We will be happy to
30  * consider code for inclusion in the official distribution, but
31  * derived work must not be called official GROMACS. Details are found
32  * in the README & COPYING files - if they are missing, get the
33  * official version at http://www.gromacs.org.
34  *
35  * To help us fund GROMACS development, we humbly ask that you cite
36  * the research papers on the package. Check out http://www.gromacs.org.
37  */
38 #include "gmxpre.h"
39
40 #include "nb_free_energy.h"
41
42 #include "config.h"
43
44 #include <cmath>
45
46 #include <algorithm>
47
48 #include "gromacs/gmxlib/nrnb.h"
49 #include "gromacs/gmxlib/nonbonded/nonbonded.h"
50 #include "gromacs/math/functions.h"
51 #include "gromacs/math/vec.h"
52 #include "gromacs/mdtypes/forceoutput.h"
53 #include "gromacs/mdtypes/forcerec.h"
54 #include "gromacs/mdtypes/interaction_const.h"
55 #include "gromacs/mdtypes/md_enums.h"
56 #include "gromacs/mdtypes/mdatom.h"
57 #include "gromacs/mdtypes/nblist.h"
58 #include "gromacs/simd/simd.h"
59 #include "gromacs/utility/fatalerror.h"
60 #include "gromacs/utility/arrayref.h"
61
62
63 //! Scalar (non-SIMD) data types.
64 struct ScalarDataTypes
65 {
66     using RealType                     = real; //!< The data type to use as real.
67     using IntType                      = int;  //!< The data type to use as int.
68     static constexpr int simdRealWidth = 1;    //!< The width of the RealType.
69     static constexpr int simdIntWidth  = 1;    //!< The width of the IntType.
70 };
71
72 #if GMX_SIMD_HAVE_REAL && GMX_SIMD_HAVE_INT32_ARITHMETICS
73 //! SIMD data types.
74 struct SimdDataTypes
75 {
76     using RealType                     = gmx::SimdReal;         //!< The data type to use as real.
77     using IntType                      = gmx::SimdInt32;        //!< The data type to use as int.
78     static constexpr int simdRealWidth = GMX_SIMD_REAL_WIDTH;   //!< The width of the RealType.
79     static constexpr int simdIntWidth  = GMX_SIMD_FINT32_WIDTH; //!< The width of the IntType.
80 };
81 #endif
82
83 //! Computes r^(1/p) and 1/r^(1/p) for the standard p=6
84 template<class RealType>
85 static inline void pthRoot(const RealType r, RealType* pthRoot, RealType* invPthRoot)
86 {
87     *invPthRoot = gmx::invsqrt(std::cbrt(r));
88     *pthRoot    = 1 / (*invPthRoot);
89 }
90
91 template<class RealType>
92 static inline RealType calculateRinv6(const RealType rInvV)
93 {
94     RealType rInv6 = rInvV * rInvV;
95     return (rInv6 * rInv6 * rInv6);
96 }
97
98 template<class RealType>
99 static inline RealType calculateVdw6(const RealType c6, const RealType rInv6)
100 {
101     return (c6 * rInv6);
102 }
103
104 template<class RealType>
105 static inline RealType calculateVdw12(const RealType c12, const RealType rInv6)
106 {
107     return (c12 * rInv6 * rInv6);
108 }
109
110 /* reaction-field electrostatics */
111 template<class RealType>
112 static inline RealType reactionFieldScalarForce(const RealType qq,
113                                                 const RealType rInv,
114                                                 const RealType r,
115                                                 const real     krf,
116                                                 const real     two)
117 {
118     return (qq * (rInv - two * krf * r * r));
119 }
120 template<class RealType>
121 static inline RealType reactionFieldPotential(const RealType qq,
122                                               const RealType rInv,
123                                               const RealType r,
124                                               const real     krf,
125                                               const real     potentialShift)
126 {
127     return (qq * (rInv + krf * r * r - potentialShift));
128 }
129
130 /* Ewald electrostatics */
131 template<class RealType>
132 static inline RealType ewaldScalarForce(const RealType coulomb, const RealType rInv)
133 {
134     return (coulomb * rInv);
135 }
136 template<class RealType>
137 static inline RealType ewaldPotential(const RealType coulomb, const RealType rInv, const real potentialShift)
138 {
139     return (coulomb * (rInv - potentialShift));
140 }
141
142 /* cutoff LJ */
143 template<class RealType>
144 static inline RealType lennardJonesScalarForce(const RealType v6, const RealType v12)
145 {
146     return (v12 - v6);
147 }
148 template<class RealType>
149 static inline RealType lennardJonesPotential(const RealType v6,
150                                              const RealType v12,
151                                              const RealType c6,
152                                              const RealType c12,
153                                              const real     repulsionShift,
154                                              const real     dispersionShift,
155                                              const real     oneSixth,
156                                              const real     oneTwelfth)
157 {
158     return ((v12 + c12 * repulsionShift) * oneTwelfth - (v6 + c6 * dispersionShift) * oneSixth);
159 }
160
161 /* Ewald LJ */
162 static inline real ewaldLennardJonesGridSubtract(const real c6grid, const real potentialShift, const real oneSixth)
163 {
164     return (c6grid * potentialShift * oneSixth);
165 }
166
167 /* LJ Potential switch */
168 template<class RealType>
169 static inline RealType potSwitchScalarForceMod(const RealType fScalarInp,
170                                                const RealType potential,
171                                                const RealType sw,
172                                                const RealType r,
173                                                const RealType rVdw,
174                                                const RealType dsw,
175                                                const real     zero)
176 {
177     if (r < rVdw)
178     {
179         real fScalar = fScalarInp * sw - r * potential * dsw;
180         return (fScalar);
181     }
182     return (zero);
183 }
184 template<class RealType>
185 static inline RealType potSwitchPotentialMod(const RealType potentialInp,
186                                              const RealType sw,
187                                              const RealType r,
188                                              const RealType rVdw,
189                                              const real     zero)
190 {
191     if (r < rVdw)
192     {
193         real potential = potentialInp * sw;
194         return (potential);
195     }
196     return (zero);
197 }
198
199
200 //! Templated free-energy non-bonded kernel
201 template<typename DataTypes, bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald, bool vdwModifierIsPotSwitch>
202 static void nb_free_energy_kernel(const t_nblist&                nlist,
203                                   gmx::ArrayRef<const gmx::RVec> coords,
204                                   gmx::ForceWithShiftForces*     forceWithShiftForces,
205                                   const int                      ntype,
206                                   const real                     rlist,
207                                   const interaction_const_t&     ic,
208                                   gmx::ArrayRef<const gmx::RVec> shiftvec,
209                                   gmx::ArrayRef<const real>      nbfp,
210                                   gmx::ArrayRef<const real>      nbfp_grid,
211                                   gmx::ArrayRef<const real>      chargeA,
212                                   gmx::ArrayRef<const real>      chargeB,
213                                   gmx::ArrayRef<const int>       typeA,
214                                   gmx::ArrayRef<const int>       typeB,
215                                   int                            flags,
216                                   gmx::ArrayRef<const real>      lambda,
217                                   gmx::ArrayRef<real>            dvdl,
218                                   gmx::ArrayRef<real>            energygrp_elec,
219                                   gmx::ArrayRef<real>            energygrp_vdw,
220                                   t_nrnb* gmx_restrict           nrnb)
221 {
222 #define STATE_A 0
223 #define STATE_B 1
224 #define NSTATES 2
225
226     using RealType = typename DataTypes::RealType;
227     using IntType  = typename DataTypes::IntType;
228
229     /* FIXME: How should these be handled with SIMD? */
230     constexpr real oneTwelfth = 1.0 / 12.0;
231     constexpr real oneSixth   = 1.0 / 6.0;
232     constexpr real zero       = 0.0;
233     constexpr real half       = 0.5;
234     constexpr real one        = 1.0;
235     constexpr real two        = 2.0;
236     constexpr real six        = 6.0;
237
238     // Extract pair list data
239     const int                nri    = nlist.nri;
240     gmx::ArrayRef<const int> iinr   = nlist.iinr;
241     gmx::ArrayRef<const int> jindex = nlist.jindex;
242     gmx::ArrayRef<const int> jjnr   = nlist.jjnr;
243     gmx::ArrayRef<const int> shift  = nlist.shift;
244     gmx::ArrayRef<const int> gid    = nlist.gid;
245
246     const real  lambda_coul   = lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)];
247     const real  lambda_vdw    = lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Vdw)];
248     const auto& scParams      = *ic.softCoreParameters;
249     const real  alpha_coul    = scParams.alphaCoulomb;
250     const real  alpha_vdw     = scParams.alphaVdw;
251     const real  lam_power     = scParams.lambdaPower;
252     const real  sigma6_def    = scParams.sigma6WithInvalidSigma;
253     const real  sigma6_min    = scParams.sigma6Minimum;
254     const bool  doForces      = ((flags & GMX_NONBONDED_DO_FORCE) != 0);
255     const bool  doShiftForces = ((flags & GMX_NONBONDED_DO_SHIFTFORCE) != 0);
256     const bool  doPotential   = ((flags & GMX_NONBONDED_DO_POTENTIAL) != 0);
257
258     // Extract data from interaction_const_t
259     const real facel           = ic.epsfac;
260     const real rCoulomb        = ic.rcoulomb;
261     const real krf             = ic.reactionFieldCoefficient;
262     const real crf             = ic.reactionFieldShift;
263     const real shLjEwald       = ic.sh_lj_ewald;
264     const real rVdw            = ic.rvdw;
265     const real dispersionShift = ic.dispersion_shift.cpot;
266     const real repulsionShift  = ic.repulsion_shift.cpot;
267
268     // Note that the nbnxm kernels do not support Coulomb potential switching at all
269     GMX_ASSERT(ic.coulomb_modifier != InteractionModifiers::PotSwitch,
270                "Potential switching is not supported for Coulomb with FEP");
271
272     real vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4;
273     if (vdwModifierIsPotSwitch)
274     {
275         const real d = ic.rvdw - ic.rvdw_switch;
276         vdw_swV3     = -10.0 / (d * d * d);
277         vdw_swV4     = 15.0 / (d * d * d * d);
278         vdw_swV5     = -6.0 / (d * d * d * d * d);
279         vdw_swF2     = -30.0 / (d * d * d);
280         vdw_swF3     = 60.0 / (d * d * d * d);
281         vdw_swF4     = -30.0 / (d * d * d * d * d);
282     }
283     else
284     {
285         /* Avoid warnings from stupid compilers (looking at you, Clang!) */
286         vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0;
287     }
288
289     NbkernelElecType icoul;
290     if (ic.eeltype == CoulombInteractionType::Cut || EEL_RF(ic.eeltype))
291     {
292         icoul = NbkernelElecType::ReactionField;
293     }
294     else
295     {
296         icoul = NbkernelElecType::None;
297     }
298
299     real rcutoff_max2 = std::max(ic.rcoulomb, ic.rvdw);
300     rcutoff_max2      = rcutoff_max2 * rcutoff_max2;
301
302     const real* tab_ewald_F_lj           = nullptr;
303     const real* tab_ewald_V_lj           = nullptr;
304     const real* ewtab                    = nullptr;
305     real        coulombTableScale        = 0;
306     real        coulombTableScaleInvHalf = 0;
307     real        vdwTableScale            = 0;
308     real        vdwTableScaleInvHalf     = 0;
309     real        sh_ewald                 = 0;
310     if (elecInteractionTypeIsEwald || vdwInteractionTypeIsEwald)
311     {
312         sh_ewald = ic.sh_ewald;
313     }
314     if (elecInteractionTypeIsEwald)
315     {
316         const auto& coulombTables = *ic.coulombEwaldTables;
317         ewtab                     = coulombTables.tableFDV0.data();
318         coulombTableScale         = coulombTables.scale;
319         coulombTableScaleInvHalf  = half / coulombTableScale;
320     }
321     if (vdwInteractionTypeIsEwald)
322     {
323         const auto& vdwTables = *ic.vdwEwaldTables;
324         tab_ewald_F_lj        = vdwTables.tableF.data();
325         tab_ewald_V_lj        = vdwTables.tableV.data();
326         vdwTableScale         = vdwTables.scale;
327         vdwTableScaleInvHalf  = half / vdwTableScale;
328     }
329
330     /* For Ewald/PME interactions we cannot easily apply the soft-core component to
331      * reciprocal space. When we use non-switched Ewald interactions, we
332      * assume the soft-coring does not significantly affect the grid contribution
333      * and apply the soft-core only to the full 1/r (- shift) pair contribution.
334      *
335      * However, we cannot use this approach for switch-modified since we would then
336      * effectively end up evaluating a significantly different interaction here compared to the
337      * normal (non-free-energy) kernels, either by applying a cutoff at a different
338      * position than what the user requested, or by switching different
339      * things (1/r rather than short-range Ewald). For these settings, we just
340      * use the traditional short-range Ewald interaction in that case.
341      */
342     GMX_RELEASE_ASSERT(!(vdwInteractionTypeIsEwald && vdwModifierIsPotSwitch),
343                        "Can not apply soft-core to switched Ewald potentials");
344
345     real dvdlCoul = 0;
346     real dvdlVdw  = 0;
347
348     /* Lambda factor for state A, 1-lambda*/
349     real LFC[NSTATES], LFV[NSTATES];
350     LFC[STATE_A] = one - lambda_coul;
351     LFV[STATE_A] = one - lambda_vdw;
352
353     /* Lambda factor for state B, lambda*/
354     LFC[STATE_B] = lambda_coul;
355     LFV[STATE_B] = lambda_vdw;
356
357     /*derivative of the lambda factor for state A and B */
358     real DLF[NSTATES];
359     DLF[STATE_A] = -1;
360     DLF[STATE_B] = 1;
361
362     real           lFacCoul[NSTATES], dlFacCoul[NSTATES], lFacVdw[NSTATES], dlFacVdw[NSTATES];
363     constexpr real sc_r_power = 6.0_real;
364     for (int i = 0; i < NSTATES; i++)
365     {
366         lFacCoul[i]  = (lam_power == 2 ? (1 - LFC[i]) * (1 - LFC[i]) : (1 - LFC[i]));
367         dlFacCoul[i] = DLF[i] * lam_power / sc_r_power * (lam_power == 2 ? (1 - LFC[i]) : 1);
368         lFacVdw[i]   = (lam_power == 2 ? (1 - LFV[i]) * (1 - LFV[i]) : (1 - LFV[i]));
369         dlFacVdw[i]  = DLF[i] * lam_power / sc_r_power * (lam_power == 2 ? (1 - LFV[i]) : 1);
370     }
371
372     // TODO: We should get rid of using pointers to real
373     const real*        x      = coords[0];
374     real* gmx_restrict f      = &(forceWithShiftForces->force()[0][0]);
375     real* gmx_restrict fshift = &(forceWithShiftForces->shiftForces()[0][0]);
376
377     const real rlistSquared = gmx::square(rlist);
378
379     int numExcludedPairsBeyondRlist = 0;
380
381     for (int n = 0; n < nri; n++)
382     {
383         int npair_within_cutoff = 0;
384
385         const int  is    = shift[n];
386         const int  is3   = DIM * is;
387         const real shX   = shiftvec[is][XX];
388         const real shY   = shiftvec[is][YY];
389         const real shZ   = shiftvec[is][ZZ];
390         const int  nj0   = jindex[n];
391         const int  nj1   = jindex[n + 1];
392         const int  ii    = iinr[n];
393         const int  ii3   = 3 * ii;
394         const real ix    = shX + x[ii3 + 0];
395         const real iy    = shY + x[ii3 + 1];
396         const real iz    = shZ + x[ii3 + 2];
397         const real iqA   = facel * chargeA[ii];
398         const real iqB   = facel * chargeB[ii];
399         const int  ntiA  = 2 * ntype * typeA[ii];
400         const int  ntiB  = 2 * ntype * typeB[ii];
401         real       vCTot = 0;
402         real       vVTot = 0;
403         real       fIX   = 0;
404         real       fIY   = 0;
405         real       fIZ   = 0;
406
407         for (int k = nj0; k < nj1; k++)
408         {
409             int            tj[NSTATES];
410             const int      jnr = jjnr[k];
411             const int      j3  = 3 * jnr;
412             RealType       c6[NSTATES], c12[NSTATES], qq[NSTATES], vCoul[NSTATES], vVdw[NSTATES];
413             RealType       r, rInv, rp, rpm2;
414             RealType       alphaVdwEff, alphaCoulEff, sigma6[NSTATES];
415             const RealType dX  = ix - x[j3];
416             const RealType dY  = iy - x[j3 + 1];
417             const RealType dZ  = iz - x[j3 + 2];
418             const RealType rSq = dX * dX + dY * dY + dZ * dZ;
419             RealType       fScalC[NSTATES], fScalV[NSTATES];
420             /* Check if this pair on the exlusions list.*/
421             const bool bPairIncluded = nlist.excl_fep.empty() || nlist.excl_fep[k];
422
423             if (rSq >= rcutoff_max2 && bPairIncluded)
424             {
425                 /* We save significant time by skipping all code below.
426                  * Note that with soft-core interactions, the actual cut-off
427                  * check might be different. But since the soft-core distance
428                  * is always larger than r, checking on r here is safe.
429                  * Exclusions outside the cutoff can not be skipped as
430                  * when using Ewald: the reciprocal-space
431                  * Ewald component still needs to be subtracted.
432                  */
433
434                 continue;
435             }
436             npair_within_cutoff++;
437
438             if (rSq > rlistSquared)
439             {
440                 numExcludedPairsBeyondRlist++;
441             }
442
443             if (rSq > 0)
444             {
445                 /* Note that unlike in the nbnxn kernels, we do not need
446                  * to clamp the value of rSq before taking the invsqrt
447                  * to avoid NaN in the LJ calculation, since here we do
448                  * not calculate LJ interactions when C6 and C12 are zero.
449                  */
450
451                 rInv = gmx::invsqrt(rSq);
452                 r    = rSq * rInv;
453             }
454             else
455             {
456                 /* The force at r=0 is zero, because of symmetry.
457                  * But note that the potential is in general non-zero,
458                  * since the soft-cored r will be non-zero.
459                  */
460                 rInv = 0;
461                 r    = 0;
462             }
463
464             if (useSoftCore)
465             {
466                 rpm2 = rSq * rSq;  /* r4 */
467                 rp   = rpm2 * rSq; /* r6 */
468             }
469             else
470             {
471                 /* The soft-core power p will not affect the results
472                  * with not using soft-core, so we use power of 0 which gives
473                  * the simplest math and cheapest code.
474                  */
475                 rpm2 = rInv * rInv;
476                 rp   = 1;
477             }
478
479             RealType fScal = 0;
480
481             qq[STATE_A] = iqA * chargeA[jnr];
482             qq[STATE_B] = iqB * chargeB[jnr];
483
484             tj[STATE_A] = ntiA + 2 * typeA[jnr];
485             tj[STATE_B] = ntiB + 2 * typeB[jnr];
486
487             if (bPairIncluded)
488             {
489                 c6[STATE_A] = nbfp[tj[STATE_A]];
490                 c6[STATE_B] = nbfp[tj[STATE_B]];
491
492                 for (int i = 0; i < NSTATES; i++)
493                 {
494                     c12[i] = nbfp[tj[i] + 1];
495                     if (useSoftCore)
496                     {
497                         if ((c6[i] > 0) && (c12[i] > 0))
498                         {
499                             /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
500                             sigma6[i] = half * c12[i] / c6[i];
501                             if (sigma6[i] < sigma6_min) /* for disappearing coul and vdw with soft core at the same time */
502                             {
503                                 sigma6[i] = sigma6_min;
504                             }
505                         }
506                         else
507                         {
508                             sigma6[i] = sigma6_def;
509                         }
510                     }
511                 }
512
513                 if (useSoftCore)
514                 {
515                     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
516                     if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
517                     {
518                         alphaVdwEff  = 0;
519                         alphaCoulEff = 0;
520                     }
521                     else
522                     {
523                         alphaVdwEff  = alpha_vdw;
524                         alphaCoulEff = alpha_coul;
525                     }
526                 }
527
528                 for (int i = 0; i < NSTATES; i++)
529                 {
530                     fScalC[i] = 0;
531                     fScalV[i] = 0;
532                     vCoul[i]  = 0;
533                     vVdw[i]   = 0;
534
535                     RealType rInvC, rInvV, rC, rV, rPInvC, rPInvV;
536
537                     /* Only spend time on A or B state if it is non-zero */
538                     if ((qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0))
539                     {
540                         /* this section has to be inside the loop because of the dependence on sigma6 */
541                         if (useSoftCore)
542                         {
543                             rPInvC = one / (alphaCoulEff * lFacCoul[i] * sigma6[i] + rp);
544                             pthRoot(rPInvC, &rInvC, &rC);
545                             if (scLambdasOrAlphasDiffer)
546                             {
547                                 rPInvV = one / (alphaVdwEff * lFacVdw[i] * sigma6[i] + rp);
548                                 pthRoot(rPInvV, &rInvV, &rV);
549                             }
550                             else
551                             {
552                                 /* We can avoid one expensive pow and one / operation */
553                                 rPInvV = rPInvC;
554                                 rInvV  = rInvC;
555                                 rV     = rC;
556                             }
557                         }
558                         else
559                         {
560                             rPInvC = 1;
561                             rInvC  = rInv;
562                             rC     = r;
563
564                             rPInvV = 1;
565                             rInvV  = rInv;
566                             rV     = r;
567                         }
568
569                         /* Only process the coulomb interactions if we have charges,
570                          * and if we either include all entries in the list (no cutoff
571                          * used in the kernel), or if we are within the cutoff.
572                          */
573                         bool computeElecInteraction = (elecInteractionTypeIsEwald && r < rCoulomb)
574                                                       || (!elecInteractionTypeIsEwald && rC < rCoulomb);
575
576                         if ((qq[i] != 0) && computeElecInteraction)
577                         {
578                             if (elecInteractionTypeIsEwald)
579                             {
580                                 vCoul[i]  = ewaldPotential(qq[i], rInvC, sh_ewald);
581                                 fScalC[i] = ewaldScalarForce(qq[i], rInvC);
582                             }
583                             else
584                             {
585                                 vCoul[i]  = reactionFieldPotential(qq[i], rInvC, rC, krf, crf);
586                                 fScalC[i] = reactionFieldScalarForce(qq[i], rInvC, rC, krf, two);
587                             }
588                         }
589
590                         /* Only process the VDW interactions if we have
591                          * some non-zero parameters, and if we either
592                          * include all entries in the list (no cutoff used
593                          * in the kernel), or if we are within the cutoff.
594                          */
595                         bool computeVdwInteraction = (vdwInteractionTypeIsEwald && r < rVdw)
596                                                      || (!vdwInteractionTypeIsEwald && rV < rVdw);
597                         if ((c6[i] != 0 || c12[i] != 0) && computeVdwInteraction)
598                         {
599                             RealType rInv6;
600                             if (useSoftCore)
601                             {
602                                 rInv6 = rPInvV;
603                             }
604                             else
605                             {
606                                 rInv6 = calculateRinv6(rInvV);
607                             }
608                             RealType vVdw6  = calculateVdw6(c6[i], rInv6);
609                             RealType vVdw12 = calculateVdw12(c12[i], rInv6);
610
611                             vVdw[i] = lennardJonesPotential(
612                                     vVdw6, vVdw12, c6[i], c12[i], repulsionShift, dispersionShift, oneSixth, oneTwelfth);
613                             fScalV[i] = lennardJonesScalarForce(vVdw6, vVdw12);
614
615                             if (vdwInteractionTypeIsEwald)
616                             {
617                                 /* Subtract the grid potential at the cut-off */
618                                 vVdw[i] += ewaldLennardJonesGridSubtract(
619                                         nbfp_grid[tj[i]], shLjEwald, oneSixth);
620                             }
621
622                             if (vdwModifierIsPotSwitch)
623                             {
624                                 RealType d        = rV - ic.rvdw_switch;
625                                 d                 = (d > zero) ? d : zero;
626                                 const RealType d2 = d * d;
627                                 const RealType sw =
628                                         one + d2 * d * (vdw_swV3 + d * (vdw_swV4 + d * vdw_swV5));
629                                 const RealType dsw = d2 * (vdw_swF2 + d * (vdw_swF3 + d * vdw_swF4));
630
631                                 fScalV[i] = potSwitchScalarForceMod(
632                                         fScalV[i], vVdw[i], sw, rV, rVdw, dsw, zero);
633                                 vVdw[i] = potSwitchPotentialMod(vVdw[i], sw, rV, rVdw, zero);
634                             }
635                         }
636
637                         /* fScalC (and fScalV) now contain: dV/drC * rC
638                          * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
639                          * Further down we first multiply by r^p-2 and then by
640                          * the vector r, which in total gives: dV/drC * (r/rC)^1-p
641                          */
642                         fScalC[i] *= rPInvC;
643                         fScalV[i] *= rPInvV;
644                     }
645                 } // end for (int i = 0; i < NSTATES; i++)
646
647                 /* Assemble A and B states */
648                 for (int i = 0; i < NSTATES; i++)
649                 {
650                     vCTot += LFC[i] * vCoul[i];
651                     vVTot += LFV[i] * vVdw[i];
652
653                     fScal += LFC[i] * fScalC[i] * rpm2;
654                     fScal += LFV[i] * fScalV[i] * rpm2;
655
656                     if (useSoftCore)
657                     {
658                         dvdlCoul += vCoul[i] * DLF[i]
659                                     + LFC[i] * alphaCoulEff * dlFacCoul[i] * fScalC[i] * sigma6[i];
660                         dvdlVdw += vVdw[i] * DLF[i]
661                                    + LFV[i] * alphaVdwEff * dlFacVdw[i] * fScalV[i] * sigma6[i];
662                     }
663                     else
664                     {
665                         dvdlCoul += vCoul[i] * DLF[i];
666                         dvdlVdw += vVdw[i] * DLF[i];
667                     }
668                 }
669             } // end if (bPairIncluded)
670             else if (icoul == NbkernelElecType::ReactionField)
671             {
672                 /* For excluded pairs, which are only in this pair list when
673                  * using the Verlet scheme, we don't use soft-core.
674                  * As there is no singularity, there is no need for soft-core.
675                  */
676                 const real FF = -two * krf;
677                 RealType   VV = krf * rSq - crf;
678
679                 if (ii == jnr)
680                 {
681                     VV *= half;
682                 }
683
684                 for (int i = 0; i < NSTATES; i++)
685                 {
686                     vCTot += LFC[i] * qq[i] * VV;
687                     fScal += LFC[i] * qq[i] * FF;
688                     dvdlCoul += DLF[i] * qq[i] * VV;
689                 }
690             }
691
692             if (elecInteractionTypeIsEwald && (r < rCoulomb || !bPairIncluded))
693             {
694                 /* See comment in the preamble. When using Ewald interactions
695                  * (unless we use a switch modifier) we subtract the reciprocal-space
696                  * Ewald component here which made it possible to apply the free
697                  * energy interaction to 1/r (vanilla coulomb short-range part)
698                  * above. This gets us closer to the ideal case of applying
699                  * the softcore to the entire electrostatic interaction,
700                  * including the reciprocal-space component.
701                  */
702                 real v_lr, f_lr;
703
704                 const RealType ewrt   = r * coulombTableScale;
705                 IntType        ewitab = static_cast<IntType>(ewrt);
706                 const RealType eweps  = ewrt - ewitab;
707                 ewitab                = 4 * ewitab;
708                 f_lr                  = ewtab[ewitab] + eweps * ewtab[ewitab + 1];
709                 v_lr = (ewtab[ewitab + 2] - coulombTableScaleInvHalf * eweps * (ewtab[ewitab] + f_lr));
710                 f_lr *= rInv;
711
712                 /* Note that any possible Ewald shift has already been applied in
713                  * the normal interaction part above.
714                  */
715
716                 if (ii == jnr)
717                 {
718                     /* If we get here, the i particle (ii) has itself (jnr)
719                      * in its neighborlist. This can only happen with the Verlet
720                      * scheme, and corresponds to a self-interaction that will
721                      * occur twice. Scale it down by 50% to only include it once.
722                      */
723                     v_lr *= half;
724                 }
725
726                 for (int i = 0; i < NSTATES; i++)
727                 {
728                     vCTot -= LFC[i] * qq[i] * v_lr;
729                     fScal -= LFC[i] * qq[i] * f_lr;
730                     dvdlCoul -= (DLF[i] * qq[i]) * v_lr;
731                 }
732             }
733
734             if (vdwInteractionTypeIsEwald && (r < rVdw || !bPairIncluded))
735             {
736                 /* See comment in the preamble. When using LJ-Ewald interactions
737                  * (unless we use a switch modifier) we subtract the reciprocal-space
738                  * Ewald component here which made it possible to apply the free
739                  * energy interaction to r^-6 (vanilla LJ6 short-range part)
740                  * above. This gets us closer to the ideal case of applying
741                  * the softcore to the entire VdW interaction,
742                  * including the reciprocal-space component.
743                  */
744                 /* We could also use the analytical form here
745                  * iso a table, but that can cause issues for
746                  * r close to 0 for non-interacting pairs.
747                  */
748
749                 const RealType rs   = rSq * rInv * vdwTableScale;
750                 const IntType  ri   = static_cast<IntType>(rs);
751                 const RealType frac = rs - ri;
752                 const RealType f_lr = (1 - frac) * tab_ewald_F_lj[ri] + frac * tab_ewald_F_lj[ri + 1];
753                 /* TODO: Currently the Ewald LJ table does not contain
754                  * the factor 1/6, we should add this.
755                  */
756                 const RealType FF = f_lr * rInv / six;
757                 RealType       VV =
758                         (tab_ewald_V_lj[ri] - vdwTableScaleInvHalf * frac * (tab_ewald_F_lj[ri] + f_lr))
759                         / six;
760
761                 if (ii == jnr)
762                 {
763                     /* If we get here, the i particle (ii) has itself (jnr)
764                      * in its neighborlist. This can only happen with the Verlet
765                      * scheme, and corresponds to a self-interaction that will
766                      * occur twice. Scale it down by 50% to only include it once.
767                      */
768                     VV *= half;
769                 }
770
771                 for (int i = 0; i < NSTATES; i++)
772                 {
773                     const real c6grid = nbfp_grid[tj[i]];
774                     vVTot += LFV[i] * c6grid * VV;
775                     fScal += LFV[i] * c6grid * FF;
776                     dvdlVdw += (DLF[i] * c6grid) * VV;
777                 }
778             }
779
780             if (doForces)
781             {
782                 const real tX = fScal * dX;
783                 const real tY = fScal * dY;
784                 const real tZ = fScal * dZ;
785                 fIX           = fIX + tX;
786                 fIY           = fIY + tY;
787                 fIZ           = fIZ + tZ;
788                 /* OpenMP atomics are expensive, but this kernels is also
789                  * expensive, so we can take this hit, instead of using
790                  * thread-local output buffers and extra reduction.
791                  *
792                  * All the OpenMP regions in this file are trivial and should
793                  * not throw, so no need for try/catch.
794                  */
795 #pragma omp atomic
796                 f[j3] -= tX;
797 #pragma omp atomic
798                 f[j3 + 1] -= tY;
799 #pragma omp atomic
800                 f[j3 + 2] -= tZ;
801             }
802         } // end for (int k = nj0; k < nj1; k++)
803
804         /* The atomics below are expensive with many OpenMP threads.
805          * Here unperturbed i-particles will usually only have a few
806          * (perturbed) j-particles in the list. Thus with a buffered list
807          * we can skip a significant number of i-reductions with a check.
808          */
809         if (npair_within_cutoff > 0)
810         {
811             if (doForces)
812             {
813 #pragma omp atomic
814                 f[ii3] += fIX;
815 #pragma omp atomic
816                 f[ii3 + 1] += fIY;
817 #pragma omp atomic
818                 f[ii3 + 2] += fIZ;
819             }
820             if (doShiftForces)
821             {
822 #pragma omp atomic
823                 fshift[is3] += fIX;
824 #pragma omp atomic
825                 fshift[is3 + 1] += fIY;
826 #pragma omp atomic
827                 fshift[is3 + 2] += fIZ;
828             }
829             if (doPotential)
830             {
831                 int ggid = gid[n];
832 #pragma omp atomic
833                 energygrp_elec[ggid] += vCTot;
834 #pragma omp atomic
835                 energygrp_vdw[ggid] += vVTot;
836             }
837         }
838     } // end for (int n = 0; n < nri; n++)
839
840 #pragma omp atomic
841     dvdl[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)] += dvdlCoul;
842 #pragma omp atomic
843     dvdl[static_cast<int>(FreeEnergyPerturbationCouplingType::Vdw)] += dvdlVdw;
844
845     /* Estimate flops, average for free energy stuff:
846      * 12  flops per outer iteration
847      * 150 flops per inner iteration
848      */
849     atomicNrnbIncrement(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist.nri * 12 + nlist.jindex[nri] * 150);
850
851     if (numExcludedPairsBeyondRlist > 0)
852     {
853         gmx_fatal(FARGS,
854                   "There are %d perturbed non-bonded pair interactions beyond the pair-list cutoff "
855                   "of %g nm, which is not supported. This can happen because the system is "
856                   "unstable or because intra-molecular interactions at long distances are "
857                   "excluded. If the "
858                   "latter is the case, you can try to increase nstlist or rlist to avoid this."
859                   "The error is likely triggered by the use of couple-intramol=no "
860                   "and the maximal distance in the decoupled molecule exceeding rlist.",
861                   numExcludedPairsBeyondRlist,
862                   rlist);
863     }
864 }
865
866 typedef void (*KernelFunction)(const t_nblist&                nlist,
867                                gmx::ArrayRef<const gmx::RVec> coords,
868                                gmx::ForceWithShiftForces*     forceWithShiftForces,
869                                const int                      ntype,
870                                const real                     rlist,
871                                const interaction_const_t&     ic,
872                                gmx::ArrayRef<const gmx::RVec> shiftvec,
873                                gmx::ArrayRef<const real>      nbfp,
874                                gmx::ArrayRef<const real>      nbfp_grid,
875                                gmx::ArrayRef<const real>      chargeA,
876                                gmx::ArrayRef<const real>      chargeB,
877                                gmx::ArrayRef<const int>       typeA,
878                                gmx::ArrayRef<const int>       typeB,
879                                int                            flags,
880                                gmx::ArrayRef<const real>      lambda,
881                                gmx::ArrayRef<real>            dvdl,
882                                gmx::ArrayRef<real>            energygrp_elec,
883                                gmx::ArrayRef<real>            energygrp_vdw,
884                                t_nrnb* gmx_restrict           nrnb);
885
886 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald, bool vdwModifierIsPotSwitch>
887 static KernelFunction dispatchKernelOnUseSimd(const bool useSimd)
888 {
889     if (useSimd)
890     {
891 #if GMX_SIMD_HAVE_REAL && GMX_SIMD_HAVE_INT32_ARITHMETICS && GMX_USE_SIMD_KERNELS
892         /* FIXME: Here SimdDataTypes should be used to enable SIMD. So far, the code in nb_free_energy_kernel is not adapted to SIMD */
893         return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
894 #else
895         return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
896 #endif
897     }
898     else
899     {
900         return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
901     }
902 }
903
904 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald>
905 static KernelFunction dispatchKernelOnVdwModifier(const bool vdwModifierIsPotSwitch, const bool useSimd)
906 {
907     if (vdwModifierIsPotSwitch)
908     {
909         return (dispatchKernelOnUseSimd<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, true>(
910                 useSimd));
911     }
912     else
913     {
914         return (dispatchKernelOnUseSimd<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, false>(
915                 useSimd));
916     }
917 }
918
919 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald>
920 static KernelFunction dispatchKernelOnElecInteractionType(const bool elecInteractionTypeIsEwald,
921                                                           const bool vdwModifierIsPotSwitch,
922                                                           const bool useSimd)
923 {
924     if (elecInteractionTypeIsEwald)
925     {
926         return (dispatchKernelOnVdwModifier<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, true>(
927                 vdwModifierIsPotSwitch, useSimd));
928     }
929     else
930     {
931         return (dispatchKernelOnVdwModifier<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, false>(
932                 vdwModifierIsPotSwitch, useSimd));
933     }
934 }
935
936 template<bool useSoftCore, bool scLambdasOrAlphasDiffer>
937 static KernelFunction dispatchKernelOnVdwInteractionType(const bool vdwInteractionTypeIsEwald,
938                                                          const bool elecInteractionTypeIsEwald,
939                                                          const bool vdwModifierIsPotSwitch,
940                                                          const bool useSimd)
941 {
942     if (vdwInteractionTypeIsEwald)
943     {
944         return (dispatchKernelOnElecInteractionType<useSoftCore, scLambdasOrAlphasDiffer, true>(
945                 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
946     }
947     else
948     {
949         return (dispatchKernelOnElecInteractionType<useSoftCore, scLambdasOrAlphasDiffer, false>(
950                 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
951     }
952 }
953
954 template<bool useSoftCore>
955 static KernelFunction dispatchKernelOnScLambdasOrAlphasDifference(const bool scLambdasOrAlphasDiffer,
956                                                                   const bool vdwInteractionTypeIsEwald,
957                                                                   const bool elecInteractionTypeIsEwald,
958                                                                   const bool vdwModifierIsPotSwitch,
959                                                                   const bool useSimd)
960 {
961     if (scLambdasOrAlphasDiffer)
962     {
963         return (dispatchKernelOnVdwInteractionType<useSoftCore, true>(
964                 vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
965     }
966     else
967     {
968         return (dispatchKernelOnVdwInteractionType<useSoftCore, false>(
969                 vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
970     }
971 }
972
973 static KernelFunction dispatchKernel(const bool                 scLambdasOrAlphasDiffer,
974                                      const bool                 vdwInteractionTypeIsEwald,
975                                      const bool                 elecInteractionTypeIsEwald,
976                                      const bool                 vdwModifierIsPotSwitch,
977                                      const bool                 useSimd,
978                                      const interaction_const_t& ic)
979 {
980     if (ic.softCoreParameters->alphaCoulomb == 0 && ic.softCoreParameters->alphaVdw == 0)
981     {
982         return (dispatchKernelOnScLambdasOrAlphasDifference<false>(scLambdasOrAlphasDiffer,
983                                                                    vdwInteractionTypeIsEwald,
984                                                                    elecInteractionTypeIsEwald,
985                                                                    vdwModifierIsPotSwitch,
986                                                                    useSimd));
987     }
988     else
989     {
990         return (dispatchKernelOnScLambdasOrAlphasDifference<true>(scLambdasOrAlphasDiffer,
991                                                                   vdwInteractionTypeIsEwald,
992                                                                   elecInteractionTypeIsEwald,
993                                                                   vdwModifierIsPotSwitch,
994                                                                   useSimd));
995     }
996 }
997
998
999 void gmx_nb_free_energy_kernel(const t_nblist&                nlist,
1000                                gmx::ArrayRef<const gmx::RVec> coords,
1001                                gmx::ForceWithShiftForces*     ff,
1002                                const bool                     useSimd,
1003                                const int                      ntype,
1004                                const real                     rlist,
1005                                const interaction_const_t&     ic,
1006                                gmx::ArrayRef<const gmx::RVec> shiftvec,
1007                                gmx::ArrayRef<const real>      nbfp,
1008                                gmx::ArrayRef<const real>      nbfp_grid,
1009                                gmx::ArrayRef<const real>      chargeA,
1010                                gmx::ArrayRef<const real>      chargeB,
1011                                gmx::ArrayRef<const int>       typeA,
1012                                gmx::ArrayRef<const int>       typeB,
1013                                int                            flags,
1014                                gmx::ArrayRef<const real>      lambda,
1015                                gmx::ArrayRef<real>            dvdl,
1016                                gmx::ArrayRef<real>            energygrp_elec,
1017                                gmx::ArrayRef<real>            energygrp_vdw,
1018                                t_nrnb*                        nrnb)
1019 {
1020     GMX_ASSERT(EEL_PME_EWALD(ic.eeltype) || ic.eeltype == CoulombInteractionType::Cut || EEL_RF(ic.eeltype),
1021                "Unsupported eeltype with free energy");
1022     GMX_ASSERT(ic.softCoreParameters, "We need soft-core parameters");
1023
1024     const auto& scParams                   = *ic.softCoreParameters;
1025     const bool  vdwInteractionTypeIsEwald  = (EVDW_PME(ic.vdwtype));
1026     const bool  elecInteractionTypeIsEwald = (EEL_PME_EWALD(ic.eeltype));
1027     const bool  vdwModifierIsPotSwitch     = (ic.vdw_modifier == InteractionModifiers::PotSwitch);
1028     bool        scLambdasOrAlphasDiffer    = true;
1029
1030     if (scParams.alphaCoulomb == 0 && scParams.alphaVdw == 0)
1031     {
1032         scLambdasOrAlphasDiffer = false;
1033     }
1034     else
1035     {
1036         if (lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)]
1037                     == lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Vdw)]
1038             && scParams.alphaCoulomb == scParams.alphaVdw)
1039         {
1040             scLambdasOrAlphasDiffer = false;
1041         }
1042     }
1043
1044     KernelFunction kernelFunc;
1045     kernelFunc = dispatchKernel(scLambdasOrAlphasDiffer,
1046                                 vdwInteractionTypeIsEwald,
1047                                 elecInteractionTypeIsEwald,
1048                                 vdwModifierIsPotSwitch,
1049                                 useSimd,
1050                                 ic);
1051     kernelFunc(nlist,
1052                coords,
1053                ff,
1054                ntype,
1055                rlist,
1056                ic,
1057                shiftvec,
1058                nbfp,
1059                nbfp_grid,
1060                chargeA,
1061                chargeB,
1062                typeA,
1063                typeB,
1064                flags,
1065                lambda,
1066                dvdl,
1067                energygrp_elec,
1068                energygrp_vdw,
1069                nrnb);
1070 }