Turn t_forcerec.shift_vec into an std::vector of gmx::RVec
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_free_energy.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
7  * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9  * and including many others, as listed in the AUTHORS file in the
10  * top-level source directory and at http://www.gromacs.org.
11  *
12  * GROMACS is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU Lesser General Public License
14  * as published by the Free Software Foundation; either version 2.1
15  * of the License, or (at your option) any later version.
16  *
17  * GROMACS is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public
23  * License along with GROMACS; if not, see
24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
26  *
27  * If you want to redistribute modifications to GROMACS, please
28  * consider that scientific software is very special. Version
29  * control is crucial - bugs must be traceable. We will be happy to
30  * consider code for inclusion in the official distribution, but
31  * derived work must not be called official GROMACS. Details are found
32  * in the README & COPYING files - if they are missing, get the
33  * official version at http://www.gromacs.org.
34  *
35  * To help us fund GROMACS development, we humbly ask that you cite
36  * the research papers on the package. Check out http://www.gromacs.org.
37  */
38 #include "gmxpre.h"
39
40 #include "nb_free_energy.h"
41
42 #include "config.h"
43
44 #include <cmath>
45
46 #include <algorithm>
47
48 #include "gromacs/gmxlib/nrnb.h"
49 #include "gromacs/gmxlib/nonbonded/nonbonded.h"
50 #include "gromacs/math/functions.h"
51 #include "gromacs/math/vec.h"
52 #include "gromacs/mdtypes/forceoutput.h"
53 #include "gromacs/mdtypes/forcerec.h"
54 #include "gromacs/mdtypes/interaction_const.h"
55 #include "gromacs/mdtypes/md_enums.h"
56 #include "gromacs/mdtypes/mdatom.h"
57 #include "gromacs/mdtypes/nblist.h"
58 #include "gromacs/simd/simd.h"
59 #include "gromacs/utility/fatalerror.h"
60 #include "gromacs/utility/arrayref.h"
61
62
63 //! Scalar (non-SIMD) data types.
64 struct ScalarDataTypes
65 {
66     using RealType                     = real; //!< The data type to use as real.
67     using IntType                      = int;  //!< The data type to use as int.
68     static constexpr int simdRealWidth = 1;    //!< The width of the RealType.
69     static constexpr int simdIntWidth  = 1;    //!< The width of the IntType.
70 };
71
72 #if GMX_SIMD_HAVE_REAL && GMX_SIMD_HAVE_INT32_ARITHMETICS
73 //! SIMD data types.
74 struct SimdDataTypes
75 {
76     using RealType                     = gmx::SimdReal;         //!< The data type to use as real.
77     using IntType                      = gmx::SimdInt32;        //!< The data type to use as int.
78     static constexpr int simdRealWidth = GMX_SIMD_REAL_WIDTH;   //!< The width of the RealType.
79     static constexpr int simdIntWidth  = GMX_SIMD_FINT32_WIDTH; //!< The width of the IntType.
80 };
81 #endif
82
83 //! Computes r^(1/p) and 1/r^(1/p) for the standard p=6
84 template<class RealType>
85 static inline void pthRoot(const RealType r, RealType* pthRoot, RealType* invPthRoot)
86 {
87     *invPthRoot = gmx::invsqrt(std::cbrt(r));
88     *pthRoot    = 1 / (*invPthRoot);
89 }
90
91 template<class RealType>
92 static inline RealType calculateRinv6(const RealType rInvV)
93 {
94     RealType rInv6 = rInvV * rInvV;
95     return (rInv6 * rInv6 * rInv6);
96 }
97
98 template<class RealType>
99 static inline RealType calculateVdw6(const RealType c6, const RealType rInv6)
100 {
101     return (c6 * rInv6);
102 }
103
104 template<class RealType>
105 static inline RealType calculateVdw12(const RealType c12, const RealType rInv6)
106 {
107     return (c12 * rInv6 * rInv6);
108 }
109
110 /* reaction-field electrostatics */
111 template<class RealType>
112 static inline RealType reactionFieldScalarForce(const RealType qq,
113                                                 const RealType rInv,
114                                                 const RealType r,
115                                                 const real     krf,
116                                                 const real     two)
117 {
118     return (qq * (rInv - two * krf * r * r));
119 }
120 template<class RealType>
121 static inline RealType reactionFieldPotential(const RealType qq,
122                                               const RealType rInv,
123                                               const RealType r,
124                                               const real     krf,
125                                               const real     potentialShift)
126 {
127     return (qq * (rInv + krf * r * r - potentialShift));
128 }
129
130 /* Ewald electrostatics */
131 template<class RealType>
132 static inline RealType ewaldScalarForce(const RealType coulomb, const RealType rInv)
133 {
134     return (coulomb * rInv);
135 }
136 template<class RealType>
137 static inline RealType ewaldPotential(const RealType coulomb, const RealType rInv, const real potentialShift)
138 {
139     return (coulomb * (rInv - potentialShift));
140 }
141
142 /* cutoff LJ */
143 template<class RealType>
144 static inline RealType lennardJonesScalarForce(const RealType v6, const RealType v12)
145 {
146     return (v12 - v6);
147 }
148 template<class RealType>
149 static inline RealType lennardJonesPotential(const RealType v6,
150                                              const RealType v12,
151                                              const RealType c6,
152                                              const RealType c12,
153                                              const real     repulsionShift,
154                                              const real     dispersionShift,
155                                              const real     oneSixth,
156                                              const real     oneTwelfth)
157 {
158     return ((v12 + c12 * repulsionShift) * oneTwelfth - (v6 + c6 * dispersionShift) * oneSixth);
159 }
160
161 /* Ewald LJ */
162 static inline real ewaldLennardJonesGridSubtract(const real c6grid, const real potentialShift, const real oneSixth)
163 {
164     return (c6grid * potentialShift * oneSixth);
165 }
166
167 /* LJ Potential switch */
168 template<class RealType>
169 static inline RealType potSwitchScalarForceMod(const RealType fScalarInp,
170                                                const RealType potential,
171                                                const RealType sw,
172                                                const RealType r,
173                                                const RealType rVdw,
174                                                const RealType dsw,
175                                                const real     zero)
176 {
177     if (r < rVdw)
178     {
179         real fScalar = fScalarInp * sw - r * potential * dsw;
180         return (fScalar);
181     }
182     return (zero);
183 }
184 template<class RealType>
185 static inline RealType potSwitchPotentialMod(const RealType potentialInp,
186                                              const RealType sw,
187                                              const RealType r,
188                                              const RealType rVdw,
189                                              const real     zero)
190 {
191     if (r < rVdw)
192     {
193         real potential = potentialInp * sw;
194         return (potential);
195     }
196     return (zero);
197 }
198
199
200 //! Templated free-energy non-bonded kernel
201 template<typename DataTypes, bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald, bool vdwModifierIsPotSwitch>
202 static void nb_free_energy_kernel(const t_nblist&                nlist,
203                                   gmx::ArrayRef<const gmx::RVec> coords,
204                                   gmx::ForceWithShiftForces*     forceWithShiftForces,
205                                   const t_forcerec&              fr,
206                                   gmx::ArrayRef<const real>      chargeA,
207                                   gmx::ArrayRef<const real>      chargeB,
208                                   gmx::ArrayRef<const int>       typeA,
209                                   gmx::ArrayRef<const int>       typeB,
210                                   int                            flags,
211                                   gmx::ArrayRef<const real>      lambda,
212                                   gmx::ArrayRef<real>            dvdl,
213                                   gmx::ArrayRef<real>            energygrp_elec,
214                                   gmx::ArrayRef<real>            energygrp_vdw,
215                                   t_nrnb* gmx_restrict nrnb)
216 {
217 #define STATE_A 0
218 #define STATE_B 1
219 #define NSTATES 2
220
221     using RealType = typename DataTypes::RealType;
222     using IntType  = typename DataTypes::IntType;
223
224     /* FIXME: How should these be handled with SIMD? */
225     constexpr real oneTwelfth = 1.0 / 12.0;
226     constexpr real oneSixth   = 1.0 / 6.0;
227     constexpr real zero       = 0.0;
228     constexpr real half       = 0.5;
229     constexpr real one        = 1.0;
230     constexpr real two        = 2.0;
231     constexpr real six        = 6.0;
232
233     /* Extract pointer to non-bonded interaction constants */
234     const interaction_const_t* ic = fr.ic.get();
235
236     // Extract pair list data
237     const int                nri    = nlist.nri;
238     gmx::ArrayRef<const int> iinr   = nlist.iinr;
239     gmx::ArrayRef<const int> jindex = nlist.jindex;
240     gmx::ArrayRef<const int> jjnr   = nlist.jjnr;
241     gmx::ArrayRef<const int> shift  = nlist.shift;
242     gmx::ArrayRef<const int> gid    = nlist.gid;
243
244     const auto                shiftvec  = fr.shift_vec;
245     const int                 ntype     = fr.ntype;
246     gmx::ArrayRef<const real> nbfp      = fr.nbfp;
247     gmx::ArrayRef<const real> nbfp_grid = fr.ljpme_c6grid;
248
249     const real  lambda_coul   = lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)];
250     const real  lambda_vdw    = lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Vdw)];
251     const auto& scParams      = *ic->softCoreParameters;
252     const real  alpha_coul    = scParams.alphaCoulomb;
253     const real  alpha_vdw     = scParams.alphaVdw;
254     const real  lam_power     = scParams.lambdaPower;
255     const real  sigma6_def    = scParams.sigma6WithInvalidSigma;
256     const real  sigma6_min    = scParams.sigma6Minimum;
257     const bool  doForces      = ((flags & GMX_NONBONDED_DO_FORCE) != 0);
258     const bool  doShiftForces = ((flags & GMX_NONBONDED_DO_SHIFTFORCE) != 0);
259     const bool  doPotential   = ((flags & GMX_NONBONDED_DO_POTENTIAL) != 0);
260
261     // Extract data from interaction_const_t
262     const real facel           = ic->epsfac;
263     const real rCoulomb        = ic->rcoulomb;
264     const real krf             = ic->reactionFieldCoefficient;
265     const real crf             = ic->reactionFieldShift;
266     const real shLjEwald       = ic->sh_lj_ewald;
267     const real rVdw            = ic->rvdw;
268     const real dispersionShift = ic->dispersion_shift.cpot;
269     const real repulsionShift  = ic->repulsion_shift.cpot;
270
271     // Note that the nbnxm kernels do not support Coulomb potential switching at all
272     GMX_ASSERT(ic->coulomb_modifier != InteractionModifiers::PotSwitch,
273                "Potential switching is not supported for Coulomb with FEP");
274
275     real vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4;
276     if (vdwModifierIsPotSwitch)
277     {
278         const real d = ic->rvdw - ic->rvdw_switch;
279         vdw_swV3     = -10.0 / (d * d * d);
280         vdw_swV4     = 15.0 / (d * d * d * d);
281         vdw_swV5     = -6.0 / (d * d * d * d * d);
282         vdw_swF2     = -30.0 / (d * d * d);
283         vdw_swF3     = 60.0 / (d * d * d * d);
284         vdw_swF4     = -30.0 / (d * d * d * d * d);
285     }
286     else
287     {
288         /* Avoid warnings from stupid compilers (looking at you, Clang!) */
289         vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0;
290     }
291
292     NbkernelElecType icoul;
293     if (ic->eeltype == CoulombInteractionType::Cut || EEL_RF(ic->eeltype))
294     {
295         icoul = NbkernelElecType::ReactionField;
296     }
297     else
298     {
299         icoul = NbkernelElecType::None;
300     }
301
302     real rcutoff_max2 = std::max(ic->rcoulomb, ic->rvdw);
303     rcutoff_max2      = rcutoff_max2 * rcutoff_max2;
304
305     const real* tab_ewald_F_lj           = nullptr;
306     const real* tab_ewald_V_lj           = nullptr;
307     const real* ewtab                    = nullptr;
308     real        coulombTableScale        = 0;
309     real        coulombTableScaleInvHalf = 0;
310     real        vdwTableScale            = 0;
311     real        vdwTableScaleInvHalf     = 0;
312     real        sh_ewald                 = 0;
313     if (elecInteractionTypeIsEwald || vdwInteractionTypeIsEwald)
314     {
315         sh_ewald = ic->sh_ewald;
316     }
317     if (elecInteractionTypeIsEwald)
318     {
319         const auto& coulombTables = *ic->coulombEwaldTables;
320         ewtab                     = coulombTables.tableFDV0.data();
321         coulombTableScale         = coulombTables.scale;
322         coulombTableScaleInvHalf  = half / coulombTableScale;
323     }
324     if (vdwInteractionTypeIsEwald)
325     {
326         const auto& vdwTables = *ic->vdwEwaldTables;
327         tab_ewald_F_lj        = vdwTables.tableF.data();
328         tab_ewald_V_lj        = vdwTables.tableV.data();
329         vdwTableScale         = vdwTables.scale;
330         vdwTableScaleInvHalf  = half / vdwTableScale;
331     }
332
333     /* For Ewald/PME interactions we cannot easily apply the soft-core component to
334      * reciprocal space. When we use non-switched Ewald interactions, we
335      * assume the soft-coring does not significantly affect the grid contribution
336      * and apply the soft-core only to the full 1/r (- shift) pair contribution.
337      *
338      * However, we cannot use this approach for switch-modified since we would then
339      * effectively end up evaluating a significantly different interaction here compared to the
340      * normal (non-free-energy) kernels, either by applying a cutoff at a different
341      * position than what the user requested, or by switching different
342      * things (1/r rather than short-range Ewald). For these settings, we just
343      * use the traditional short-range Ewald interaction in that case.
344      */
345     GMX_RELEASE_ASSERT(!(vdwInteractionTypeIsEwald && vdwModifierIsPotSwitch),
346                        "Can not apply soft-core to switched Ewald potentials");
347
348     real dvdlCoul = 0;
349     real dvdlVdw  = 0;
350
351     /* Lambda factor for state A, 1-lambda*/
352     real LFC[NSTATES], LFV[NSTATES];
353     LFC[STATE_A] = one - lambda_coul;
354     LFV[STATE_A] = one - lambda_vdw;
355
356     /* Lambda factor for state B, lambda*/
357     LFC[STATE_B] = lambda_coul;
358     LFV[STATE_B] = lambda_vdw;
359
360     /*derivative of the lambda factor for state A and B */
361     real DLF[NSTATES];
362     DLF[STATE_A] = -1;
363     DLF[STATE_B] = 1;
364
365     real           lFacCoul[NSTATES], dlFacCoul[NSTATES], lFacVdw[NSTATES], dlFacVdw[NSTATES];
366     constexpr real sc_r_power = 6.0_real;
367     for (int i = 0; i < NSTATES; i++)
368     {
369         lFacCoul[i]  = (lam_power == 2 ? (1 - LFC[i]) * (1 - LFC[i]) : (1 - LFC[i]));
370         dlFacCoul[i] = DLF[i] * lam_power / sc_r_power * (lam_power == 2 ? (1 - LFC[i]) : 1);
371         lFacVdw[i]   = (lam_power == 2 ? (1 - LFV[i]) * (1 - LFV[i]) : (1 - LFV[i]));
372         dlFacVdw[i]  = DLF[i] * lam_power / sc_r_power * (lam_power == 2 ? (1 - LFV[i]) : 1);
373     }
374
375     // TODO: We should get rid of using pointers to real
376     const real* x             = coords[0];
377     real* gmx_restrict f      = &(forceWithShiftForces->force()[0][0]);
378     real* gmx_restrict fshift = &(forceWithShiftForces->shiftForces()[0][0]);
379
380     const real rlistSquared = gmx::square(fr.rlist);
381
382     int numExcludedPairsBeyondRlist = 0;
383
384     for (int n = 0; n < nri; n++)
385     {
386         int npair_within_cutoff = 0;
387
388         const int  is    = shift[n];
389         const int  is3   = DIM * is;
390         const real shX   = shiftvec[is][XX];
391         const real shY   = shiftvec[is][YY];
392         const real shZ   = shiftvec[is][ZZ];
393         const int  nj0   = jindex[n];
394         const int  nj1   = jindex[n + 1];
395         const int  ii    = iinr[n];
396         const int  ii3   = 3 * ii;
397         const real ix    = shX + x[ii3 + 0];
398         const real iy    = shY + x[ii3 + 1];
399         const real iz    = shZ + x[ii3 + 2];
400         const real iqA   = facel * chargeA[ii];
401         const real iqB   = facel * chargeB[ii];
402         const int  ntiA  = 2 * ntype * typeA[ii];
403         const int  ntiB  = 2 * ntype * typeB[ii];
404         real       vCTot = 0;
405         real       vVTot = 0;
406         real       fIX   = 0;
407         real       fIY   = 0;
408         real       fIZ   = 0;
409
410         for (int k = nj0; k < nj1; k++)
411         {
412             int            tj[NSTATES];
413             const int      jnr = jjnr[k];
414             const int      j3  = 3 * jnr;
415             RealType       c6[NSTATES], c12[NSTATES], qq[NSTATES], vCoul[NSTATES], vVdw[NSTATES];
416             RealType       r, rInv, rp, rpm2;
417             RealType       alphaVdwEff, alphaCoulEff, sigma6[NSTATES];
418             const RealType dX  = ix - x[j3];
419             const RealType dY  = iy - x[j3 + 1];
420             const RealType dZ  = iz - x[j3 + 2];
421             const RealType rSq = dX * dX + dY * dY + dZ * dZ;
422             RealType       fScalC[NSTATES], fScalV[NSTATES];
423             /* Check if this pair on the exlusions list.*/
424             const bool bPairIncluded = nlist.excl_fep.empty() || nlist.excl_fep[k];
425
426             if (rSq >= rcutoff_max2 && bPairIncluded)
427             {
428                 /* We save significant time by skipping all code below.
429                  * Note that with soft-core interactions, the actual cut-off
430                  * check might be different. But since the soft-core distance
431                  * is always larger than r, checking on r here is safe.
432                  * Exclusions outside the cutoff can not be skipped as
433                  * when using Ewald: the reciprocal-space
434                  * Ewald component still needs to be subtracted.
435                  */
436
437                 continue;
438             }
439             npair_within_cutoff++;
440
441             if (rSq > rlistSquared)
442             {
443                 numExcludedPairsBeyondRlist++;
444             }
445
446             if (rSq > 0)
447             {
448                 /* Note that unlike in the nbnxn kernels, we do not need
449                  * to clamp the value of rSq before taking the invsqrt
450                  * to avoid NaN in the LJ calculation, since here we do
451                  * not calculate LJ interactions when C6 and C12 are zero.
452                  */
453
454                 rInv = gmx::invsqrt(rSq);
455                 r    = rSq * rInv;
456             }
457             else
458             {
459                 /* The force at r=0 is zero, because of symmetry.
460                  * But note that the potential is in general non-zero,
461                  * since the soft-cored r will be non-zero.
462                  */
463                 rInv = 0;
464                 r    = 0;
465             }
466
467             if (useSoftCore)
468             {
469                 rpm2 = rSq * rSq;  /* r4 */
470                 rp   = rpm2 * rSq; /* r6 */
471             }
472             else
473             {
474                 /* The soft-core power p will not affect the results
475                  * with not using soft-core, so we use power of 0 which gives
476                  * the simplest math and cheapest code.
477                  */
478                 rpm2 = rInv * rInv;
479                 rp   = 1;
480             }
481
482             RealType fScal = 0;
483
484             qq[STATE_A] = iqA * chargeA[jnr];
485             qq[STATE_B] = iqB * chargeB[jnr];
486
487             tj[STATE_A] = ntiA + 2 * typeA[jnr];
488             tj[STATE_B] = ntiB + 2 * typeB[jnr];
489
490             if (bPairIncluded)
491             {
492                 c6[STATE_A] = nbfp[tj[STATE_A]];
493                 c6[STATE_B] = nbfp[tj[STATE_B]];
494
495                 for (int i = 0; i < NSTATES; i++)
496                 {
497                     c12[i] = nbfp[tj[i] + 1];
498                     if (useSoftCore)
499                     {
500                         if ((c6[i] > 0) && (c12[i] > 0))
501                         {
502                             /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
503                             sigma6[i] = half * c12[i] / c6[i];
504                             if (sigma6[i] < sigma6_min) /* for disappearing coul and vdw with soft core at the same time */
505                             {
506                                 sigma6[i] = sigma6_min;
507                             }
508                         }
509                         else
510                         {
511                             sigma6[i] = sigma6_def;
512                         }
513                     }
514                 }
515
516                 if (useSoftCore)
517                 {
518                     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
519                     if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
520                     {
521                         alphaVdwEff  = 0;
522                         alphaCoulEff = 0;
523                     }
524                     else
525                     {
526                         alphaVdwEff  = alpha_vdw;
527                         alphaCoulEff = alpha_coul;
528                     }
529                 }
530
531                 for (int i = 0; i < NSTATES; i++)
532                 {
533                     fScalC[i] = 0;
534                     fScalV[i] = 0;
535                     vCoul[i]  = 0;
536                     vVdw[i]   = 0;
537
538                     RealType rInvC, rInvV, rC, rV, rPInvC, rPInvV;
539
540                     /* Only spend time on A or B state if it is non-zero */
541                     if ((qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0))
542                     {
543                         /* this section has to be inside the loop because of the dependence on sigma6 */
544                         if (useSoftCore)
545                         {
546                             rPInvC = one / (alphaCoulEff * lFacCoul[i] * sigma6[i] + rp);
547                             pthRoot(rPInvC, &rInvC, &rC);
548                             if (scLambdasOrAlphasDiffer)
549                             {
550                                 rPInvV = one / (alphaVdwEff * lFacVdw[i] * sigma6[i] + rp);
551                                 pthRoot(rPInvV, &rInvV, &rV);
552                             }
553                             else
554                             {
555                                 /* We can avoid one expensive pow and one / operation */
556                                 rPInvV = rPInvC;
557                                 rInvV  = rInvC;
558                                 rV     = rC;
559                             }
560                         }
561                         else
562                         {
563                             rPInvC = 1;
564                             rInvC  = rInv;
565                             rC     = r;
566
567                             rPInvV = 1;
568                             rInvV  = rInv;
569                             rV     = r;
570                         }
571
572                         /* Only process the coulomb interactions if we have charges,
573                          * and if we either include all entries in the list (no cutoff
574                          * used in the kernel), or if we are within the cutoff.
575                          */
576                         bool computeElecInteraction = (elecInteractionTypeIsEwald && r < rCoulomb)
577                                                       || (!elecInteractionTypeIsEwald && rC < rCoulomb);
578
579                         if ((qq[i] != 0) && computeElecInteraction)
580                         {
581                             if (elecInteractionTypeIsEwald)
582                             {
583                                 vCoul[i]  = ewaldPotential(qq[i], rInvC, sh_ewald);
584                                 fScalC[i] = ewaldScalarForce(qq[i], rInvC);
585                             }
586                             else
587                             {
588                                 vCoul[i]  = reactionFieldPotential(qq[i], rInvC, rC, krf, crf);
589                                 fScalC[i] = reactionFieldScalarForce(qq[i], rInvC, rC, krf, two);
590                             }
591                         }
592
593                         /* Only process the VDW interactions if we have
594                          * some non-zero parameters, and if we either
595                          * include all entries in the list (no cutoff used
596                          * in the kernel), or if we are within the cutoff.
597                          */
598                         bool computeVdwInteraction = (vdwInteractionTypeIsEwald && r < rVdw)
599                                                      || (!vdwInteractionTypeIsEwald && rV < rVdw);
600                         if ((c6[i] != 0 || c12[i] != 0) && computeVdwInteraction)
601                         {
602                             RealType rInv6;
603                             if (useSoftCore)
604                             {
605                                 rInv6 = rPInvV;
606                             }
607                             else
608                             {
609                                 rInv6 = calculateRinv6(rInvV);
610                             }
611                             RealType vVdw6  = calculateVdw6(c6[i], rInv6);
612                             RealType vVdw12 = calculateVdw12(c12[i], rInv6);
613
614                             vVdw[i] = lennardJonesPotential(
615                                     vVdw6, vVdw12, c6[i], c12[i], repulsionShift, dispersionShift, oneSixth, oneTwelfth);
616                             fScalV[i] = lennardJonesScalarForce(vVdw6, vVdw12);
617
618                             if (vdwInteractionTypeIsEwald)
619                             {
620                                 /* Subtract the grid potential at the cut-off */
621                                 vVdw[i] += ewaldLennardJonesGridSubtract(
622                                         nbfp_grid[tj[i]], shLjEwald, oneSixth);
623                             }
624
625                             if (vdwModifierIsPotSwitch)
626                             {
627                                 RealType d        = rV - ic->rvdw_switch;
628                                 d                 = (d > zero) ? d : zero;
629                                 const RealType d2 = d * d;
630                                 const RealType sw =
631                                         one + d2 * d * (vdw_swV3 + d * (vdw_swV4 + d * vdw_swV5));
632                                 const RealType dsw = d2 * (vdw_swF2 + d * (vdw_swF3 + d * vdw_swF4));
633
634                                 fScalV[i] = potSwitchScalarForceMod(
635                                         fScalV[i], vVdw[i], sw, rV, rVdw, dsw, zero);
636                                 vVdw[i] = potSwitchPotentialMod(vVdw[i], sw, rV, rVdw, zero);
637                             }
638                         }
639
640                         /* fScalC (and fScalV) now contain: dV/drC * rC
641                          * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
642                          * Further down we first multiply by r^p-2 and then by
643                          * the vector r, which in total gives: dV/drC * (r/rC)^1-p
644                          */
645                         fScalC[i] *= rPInvC;
646                         fScalV[i] *= rPInvV;
647                     }
648                 } // end for (int i = 0; i < NSTATES; i++)
649
650                 /* Assemble A and B states */
651                 for (int i = 0; i < NSTATES; i++)
652                 {
653                     vCTot += LFC[i] * vCoul[i];
654                     vVTot += LFV[i] * vVdw[i];
655
656                     fScal += LFC[i] * fScalC[i] * rpm2;
657                     fScal += LFV[i] * fScalV[i] * rpm2;
658
659                     if (useSoftCore)
660                     {
661                         dvdlCoul += vCoul[i] * DLF[i]
662                                     + LFC[i] * alphaCoulEff * dlFacCoul[i] * fScalC[i] * sigma6[i];
663                         dvdlVdw += vVdw[i] * DLF[i]
664                                    + LFV[i] * alphaVdwEff * dlFacVdw[i] * fScalV[i] * sigma6[i];
665                     }
666                     else
667                     {
668                         dvdlCoul += vCoul[i] * DLF[i];
669                         dvdlVdw += vVdw[i] * DLF[i];
670                     }
671                 }
672             } // end if (bPairIncluded)
673             else if (icoul == NbkernelElecType::ReactionField)
674             {
675                 /* For excluded pairs, which are only in this pair list when
676                  * using the Verlet scheme, we don't use soft-core.
677                  * As there is no singularity, there is no need for soft-core.
678                  */
679                 const real FF = -two * krf;
680                 RealType   VV = krf * rSq - crf;
681
682                 if (ii == jnr)
683                 {
684                     VV *= half;
685                 }
686
687                 for (int i = 0; i < NSTATES; i++)
688                 {
689                     vCTot += LFC[i] * qq[i] * VV;
690                     fScal += LFC[i] * qq[i] * FF;
691                     dvdlCoul += DLF[i] * qq[i] * VV;
692                 }
693             }
694
695             if (elecInteractionTypeIsEwald && (r < rCoulomb || !bPairIncluded))
696             {
697                 /* See comment in the preamble. When using Ewald interactions
698                  * (unless we use a switch modifier) we subtract the reciprocal-space
699                  * Ewald component here which made it possible to apply the free
700                  * energy interaction to 1/r (vanilla coulomb short-range part)
701                  * above. This gets us closer to the ideal case of applying
702                  * the softcore to the entire electrostatic interaction,
703                  * including the reciprocal-space component.
704                  */
705                 real v_lr, f_lr;
706
707                 const RealType ewrt   = r * coulombTableScale;
708                 IntType        ewitab = static_cast<IntType>(ewrt);
709                 const RealType eweps  = ewrt - ewitab;
710                 ewitab                = 4 * ewitab;
711                 f_lr                  = ewtab[ewitab] + eweps * ewtab[ewitab + 1];
712                 v_lr = (ewtab[ewitab + 2] - coulombTableScaleInvHalf * eweps * (ewtab[ewitab] + f_lr));
713                 f_lr *= rInv;
714
715                 /* Note that any possible Ewald shift has already been applied in
716                  * the normal interaction part above.
717                  */
718
719                 if (ii == jnr)
720                 {
721                     /* If we get here, the i particle (ii) has itself (jnr)
722                      * in its neighborlist. This can only happen with the Verlet
723                      * scheme, and corresponds to a self-interaction that will
724                      * occur twice. Scale it down by 50% to only include it once.
725                      */
726                     v_lr *= half;
727                 }
728
729                 for (int i = 0; i < NSTATES; i++)
730                 {
731                     vCTot -= LFC[i] * qq[i] * v_lr;
732                     fScal -= LFC[i] * qq[i] * f_lr;
733                     dvdlCoul -= (DLF[i] * qq[i]) * v_lr;
734                 }
735             }
736
737             if (vdwInteractionTypeIsEwald && (r < rVdw || !bPairIncluded))
738             {
739                 /* See comment in the preamble. When using LJ-Ewald interactions
740                  * (unless we use a switch modifier) we subtract the reciprocal-space
741                  * Ewald component here which made it possible to apply the free
742                  * energy interaction to r^-6 (vanilla LJ6 short-range part)
743                  * above. This gets us closer to the ideal case of applying
744                  * the softcore to the entire VdW interaction,
745                  * including the reciprocal-space component.
746                  */
747                 /* We could also use the analytical form here
748                  * iso a table, but that can cause issues for
749                  * r close to 0 for non-interacting pairs.
750                  */
751
752                 const RealType rs   = rSq * rInv * vdwTableScale;
753                 const IntType  ri   = static_cast<IntType>(rs);
754                 const RealType frac = rs - ri;
755                 const RealType f_lr = (1 - frac) * tab_ewald_F_lj[ri] + frac * tab_ewald_F_lj[ri + 1];
756                 /* TODO: Currently the Ewald LJ table does not contain
757                  * the factor 1/6, we should add this.
758                  */
759                 const RealType FF = f_lr * rInv / six;
760                 RealType       VV =
761                         (tab_ewald_V_lj[ri] - vdwTableScaleInvHalf * frac * (tab_ewald_F_lj[ri] + f_lr))
762                         / six;
763
764                 if (ii == jnr)
765                 {
766                     /* If we get here, the i particle (ii) has itself (jnr)
767                      * in its neighborlist. This can only happen with the Verlet
768                      * scheme, and corresponds to a self-interaction that will
769                      * occur twice. Scale it down by 50% to only include it once.
770                      */
771                     VV *= half;
772                 }
773
774                 for (int i = 0; i < NSTATES; i++)
775                 {
776                     const real c6grid = nbfp_grid[tj[i]];
777                     vVTot += LFV[i] * c6grid * VV;
778                     fScal += LFV[i] * c6grid * FF;
779                     dvdlVdw += (DLF[i] * c6grid) * VV;
780                 }
781             }
782
783             if (doForces)
784             {
785                 const real tX = fScal * dX;
786                 const real tY = fScal * dY;
787                 const real tZ = fScal * dZ;
788                 fIX           = fIX + tX;
789                 fIY           = fIY + tY;
790                 fIZ           = fIZ + tZ;
791                 /* OpenMP atomics are expensive, but this kernels is also
792                  * expensive, so we can take this hit, instead of using
793                  * thread-local output buffers and extra reduction.
794                  *
795                  * All the OpenMP regions in this file are trivial and should
796                  * not throw, so no need for try/catch.
797                  */
798 #pragma omp atomic
799                 f[j3] -= tX;
800 #pragma omp atomic
801                 f[j3 + 1] -= tY;
802 #pragma omp atomic
803                 f[j3 + 2] -= tZ;
804             }
805         } // end for (int k = nj0; k < nj1; k++)
806
807         /* The atomics below are expensive with many OpenMP threads.
808          * Here unperturbed i-particles will usually only have a few
809          * (perturbed) j-particles in the list. Thus with a buffered list
810          * we can skip a significant number of i-reductions with a check.
811          */
812         if (npair_within_cutoff > 0)
813         {
814             if (doForces)
815             {
816 #pragma omp atomic
817                 f[ii3] += fIX;
818 #pragma omp atomic
819                 f[ii3 + 1] += fIY;
820 #pragma omp atomic
821                 f[ii3 + 2] += fIZ;
822             }
823             if (doShiftForces)
824             {
825 #pragma omp atomic
826                 fshift[is3] += fIX;
827 #pragma omp atomic
828                 fshift[is3 + 1] += fIY;
829 #pragma omp atomic
830                 fshift[is3 + 2] += fIZ;
831             }
832             if (doPotential)
833             {
834                 int ggid = gid[n];
835 #pragma omp atomic
836                 energygrp_elec[ggid] += vCTot;
837 #pragma omp atomic
838                 energygrp_vdw[ggid] += vVTot;
839             }
840         }
841     } // end for (int n = 0; n < nri; n++)
842
843 #pragma omp atomic
844     dvdl[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)] += dvdlCoul;
845 #pragma omp atomic
846     dvdl[static_cast<int>(FreeEnergyPerturbationCouplingType::Vdw)] += dvdlVdw;
847
848     /* Estimate flops, average for free energy stuff:
849      * 12  flops per outer iteration
850      * 150 flops per inner iteration
851      */
852     atomicNrnbIncrement(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist.nri * 12 + nlist.jindex[nri] * 150);
853
854     if (numExcludedPairsBeyondRlist > 0)
855     {
856         gmx_fatal(FARGS,
857                   "There are %d perturbed non-bonded pair interactions beyond the pair-list cutoff "
858                   "of %g nm, which is not supported. This can happen because the system is "
859                   "unstable or because intra-molecular interactions at long distances are "
860                   "excluded. If the "
861                   "latter is the case, you can try to increase nstlist or rlist to avoid this."
862                   "The error is likely triggered by the use of couple-intramol=no "
863                   "and the maximal distance in the decoupled molecule exceeding rlist.",
864                   numExcludedPairsBeyondRlist,
865                   fr.rlist);
866     }
867 }
868
869 typedef void (*KernelFunction)(const t_nblist&                nlist,
870                                gmx::ArrayRef<const gmx::RVec> coords,
871                                gmx::ForceWithShiftForces*     forceWithShiftForces,
872                                const t_forcerec&              fr,
873                                gmx::ArrayRef<const real>      chargeA,
874                                gmx::ArrayRef<const real>      chargeB,
875                                gmx::ArrayRef<const int>       typeA,
876                                gmx::ArrayRef<const int>       typeB,
877                                int                            flags,
878                                gmx::ArrayRef<const real>      lambda,
879                                gmx::ArrayRef<real>            dvdl,
880                                gmx::ArrayRef<real>            energygrp_elec,
881                                gmx::ArrayRef<real>            energygrp_vdw,
882                                t_nrnb* gmx_restrict nrnb);
883
884 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald, bool vdwModifierIsPotSwitch>
885 static KernelFunction dispatchKernelOnUseSimd(const bool useSimd)
886 {
887     if (useSimd)
888     {
889 #if GMX_SIMD_HAVE_REAL && GMX_SIMD_HAVE_INT32_ARITHMETICS && GMX_USE_SIMD_KERNELS
890         /* FIXME: Here SimdDataTypes should be used to enable SIMD. So far, the code in nb_free_energy_kernel is not adapted to SIMD */
891         return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
892 #else
893         return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
894 #endif
895     }
896     else
897     {
898         return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
899     }
900 }
901
902 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald>
903 static KernelFunction dispatchKernelOnVdwModifier(const bool vdwModifierIsPotSwitch, const bool useSimd)
904 {
905     if (vdwModifierIsPotSwitch)
906     {
907         return (dispatchKernelOnUseSimd<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, true>(
908                 useSimd));
909     }
910     else
911     {
912         return (dispatchKernelOnUseSimd<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, false>(
913                 useSimd));
914     }
915 }
916
917 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald>
918 static KernelFunction dispatchKernelOnElecInteractionType(const bool elecInteractionTypeIsEwald,
919                                                           const bool vdwModifierIsPotSwitch,
920                                                           const bool useSimd)
921 {
922     if (elecInteractionTypeIsEwald)
923     {
924         return (dispatchKernelOnVdwModifier<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, true>(
925                 vdwModifierIsPotSwitch, useSimd));
926     }
927     else
928     {
929         return (dispatchKernelOnVdwModifier<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, false>(
930                 vdwModifierIsPotSwitch, useSimd));
931     }
932 }
933
934 template<bool useSoftCore, bool scLambdasOrAlphasDiffer>
935 static KernelFunction dispatchKernelOnVdwInteractionType(const bool vdwInteractionTypeIsEwald,
936                                                          const bool elecInteractionTypeIsEwald,
937                                                          const bool vdwModifierIsPotSwitch,
938                                                          const bool useSimd)
939 {
940     if (vdwInteractionTypeIsEwald)
941     {
942         return (dispatchKernelOnElecInteractionType<useSoftCore, scLambdasOrAlphasDiffer, true>(
943                 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
944     }
945     else
946     {
947         return (dispatchKernelOnElecInteractionType<useSoftCore, scLambdasOrAlphasDiffer, false>(
948                 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
949     }
950 }
951
952 template<bool useSoftCore>
953 static KernelFunction dispatchKernelOnScLambdasOrAlphasDifference(const bool scLambdasOrAlphasDiffer,
954                                                                   const bool vdwInteractionTypeIsEwald,
955                                                                   const bool elecInteractionTypeIsEwald,
956                                                                   const bool vdwModifierIsPotSwitch,
957                                                                   const bool useSimd)
958 {
959     if (scLambdasOrAlphasDiffer)
960     {
961         return (dispatchKernelOnVdwInteractionType<useSoftCore, true>(
962                 vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
963     }
964     else
965     {
966         return (dispatchKernelOnVdwInteractionType<useSoftCore, false>(
967                 vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
968     }
969 }
970
971 static KernelFunction dispatchKernel(const bool                 scLambdasOrAlphasDiffer,
972                                      const bool                 vdwInteractionTypeIsEwald,
973                                      const bool                 elecInteractionTypeIsEwald,
974                                      const bool                 vdwModifierIsPotSwitch,
975                                      const bool                 useSimd,
976                                      const interaction_const_t& ic)
977 {
978     if (ic.softCoreParameters->alphaCoulomb == 0 && ic.softCoreParameters->alphaVdw == 0)
979     {
980         return (dispatchKernelOnScLambdasOrAlphasDifference<false>(scLambdasOrAlphasDiffer,
981                                                                    vdwInteractionTypeIsEwald,
982                                                                    elecInteractionTypeIsEwald,
983                                                                    vdwModifierIsPotSwitch,
984                                                                    useSimd));
985     }
986     else
987     {
988         return (dispatchKernelOnScLambdasOrAlphasDifference<true>(scLambdasOrAlphasDiffer,
989                                                                   vdwInteractionTypeIsEwald,
990                                                                   elecInteractionTypeIsEwald,
991                                                                   vdwModifierIsPotSwitch,
992                                                                   useSimd));
993     }
994 }
995
996
997 void gmx_nb_free_energy_kernel(const t_nblist&                nlist,
998                                gmx::ArrayRef<const gmx::RVec> coords,
999                                gmx::ForceWithShiftForces*     ff,
1000                                const t_forcerec&              fr,
1001                                gmx::ArrayRef<const real>      chargeA,
1002                                gmx::ArrayRef<const real>      chargeB,
1003                                gmx::ArrayRef<const int>       typeA,
1004                                gmx::ArrayRef<const int>       typeB,
1005                                int                            flags,
1006                                gmx::ArrayRef<const real>      lambda,
1007                                gmx::ArrayRef<real>            dvdl,
1008                                gmx::ArrayRef<real>            energygrp_elec,
1009                                gmx::ArrayRef<real>            energygrp_vdw,
1010                                t_nrnb*                        nrnb)
1011 {
1012     const interaction_const_t& ic = *fr.ic;
1013     GMX_ASSERT(EEL_PME_EWALD(ic.eeltype) || ic.eeltype == CoulombInteractionType::Cut || EEL_RF(ic.eeltype),
1014                "Unsupported eeltype with free energy");
1015     GMX_ASSERT(ic.softCoreParameters, "We need soft-core parameters");
1016
1017     const auto& scParams                   = *ic.softCoreParameters;
1018     const bool  vdwInteractionTypeIsEwald  = (EVDW_PME(ic.vdwtype));
1019     const bool  elecInteractionTypeIsEwald = (EEL_PME_EWALD(ic.eeltype));
1020     const bool  vdwModifierIsPotSwitch     = (ic.vdw_modifier == InteractionModifiers::PotSwitch);
1021     bool        scLambdasOrAlphasDiffer    = true;
1022     const bool  useSimd                    = fr.use_simd_kernels;
1023
1024     if (scParams.alphaCoulomb == 0 && scParams.alphaVdw == 0)
1025     {
1026         scLambdasOrAlphasDiffer = false;
1027     }
1028     else
1029     {
1030         if (lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)]
1031                     == lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Vdw)]
1032             && scParams.alphaCoulomb == scParams.alphaVdw)
1033         {
1034             scLambdasOrAlphasDiffer = false;
1035         }
1036     }
1037
1038     KernelFunction kernelFunc;
1039     kernelFunc = dispatchKernel(scLambdasOrAlphasDiffer,
1040                                 vdwInteractionTypeIsEwald,
1041                                 elecInteractionTypeIsEwald,
1042                                 vdwModifierIsPotSwitch,
1043                                 useSimd,
1044                                 ic);
1045     kernelFunc(nlist, coords, ff, fr, chargeA, chargeB, typeA, typeB, flags, lambda, dvdl, energygrp_elec, energygrp_vdw, nrnb);
1046 }