d036d8510a4317526382c44330da3defee190d9f
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_free_energy.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
7  * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9  * and including many others, as listed in the AUTHORS file in the
10  * top-level source directory and at http://www.gromacs.org.
11  *
12  * GROMACS is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU Lesser General Public License
14  * as published by the Free Software Foundation; either version 2.1
15  * of the License, or (at your option) any later version.
16  *
17  * GROMACS is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public
23  * License along with GROMACS; if not, see
24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
26  *
27  * If you want to redistribute modifications to GROMACS, please
28  * consider that scientific software is very special. Version
29  * control is crucial - bugs must be traceable. We will be happy to
30  * consider code for inclusion in the official distribution, but
31  * derived work must not be called official GROMACS. Details are found
32  * in the README & COPYING files - if they are missing, get the
33  * official version at http://www.gromacs.org.
34  *
35  * To help us fund GROMACS development, we humbly ask that you cite
36  * the research papers on the package. Check out http://www.gromacs.org.
37  */
38 #include "gmxpre.h"
39
40 #include "nb_free_energy.h"
41
42 #include "config.h"
43
44 #include <cmath>
45
46 #include <algorithm>
47
48 #include "gromacs/gmxlib/nrnb.h"
49 #include "gromacs/gmxlib/nonbonded/nb_kernel.h"
50 #include "gromacs/gmxlib/nonbonded/nonbonded.h"
51 #include "gromacs/math/functions.h"
52 #include "gromacs/math/vec.h"
53 #include "gromacs/mdtypes/forceoutput.h"
54 #include "gromacs/mdtypes/forcerec.h"
55 #include "gromacs/mdtypes/interaction_const.h"
56 #include "gromacs/mdtypes/md_enums.h"
57 #include "gromacs/mdtypes/mdatom.h"
58 #include "gromacs/simd/simd.h"
59 #include "gromacs/utility/fatalerror.h"
60 #include "gromacs/utility/arrayref.h"
61
62
63 //! Scalar (non-SIMD) data types.
64 struct ScalarDataTypes
65 {
66     using RealType                     = real; //!< The data type to use as real.
67     using IntType                      = int;  //!< The data type to use as int.
68     static constexpr int simdRealWidth = 1;    //!< The width of the RealType.
69     static constexpr int simdIntWidth  = 1;    //!< The width of the IntType.
70 };
71
72 #if GMX_SIMD_HAVE_REAL && GMX_SIMD_HAVE_INT32_ARITHMETICS
73 //! SIMD data types.
74 struct SimdDataTypes
75 {
76     using RealType                     = gmx::SimdReal;         //!< The data type to use as real.
77     using IntType                      = gmx::SimdInt32;        //!< The data type to use as int.
78     static constexpr int simdRealWidth = GMX_SIMD_REAL_WIDTH;   //!< The width of the RealType.
79     static constexpr int simdIntWidth  = GMX_SIMD_FINT32_WIDTH; //!< The width of the IntType.
80 };
81 #endif
82
83 //! Computes r^(1/p) and 1/r^(1/p) for the standard p=6
84 template<class RealType>
85 static inline void pthRoot(const RealType r, RealType* pthRoot, RealType* invPthRoot)
86 {
87     *invPthRoot = gmx::invsqrt(std::cbrt(r));
88     *pthRoot    = 1 / (*invPthRoot);
89 }
90
91 template<class RealType>
92 static inline RealType calculateRinv6(const RealType rInvV)
93 {
94     RealType rInv6 = rInvV * rInvV;
95     return (rInv6 * rInv6 * rInv6);
96 }
97
98 template<class RealType>
99 static inline RealType calculateVdw6(const RealType c6, const RealType rInv6)
100 {
101     return (c6 * rInv6);
102 }
103
104 template<class RealType>
105 static inline RealType calculateVdw12(const RealType c12, const RealType rInv6)
106 {
107     return (c12 * rInv6 * rInv6);
108 }
109
110 /* reaction-field electrostatics */
111 template<class RealType>
112 static inline RealType reactionFieldScalarForce(const RealType qq,
113                                                 const RealType rInv,
114                                                 const RealType r,
115                                                 const real     krf,
116                                                 const real     two)
117 {
118     return (qq * (rInv - two * krf * r * r));
119 }
120 template<class RealType>
121 static inline RealType reactionFieldPotential(const RealType qq,
122                                               const RealType rInv,
123                                               const RealType r,
124                                               const real     krf,
125                                               const real     potentialShift)
126 {
127     return (qq * (rInv + krf * r * r - potentialShift));
128 }
129
130 /* Ewald electrostatics */
131 template<class RealType>
132 static inline RealType ewaldScalarForce(const RealType coulomb, const RealType rInv)
133 {
134     return (coulomb * rInv);
135 }
136 template<class RealType>
137 static inline RealType ewaldPotential(const RealType coulomb, const RealType rInv, const real potentialShift)
138 {
139     return (coulomb * (rInv - potentialShift));
140 }
141
142 /* cutoff LJ */
143 template<class RealType>
144 static inline RealType lennardJonesScalarForce(const RealType v6, const RealType v12)
145 {
146     return (v12 - v6);
147 }
148 template<class RealType>
149 static inline RealType lennardJonesPotential(const RealType v6,
150                                              const RealType v12,
151                                              const RealType c6,
152                                              const RealType c12,
153                                              const real     repulsionShift,
154                                              const real     dispersionShift,
155                                              const real     oneSixth,
156                                              const real     oneTwelfth)
157 {
158     return ((v12 + c12 * repulsionShift) * oneTwelfth - (v6 + c6 * dispersionShift) * oneSixth);
159 }
160
161 /* Ewald LJ */
162 static inline real ewaldLennardJonesGridSubtract(const real c6grid, const real potentialShift, const real oneSixth)
163 {
164     return (c6grid * potentialShift * oneSixth);
165 }
166
167 /* LJ Potential switch */
168 template<class RealType>
169 static inline RealType potSwitchScalarForceMod(const RealType fScalarInp,
170                                                const RealType potential,
171                                                const RealType sw,
172                                                const RealType r,
173                                                const RealType rVdw,
174                                                const RealType dsw,
175                                                const real     zero)
176 {
177     if (r < rVdw)
178     {
179         real fScalar = fScalarInp * sw - r * potential * dsw;
180         return (fScalar);
181     }
182     return (zero);
183 }
184 template<class RealType>
185 static inline RealType potSwitchPotentialMod(const RealType potentialInp,
186                                              const RealType sw,
187                                              const RealType r,
188                                              const RealType rVdw,
189                                              const real     zero)
190 {
191     if (r < rVdw)
192     {
193         real potential = potentialInp * sw;
194         return (potential);
195     }
196     return (zero);
197 }
198
199
200 //! Templated free-energy non-bonded kernel
201 template<typename DataTypes, bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald, bool vdwModifierIsPotSwitch>
202 static void nb_free_energy_kernel(const t_nblist* gmx_restrict nlist,
203                                   rvec* gmx_restrict         xx,
204                                   gmx::ForceWithShiftForces* forceWithShiftForces,
205                                   const t_forcerec* gmx_restrict fr,
206                                   const t_mdatoms* gmx_restrict mdatoms,
207                                   nb_kernel_data_t* gmx_restrict kernel_data,
208                                   t_nrnb* gmx_restrict nrnb)
209 {
210 #define STATE_A 0
211 #define STATE_B 1
212 #define NSTATES 2
213
214     using RealType = typename DataTypes::RealType;
215     using IntType  = typename DataTypes::IntType;
216
217     /* FIXME: How should these be handled with SIMD? */
218     constexpr real oneTwelfth = 1.0 / 12.0;
219     constexpr real oneSixth   = 1.0 / 6.0;
220     constexpr real zero       = 0.0;
221     constexpr real half       = 0.5;
222     constexpr real one        = 1.0;
223     constexpr real two        = 2.0;
224     constexpr real six        = 6.0;
225
226     /* Extract pointer to non-bonded interaction constants */
227     const interaction_const_t* ic = fr->ic;
228
229     // Extract pair list data
230     const int                nri    = nlist->nri;
231     gmx::ArrayRef<const int> iinr   = nlist->iinr;
232     gmx::ArrayRef<const int> jindex = nlist->jindex;
233     gmx::ArrayRef<const int> jjnr   = nlist->jjnr;
234     gmx::ArrayRef<const int> shift  = nlist->shift;
235     gmx::ArrayRef<const int> gid    = nlist->gid;
236
237     const real*               shiftvec  = fr->shift_vec[0];
238     const real*               chargeA   = mdatoms->chargeA;
239     const real*               chargeB   = mdatoms->chargeB;
240     real*                     Vc        = kernel_data->energygrp_elec;
241     const int*                typeA     = mdatoms->typeA;
242     const int*                typeB     = mdatoms->typeB;
243     const int                 ntype     = fr->ntype;
244     gmx::ArrayRef<const real> nbfp      = fr->nbfp;
245     gmx::ArrayRef<const real> nbfp_grid = fr->ljpme_c6grid;
246
247     real*       Vv            = kernel_data->energygrp_vdw;
248     const real  lambda_coul   = kernel_data->lambda[efptCOUL];
249     const real  lambda_vdw    = kernel_data->lambda[efptVDW];
250     real*       dvdl          = kernel_data->dvdl;
251     const auto& scParams      = *ic->softCoreParameters;
252     const real  alpha_coul    = scParams.alphaCoulomb;
253     const real  alpha_vdw     = scParams.alphaVdw;
254     const real  lam_power     = scParams.lambdaPower;
255     const real  sigma6_def    = scParams.sigma6WithInvalidSigma;
256     const real  sigma6_min    = scParams.sigma6Minimum;
257     const bool  doForces      = ((kernel_data->flags & GMX_NONBONDED_DO_FORCE) != 0);
258     const bool  doShiftForces = ((kernel_data->flags & GMX_NONBONDED_DO_SHIFTFORCE) != 0);
259     const bool  doPotential   = ((kernel_data->flags & GMX_NONBONDED_DO_POTENTIAL) != 0);
260
261     // Extract data from interaction_const_t
262     const real facel           = ic->epsfac;
263     const real rCoulomb        = ic->rcoulomb;
264     const real krf             = ic->reactionFieldCoefficient;
265     const real crf             = ic->reactionFieldShift;
266     const real shLjEwald       = ic->sh_lj_ewald;
267     const real rVdw            = ic->rvdw;
268     const real dispersionShift = ic->dispersion_shift.cpot;
269     const real repulsionShift  = ic->repulsion_shift.cpot;
270
271     // Note that the nbnxm kernels do not support Coulomb potential switching at all
272     GMX_ASSERT(ic->coulomb_modifier != eintmodPOTSWITCH,
273                "Potential switching is not supported for Coulomb with FEP");
274
275     real vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4;
276     if (vdwModifierIsPotSwitch)
277     {
278         const real d = ic->rvdw - ic->rvdw_switch;
279         vdw_swV3     = -10.0 / (d * d * d);
280         vdw_swV4     = 15.0 / (d * d * d * d);
281         vdw_swV5     = -6.0 / (d * d * d * d * d);
282         vdw_swF2     = -30.0 / (d * d * d);
283         vdw_swF3     = 60.0 / (d * d * d * d);
284         vdw_swF4     = -30.0 / (d * d * d * d * d);
285     }
286     else
287     {
288         /* Avoid warnings from stupid compilers (looking at you, Clang!) */
289         vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0;
290     }
291
292     int icoul;
293     if (ic->eeltype == eelCUT || EEL_RF(ic->eeltype))
294     {
295         icoul = GMX_NBKERNEL_ELEC_REACTIONFIELD;
296     }
297     else
298     {
299         icoul = GMX_NBKERNEL_ELEC_NONE;
300     }
301
302     real rcutoff_max2 = std::max(ic->rcoulomb, ic->rvdw);
303     rcutoff_max2      = rcutoff_max2 * rcutoff_max2;
304
305     const real* tab_ewald_F_lj           = nullptr;
306     const real* tab_ewald_V_lj           = nullptr;
307     const real* ewtab                    = nullptr;
308     real        coulombTableScale        = 0;
309     real        coulombTableScaleInvHalf = 0;
310     real        vdwTableScale            = 0;
311     real        vdwTableScaleInvHalf     = 0;
312     real        sh_ewald                 = 0;
313     if (elecInteractionTypeIsEwald || vdwInteractionTypeIsEwald)
314     {
315         sh_ewald = ic->sh_ewald;
316     }
317     if (elecInteractionTypeIsEwald)
318     {
319         const auto& coulombTables = *ic->coulombEwaldTables;
320         ewtab                     = coulombTables.tableFDV0.data();
321         coulombTableScale         = coulombTables.scale;
322         coulombTableScaleInvHalf  = half / coulombTableScale;
323     }
324     if (vdwInteractionTypeIsEwald)
325     {
326         const auto& vdwTables = *ic->vdwEwaldTables;
327         tab_ewald_F_lj        = vdwTables.tableF.data();
328         tab_ewald_V_lj        = vdwTables.tableV.data();
329         vdwTableScale         = vdwTables.scale;
330         vdwTableScaleInvHalf  = half / vdwTableScale;
331     }
332
333     /* For Ewald/PME interactions we cannot easily apply the soft-core component to
334      * reciprocal space. When we use non-switched Ewald interactions, we
335      * assume the soft-coring does not significantly affect the grid contribution
336      * and apply the soft-core only to the full 1/r (- shift) pair contribution.
337      *
338      * However, we cannot use this approach for switch-modified since we would then
339      * effectively end up evaluating a significantly different interaction here compared to the
340      * normal (non-free-energy) kernels, either by applying a cutoff at a different
341      * position than what the user requested, or by switching different
342      * things (1/r rather than short-range Ewald). For these settings, we just
343      * use the traditional short-range Ewald interaction in that case.
344      */
345     GMX_RELEASE_ASSERT(!(vdwInteractionTypeIsEwald && vdwModifierIsPotSwitch),
346                        "Can not apply soft-core to switched Ewald potentials");
347
348     real dvdlCoul = 0;
349     real dvdlVdw  = 0;
350
351     /* Lambda factor for state A, 1-lambda*/
352     real LFC[NSTATES], LFV[NSTATES];
353     LFC[STATE_A] = one - lambda_coul;
354     LFV[STATE_A] = one - lambda_vdw;
355
356     /* Lambda factor for state B, lambda*/
357     LFC[STATE_B] = lambda_coul;
358     LFV[STATE_B] = lambda_vdw;
359
360     /*derivative of the lambda factor for state A and B */
361     real DLF[NSTATES];
362     DLF[STATE_A] = -1;
363     DLF[STATE_B] = 1;
364
365     real           lFacCoul[NSTATES], dlFacCoul[NSTATES], lFacVdw[NSTATES], dlFacVdw[NSTATES];
366     constexpr real sc_r_power = 6.0_real;
367     for (int i = 0; i < NSTATES; i++)
368     {
369         lFacCoul[i]  = (lam_power == 2 ? (1 - LFC[i]) * (1 - LFC[i]) : (1 - LFC[i]));
370         dlFacCoul[i] = DLF[i] * lam_power / sc_r_power * (lam_power == 2 ? (1 - LFC[i]) : 1);
371         lFacVdw[i]   = (lam_power == 2 ? (1 - LFV[i]) * (1 - LFV[i]) : (1 - LFV[i]));
372         dlFacVdw[i]  = DLF[i] * lam_power / sc_r_power * (lam_power == 2 ? (1 - LFV[i]) : 1);
373     }
374
375     // TODO: We should get rid of using pointers to real
376     const real* x             = xx[0];
377     real* gmx_restrict f      = &(forceWithShiftForces->force()[0][0]);
378     real* gmx_restrict fshift = &(forceWithShiftForces->shiftForces()[0][0]);
379
380     const real rlistSquared = gmx::square(fr->rlist);
381
382     int numExcludedPairsBeyondRlist = 0;
383
384     for (int n = 0; n < nri; n++)
385     {
386         int npair_within_cutoff = 0;
387
388         const int  is3   = 3 * shift[n];
389         const real shX   = shiftvec[is3];
390         const real shY   = shiftvec[is3 + 1];
391         const real shZ   = shiftvec[is3 + 2];
392         const int  nj0   = jindex[n];
393         const int  nj1   = jindex[n + 1];
394         const int  ii    = iinr[n];
395         const int  ii3   = 3 * ii;
396         const real ix    = shX + x[ii3 + 0];
397         const real iy    = shY + x[ii3 + 1];
398         const real iz    = shZ + x[ii3 + 2];
399         const real iqA   = facel * chargeA[ii];
400         const real iqB   = facel * chargeB[ii];
401         const int  ntiA  = 2 * ntype * typeA[ii];
402         const int  ntiB  = 2 * ntype * typeB[ii];
403         real       vCTot = 0;
404         real       vVTot = 0;
405         real       fIX   = 0;
406         real       fIY   = 0;
407         real       fIZ   = 0;
408
409         for (int k = nj0; k < nj1; k++)
410         {
411             int            tj[NSTATES];
412             const int      jnr = jjnr[k];
413             const int      j3  = 3 * jnr;
414             RealType       c6[NSTATES], c12[NSTATES], qq[NSTATES], vCoul[NSTATES], vVdw[NSTATES];
415             RealType       r, rInv, rp, rpm2;
416             RealType       alphaVdwEff, alphaCoulEff, sigma6[NSTATES];
417             const RealType dX  = ix - x[j3];
418             const RealType dY  = iy - x[j3 + 1];
419             const RealType dZ  = iz - x[j3 + 2];
420             const RealType rSq = dX * dX + dY * dY + dZ * dZ;
421             RealType       fScalC[NSTATES], fScalV[NSTATES];
422             /* Check if this pair on the exlusions list.*/
423             const bool bPairIncluded = nlist->excl_fep.empty() || nlist->excl_fep[k];
424
425             if (rSq >= rcutoff_max2 && bPairIncluded)
426             {
427                 /* We save significant time by skipping all code below.
428                  * Note that with soft-core interactions, the actual cut-off
429                  * check might be different. But since the soft-core distance
430                  * is always larger than r, checking on r here is safe.
431                  * Exclusions outside the cutoff can not be skipped as
432                  * when using Ewald: the reciprocal-space
433                  * Ewald component still needs to be subtracted.
434                  */
435
436                 continue;
437             }
438             npair_within_cutoff++;
439
440             if (rSq > rlistSquared)
441             {
442                 numExcludedPairsBeyondRlist++;
443             }
444
445             if (rSq > 0)
446             {
447                 /* Note that unlike in the nbnxn kernels, we do not need
448                  * to clamp the value of rSq before taking the invsqrt
449                  * to avoid NaN in the LJ calculation, since here we do
450                  * not calculate LJ interactions when C6 and C12 are zero.
451                  */
452
453                 rInv = gmx::invsqrt(rSq);
454                 r    = rSq * rInv;
455             }
456             else
457             {
458                 /* The force at r=0 is zero, because of symmetry.
459                  * But note that the potential is in general non-zero,
460                  * since the soft-cored r will be non-zero.
461                  */
462                 rInv = 0;
463                 r    = 0;
464             }
465
466             if (useSoftCore)
467             {
468                 rpm2 = rSq * rSq;  /* r4 */
469                 rp   = rpm2 * rSq; /* r6 */
470             }
471             else
472             {
473                 /* The soft-core power p will not affect the results
474                  * with not using soft-core, so we use power of 0 which gives
475                  * the simplest math and cheapest code.
476                  */
477                 rpm2 = rInv * rInv;
478                 rp   = 1;
479             }
480
481             RealType fScal = 0;
482
483             qq[STATE_A] = iqA * chargeA[jnr];
484             qq[STATE_B] = iqB * chargeB[jnr];
485
486             tj[STATE_A] = ntiA + 2 * typeA[jnr];
487             tj[STATE_B] = ntiB + 2 * typeB[jnr];
488
489             if (bPairIncluded)
490             {
491                 c6[STATE_A] = nbfp[tj[STATE_A]];
492                 c6[STATE_B] = nbfp[tj[STATE_B]];
493
494                 for (int i = 0; i < NSTATES; i++)
495                 {
496                     c12[i] = nbfp[tj[i] + 1];
497                     if (useSoftCore)
498                     {
499                         if ((c6[i] > 0) && (c12[i] > 0))
500                         {
501                             /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
502                             sigma6[i] = half * c12[i] / c6[i];
503                             if (sigma6[i] < sigma6_min) /* for disappearing coul and vdw with soft core at the same time */
504                             {
505                                 sigma6[i] = sigma6_min;
506                             }
507                         }
508                         else
509                         {
510                             sigma6[i] = sigma6_def;
511                         }
512                     }
513                 }
514
515                 if (useSoftCore)
516                 {
517                     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
518                     if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
519                     {
520                         alphaVdwEff  = 0;
521                         alphaCoulEff = 0;
522                     }
523                     else
524                     {
525                         alphaVdwEff  = alpha_vdw;
526                         alphaCoulEff = alpha_coul;
527                     }
528                 }
529
530                 for (int i = 0; i < NSTATES; i++)
531                 {
532                     fScalC[i] = 0;
533                     fScalV[i] = 0;
534                     vCoul[i]  = 0;
535                     vVdw[i]   = 0;
536
537                     RealType rInvC, rInvV, rC, rV, rPInvC, rPInvV;
538
539                     /* Only spend time on A or B state if it is non-zero */
540                     if ((qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0))
541                     {
542                         /* this section has to be inside the loop because of the dependence on sigma6 */
543                         if (useSoftCore)
544                         {
545                             rPInvC = one / (alphaCoulEff * lFacCoul[i] * sigma6[i] + rp);
546                             pthRoot(rPInvC, &rInvC, &rC);
547                             if (scLambdasOrAlphasDiffer)
548                             {
549                                 rPInvV = one / (alphaVdwEff * lFacVdw[i] * sigma6[i] + rp);
550                                 pthRoot(rPInvV, &rInvV, &rV);
551                             }
552                             else
553                             {
554                                 /* We can avoid one expensive pow and one / operation */
555                                 rPInvV = rPInvC;
556                                 rInvV  = rInvC;
557                                 rV     = rC;
558                             }
559                         }
560                         else
561                         {
562                             rPInvC = 1;
563                             rInvC  = rInv;
564                             rC     = r;
565
566                             rPInvV = 1;
567                             rInvV  = rInv;
568                             rV     = r;
569                         }
570
571                         /* Only process the coulomb interactions if we have charges,
572                          * and if we either include all entries in the list (no cutoff
573                          * used in the kernel), or if we are within the cutoff.
574                          */
575                         bool computeElecInteraction = (elecInteractionTypeIsEwald && r < rCoulomb)
576                                                       || (!elecInteractionTypeIsEwald && rC < rCoulomb);
577
578                         if ((qq[i] != 0) && computeElecInteraction)
579                         {
580                             if (elecInteractionTypeIsEwald)
581                             {
582                                 vCoul[i]  = ewaldPotential(qq[i], rInvC, sh_ewald);
583                                 fScalC[i] = ewaldScalarForce(qq[i], rInvC);
584                             }
585                             else
586                             {
587                                 vCoul[i]  = reactionFieldPotential(qq[i], rInvC, rC, krf, crf);
588                                 fScalC[i] = reactionFieldScalarForce(qq[i], rInvC, rC, krf, two);
589                             }
590                         }
591
592                         /* Only process the VDW interactions if we have
593                          * some non-zero parameters, and if we either
594                          * include all entries in the list (no cutoff used
595                          * in the kernel), or if we are within the cutoff.
596                          */
597                         bool computeVdwInteraction = (vdwInteractionTypeIsEwald && r < rVdw)
598                                                      || (!vdwInteractionTypeIsEwald && rV < rVdw);
599                         if ((c6[i] != 0 || c12[i] != 0) && computeVdwInteraction)
600                         {
601                             RealType rInv6;
602                             if (useSoftCore)
603                             {
604                                 rInv6 = rPInvV;
605                             }
606                             else
607                             {
608                                 rInv6 = calculateRinv6(rInvV);
609                             }
610                             RealType vVdw6  = calculateVdw6(c6[i], rInv6);
611                             RealType vVdw12 = calculateVdw12(c12[i], rInv6);
612
613                             vVdw[i] = lennardJonesPotential(
614                                     vVdw6, vVdw12, c6[i], c12[i], repulsionShift, dispersionShift, oneSixth, oneTwelfth);
615                             fScalV[i] = lennardJonesScalarForce(vVdw6, vVdw12);
616
617                             if (vdwInteractionTypeIsEwald)
618                             {
619                                 /* Subtract the grid potential at the cut-off */
620                                 vVdw[i] += ewaldLennardJonesGridSubtract(
621                                         nbfp_grid[tj[i]], shLjEwald, oneSixth);
622                             }
623
624                             if (vdwModifierIsPotSwitch)
625                             {
626                                 RealType d        = rV - ic->rvdw_switch;
627                                 d                 = (d > zero) ? d : zero;
628                                 const RealType d2 = d * d;
629                                 const RealType sw =
630                                         one + d2 * d * (vdw_swV3 + d * (vdw_swV4 + d * vdw_swV5));
631                                 const RealType dsw = d2 * (vdw_swF2 + d * (vdw_swF3 + d * vdw_swF4));
632
633                                 fScalV[i] = potSwitchScalarForceMod(
634                                         fScalV[i], vVdw[i], sw, rV, rVdw, dsw, zero);
635                                 vVdw[i] = potSwitchPotentialMod(vVdw[i], sw, rV, rVdw, zero);
636                             }
637                         }
638
639                         /* fScalC (and fScalV) now contain: dV/drC * rC
640                          * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
641                          * Further down we first multiply by r^p-2 and then by
642                          * the vector r, which in total gives: dV/drC * (r/rC)^1-p
643                          */
644                         fScalC[i] *= rPInvC;
645                         fScalV[i] *= rPInvV;
646                     }
647                 } // end for (int i = 0; i < NSTATES; i++)
648
649                 /* Assemble A and B states */
650                 for (int i = 0; i < NSTATES; i++)
651                 {
652                     vCTot += LFC[i] * vCoul[i];
653                     vVTot += LFV[i] * vVdw[i];
654
655                     fScal += LFC[i] * fScalC[i] * rpm2;
656                     fScal += LFV[i] * fScalV[i] * rpm2;
657
658                     if (useSoftCore)
659                     {
660                         dvdlCoul += vCoul[i] * DLF[i]
661                                     + LFC[i] * alphaCoulEff * dlFacCoul[i] * fScalC[i] * sigma6[i];
662                         dvdlVdw += vVdw[i] * DLF[i]
663                                    + LFV[i] * alphaVdwEff * dlFacVdw[i] * fScalV[i] * sigma6[i];
664                     }
665                     else
666                     {
667                         dvdlCoul += vCoul[i] * DLF[i];
668                         dvdlVdw += vVdw[i] * DLF[i];
669                     }
670                 }
671             } // end if (bPairIncluded)
672             else if (icoul == GMX_NBKERNEL_ELEC_REACTIONFIELD)
673             {
674                 /* For excluded pairs, which are only in this pair list when
675                  * using the Verlet scheme, we don't use soft-core.
676                  * As there is no singularity, there is no need for soft-core.
677                  */
678                 const real FF = -two * krf;
679                 RealType   VV = krf * rSq - crf;
680
681                 if (ii == jnr)
682                 {
683                     VV *= half;
684                 }
685
686                 for (int i = 0; i < NSTATES; i++)
687                 {
688                     vCTot += LFC[i] * qq[i] * VV;
689                     fScal += LFC[i] * qq[i] * FF;
690                     dvdlCoul += DLF[i] * qq[i] * VV;
691                 }
692             }
693
694             if (elecInteractionTypeIsEwald && (r < rCoulomb || !bPairIncluded))
695             {
696                 /* See comment in the preamble. When using Ewald interactions
697                  * (unless we use a switch modifier) we subtract the reciprocal-space
698                  * Ewald component here which made it possible to apply the free
699                  * energy interaction to 1/r (vanilla coulomb short-range part)
700                  * above. This gets us closer to the ideal case of applying
701                  * the softcore to the entire electrostatic interaction,
702                  * including the reciprocal-space component.
703                  */
704                 real v_lr, f_lr;
705
706                 const RealType ewrt   = r * coulombTableScale;
707                 IntType        ewitab = static_cast<IntType>(ewrt);
708                 const RealType eweps  = ewrt - ewitab;
709                 ewitab                = 4 * ewitab;
710                 f_lr                  = ewtab[ewitab] + eweps * ewtab[ewitab + 1];
711                 v_lr = (ewtab[ewitab + 2] - coulombTableScaleInvHalf * eweps * (ewtab[ewitab] + f_lr));
712                 f_lr *= rInv;
713
714                 /* Note that any possible Ewald shift has already been applied in
715                  * the normal interaction part above.
716                  */
717
718                 if (ii == jnr)
719                 {
720                     /* If we get here, the i particle (ii) has itself (jnr)
721                      * in its neighborlist. This can only happen with the Verlet
722                      * scheme, and corresponds to a self-interaction that will
723                      * occur twice. Scale it down by 50% to only include it once.
724                      */
725                     v_lr *= half;
726                 }
727
728                 for (int i = 0; i < NSTATES; i++)
729                 {
730                     vCTot -= LFC[i] * qq[i] * v_lr;
731                     fScal -= LFC[i] * qq[i] * f_lr;
732                     dvdlCoul -= (DLF[i] * qq[i]) * v_lr;
733                 }
734             }
735
736             if (vdwInteractionTypeIsEwald && (r < rVdw || !bPairIncluded))
737             {
738                 /* See comment in the preamble. When using LJ-Ewald interactions
739                  * (unless we use a switch modifier) we subtract the reciprocal-space
740                  * Ewald component here which made it possible to apply the free
741                  * energy interaction to r^-6 (vanilla LJ6 short-range part)
742                  * above. This gets us closer to the ideal case of applying
743                  * the softcore to the entire VdW interaction,
744                  * including the reciprocal-space component.
745                  */
746                 /* We could also use the analytical form here
747                  * iso a table, but that can cause issues for
748                  * r close to 0 for non-interacting pairs.
749                  */
750
751                 const RealType rs   = rSq * rInv * vdwTableScale;
752                 const IntType  ri   = static_cast<IntType>(rs);
753                 const RealType frac = rs - ri;
754                 const RealType f_lr = (1 - frac) * tab_ewald_F_lj[ri] + frac * tab_ewald_F_lj[ri + 1];
755                 /* TODO: Currently the Ewald LJ table does not contain
756                  * the factor 1/6, we should add this.
757                  */
758                 const RealType FF = f_lr * rInv / six;
759                 RealType       VV =
760                         (tab_ewald_V_lj[ri] - vdwTableScaleInvHalf * frac * (tab_ewald_F_lj[ri] + f_lr))
761                         / six;
762
763                 if (ii == jnr)
764                 {
765                     /* If we get here, the i particle (ii) has itself (jnr)
766                      * in its neighborlist. This can only happen with the Verlet
767                      * scheme, and corresponds to a self-interaction that will
768                      * occur twice. Scale it down by 50% to only include it once.
769                      */
770                     VV *= half;
771                 }
772
773                 for (int i = 0; i < NSTATES; i++)
774                 {
775                     const real c6grid = nbfp_grid[tj[i]];
776                     vVTot += LFV[i] * c6grid * VV;
777                     fScal += LFV[i] * c6grid * FF;
778                     dvdlVdw += (DLF[i] * c6grid) * VV;
779                 }
780             }
781
782             if (doForces)
783             {
784                 const real tX = fScal * dX;
785                 const real tY = fScal * dY;
786                 const real tZ = fScal * dZ;
787                 fIX           = fIX + tX;
788                 fIY           = fIY + tY;
789                 fIZ           = fIZ + tZ;
790                 /* OpenMP atomics are expensive, but this kernels is also
791                  * expensive, so we can take this hit, instead of using
792                  * thread-local output buffers and extra reduction.
793                  *
794                  * All the OpenMP regions in this file are trivial and should
795                  * not throw, so no need for try/catch.
796                  */
797 #pragma omp atomic
798                 f[j3] -= tX;
799 #pragma omp atomic
800                 f[j3 + 1] -= tY;
801 #pragma omp atomic
802                 f[j3 + 2] -= tZ;
803             }
804         } // end for (int k = nj0; k < nj1; k++)
805
806         /* The atomics below are expensive with many OpenMP threads.
807          * Here unperturbed i-particles will usually only have a few
808          * (perturbed) j-particles in the list. Thus with a buffered list
809          * we can skip a significant number of i-reductions with a check.
810          */
811         if (npair_within_cutoff > 0)
812         {
813             if (doForces)
814             {
815 #pragma omp atomic
816                 f[ii3] += fIX;
817 #pragma omp atomic
818                 f[ii3 + 1] += fIY;
819 #pragma omp atomic
820                 f[ii3 + 2] += fIZ;
821             }
822             if (doShiftForces)
823             {
824 #pragma omp atomic
825                 fshift[is3] += fIX;
826 #pragma omp atomic
827                 fshift[is3 + 1] += fIY;
828 #pragma omp atomic
829                 fshift[is3 + 2] += fIZ;
830             }
831             if (doPotential)
832             {
833                 int ggid = gid[n];
834 #pragma omp atomic
835                 Vc[ggid] += vCTot;
836 #pragma omp atomic
837                 Vv[ggid] += vVTot;
838             }
839         }
840     } // end for (int n = 0; n < nri; n++)
841
842 #pragma omp atomic
843     dvdl[efptCOUL] += dvdlCoul;
844 #pragma omp atomic
845     dvdl[efptVDW] += dvdlVdw;
846
847     /* Estimate flops, average for free energy stuff:
848      * 12  flops per outer iteration
849      * 150 flops per inner iteration
850      */
851 #pragma omp atomic
852     inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri * 12 + nlist->jindex[nri] * 150);
853
854     if (numExcludedPairsBeyondRlist > 0)
855     {
856         gmx_fatal(FARGS,
857                   "There are %d perturbed non-bonded pair interactions beyond the pair-list cutoff "
858                   "of %g nm, which is not supported. This can happen because the system is "
859                   "unstable or because intra-molecular interactions at long distances are "
860                   "excluded. If the "
861                   "latter is the case, you can try to increase nstlist or rlist to avoid this."
862                   "The error is likely triggered by the use of couple-intramol=no "
863                   "and the maximal distance in the decoupled molecule exceeding rlist.",
864                   numExcludedPairsBeyondRlist,
865                   fr->rlist);
866     }
867 }
868
869 typedef void (*KernelFunction)(const t_nblist* gmx_restrict nlist,
870                                rvec* gmx_restrict         xx,
871                                gmx::ForceWithShiftForces* forceWithShiftForces,
872                                const t_forcerec* gmx_restrict fr,
873                                const t_mdatoms* gmx_restrict mdatoms,
874                                nb_kernel_data_t* gmx_restrict kernel_data,
875                                t_nrnb* gmx_restrict nrnb);
876
877 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald, bool vdwModifierIsPotSwitch>
878 static KernelFunction dispatchKernelOnUseSimd(const bool useSimd)
879 {
880     if (useSimd)
881     {
882 #if GMX_SIMD_HAVE_REAL && GMX_SIMD_HAVE_INT32_ARITHMETICS && GMX_USE_SIMD_KERNELS
883         /* FIXME: Here SimdDataTypes should be used to enable SIMD. So far, the code in nb_free_energy_kernel is not adapted to SIMD */
884         return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
885 #else
886         return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
887 #endif
888     }
889     else
890     {
891         return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
892     }
893 }
894
895 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald>
896 static KernelFunction dispatchKernelOnVdwModifier(const bool vdwModifierIsPotSwitch, const bool useSimd)
897 {
898     if (vdwModifierIsPotSwitch)
899     {
900         return (dispatchKernelOnUseSimd<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, true>(
901                 useSimd));
902     }
903     else
904     {
905         return (dispatchKernelOnUseSimd<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, false>(
906                 useSimd));
907     }
908 }
909
910 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald>
911 static KernelFunction dispatchKernelOnElecInteractionType(const bool elecInteractionTypeIsEwald,
912                                                           const bool vdwModifierIsPotSwitch,
913                                                           const bool useSimd)
914 {
915     if (elecInteractionTypeIsEwald)
916     {
917         return (dispatchKernelOnVdwModifier<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, true>(
918                 vdwModifierIsPotSwitch, useSimd));
919     }
920     else
921     {
922         return (dispatchKernelOnVdwModifier<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, false>(
923                 vdwModifierIsPotSwitch, useSimd));
924     }
925 }
926
927 template<bool useSoftCore, bool scLambdasOrAlphasDiffer>
928 static KernelFunction dispatchKernelOnVdwInteractionType(const bool vdwInteractionTypeIsEwald,
929                                                          const bool elecInteractionTypeIsEwald,
930                                                          const bool vdwModifierIsPotSwitch,
931                                                          const bool useSimd)
932 {
933     if (vdwInteractionTypeIsEwald)
934     {
935         return (dispatchKernelOnElecInteractionType<useSoftCore, scLambdasOrAlphasDiffer, true>(
936                 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
937     }
938     else
939     {
940         return (dispatchKernelOnElecInteractionType<useSoftCore, scLambdasOrAlphasDiffer, false>(
941                 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
942     }
943 }
944
945 template<bool useSoftCore>
946 static KernelFunction dispatchKernelOnScLambdasOrAlphasDifference(const bool scLambdasOrAlphasDiffer,
947                                                                   const bool vdwInteractionTypeIsEwald,
948                                                                   const bool elecInteractionTypeIsEwald,
949                                                                   const bool vdwModifierIsPotSwitch,
950                                                                   const bool useSimd)
951 {
952     if (scLambdasOrAlphasDiffer)
953     {
954         return (dispatchKernelOnVdwInteractionType<useSoftCore, true>(
955                 vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
956     }
957     else
958     {
959         return (dispatchKernelOnVdwInteractionType<useSoftCore, false>(
960                 vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
961     }
962 }
963
964 static KernelFunction dispatchKernel(const bool                 scLambdasOrAlphasDiffer,
965                                      const bool                 vdwInteractionTypeIsEwald,
966                                      const bool                 elecInteractionTypeIsEwald,
967                                      const bool                 vdwModifierIsPotSwitch,
968                                      const bool                 useSimd,
969                                      const interaction_const_t& ic)
970 {
971     if (ic.softCoreParameters->alphaCoulomb == 0 && ic.softCoreParameters->alphaVdw == 0)
972     {
973         return (dispatchKernelOnScLambdasOrAlphasDifference<false>(scLambdasOrAlphasDiffer,
974                                                                    vdwInteractionTypeIsEwald,
975                                                                    elecInteractionTypeIsEwald,
976                                                                    vdwModifierIsPotSwitch,
977                                                                    useSimd));
978     }
979     else
980     {
981         return (dispatchKernelOnScLambdasOrAlphasDifference<true>(scLambdasOrAlphasDiffer,
982                                                                   vdwInteractionTypeIsEwald,
983                                                                   elecInteractionTypeIsEwald,
984                                                                   vdwModifierIsPotSwitch,
985                                                                   useSimd));
986     }
987 }
988
989
990 void gmx_nb_free_energy_kernel(const t_nblist*            nlist,
991                                rvec*                      xx,
992                                gmx::ForceWithShiftForces* ff,
993                                const t_forcerec*          fr,
994                                const t_mdatoms*           mdatoms,
995                                nb_kernel_data_t*          kernel_data,
996                                t_nrnb*                    nrnb)
997 {
998     const interaction_const_t& ic = *fr->ic;
999     GMX_ASSERT(EEL_PME_EWALD(ic.eeltype) || ic.eeltype == eelCUT || EEL_RF(ic.eeltype),
1000                "Unsupported eeltype with free energy");
1001     GMX_ASSERT(ic.softCoreParameters, "We need soft-core parameters");
1002
1003     const auto& scParams                   = *ic.softCoreParameters;
1004     const bool  vdwInteractionTypeIsEwald  = (EVDW_PME(ic.vdwtype));
1005     const bool  elecInteractionTypeIsEwald = (EEL_PME_EWALD(ic.eeltype));
1006     const bool  vdwModifierIsPotSwitch     = (ic.vdw_modifier == eintmodPOTSWITCH);
1007     bool        scLambdasOrAlphasDiffer    = true;
1008     const bool  useSimd                    = fr->use_simd_kernels;
1009
1010     if (scParams.alphaCoulomb == 0 && scParams.alphaVdw == 0)
1011     {
1012         scLambdasOrAlphasDiffer = false;
1013     }
1014     else
1015     {
1016         if (kernel_data->lambda[efptCOUL] == kernel_data->lambda[efptVDW]
1017             && scParams.alphaCoulomb == scParams.alphaVdw)
1018         {
1019             scLambdasOrAlphasDiffer = false;
1020         }
1021     }
1022
1023     KernelFunction kernelFunc;
1024     kernelFunc = dispatchKernel(scLambdasOrAlphasDiffer,
1025                                 vdwInteractionTypeIsEwald,
1026                                 elecInteractionTypeIsEwald,
1027                                 vdwModifierIsPotSwitch,
1028                                 useSimd,
1029                                 ic);
1030     kernelFunc(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
1031 }