Move soft-core parameters to interaction_const_t
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_free_energy.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
7  * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9  * and including many others, as listed in the AUTHORS file in the
10  * top-level source directory and at http://www.gromacs.org.
11  *
12  * GROMACS is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU Lesser General Public License
14  * as published by the Free Software Foundation; either version 2.1
15  * of the License, or (at your option) any later version.
16  *
17  * GROMACS is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public
23  * License along with GROMACS; if not, see
24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
26  *
27  * If you want to redistribute modifications to GROMACS, please
28  * consider that scientific software is very special. Version
29  * control is crucial - bugs must be traceable. We will be happy to
30  * consider code for inclusion in the official distribution, but
31  * derived work must not be called official GROMACS. Details are found
32  * in the README & COPYING files - if they are missing, get the
33  * official version at http://www.gromacs.org.
34  *
35  * To help us fund GROMACS development, we humbly ask that you cite
36  * the research papers on the package. Check out http://www.gromacs.org.
37  */
38 #include "gmxpre.h"
39
40 #include "nb_free_energy.h"
41
42 #include "config.h"
43
44 #include <cmath>
45
46 #include <algorithm>
47
48 #include "gromacs/gmxlib/nrnb.h"
49 #include "gromacs/gmxlib/nonbonded/nb_kernel.h"
50 #include "gromacs/gmxlib/nonbonded/nonbonded.h"
51 #include "gromacs/math/functions.h"
52 #include "gromacs/math/vec.h"
53 #include "gromacs/mdtypes/forceoutput.h"
54 #include "gromacs/mdtypes/forcerec.h"
55 #include "gromacs/mdtypes/interaction_const.h"
56 #include "gromacs/mdtypes/md_enums.h"
57 #include "gromacs/mdtypes/mdatom.h"
58 #include "gromacs/simd/simd.h"
59 #include "gromacs/utility/fatalerror.h"
60
61
62 //! Scalar (non-SIMD) data types.
63 struct ScalarDataTypes
64 {
65     using RealType                     = real; //!< The data type to use as real.
66     using IntType                      = int;  //!< The data type to use as int.
67     static constexpr int simdRealWidth = 1;    //!< The width of the RealType.
68     static constexpr int simdIntWidth  = 1;    //!< The width of the IntType.
69 };
70
71 #if GMX_SIMD_HAVE_REAL && GMX_SIMD_HAVE_INT32_ARITHMETICS
72 //! SIMD data types.
73 struct SimdDataTypes
74 {
75     using RealType                     = gmx::SimdReal;         //!< The data type to use as real.
76     using IntType                      = gmx::SimdInt32;        //!< The data type to use as int.
77     static constexpr int simdRealWidth = GMX_SIMD_REAL_WIDTH;   //!< The width of the RealType.
78     static constexpr int simdIntWidth  = GMX_SIMD_FINT32_WIDTH; //!< The width of the IntType.
79 };
80 #endif
81
82 //! Computes r^(1/p) and 1/r^(1/p) for the standard p=6
83 template<class RealType>
84 static inline void pthRoot(const RealType r, RealType* pthRoot, RealType* invPthRoot)
85 {
86     *invPthRoot = gmx::invsqrt(std::cbrt(r));
87     *pthRoot    = 1 / (*invPthRoot);
88 }
89
90 template<class RealType>
91 static inline RealType calculateRinv6(const RealType rinvV)
92 {
93     RealType rinv6 = rinvV * rinvV;
94     return (rinv6 * rinv6 * rinv6);
95 }
96
97 template<class RealType>
98 static inline RealType calculateVdw6(const RealType c6, const RealType rinv6)
99 {
100     return (c6 * rinv6);
101 }
102
103 template<class RealType>
104 static inline RealType calculateVdw12(const RealType c12, const RealType rinv6)
105 {
106     return (c12 * rinv6 * rinv6);
107 }
108
109 /* reaction-field electrostatics */
110 template<class RealType>
111 static inline RealType reactionFieldScalarForce(const RealType qq,
112                                                 const RealType rinv,
113                                                 const RealType r,
114                                                 const real     krf,
115                                                 const real     two)
116 {
117     return (qq * (rinv - two * krf * r * r));
118 }
119 template<class RealType>
120 static inline RealType reactionFieldPotential(const RealType qq,
121                                               const RealType rinv,
122                                               const RealType r,
123                                               const real     krf,
124                                               const real     potentialShift)
125 {
126     return (qq * (rinv + krf * r * r - potentialShift));
127 }
128
129 /* Ewald electrostatics */
130 template<class RealType>
131 static inline RealType ewaldScalarForce(const RealType coulomb, const RealType rinv)
132 {
133     return (coulomb * rinv);
134 }
135 template<class RealType>
136 static inline RealType ewaldPotential(const RealType coulomb, const RealType rinv, const real potentialShift)
137 {
138     return (coulomb * (rinv - potentialShift));
139 }
140
141 /* cutoff LJ */
142 template<class RealType>
143 static inline RealType lennardJonesScalarForce(const RealType v6, const RealType v12)
144 {
145     return (v12 - v6);
146 }
147 template<class RealType>
148 static inline RealType lennardJonesPotential(const RealType v6,
149                                              const RealType v12,
150                                              const RealType c6,
151                                              const RealType c12,
152                                              const real     repulsionShift,
153                                              const real     dispersionShift,
154                                              const real     onesixth,
155                                              const real     onetwelfth)
156 {
157     return ((v12 + c12 * repulsionShift) * onetwelfth - (v6 + c6 * dispersionShift) * onesixth);
158 }
159
160 /* Ewald LJ */
161 static inline real ewaldLennardJonesGridSubtract(const real c6grid, const real potentialShift, const real onesixth)
162 {
163     return (c6grid * potentialShift * onesixth);
164 }
165
166 /* LJ Potential switch */
167 template<class RealType>
168 static inline RealType potSwitchScalarForceMod(const RealType fScalarInp,
169                                                const RealType potential,
170                                                const RealType sw,
171                                                const RealType r,
172                                                const RealType rVdw,
173                                                const RealType dsw,
174                                                const real     zero)
175 {
176     if (r < rVdw)
177     {
178         real fScalar = fScalarInp * sw - r * potential * dsw;
179         return (fScalar);
180     }
181     return (zero);
182 }
183 template<class RealType>
184 static inline RealType potSwitchPotentialMod(const RealType potentialInp,
185                                              const RealType sw,
186                                              const RealType r,
187                                              const RealType rVdw,
188                                              const real     zero)
189 {
190     if (r < rVdw)
191     {
192         real potential = potentialInp * sw;
193         return (potential);
194     }
195     return (zero);
196 }
197
198
199 //! Templated free-energy non-bonded kernel
200 template<typename DataTypes, bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald, bool vdwModifierIsPotSwitch>
201 static void nb_free_energy_kernel(const t_nblist* gmx_restrict nlist,
202                                   rvec* gmx_restrict         xx,
203                                   gmx::ForceWithShiftForces* forceWithShiftForces,
204                                   const t_forcerec* gmx_restrict fr,
205                                   const t_mdatoms* gmx_restrict mdatoms,
206                                   nb_kernel_data_t* gmx_restrict kernel_data,
207                                   t_nrnb* gmx_restrict nrnb)
208 {
209 #define STATE_A 0
210 #define STATE_B 1
211 #define NSTATES 2
212
213     using RealType = typename DataTypes::RealType;
214     using IntType  = typename DataTypes::IntType;
215
216     /* FIXME: How should these be handled with SIMD? */
217     constexpr real onetwelfth = 1.0 / 12.0;
218     constexpr real onesixth   = 1.0 / 6.0;
219     constexpr real zero       = 0.0;
220     constexpr real half       = 0.5;
221     constexpr real one        = 1.0;
222     constexpr real two        = 2.0;
223     constexpr real six        = 6.0;
224
225     /* Extract pointer to non-bonded interaction constants */
226     const interaction_const_t* ic = fr->ic;
227
228     // Extract pair list data
229     const int  nri    = nlist->nri;
230     const int* iinr   = nlist->iinr;
231     const int* jindex = nlist->jindex;
232     const int* jjnr   = nlist->jjnr;
233     const int* shift  = nlist->shift;
234     const int* gid    = nlist->gid;
235
236     const real* shiftvec      = fr->shift_vec[0];
237     const real* chargeA       = mdatoms->chargeA;
238     const real* chargeB       = mdatoms->chargeB;
239     real*       Vc            = kernel_data->energygrp_elec;
240     const int*  typeA         = mdatoms->typeA;
241     const int*  typeB         = mdatoms->typeB;
242     const int   ntype         = fr->ntype;
243     const real* nbfp          = fr->nbfp.data();
244     const real* nbfp_grid     = fr->ljpme_c6grid;
245     real*       Vv            = kernel_data->energygrp_vdw;
246     const real  lambda_coul   = kernel_data->lambda[efptCOUL];
247     const real  lambda_vdw    = kernel_data->lambda[efptVDW];
248     real*       dvdl          = kernel_data->dvdl;
249     const auto& scParams      = *ic->softCoreParameters;
250     const real  alpha_coul    = scParams.alphaCoulomb;
251     const real  alpha_vdw     = scParams.alphaVdw;
252     const real  lam_power     = scParams.lambdaPower;
253     const real  sigma6_def    = scParams.sigma6WithInvalidSigma;
254     const real  sigma6_min    = scParams.sigma6Minimum;
255     const bool  doForces      = ((kernel_data->flags & GMX_NONBONDED_DO_FORCE) != 0);
256     const bool  doShiftForces = ((kernel_data->flags & GMX_NONBONDED_DO_SHIFTFORCE) != 0);
257     const bool  doPotential   = ((kernel_data->flags & GMX_NONBONDED_DO_POTENTIAL) != 0);
258
259     // Extract data from interaction_const_t
260     const real facel           = ic->epsfac;
261     const real rcoulomb        = ic->rcoulomb;
262     const real krf             = ic->k_rf;
263     const real crf             = ic->c_rf;
264     const real sh_lj_ewald     = ic->sh_lj_ewald;
265     const real rvdw            = ic->rvdw;
266     const real dispersionShift = ic->dispersion_shift.cpot;
267     const real repulsionShift  = ic->repulsion_shift.cpot;
268
269     // Note that the nbnxm kernels do not support Coulomb potential switching at all
270     GMX_ASSERT(ic->coulomb_modifier != eintmodPOTSWITCH,
271                "Potential switching is not supported for Coulomb with FEP");
272
273     real vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4;
274     if (vdwModifierIsPotSwitch)
275     {
276         const real d = ic->rvdw - ic->rvdw_switch;
277         vdw_swV3     = -10.0 / (d * d * d);
278         vdw_swV4     = 15.0 / (d * d * d * d);
279         vdw_swV5     = -6.0 / (d * d * d * d * d);
280         vdw_swF2     = -30.0 / (d * d * d);
281         vdw_swF3     = 60.0 / (d * d * d * d);
282         vdw_swF4     = -30.0 / (d * d * d * d * d);
283     }
284     else
285     {
286         /* Avoid warnings from stupid compilers (looking at you, Clang!) */
287         vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0;
288     }
289
290     int icoul;
291     if (ic->eeltype == eelCUT || EEL_RF(ic->eeltype))
292     {
293         icoul = GMX_NBKERNEL_ELEC_REACTIONFIELD;
294     }
295     else
296     {
297         icoul = GMX_NBKERNEL_ELEC_NONE;
298     }
299
300     real rcutoff_max2 = std::max(ic->rcoulomb, ic->rvdw);
301     rcutoff_max2      = rcutoff_max2 * rcutoff_max2;
302
303     const real* tab_ewald_F_lj           = nullptr;
304     const real* tab_ewald_V_lj           = nullptr;
305     const real* ewtab                    = nullptr;
306     real        coulombTableScale        = 0;
307     real        coulombTableScaleInvHalf = 0;
308     real        vdwTableScale            = 0;
309     real        vdwTableScaleInvHalf     = 0;
310     real        sh_ewald                 = 0;
311     if (elecInteractionTypeIsEwald || vdwInteractionTypeIsEwald)
312     {
313         sh_ewald = ic->sh_ewald;
314     }
315     if (elecInteractionTypeIsEwald)
316     {
317         const auto& coulombTables = *ic->coulombEwaldTables;
318         ewtab                     = coulombTables.tableFDV0.data();
319         coulombTableScale         = coulombTables.scale;
320         coulombTableScaleInvHalf  = half / coulombTableScale;
321     }
322     if (vdwInteractionTypeIsEwald)
323     {
324         const auto& vdwTables = *ic->vdwEwaldTables;
325         tab_ewald_F_lj        = vdwTables.tableF.data();
326         tab_ewald_V_lj        = vdwTables.tableV.data();
327         vdwTableScale         = vdwTables.scale;
328         vdwTableScaleInvHalf  = half / vdwTableScale;
329     }
330
331     /* For Ewald/PME interactions we cannot easily apply the soft-core component to
332      * reciprocal space. When we use non-switched Ewald interactions, we
333      * assume the soft-coring does not significantly affect the grid contribution
334      * and apply the soft-core only to the full 1/r (- shift) pair contribution.
335      *
336      * However, we cannot use this approach for switch-modified since we would then
337      * effectively end up evaluating a significantly different interaction here compared to the
338      * normal (non-free-energy) kernels, either by applying a cutoff at a different
339      * position than what the user requested, or by switching different
340      * things (1/r rather than short-range Ewald). For these settings, we just
341      * use the traditional short-range Ewald interaction in that case.
342      */
343     GMX_RELEASE_ASSERT(!(vdwInteractionTypeIsEwald && vdwModifierIsPotSwitch),
344                        "Can not apply soft-core to switched Ewald potentials");
345
346     real dvdl_coul = 0;
347     real dvdl_vdw  = 0;
348
349     /* Lambda factor for state A, 1-lambda*/
350     real LFC[NSTATES], LFV[NSTATES];
351     LFC[STATE_A] = one - lambda_coul;
352     LFV[STATE_A] = one - lambda_vdw;
353
354     /* Lambda factor for state B, lambda*/
355     LFC[STATE_B] = lambda_coul;
356     LFV[STATE_B] = lambda_vdw;
357
358     /*derivative of the lambda factor for state A and B */
359     real DLF[NSTATES];
360     DLF[STATE_A] = -1;
361     DLF[STATE_B] = 1;
362
363     real           lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
364     constexpr real sc_r_power = 6.0_real;
365     for (int i = 0; i < NSTATES; i++)
366     {
367         lfac_coul[i]  = (lam_power == 2 ? (1 - LFC[i]) * (1 - LFC[i]) : (1 - LFC[i]));
368         dlfac_coul[i] = DLF[i] * lam_power / sc_r_power * (lam_power == 2 ? (1 - LFC[i]) : 1);
369         lfac_vdw[i]   = (lam_power == 2 ? (1 - LFV[i]) * (1 - LFV[i]) : (1 - LFV[i]));
370         dlfac_vdw[i]  = DLF[i] * lam_power / sc_r_power * (lam_power == 2 ? (1 - LFV[i]) : 1);
371     }
372
373     // TODO: We should get rid of using pointers to real
374     const real* x             = xx[0];
375     real* gmx_restrict f      = &(forceWithShiftForces->force()[0][0]);
376     real* gmx_restrict fshift = &(forceWithShiftForces->shiftForces()[0][0]);
377
378     for (int n = 0; n < nri; n++)
379     {
380         int npair_within_cutoff = 0;
381
382         const int  is3   = 3 * shift[n];
383         const real shX   = shiftvec[is3];
384         const real shY   = shiftvec[is3 + 1];
385         const real shZ   = shiftvec[is3 + 2];
386         const int  nj0   = jindex[n];
387         const int  nj1   = jindex[n + 1];
388         const int  ii    = iinr[n];
389         const int  ii3   = 3 * ii;
390         const real ix    = shX + x[ii3 + 0];
391         const real iy    = shY + x[ii3 + 1];
392         const real iz    = shZ + x[ii3 + 2];
393         const real iqA   = facel * chargeA[ii];
394         const real iqB   = facel * chargeB[ii];
395         const int  ntiA  = 2 * ntype * typeA[ii];
396         const int  ntiB  = 2 * ntype * typeB[ii];
397         real       vctot = 0;
398         real       vvtot = 0;
399         real       fix   = 0;
400         real       fiy   = 0;
401         real       fiz   = 0;
402
403         for (int k = nj0; k < nj1; k++)
404         {
405             int            tj[NSTATES];
406             const int      jnr = jjnr[k];
407             const int      j3  = 3 * jnr;
408             RealType       c6[NSTATES], c12[NSTATES], qq[NSTATES], Vcoul[NSTATES], Vvdw[NSTATES];
409             RealType       r, rinv, rp, rpm2;
410             RealType       alpha_vdw_eff, alpha_coul_eff, sigma6[NSTATES];
411             const RealType dx  = ix - x[j3];
412             const RealType dy  = iy - x[j3 + 1];
413             const RealType dz  = iz - x[j3 + 2];
414             const RealType rsq = dx * dx + dy * dy + dz * dz;
415             RealType       FscalC[NSTATES], FscalV[NSTATES];
416             /* Check if this pair on the exlusions list.*/
417             const bool bPairIncluded = nlist->excl_fep == nullptr || nlist->excl_fep[k];
418
419             if (rsq >= rcutoff_max2 && bPairIncluded)
420             {
421                 /* We save significant time by skipping all code below.
422                  * Note that with soft-core interactions, the actual cut-off
423                  * check might be different. But since the soft-core distance
424                  * is always larger than r, checking on r here is safe.
425                  * Exclusions outside the cutoff can not be skipped as
426                  * when using Ewald: the reciprocal-space
427                  * Ewald component still needs to be subtracted.
428                  */
429
430                 continue;
431             }
432             npair_within_cutoff++;
433
434             if (rsq > 0)
435             {
436                 /* Note that unlike in the nbnxn kernels, we do not need
437                  * to clamp the value of rsq before taking the invsqrt
438                  * to avoid NaN in the LJ calculation, since here we do
439                  * not calculate LJ interactions when C6 and C12 are zero.
440                  */
441
442                 rinv = gmx::invsqrt(rsq);
443                 r    = rsq * rinv;
444             }
445             else
446             {
447                 /* The force at r=0 is zero, because of symmetry.
448                  * But note that the potential is in general non-zero,
449                  * since the soft-cored r will be non-zero.
450                  */
451                 rinv = 0;
452                 r    = 0;
453             }
454
455             if (useSoftCore)
456             {
457                 rpm2 = rsq * rsq;  /* r4 */
458                 rp   = rpm2 * rsq; /* r6 */
459             }
460             else
461             {
462                 /* The soft-core power p will not affect the results
463                  * with not using soft-core, so we use power of 0 which gives
464                  * the simplest math and cheapest code.
465                  */
466                 rpm2 = rinv * rinv;
467                 rp   = 1;
468             }
469
470             RealType Fscal = 0;
471
472             qq[STATE_A] = iqA * chargeA[jnr];
473             qq[STATE_B] = iqB * chargeB[jnr];
474
475             tj[STATE_A] = ntiA + 2 * typeA[jnr];
476             tj[STATE_B] = ntiB + 2 * typeB[jnr];
477
478             if (bPairIncluded)
479             {
480                 c6[STATE_A] = nbfp[tj[STATE_A]];
481                 c6[STATE_B] = nbfp[tj[STATE_B]];
482
483                 for (int i = 0; i < NSTATES; i++)
484                 {
485                     c12[i] = nbfp[tj[i] + 1];
486                     if (useSoftCore)
487                     {
488                         if ((c6[i] > 0) && (c12[i] > 0))
489                         {
490                             /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
491                             sigma6[i] = half * c12[i] / c6[i];
492                             if (sigma6[i] < sigma6_min) /* for disappearing coul and vdw with soft core at the same time */
493                             {
494                                 sigma6[i] = sigma6_min;
495                             }
496                         }
497                         else
498                         {
499                             sigma6[i] = sigma6_def;
500                         }
501                     }
502                 }
503
504                 if (useSoftCore)
505                 {
506                     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
507                     if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
508                     {
509                         alpha_vdw_eff  = 0;
510                         alpha_coul_eff = 0;
511                     }
512                     else
513                     {
514                         alpha_vdw_eff  = alpha_vdw;
515                         alpha_coul_eff = alpha_coul;
516                     }
517                 }
518
519                 for (int i = 0; i < NSTATES; i++)
520                 {
521                     FscalC[i] = 0;
522                     FscalV[i] = 0;
523                     Vcoul[i]  = 0;
524                     Vvdw[i]   = 0;
525
526                     RealType rinvC, rinvV, rC, rV, rpinvC, rpinvV;
527
528                     /* Only spend time on A or B state if it is non-zero */
529                     if ((qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0))
530                     {
531                         /* this section has to be inside the loop because of the dependence on sigma6 */
532                         if (useSoftCore)
533                         {
534                             rpinvC = one / (alpha_coul_eff * lfac_coul[i] * sigma6[i] + rp);
535                             pthRoot(rpinvC, &rinvC, &rC);
536                             if (scLambdasOrAlphasDiffer)
537                             {
538                                 rpinvV = one / (alpha_vdw_eff * lfac_vdw[i] * sigma6[i] + rp);
539                                 pthRoot(rpinvV, &rinvV, &rV);
540                             }
541                             else
542                             {
543                                 /* We can avoid one expensive pow and one / operation */
544                                 rpinvV = rpinvC;
545                                 rinvV  = rinvC;
546                                 rV     = rC;
547                             }
548                         }
549                         else
550                         {
551                             rpinvC = 1;
552                             rinvC  = rinv;
553                             rC     = r;
554
555                             rpinvV = 1;
556                             rinvV  = rinv;
557                             rV     = r;
558                         }
559
560                         /* Only process the coulomb interactions if we have charges,
561                          * and if we either include all entries in the list (no cutoff
562                          * used in the kernel), or if we are within the cutoff.
563                          */
564                         bool computeElecInteraction = (elecInteractionTypeIsEwald && r < rcoulomb)
565                                                       || (!elecInteractionTypeIsEwald && rC < rcoulomb);
566
567                         if ((qq[i] != 0) && computeElecInteraction)
568                         {
569                             if (elecInteractionTypeIsEwald)
570                             {
571                                 Vcoul[i]  = ewaldPotential(qq[i], rinvC, sh_ewald);
572                                 FscalC[i] = ewaldScalarForce(qq[i], rinvC);
573                             }
574                             else
575                             {
576                                 Vcoul[i]  = reactionFieldPotential(qq[i], rinvC, rC, krf, crf);
577                                 FscalC[i] = reactionFieldScalarForce(qq[i], rinvC, rC, krf, two);
578                             }
579                         }
580
581                         /* Only process the VDW interactions if we have
582                          * some non-zero parameters, and if we either
583                          * include all entries in the list (no cutoff used
584                          * in the kernel), or if we are within the cutoff.
585                          */
586                         bool computeVdwInteraction = (vdwInteractionTypeIsEwald && r < rvdw)
587                                                      || (!vdwInteractionTypeIsEwald && rV < rvdw);
588                         if ((c6[i] != 0 || c12[i] != 0) && computeVdwInteraction)
589                         {
590                             RealType rinv6;
591                             if (useSoftCore)
592                             {
593                                 rinv6 = rpinvV;
594                             }
595                             else
596                             {
597                                 rinv6 = calculateRinv6(rinvV);
598                             }
599                             RealType Vvdw6  = calculateVdw6(c6[i], rinv6);
600                             RealType Vvdw12 = calculateVdw12(c12[i], rinv6);
601
602                             Vvdw[i] = lennardJonesPotential(Vvdw6, Vvdw12, c6[i], c12[i], repulsionShift,
603                                                             dispersionShift, onesixth, onetwelfth);
604                             FscalV[i] = lennardJonesScalarForce(Vvdw6, Vvdw12);
605
606                             if (vdwInteractionTypeIsEwald)
607                             {
608                                 /* Subtract the grid potential at the cut-off */
609                                 Vvdw[i] += ewaldLennardJonesGridSubtract(nbfp_grid[tj[i]],
610                                                                          sh_lj_ewald, onesixth);
611                             }
612
613                             if (vdwModifierIsPotSwitch)
614                             {
615                                 RealType d        = rV - ic->rvdw_switch;
616                                 d                 = (d > zero) ? d : zero;
617                                 const RealType d2 = d * d;
618                                 const RealType sw =
619                                         one + d2 * d * (vdw_swV3 + d * (vdw_swV4 + d * vdw_swV5));
620                                 const RealType dsw = d2 * (vdw_swF2 + d * (vdw_swF3 + d * vdw_swF4));
621
622                                 FscalV[i] = potSwitchScalarForceMod(FscalV[i], Vvdw[i], sw, rV,
623                                                                     rvdw, dsw, zero);
624                                 Vvdw[i]   = potSwitchPotentialMod(Vvdw[i], sw, rV, rvdw, zero);
625                             }
626                         }
627
628                         /* FscalC (and FscalV) now contain: dV/drC * rC
629                          * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
630                          * Further down we first multiply by r^p-2 and then by
631                          * the vector r, which in total gives: dV/drC * (r/rC)^1-p
632                          */
633                         FscalC[i] *= rpinvC;
634                         FscalV[i] *= rpinvV;
635                     }
636                 } // end for (int i = 0; i < NSTATES; i++)
637
638                 /* Assemble A and B states */
639                 for (int i = 0; i < NSTATES; i++)
640                 {
641                     vctot += LFC[i] * Vcoul[i];
642                     vvtot += LFV[i] * Vvdw[i];
643
644                     Fscal += LFC[i] * FscalC[i] * rpm2;
645                     Fscal += LFV[i] * FscalV[i] * rpm2;
646
647                     if (useSoftCore)
648                     {
649                         dvdl_coul += Vcoul[i] * DLF[i]
650                                      + LFC[i] * alpha_coul_eff * dlfac_coul[i] * FscalC[i] * sigma6[i];
651                         dvdl_vdw += Vvdw[i] * DLF[i]
652                                     + LFV[i] * alpha_vdw_eff * dlfac_vdw[i] * FscalV[i] * sigma6[i];
653                     }
654                     else
655                     {
656                         dvdl_coul += Vcoul[i] * DLF[i];
657                         dvdl_vdw += Vvdw[i] * DLF[i];
658                     }
659                 }
660             } // end if (bPairIncluded)
661             else if (icoul == GMX_NBKERNEL_ELEC_REACTIONFIELD)
662             {
663                 /* For excluded pairs, which are only in this pair list when
664                  * using the Verlet scheme, we don't use soft-core.
665                  * As there is no singularity, there is no need for soft-core.
666                  */
667                 const real FF = -two * krf;
668                 RealType   VV = krf * rsq - crf;
669
670                 if (ii == jnr)
671                 {
672                     VV *= half;
673                 }
674
675                 for (int i = 0; i < NSTATES; i++)
676                 {
677                     vctot += LFC[i] * qq[i] * VV;
678                     Fscal += LFC[i] * qq[i] * FF;
679                     dvdl_coul += DLF[i] * qq[i] * VV;
680                 }
681             }
682
683             if (elecInteractionTypeIsEwald && (r < rcoulomb || !bPairIncluded))
684             {
685                 /* See comment in the preamble. When using Ewald interactions
686                  * (unless we use a switch modifier) we subtract the reciprocal-space
687                  * Ewald component here which made it possible to apply the free
688                  * energy interaction to 1/r (vanilla coulomb short-range part)
689                  * above. This gets us closer to the ideal case of applying
690                  * the softcore to the entire electrostatic interaction,
691                  * including the reciprocal-space component.
692                  */
693                 real v_lr, f_lr;
694
695                 const RealType ewrt   = r * coulombTableScale;
696                 IntType        ewitab = static_cast<IntType>(ewrt);
697                 const RealType eweps  = ewrt - ewitab;
698                 ewitab                = 4 * ewitab;
699                 f_lr                  = ewtab[ewitab] + eweps * ewtab[ewitab + 1];
700                 v_lr = (ewtab[ewitab + 2] - coulombTableScaleInvHalf * eweps * (ewtab[ewitab] + f_lr));
701                 f_lr *= rinv;
702
703                 /* Note that any possible Ewald shift has already been applied in
704                  * the normal interaction part above.
705                  */
706
707                 if (ii == jnr)
708                 {
709                     /* If we get here, the i particle (ii) has itself (jnr)
710                      * in its neighborlist. This can only happen with the Verlet
711                      * scheme, and corresponds to a self-interaction that will
712                      * occur twice. Scale it down by 50% to only include it once.
713                      */
714                     v_lr *= half;
715                 }
716
717                 for (int i = 0; i < NSTATES; i++)
718                 {
719                     vctot -= LFC[i] * qq[i] * v_lr;
720                     Fscal -= LFC[i] * qq[i] * f_lr;
721                     dvdl_coul -= (DLF[i] * qq[i]) * v_lr;
722                 }
723             }
724
725             if (vdwInteractionTypeIsEwald && r < rvdw)
726             {
727                 /* See comment in the preamble. When using LJ-Ewald interactions
728                  * (unless we use a switch modifier) we subtract the reciprocal-space
729                  * Ewald component here which made it possible to apply the free
730                  * energy interaction to r^-6 (vanilla LJ6 short-range part)
731                  * above. This gets us closer to the ideal case of applying
732                  * the softcore to the entire VdW interaction,
733                  * including the reciprocal-space component.
734                  */
735                 /* We could also use the analytical form here
736                  * iso a table, but that can cause issues for
737                  * r close to 0 for non-interacting pairs.
738                  */
739
740                 const RealType rs   = rsq * rinv * vdwTableScale;
741                 const IntType  ri   = static_cast<IntType>(rs);
742                 const RealType frac = rs - ri;
743                 const RealType f_lr = (1 - frac) * tab_ewald_F_lj[ri] + frac * tab_ewald_F_lj[ri + 1];
744                 /* TODO: Currently the Ewald LJ table does not contain
745                  * the factor 1/6, we should add this.
746                  */
747                 const RealType FF = f_lr * rinv / six;
748                 RealType       VV =
749                         (tab_ewald_V_lj[ri] - vdwTableScaleInvHalf * frac * (tab_ewald_F_lj[ri] + f_lr))
750                         / six;
751
752                 if (ii == jnr)
753                 {
754                     /* If we get here, the i particle (ii) has itself (jnr)
755                      * in its neighborlist. This can only happen with the Verlet
756                      * scheme, and corresponds to a self-interaction that will
757                      * occur twice. Scale it down by 50% to only include it once.
758                      */
759                     VV *= half;
760                 }
761
762                 for (int i = 0; i < NSTATES; i++)
763                 {
764                     const real c6grid = nbfp_grid[tj[i]];
765                     vvtot += LFV[i] * c6grid * VV;
766                     Fscal += LFV[i] * c6grid * FF;
767                     dvdl_vdw += (DLF[i] * c6grid) * VV;
768                 }
769             }
770
771             if (doForces)
772             {
773                 const real tx = Fscal * dx;
774                 const real ty = Fscal * dy;
775                 const real tz = Fscal * dz;
776                 fix           = fix + tx;
777                 fiy           = fiy + ty;
778                 fiz           = fiz + tz;
779                 /* OpenMP atomics are expensive, but this kernels is also
780                  * expensive, so we can take this hit, instead of using
781                  * thread-local output buffers and extra reduction.
782                  *
783                  * All the OpenMP regions in this file are trivial and should
784                  * not throw, so no need for try/catch.
785                  */
786 #pragma omp atomic
787                 f[j3] -= tx;
788 #pragma omp atomic
789                 f[j3 + 1] -= ty;
790 #pragma omp atomic
791                 f[j3 + 2] -= tz;
792             }
793         } // end for (int k = nj0; k < nj1; k++)
794
795         /* The atomics below are expensive with many OpenMP threads.
796          * Here unperturbed i-particles will usually only have a few
797          * (perturbed) j-particles in the list. Thus with a buffered list
798          * we can skip a significant number of i-reductions with a check.
799          */
800         if (npair_within_cutoff > 0)
801         {
802             if (doForces)
803             {
804 #pragma omp atomic
805                 f[ii3] += fix;
806 #pragma omp atomic
807                 f[ii3 + 1] += fiy;
808 #pragma omp atomic
809                 f[ii3 + 2] += fiz;
810             }
811             if (doShiftForces)
812             {
813 #pragma omp atomic
814                 fshift[is3] += fix;
815 #pragma omp atomic
816                 fshift[is3 + 1] += fiy;
817 #pragma omp atomic
818                 fshift[is3 + 2] += fiz;
819             }
820             if (doPotential)
821             {
822                 int ggid = gid[n];
823 #pragma omp atomic
824                 Vc[ggid] += vctot;
825 #pragma omp atomic
826                 Vv[ggid] += vvtot;
827             }
828         }
829     } // end for (int n = 0; n < nri; n++)
830
831 #pragma omp atomic
832     dvdl[efptCOUL] += dvdl_coul;
833 #pragma omp atomic
834     dvdl[efptVDW] += dvdl_vdw;
835
836     /* Estimate flops, average for free energy stuff:
837      * 12  flops per outer iteration
838      * 150 flops per inner iteration
839      */
840 #pragma omp atomic
841     inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri * 12 + nlist->jindex[nri] * 150);
842 }
843
844 typedef void (*KernelFunction)(const t_nblist* gmx_restrict nlist,
845                                rvec* gmx_restrict         xx,
846                                gmx::ForceWithShiftForces* forceWithShiftForces,
847                                const t_forcerec* gmx_restrict fr,
848                                const t_mdatoms* gmx_restrict mdatoms,
849                                nb_kernel_data_t* gmx_restrict kernel_data,
850                                t_nrnb* gmx_restrict nrnb);
851
852 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald, bool vdwModifierIsPotSwitch>
853 static KernelFunction dispatchKernelOnUseSimd(const bool useSimd)
854 {
855     if (useSimd)
856     {
857 #if GMX_SIMD_HAVE_REAL && GMX_SIMD_HAVE_INT32_ARITHMETICS && GMX_USE_SIMD_KERNELS
858         /* FIXME: Here SimdDataTypes should be used to enable SIMD. So far, the code in nb_free_energy_kernel is not adapted to SIMD */
859         return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
860                                       elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
861 #else
862         return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
863                                       elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
864 #endif
865     }
866     else
867     {
868         return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
869                                       elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
870     }
871 }
872
873 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald>
874 static KernelFunction dispatchKernelOnVdwModifier(const bool vdwModifierIsPotSwitch, const bool useSimd)
875 {
876     if (vdwModifierIsPotSwitch)
877     {
878         return (dispatchKernelOnUseSimd<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
879                                         elecInteractionTypeIsEwald, true>(useSimd));
880     }
881     else
882     {
883         return (dispatchKernelOnUseSimd<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
884                                         elecInteractionTypeIsEwald, false>(useSimd));
885     }
886 }
887
888 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald>
889 static KernelFunction dispatchKernelOnElecInteractionType(const bool elecInteractionTypeIsEwald,
890                                                           const bool vdwModifierIsPotSwitch,
891                                                           const bool useSimd)
892 {
893     if (elecInteractionTypeIsEwald)
894     {
895         return (dispatchKernelOnVdwModifier<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, true>(
896                 vdwModifierIsPotSwitch, useSimd));
897     }
898     else
899     {
900         return (dispatchKernelOnVdwModifier<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, false>(
901                 vdwModifierIsPotSwitch, useSimd));
902     }
903 }
904
905 template<bool useSoftCore, bool scLambdasOrAlphasDiffer>
906 static KernelFunction dispatchKernelOnVdwInteractionType(const bool vdwInteractionTypeIsEwald,
907                                                          const bool elecInteractionTypeIsEwald,
908                                                          const bool vdwModifierIsPotSwitch,
909                                                          const bool useSimd)
910 {
911     if (vdwInteractionTypeIsEwald)
912     {
913         return (dispatchKernelOnElecInteractionType<useSoftCore, scLambdasOrAlphasDiffer, true>(
914                 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
915     }
916     else
917     {
918         return (dispatchKernelOnElecInteractionType<useSoftCore, scLambdasOrAlphasDiffer, false>(
919                 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
920     }
921 }
922
923 template<bool useSoftCore>
924 static KernelFunction dispatchKernelOnScLambdasOrAlphasDifference(const bool scLambdasOrAlphasDiffer,
925                                                                   const bool vdwInteractionTypeIsEwald,
926                                                                   const bool elecInteractionTypeIsEwald,
927                                                                   const bool vdwModifierIsPotSwitch,
928                                                                   const bool useSimd)
929 {
930     if (scLambdasOrAlphasDiffer)
931     {
932         return (dispatchKernelOnVdwInteractionType<useSoftCore, true>(
933                 vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
934     }
935     else
936     {
937         return (dispatchKernelOnVdwInteractionType<useSoftCore, false>(
938                 vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
939     }
940 }
941
942 static KernelFunction dispatchKernel(const bool                 scLambdasOrAlphasDiffer,
943                                      const bool                 vdwInteractionTypeIsEwald,
944                                      const bool                 elecInteractionTypeIsEwald,
945                                      const bool                 vdwModifierIsPotSwitch,
946                                      const bool                 useSimd,
947                                      const interaction_const_t& ic)
948 {
949     if (ic.softCoreParameters->alphaCoulomb == 0 && ic.softCoreParameters->alphaVdw == 0)
950     {
951         return (dispatchKernelOnScLambdasOrAlphasDifference<false>(
952                 scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
953                 vdwModifierIsPotSwitch, useSimd));
954     }
955     else
956     {
957         return (dispatchKernelOnScLambdasOrAlphasDifference<true>(
958                 scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
959                 vdwModifierIsPotSwitch, useSimd));
960     }
961 }
962
963
964 void gmx_nb_free_energy_kernel(const t_nblist*            nlist,
965                                rvec*                      xx,
966                                gmx::ForceWithShiftForces* ff,
967                                const t_forcerec*          fr,
968                                const t_mdatoms*           mdatoms,
969                                nb_kernel_data_t*          kernel_data,
970                                t_nrnb*                    nrnb)
971 {
972     const interaction_const_t& ic = *fr->ic;
973     GMX_ASSERT(EEL_PME_EWALD(ic.eeltype) || ic.eeltype == eelCUT || EEL_RF(ic.eeltype),
974                "Unsupported eeltype with free energy");
975     GMX_ASSERT(ic.softCoreParameters, "We need soft-core parameters");
976
977     const auto& scParams                   = *ic.softCoreParameters;
978     const bool  vdwInteractionTypeIsEwald  = (EVDW_PME(ic.vdwtype));
979     const bool  elecInteractionTypeIsEwald = (EEL_PME_EWALD(ic.eeltype));
980     const bool  vdwModifierIsPotSwitch     = (ic.vdw_modifier == eintmodPOTSWITCH);
981     bool        scLambdasOrAlphasDiffer    = true;
982     const bool  useSimd                    = fr->use_simd_kernels;
983
984     if (scParams.alphaCoulomb == 0 && scParams.alphaVdw == 0)
985     {
986         scLambdasOrAlphasDiffer = false;
987     }
988     else
989     {
990         if (kernel_data->lambda[efptCOUL] == kernel_data->lambda[efptVDW]
991             && scParams.alphaCoulomb == scParams.alphaVdw)
992         {
993             scLambdasOrAlphasDiffer = false;
994         }
995     }
996
997     KernelFunction kernelFunc;
998     kernelFunc = dispatchKernel(scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
999                                 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd, ic);
1000     kernelFunc(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
1001 }