e23eef5bacac81b46820d97095190cd3f8cfbf02
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_free_energy.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
7  * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9  * and including many others, as listed in the AUTHORS file in the
10  * top-level source directory and at http://www.gromacs.org.
11  *
12  * GROMACS is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU Lesser General Public License
14  * as published by the Free Software Foundation; either version 2.1
15  * of the License, or (at your option) any later version.
16  *
17  * GROMACS is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public
23  * License along with GROMACS; if not, see
24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
26  *
27  * If you want to redistribute modifications to GROMACS, please
28  * consider that scientific software is very special. Version
29  * control is crucial - bugs must be traceable. We will be happy to
30  * consider code for inclusion in the official distribution, but
31  * derived work must not be called official GROMACS. Details are found
32  * in the README & COPYING files - if they are missing, get the
33  * official version at http://www.gromacs.org.
34  *
35  * To help us fund GROMACS development, we humbly ask that you cite
36  * the research papers on the package. Check out http://www.gromacs.org.
37  */
38 #include "gmxpre.h"
39
40 #include "nb_free_energy.h"
41
42 #include "config.h"
43
44 #include <cmath>
45
46 #include <algorithm>
47
48 #include "gromacs/gmxlib/nrnb.h"
49 #include "gromacs/gmxlib/nonbonded/nb_kernel.h"
50 #include "gromacs/gmxlib/nonbonded/nonbonded.h"
51 #include "gromacs/math/functions.h"
52 #include "gromacs/math/vec.h"
53 #include "gromacs/mdtypes/forceoutput.h"
54 #include "gromacs/mdtypes/forcerec.h"
55 #include "gromacs/mdtypes/interaction_const.h"
56 #include "gromacs/mdtypes/md_enums.h"
57 #include "gromacs/mdtypes/mdatom.h"
58 #include "gromacs/simd/simd.h"
59 #include "gromacs/utility/fatalerror.h"
60
61
62 //! Scalar (non-SIMD) data types.
63 struct ScalarDataTypes
64 {
65     using RealType                     = real; //!< The data type to use as real.
66     using IntType                      = int;  //!< The data type to use as int.
67     static constexpr int simdRealWidth = 1;    //!< The width of the RealType.
68     static constexpr int simdIntWidth  = 1;    //!< The width of the IntType.
69 };
70
71 #if GMX_SIMD_HAVE_REAL && GMX_SIMD_HAVE_INT32_ARITHMETICS
72 //! SIMD data types.
73 struct SimdDataTypes
74 {
75     using RealType                     = gmx::SimdReal;         //!< The data type to use as real.
76     using IntType                      = gmx::SimdInt32;        //!< The data type to use as int.
77     static constexpr int simdRealWidth = GMX_SIMD_REAL_WIDTH;   //!< The width of the RealType.
78     static constexpr int simdIntWidth  = GMX_SIMD_FINT32_WIDTH; //!< The width of the IntType.
79 };
80 #endif
81
82 //! Computes r^(1/p) and 1/r^(1/p) for the standard p=6
83 template<class RealType>
84 static inline void pthRoot(const RealType r, RealType* pthRoot, RealType* invPthRoot)
85 {
86     *invPthRoot = gmx::invsqrt(std::cbrt(r));
87     *pthRoot    = 1 / (*invPthRoot);
88 }
89
90 template<class RealType>
91 static inline RealType calculateRinv6(const RealType rinvV)
92 {
93     RealType rinv6 = rinvV * rinvV;
94     return (rinv6 * rinv6 * rinv6);
95 }
96
97 template<class RealType>
98 static inline RealType calculateVdw6(const RealType c6, const RealType rinv6)
99 {
100     return (c6 * rinv6);
101 }
102
103 template<class RealType>
104 static inline RealType calculateVdw12(const RealType c12, const RealType rinv6)
105 {
106     return (c12 * rinv6 * rinv6);
107 }
108
109 /* reaction-field electrostatics */
110 template<class RealType>
111 static inline RealType reactionFieldScalarForce(const RealType qq,
112                                                 const RealType rinv,
113                                                 const RealType r,
114                                                 const real     krf,
115                                                 const real     two)
116 {
117     return (qq * (rinv - two * krf * r * r));
118 }
119 template<class RealType>
120 static inline RealType reactionFieldPotential(const RealType qq,
121                                               const RealType rinv,
122                                               const RealType r,
123                                               const real     krf,
124                                               const real     potentialShift)
125 {
126     return (qq * (rinv + krf * r * r - potentialShift));
127 }
128
129 /* Ewald electrostatics */
130 template<class RealType>
131 static inline RealType ewaldScalarForce(const RealType coulomb, const RealType rinv)
132 {
133     return (coulomb * rinv);
134 }
135 template<class RealType>
136 static inline RealType ewaldPotential(const RealType coulomb, const RealType rinv, const real potentialShift)
137 {
138     return (coulomb * (rinv - potentialShift));
139 }
140
141 /* cutoff LJ */
142 template<class RealType>
143 static inline RealType lennardJonesScalarForce(const RealType v6, const RealType v12)
144 {
145     return (v12 - v6);
146 }
147 template<class RealType>
148 static inline RealType lennardJonesPotential(const RealType v6,
149                                              const RealType v12,
150                                              const RealType c6,
151                                              const RealType c12,
152                                              const real     repulsionShift,
153                                              const real     dispersionShift,
154                                              const real     onesixth,
155                                              const real     onetwelfth)
156 {
157     return ((v12 + c12 * repulsionShift) * onetwelfth - (v6 + c6 * dispersionShift) * onesixth);
158 }
159
160 /* Ewald LJ */
161 static inline real ewaldLennardJonesGridSubtract(const real c6grid, const real potentialShift, const real onesixth)
162 {
163     return (c6grid * potentialShift * onesixth);
164 }
165
166 /* LJ Potential switch */
167 template<class RealType>
168 static inline RealType potSwitchScalarForceMod(const RealType fScalarInp,
169                                                const RealType potential,
170                                                const RealType sw,
171                                                const RealType r,
172                                                const RealType rVdw,
173                                                const RealType dsw,
174                                                const real     zero)
175 {
176     if (r < rVdw)
177     {
178         real fScalar = fScalarInp * sw - r * potential * dsw;
179         return (fScalar);
180     }
181     return (zero);
182 }
183 template<class RealType>
184 static inline RealType potSwitchPotentialMod(const RealType potentialInp,
185                                              const RealType sw,
186                                              const RealType r,
187                                              const RealType rVdw,
188                                              const real     zero)
189 {
190     if (r < rVdw)
191     {
192         real potential = potentialInp * sw;
193         return (potential);
194     }
195     return (zero);
196 }
197
198
199 //! Templated free-energy non-bonded kernel
200 template<typename DataTypes, bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald, bool vdwModifierIsPotSwitch>
201 static void nb_free_energy_kernel(const t_nblist* gmx_restrict nlist,
202                                   rvec* gmx_restrict         xx,
203                                   gmx::ForceWithShiftForces* forceWithShiftForces,
204                                   const t_forcerec* gmx_restrict fr,
205                                   const t_mdatoms* gmx_restrict mdatoms,
206                                   nb_kernel_data_t* gmx_restrict kernel_data,
207                                   t_nrnb* gmx_restrict nrnb)
208 {
209 #define STATE_A 0
210 #define STATE_B 1
211 #define NSTATES 2
212
213     using RealType = typename DataTypes::RealType;
214     using IntType  = typename DataTypes::IntType;
215
216     /* FIXME: How should these be handled with SIMD? */
217     constexpr real onetwelfth = 1.0 / 12.0;
218     constexpr real onesixth   = 1.0 / 6.0;
219     constexpr real zero       = 0.0;
220     constexpr real half       = 0.5;
221     constexpr real one        = 1.0;
222     constexpr real two        = 2.0;
223     constexpr real six        = 6.0;
224
225     /* Extract pointer to non-bonded interaction constants */
226     const interaction_const_t* ic = fr->ic;
227
228     // Extract pair list data
229     const int  nri    = nlist->nri;
230     const int* iinr   = nlist->iinr;
231     const int* jindex = nlist->jindex;
232     const int* jjnr   = nlist->jjnr;
233     const int* shift  = nlist->shift;
234     const int* gid    = nlist->gid;
235
236     const real* shiftvec      = fr->shift_vec[0];
237     const real* chargeA       = mdatoms->chargeA;
238     const real* chargeB       = mdatoms->chargeB;
239     real*       Vc            = kernel_data->energygrp_elec;
240     const int*  typeA         = mdatoms->typeA;
241     const int*  typeB         = mdatoms->typeB;
242     const int   ntype         = fr->ntype;
243     const real* nbfp          = fr->nbfp.data();
244     const real* nbfp_grid     = fr->ljpme_c6grid;
245     real*       Vv            = kernel_data->energygrp_vdw;
246     const real  lambda_coul   = kernel_data->lambda[efptCOUL];
247     const real  lambda_vdw    = kernel_data->lambda[efptVDW];
248     real*       dvdl          = kernel_data->dvdl;
249     const real  alpha_coul    = fr->sc_alphacoul;
250     const real  alpha_vdw     = fr->sc_alphavdw;
251     const real  lam_power     = fr->sc_power;
252     const real  sigma6_def    = fr->sc_sigma6_def;
253     const real  sigma6_min    = fr->sc_sigma6_min;
254     const bool  doForces      = ((kernel_data->flags & GMX_NONBONDED_DO_FORCE) != 0);
255     const bool  doShiftForces = ((kernel_data->flags & GMX_NONBONDED_DO_SHIFTFORCE) != 0);
256     const bool  doPotential   = ((kernel_data->flags & GMX_NONBONDED_DO_POTENTIAL) != 0);
257
258     // Extract data from interaction_const_t
259     const real facel           = ic->epsfac;
260     const real rcoulomb        = ic->rcoulomb;
261     const real krf             = ic->k_rf;
262     const real crf             = ic->c_rf;
263     const real sh_lj_ewald     = ic->sh_lj_ewald;
264     const real rvdw            = ic->rvdw;
265     const real dispersionShift = ic->dispersion_shift.cpot;
266     const real repulsionShift  = ic->repulsion_shift.cpot;
267
268     // Note that the nbnxm kernels do not support Coulomb potential switching at all
269     GMX_ASSERT(ic->coulomb_modifier != eintmodPOTSWITCH,
270                "Potential switching is not supported for Coulomb with FEP");
271
272     real vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4;
273     if (vdwModifierIsPotSwitch)
274     {
275         const real d = ic->rvdw - ic->rvdw_switch;
276         vdw_swV3     = -10.0 / (d * d * d);
277         vdw_swV4     = 15.0 / (d * d * d * d);
278         vdw_swV5     = -6.0 / (d * d * d * d * d);
279         vdw_swF2     = -30.0 / (d * d * d);
280         vdw_swF3     = 60.0 / (d * d * d * d);
281         vdw_swF4     = -30.0 / (d * d * d * d * d);
282     }
283     else
284     {
285         /* Avoid warnings from stupid compilers (looking at you, Clang!) */
286         vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0;
287     }
288
289     int icoul;
290     if (ic->eeltype == eelCUT || EEL_RF(ic->eeltype))
291     {
292         icoul = GMX_NBKERNEL_ELEC_REACTIONFIELD;
293     }
294     else
295     {
296         icoul = GMX_NBKERNEL_ELEC_NONE;
297     }
298
299     real rcutoff_max2 = std::max(ic->rcoulomb, ic->rvdw);
300     rcutoff_max2      = rcutoff_max2 * rcutoff_max2;
301
302     const real* tab_ewald_F_lj           = nullptr;
303     const real* tab_ewald_V_lj           = nullptr;
304     const real* ewtab                    = nullptr;
305     real        coulombTableScale        = 0;
306     real        coulombTableScaleInvHalf = 0;
307     real        vdwTableScale            = 0;
308     real        vdwTableScaleInvHalf     = 0;
309     real        sh_ewald                 = 0;
310     if (elecInteractionTypeIsEwald || vdwInteractionTypeIsEwald)
311     {
312         sh_ewald = ic->sh_ewald;
313     }
314     if (elecInteractionTypeIsEwald)
315     {
316         const auto& coulombTables = *ic->coulombEwaldTables;
317         ewtab                     = coulombTables.tableFDV0.data();
318         coulombTableScale         = coulombTables.scale;
319         coulombTableScaleInvHalf  = half / coulombTableScale;
320     }
321     if (vdwInteractionTypeIsEwald)
322     {
323         const auto& vdwTables = *ic->vdwEwaldTables;
324         tab_ewald_F_lj        = vdwTables.tableF.data();
325         tab_ewald_V_lj        = vdwTables.tableV.data();
326         vdwTableScale         = vdwTables.scale;
327         vdwTableScaleInvHalf  = half / vdwTableScale;
328     }
329
330     /* For Ewald/PME interactions we cannot easily apply the soft-core component to
331      * reciprocal space. When we use non-switched Ewald interactions, we
332      * assume the soft-coring does not significantly affect the grid contribution
333      * and apply the soft-core only to the full 1/r (- shift) pair contribution.
334      *
335      * However, we cannot use this approach for switch-modified since we would then
336      * effectively end up evaluating a significantly different interaction here compared to the
337      * normal (non-free-energy) kernels, either by applying a cutoff at a different
338      * position than what the user requested, or by switching different
339      * things (1/r rather than short-range Ewald). For these settings, we just
340      * use the traditional short-range Ewald interaction in that case.
341      */
342     GMX_RELEASE_ASSERT(!(vdwInteractionTypeIsEwald && vdwModifierIsPotSwitch),
343                        "Can not apply soft-core to switched Ewald potentials");
344
345     real dvdl_coul = 0;
346     real dvdl_vdw  = 0;
347
348     /* Lambda factor for state A, 1-lambda*/
349     real LFC[NSTATES], LFV[NSTATES];
350     LFC[STATE_A] = one - lambda_coul;
351     LFV[STATE_A] = one - lambda_vdw;
352
353     /* Lambda factor for state B, lambda*/
354     LFC[STATE_B] = lambda_coul;
355     LFV[STATE_B] = lambda_vdw;
356
357     /*derivative of the lambda factor for state A and B */
358     real DLF[NSTATES];
359     DLF[STATE_A] = -1;
360     DLF[STATE_B] = 1;
361
362     real           lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
363     constexpr real sc_r_power = 6.0_real;
364     for (int i = 0; i < NSTATES; i++)
365     {
366         lfac_coul[i]  = (lam_power == 2 ? (1 - LFC[i]) * (1 - LFC[i]) : (1 - LFC[i]));
367         dlfac_coul[i] = DLF[i] * lam_power / sc_r_power * (lam_power == 2 ? (1 - LFC[i]) : 1);
368         lfac_vdw[i]   = (lam_power == 2 ? (1 - LFV[i]) * (1 - LFV[i]) : (1 - LFV[i]));
369         dlfac_vdw[i]  = DLF[i] * lam_power / sc_r_power * (lam_power == 2 ? (1 - LFV[i]) : 1);
370     }
371
372     // TODO: We should get rid of using pointers to real
373     const real* x             = xx[0];
374     real* gmx_restrict f      = &(forceWithShiftForces->force()[0][0]);
375     real* gmx_restrict fshift = &(forceWithShiftForces->shiftForces()[0][0]);
376
377     for (int n = 0; n < nri; n++)
378     {
379         int npair_within_cutoff = 0;
380
381         const int  is3   = 3 * shift[n];
382         const real shX   = shiftvec[is3];
383         const real shY   = shiftvec[is3 + 1];
384         const real shZ   = shiftvec[is3 + 2];
385         const int  nj0   = jindex[n];
386         const int  nj1   = jindex[n + 1];
387         const int  ii    = iinr[n];
388         const int  ii3   = 3 * ii;
389         const real ix    = shX + x[ii3 + 0];
390         const real iy    = shY + x[ii3 + 1];
391         const real iz    = shZ + x[ii3 + 2];
392         const real iqA   = facel * chargeA[ii];
393         const real iqB   = facel * chargeB[ii];
394         const int  ntiA  = 2 * ntype * typeA[ii];
395         const int  ntiB  = 2 * ntype * typeB[ii];
396         real       vctot = 0;
397         real       vvtot = 0;
398         real       fix   = 0;
399         real       fiy   = 0;
400         real       fiz   = 0;
401
402         for (int k = nj0; k < nj1; k++)
403         {
404             int            tj[NSTATES];
405             const int      jnr = jjnr[k];
406             const int      j3  = 3 * jnr;
407             RealType       c6[NSTATES], c12[NSTATES], qq[NSTATES], Vcoul[NSTATES], Vvdw[NSTATES];
408             RealType       r, rinv, rp, rpm2;
409             RealType       alpha_vdw_eff, alpha_coul_eff, sigma6[NSTATES];
410             const RealType dx  = ix - x[j3];
411             const RealType dy  = iy - x[j3 + 1];
412             const RealType dz  = iz - x[j3 + 2];
413             const RealType rsq = dx * dx + dy * dy + dz * dz;
414             RealType       FscalC[NSTATES], FscalV[NSTATES];
415             /* Check if this pair on the exlusions list.*/
416             const bool bPairIncluded = nlist->excl_fep == nullptr || nlist->excl_fep[k];
417
418             if (rsq >= rcutoff_max2 && bPairIncluded)
419             {
420                 /* We save significant time by skipping all code below.
421                  * Note that with soft-core interactions, the actual cut-off
422                  * check might be different. But since the soft-core distance
423                  * is always larger than r, checking on r here is safe.
424                  * Exclusions outside the cutoff can not be skipped as
425                  * when using Ewald: the reciprocal-space
426                  * Ewald component still needs to be subtracted.
427                  */
428
429                 continue;
430             }
431             npair_within_cutoff++;
432
433             if (rsq > 0)
434             {
435                 /* Note that unlike in the nbnxn kernels, we do not need
436                  * to clamp the value of rsq before taking the invsqrt
437                  * to avoid NaN in the LJ calculation, since here we do
438                  * not calculate LJ interactions when C6 and C12 are zero.
439                  */
440
441                 rinv = gmx::invsqrt(rsq);
442                 r    = rsq * rinv;
443             }
444             else
445             {
446                 /* The force at r=0 is zero, because of symmetry.
447                  * But note that the potential is in general non-zero,
448                  * since the soft-cored r will be non-zero.
449                  */
450                 rinv = 0;
451                 r    = 0;
452             }
453
454             if (useSoftCore)
455             {
456                 rpm2 = rsq * rsq;  /* r4 */
457                 rp   = rpm2 * rsq; /* r6 */
458             }
459             else
460             {
461                 /* The soft-core power p will not affect the results
462                  * with not using soft-core, so we use power of 0 which gives
463                  * the simplest math and cheapest code.
464                  */
465                 rpm2 = rinv * rinv;
466                 rp   = 1;
467             }
468
469             RealType Fscal = 0;
470
471             qq[STATE_A] = iqA * chargeA[jnr];
472             qq[STATE_B] = iqB * chargeB[jnr];
473
474             tj[STATE_A] = ntiA + 2 * typeA[jnr];
475             tj[STATE_B] = ntiB + 2 * typeB[jnr];
476
477             if (bPairIncluded)
478             {
479                 c6[STATE_A] = nbfp[tj[STATE_A]];
480                 c6[STATE_B] = nbfp[tj[STATE_B]];
481
482                 for (int i = 0; i < NSTATES; i++)
483                 {
484                     c12[i] = nbfp[tj[i] + 1];
485                     if (useSoftCore)
486                     {
487                         if ((c6[i] > 0) && (c12[i] > 0))
488                         {
489                             /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
490                             sigma6[i] = half * c12[i] / c6[i];
491                             if (sigma6[i] < sigma6_min) /* for disappearing coul and vdw with soft core at the same time */
492                             {
493                                 sigma6[i] = sigma6_min;
494                             }
495                         }
496                         else
497                         {
498                             sigma6[i] = sigma6_def;
499                         }
500                     }
501                 }
502
503                 if (useSoftCore)
504                 {
505                     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
506                     if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
507                     {
508                         alpha_vdw_eff  = 0;
509                         alpha_coul_eff = 0;
510                     }
511                     else
512                     {
513                         alpha_vdw_eff  = alpha_vdw;
514                         alpha_coul_eff = alpha_coul;
515                     }
516                 }
517
518                 for (int i = 0; i < NSTATES; i++)
519                 {
520                     FscalC[i] = 0;
521                     FscalV[i] = 0;
522                     Vcoul[i]  = 0;
523                     Vvdw[i]   = 0;
524
525                     RealType rinvC, rinvV, rC, rV, rpinvC, rpinvV;
526
527                     /* Only spend time on A or B state if it is non-zero */
528                     if ((qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0))
529                     {
530                         /* this section has to be inside the loop because of the dependence on sigma6 */
531                         if (useSoftCore)
532                         {
533                             rpinvC = one / (alpha_coul_eff * lfac_coul[i] * sigma6[i] + rp);
534                             pthRoot(rpinvC, &rinvC, &rC);
535                             if (scLambdasOrAlphasDiffer)
536                             {
537                                 rpinvV = one / (alpha_vdw_eff * lfac_vdw[i] * sigma6[i] + rp);
538                                 pthRoot(rpinvV, &rinvV, &rV);
539                             }
540                             else
541                             {
542                                 /* We can avoid one expensive pow and one / operation */
543                                 rpinvV = rpinvC;
544                                 rinvV  = rinvC;
545                                 rV     = rC;
546                             }
547                         }
548                         else
549                         {
550                             rpinvC = 1;
551                             rinvC  = rinv;
552                             rC     = r;
553
554                             rpinvV = 1;
555                             rinvV  = rinv;
556                             rV     = r;
557                         }
558
559                         /* Only process the coulomb interactions if we have charges,
560                          * and if we either include all entries in the list (no cutoff
561                          * used in the kernel), or if we are within the cutoff.
562                          */
563                         bool computeElecInteraction = (elecInteractionTypeIsEwald && r < rcoulomb)
564                                                       || (!elecInteractionTypeIsEwald && rC < rcoulomb);
565
566                         if ((qq[i] != 0) && computeElecInteraction)
567                         {
568                             if (elecInteractionTypeIsEwald)
569                             {
570                                 Vcoul[i]  = ewaldPotential(qq[i], rinvC, sh_ewald);
571                                 FscalC[i] = ewaldScalarForce(qq[i], rinvC);
572                             }
573                             else
574                             {
575                                 Vcoul[i]  = reactionFieldPotential(qq[i], rinvC, rC, krf, crf);
576                                 FscalC[i] = reactionFieldScalarForce(qq[i], rinvC, rC, krf, two);
577                             }
578                         }
579
580                         /* Only process the VDW interactions if we have
581                          * some non-zero parameters, and if we either
582                          * include all entries in the list (no cutoff used
583                          * in the kernel), or if we are within the cutoff.
584                          */
585                         bool computeVdwInteraction = (vdwInteractionTypeIsEwald && r < rvdw)
586                                                      || (!vdwInteractionTypeIsEwald && rV < rvdw);
587                         if ((c6[i] != 0 || c12[i] != 0) && computeVdwInteraction)
588                         {
589                             RealType rinv6;
590                             if (useSoftCore)
591                             {
592                                 rinv6 = rpinvV;
593                             }
594                             else
595                             {
596                                 rinv6 = calculateRinv6(rinvV);
597                             }
598                             RealType Vvdw6  = calculateVdw6(c6[i], rinv6);
599                             RealType Vvdw12 = calculateVdw12(c12[i], rinv6);
600
601                             Vvdw[i] = lennardJonesPotential(Vvdw6, Vvdw12, c6[i], c12[i], repulsionShift,
602                                                             dispersionShift, onesixth, onetwelfth);
603                             FscalV[i] = lennardJonesScalarForce(Vvdw6, Vvdw12);
604
605                             if (vdwInteractionTypeIsEwald)
606                             {
607                                 /* Subtract the grid potential at the cut-off */
608                                 Vvdw[i] += ewaldLennardJonesGridSubtract(nbfp_grid[tj[i]],
609                                                                          sh_lj_ewald, onesixth);
610                             }
611
612                             if (vdwModifierIsPotSwitch)
613                             {
614                                 RealType d        = rV - ic->rvdw_switch;
615                                 d                 = (d > zero) ? d : zero;
616                                 const RealType d2 = d * d;
617                                 const RealType sw =
618                                         one + d2 * d * (vdw_swV3 + d * (vdw_swV4 + d * vdw_swV5));
619                                 const RealType dsw = d2 * (vdw_swF2 + d * (vdw_swF3 + d * vdw_swF4));
620
621                                 FscalV[i] = potSwitchScalarForceMod(FscalV[i], Vvdw[i], sw, rV,
622                                                                     rvdw, dsw, zero);
623                                 Vvdw[i]   = potSwitchPotentialMod(Vvdw[i], sw, rV, rvdw, zero);
624                             }
625                         }
626
627                         /* FscalC (and FscalV) now contain: dV/drC * rC
628                          * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
629                          * Further down we first multiply by r^p-2 and then by
630                          * the vector r, which in total gives: dV/drC * (r/rC)^1-p
631                          */
632                         FscalC[i] *= rpinvC;
633                         FscalV[i] *= rpinvV;
634                     }
635                 } // end for (int i = 0; i < NSTATES; i++)
636
637                 /* Assemble A and B states */
638                 for (int i = 0; i < NSTATES; i++)
639                 {
640                     vctot += LFC[i] * Vcoul[i];
641                     vvtot += LFV[i] * Vvdw[i];
642
643                     Fscal += LFC[i] * FscalC[i] * rpm2;
644                     Fscal += LFV[i] * FscalV[i] * rpm2;
645
646                     if (useSoftCore)
647                     {
648                         dvdl_coul += Vcoul[i] * DLF[i]
649                                      + LFC[i] * alpha_coul_eff * dlfac_coul[i] * FscalC[i] * sigma6[i];
650                         dvdl_vdw += Vvdw[i] * DLF[i]
651                                     + LFV[i] * alpha_vdw_eff * dlfac_vdw[i] * FscalV[i] * sigma6[i];
652                     }
653                     else
654                     {
655                         dvdl_coul += Vcoul[i] * DLF[i];
656                         dvdl_vdw += Vvdw[i] * DLF[i];
657                     }
658                 }
659             } // end if (bPairIncluded)
660             else if (icoul == GMX_NBKERNEL_ELEC_REACTIONFIELD)
661             {
662                 /* For excluded pairs, which are only in this pair list when
663                  * using the Verlet scheme, we don't use soft-core.
664                  * As there is no singularity, there is no need for soft-core.
665                  */
666                 const real FF = -two * krf;
667                 RealType   VV = krf * rsq - crf;
668
669                 if (ii == jnr)
670                 {
671                     VV *= half;
672                 }
673
674                 for (int i = 0; i < NSTATES; i++)
675                 {
676                     vctot += LFC[i] * qq[i] * VV;
677                     Fscal += LFC[i] * qq[i] * FF;
678                     dvdl_coul += DLF[i] * qq[i] * VV;
679                 }
680             }
681
682             if (elecInteractionTypeIsEwald && (r < rcoulomb || !bPairIncluded))
683             {
684                 /* See comment in the preamble. When using Ewald interactions
685                  * (unless we use a switch modifier) we subtract the reciprocal-space
686                  * Ewald component here which made it possible to apply the free
687                  * energy interaction to 1/r (vanilla coulomb short-range part)
688                  * above. This gets us closer to the ideal case of applying
689                  * the softcore to the entire electrostatic interaction,
690                  * including the reciprocal-space component.
691                  */
692                 real v_lr, f_lr;
693
694                 const RealType ewrt   = r * coulombTableScale;
695                 IntType        ewitab = static_cast<IntType>(ewrt);
696                 const RealType eweps  = ewrt - ewitab;
697                 ewitab                = 4 * ewitab;
698                 f_lr                  = ewtab[ewitab] + eweps * ewtab[ewitab + 1];
699                 v_lr = (ewtab[ewitab + 2] - coulombTableScaleInvHalf * eweps * (ewtab[ewitab] + f_lr));
700                 f_lr *= rinv;
701
702                 /* Note that any possible Ewald shift has already been applied in
703                  * the normal interaction part above.
704                  */
705
706                 if (ii == jnr)
707                 {
708                     /* If we get here, the i particle (ii) has itself (jnr)
709                      * in its neighborlist. This can only happen with the Verlet
710                      * scheme, and corresponds to a self-interaction that will
711                      * occur twice. Scale it down by 50% to only include it once.
712                      */
713                     v_lr *= half;
714                 }
715
716                 for (int i = 0; i < NSTATES; i++)
717                 {
718                     vctot -= LFC[i] * qq[i] * v_lr;
719                     Fscal -= LFC[i] * qq[i] * f_lr;
720                     dvdl_coul -= (DLF[i] * qq[i]) * v_lr;
721                 }
722             }
723
724             if (vdwInteractionTypeIsEwald && r < rvdw)
725             {
726                 /* See comment in the preamble. When using LJ-Ewald interactions
727                  * (unless we use a switch modifier) we subtract the reciprocal-space
728                  * Ewald component here which made it possible to apply the free
729                  * energy interaction to r^-6 (vanilla LJ6 short-range part)
730                  * above. This gets us closer to the ideal case of applying
731                  * the softcore to the entire VdW interaction,
732                  * including the reciprocal-space component.
733                  */
734                 /* We could also use the analytical form here
735                  * iso a table, but that can cause issues for
736                  * r close to 0 for non-interacting pairs.
737                  */
738
739                 const RealType rs   = rsq * rinv * vdwTableScale;
740                 const IntType  ri   = static_cast<IntType>(rs);
741                 const RealType frac = rs - ri;
742                 const RealType f_lr = (1 - frac) * tab_ewald_F_lj[ri] + frac * tab_ewald_F_lj[ri + 1];
743                 /* TODO: Currently the Ewald LJ table does not contain
744                  * the factor 1/6, we should add this.
745                  */
746                 const RealType FF = f_lr * rinv / six;
747                 RealType       VV =
748                         (tab_ewald_V_lj[ri] - vdwTableScaleInvHalf * frac * (tab_ewald_F_lj[ri] + f_lr))
749                         / six;
750
751                 if (ii == jnr)
752                 {
753                     /* If we get here, the i particle (ii) has itself (jnr)
754                      * in its neighborlist. This can only happen with the Verlet
755                      * scheme, and corresponds to a self-interaction that will
756                      * occur twice. Scale it down by 50% to only include it once.
757                      */
758                     VV *= half;
759                 }
760
761                 for (int i = 0; i < NSTATES; i++)
762                 {
763                     const real c6grid = nbfp_grid[tj[i]];
764                     vvtot += LFV[i] * c6grid * VV;
765                     Fscal += LFV[i] * c6grid * FF;
766                     dvdl_vdw += (DLF[i] * c6grid) * VV;
767                 }
768             }
769
770             if (doForces)
771             {
772                 const real tx = Fscal * dx;
773                 const real ty = Fscal * dy;
774                 const real tz = Fscal * dz;
775                 fix           = fix + tx;
776                 fiy           = fiy + ty;
777                 fiz           = fiz + tz;
778                 /* OpenMP atomics are expensive, but this kernels is also
779                  * expensive, so we can take this hit, instead of using
780                  * thread-local output buffers and extra reduction.
781                  *
782                  * All the OpenMP regions in this file are trivial and should
783                  * not throw, so no need for try/catch.
784                  */
785 #pragma omp atomic
786                 f[j3] -= tx;
787 #pragma omp atomic
788                 f[j3 + 1] -= ty;
789 #pragma omp atomic
790                 f[j3 + 2] -= tz;
791             }
792         } // end for (int k = nj0; k < nj1; k++)
793
794         /* The atomics below are expensive with many OpenMP threads.
795          * Here unperturbed i-particles will usually only have a few
796          * (perturbed) j-particles in the list. Thus with a buffered list
797          * we can skip a significant number of i-reductions with a check.
798          */
799         if (npair_within_cutoff > 0)
800         {
801             if (doForces)
802             {
803 #pragma omp atomic
804                 f[ii3] += fix;
805 #pragma omp atomic
806                 f[ii3 + 1] += fiy;
807 #pragma omp atomic
808                 f[ii3 + 2] += fiz;
809             }
810             if (doShiftForces)
811             {
812 #pragma omp atomic
813                 fshift[is3] += fix;
814 #pragma omp atomic
815                 fshift[is3 + 1] += fiy;
816 #pragma omp atomic
817                 fshift[is3 + 2] += fiz;
818             }
819             if (doPotential)
820             {
821                 int ggid = gid[n];
822 #pragma omp atomic
823                 Vc[ggid] += vctot;
824 #pragma omp atomic
825                 Vv[ggid] += vvtot;
826             }
827         }
828     } // end for (int n = 0; n < nri; n++)
829
830 #pragma omp atomic
831     dvdl[efptCOUL] += dvdl_coul;
832 #pragma omp atomic
833     dvdl[efptVDW] += dvdl_vdw;
834
835     /* Estimate flops, average for free energy stuff:
836      * 12  flops per outer iteration
837      * 150 flops per inner iteration
838      */
839 #pragma omp atomic
840     inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri * 12 + nlist->jindex[nri] * 150);
841 }
842
843 typedef void (*KernelFunction)(const t_nblist* gmx_restrict nlist,
844                                rvec* gmx_restrict         xx,
845                                gmx::ForceWithShiftForces* forceWithShiftForces,
846                                const t_forcerec* gmx_restrict fr,
847                                const t_mdatoms* gmx_restrict mdatoms,
848                                nb_kernel_data_t* gmx_restrict kernel_data,
849                                t_nrnb* gmx_restrict nrnb);
850
851 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald, bool vdwModifierIsPotSwitch>
852 static KernelFunction dispatchKernelOnUseSimd(const bool useSimd)
853 {
854     if (useSimd)
855     {
856 #if GMX_SIMD_HAVE_REAL && GMX_SIMD_HAVE_INT32_ARITHMETICS && GMX_USE_SIMD_KERNELS
857         /* FIXME: Here SimdDataTypes should be used to enable SIMD. So far, the code in nb_free_energy_kernel is not adapted to SIMD */
858         return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
859                                       elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
860 #else
861         return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
862                                       elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
863 #endif
864     }
865     else
866     {
867         return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
868                                       elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
869     }
870 }
871
872 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald>
873 static KernelFunction dispatchKernelOnVdwModifier(const bool vdwModifierIsPotSwitch, const bool useSimd)
874 {
875     if (vdwModifierIsPotSwitch)
876     {
877         return (dispatchKernelOnUseSimd<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
878                                         elecInteractionTypeIsEwald, true>(useSimd));
879     }
880     else
881     {
882         return (dispatchKernelOnUseSimd<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
883                                         elecInteractionTypeIsEwald, false>(useSimd));
884     }
885 }
886
887 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald>
888 static KernelFunction dispatchKernelOnElecInteractionType(const bool elecInteractionTypeIsEwald,
889                                                           const bool vdwModifierIsPotSwitch,
890                                                           const bool useSimd)
891 {
892     if (elecInteractionTypeIsEwald)
893     {
894         return (dispatchKernelOnVdwModifier<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, true>(
895                 vdwModifierIsPotSwitch, useSimd));
896     }
897     else
898     {
899         return (dispatchKernelOnVdwModifier<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, false>(
900                 vdwModifierIsPotSwitch, useSimd));
901     }
902 }
903
904 template<bool useSoftCore, bool scLambdasOrAlphasDiffer>
905 static KernelFunction dispatchKernelOnVdwInteractionType(const bool vdwInteractionTypeIsEwald,
906                                                          const bool elecInteractionTypeIsEwald,
907                                                          const bool vdwModifierIsPotSwitch,
908                                                          const bool useSimd)
909 {
910     if (vdwInteractionTypeIsEwald)
911     {
912         return (dispatchKernelOnElecInteractionType<useSoftCore, scLambdasOrAlphasDiffer, true>(
913                 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
914     }
915     else
916     {
917         return (dispatchKernelOnElecInteractionType<useSoftCore, scLambdasOrAlphasDiffer, false>(
918                 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
919     }
920 }
921
922 template<bool useSoftCore>
923 static KernelFunction dispatchKernelOnScLambdasOrAlphasDifference(const bool scLambdasOrAlphasDiffer,
924                                                                   const bool vdwInteractionTypeIsEwald,
925                                                                   const bool elecInteractionTypeIsEwald,
926                                                                   const bool vdwModifierIsPotSwitch,
927                                                                   const bool useSimd)
928 {
929     if (scLambdasOrAlphasDiffer)
930     {
931         return (dispatchKernelOnVdwInteractionType<useSoftCore, true>(
932                 vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
933     }
934     else
935     {
936         return (dispatchKernelOnVdwInteractionType<useSoftCore, false>(
937                 vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
938     }
939 }
940
941 static KernelFunction dispatchKernel(const bool        scLambdasOrAlphasDiffer,
942                                      const bool        vdwInteractionTypeIsEwald,
943                                      const bool        elecInteractionTypeIsEwald,
944                                      const bool        vdwModifierIsPotSwitch,
945                                      const bool        useSimd,
946                                      const t_forcerec* fr)
947 {
948     if (fr->sc_alphacoul == 0 && fr->sc_alphavdw == 0)
949     {
950         return (dispatchKernelOnScLambdasOrAlphasDifference<false>(
951                 scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
952                 vdwModifierIsPotSwitch, useSimd));
953     }
954     else
955     {
956         return (dispatchKernelOnScLambdasOrAlphasDifference<true>(
957                 scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
958                 vdwModifierIsPotSwitch, useSimd));
959     }
960 }
961
962
963 void gmx_nb_free_energy_kernel(const t_nblist*            nlist,
964                                rvec*                      xx,
965                                gmx::ForceWithShiftForces* ff,
966                                const t_forcerec*          fr,
967                                const t_mdatoms*           mdatoms,
968                                nb_kernel_data_t*          kernel_data,
969                                t_nrnb*                    nrnb)
970 {
971     GMX_ASSERT(EEL_PME_EWALD(fr->ic->eeltype) || fr->ic->eeltype == eelCUT || EEL_RF(fr->ic->eeltype),
972                "Unsupported eeltype with free energy");
973
974     const bool vdwInteractionTypeIsEwald  = (EVDW_PME(fr->ic->vdwtype));
975     const bool elecInteractionTypeIsEwald = (EEL_PME_EWALD(fr->ic->eeltype));
976     const bool vdwModifierIsPotSwitch     = (fr->ic->vdw_modifier == eintmodPOTSWITCH);
977     bool       scLambdasOrAlphasDiffer    = true;
978     const bool useSimd                    = fr->use_simd_kernels;
979
980     if (fr->sc_alphacoul == 0 && fr->sc_alphavdw == 0)
981     {
982         scLambdasOrAlphasDiffer = false;
983     }
984     else if (fr->sc_r_power == 6.0_real)
985     {
986         if (kernel_data->lambda[efptCOUL] == kernel_data->lambda[efptVDW] && fr->sc_alphacoul == fr->sc_alphavdw)
987         {
988             scLambdasOrAlphasDiffer = false;
989         }
990     }
991     else
992     {
993         GMX_RELEASE_ASSERT(false, "Unsupported soft-core r-power");
994     }
995
996     KernelFunction kernelFunc;
997     kernelFunc = dispatchKernel(scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
998                                 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd, fr);
999     kernelFunc(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
1000 }