Simplify t_forcerec destruction
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_free_energy.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8  * and including many others, as listed in the AUTHORS file in the
9  * top-level source directory and at http://www.gromacs.org.
10  *
11  * GROMACS is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public License
13  * as published by the Free Software Foundation; either version 2.1
14  * of the License, or (at your option) any later version.
15  *
16  * GROMACS is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with GROMACS; if not, see
23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
25  *
26  * If you want to redistribute modifications to GROMACS, please
27  * consider that scientific software is very special. Version
28  * control is crucial - bugs must be traceable. We will be happy to
29  * consider code for inclusion in the official distribution, but
30  * derived work must not be called official GROMACS. Details are found
31  * in the README & COPYING files - if they are missing, get the
32  * official version at http://www.gromacs.org.
33  *
34  * To help us fund GROMACS development, we humbly ask that you cite
35  * the research papers on the package. Check out http://www.gromacs.org.
36  */
37 #include "gmxpre.h"
38
39 #include "nb_free_energy.h"
40
41 #include <cmath>
42
43 #include <algorithm>
44
45 #include "gromacs/gmxlib/nrnb.h"
46 #include "gromacs/gmxlib/nonbonded/nb_kernel.h"
47 #include "gromacs/gmxlib/nonbonded/nonbonded.h"
48 #include "gromacs/math/functions.h"
49 #include "gromacs/math/vec.h"
50 #include "gromacs/mdtypes/forceoutput.h"
51 #include "gromacs/mdtypes/forcerec.h"
52 #include "gromacs/mdtypes/md_enums.h"
53 #include "gromacs/utility/fatalerror.h"
54
55
56 //! Enum for templating the soft-core treatment in the kernel
57 enum class SoftCoreTreatment
58 {
59     None,    //!< No soft-core
60     RPower6, //!< Soft-core with r-power = 6
61     RPower48 //!< Soft-core with r-power = 48
62 };
63
64 //! Most treatments are fine with float in mixed-precision mode.
65 template<SoftCoreTreatment softCoreTreatment>
66 struct SoftCoreReal
67 {
68     //! Real type for soft-core calculations
69     using Real = real;
70 };
71
72 //! This treatment requires double precision for some computations.
73 template<>
74 struct SoftCoreReal<SoftCoreTreatment::RPower48>
75 {
76     //! Real type for soft-core calculations
77     using Real = double;
78 };
79
80 //! Computes r^(1/p) and 1/r^(1/p) for the standard p=6
81 template<SoftCoreTreatment softCoreTreatment>
82 static inline void pthRoot(const real r, real* pthRoot, real* invPthRoot)
83 {
84     *invPthRoot = gmx::invsqrt(std::cbrt(r));
85     *pthRoot    = 1 / (*invPthRoot);
86 }
87
88 // We need a double version to make the specialization below work
89 #if !GMX_DOUBLE
90 //! Computes r^(1/p) and 1/r^(1/p) for the standard p=6
91 template<SoftCoreTreatment softCoreTreatment>
92 static inline void pthRoot(const double r, real* pthRoot, double* invPthRoot)
93 {
94     *invPthRoot = gmx::invsqrt(std::cbrt(r));
95     *pthRoot    = 1 / (*invPthRoot);
96 }
97 #endif
98
99 //! Computes r^(1/p) and 1/r^(1/p) for p=48
100 template<>
101 inline void pthRoot<SoftCoreTreatment::RPower48>(const double r, real* pthRoot, double* invPthRoot)
102 {
103     *pthRoot    = std::pow(r, 1.0 / 48.0);
104     *invPthRoot = 1 / (*pthRoot);
105 }
106
107 template<SoftCoreTreatment softCoreTreatment>
108 static inline real calculateSigmaPow(const real sigma6)
109 {
110     if (softCoreTreatment == SoftCoreTreatment::RPower6)
111     {
112         return sigma6;
113     }
114     else
115     {
116         real sigmaPow = sigma6 * sigma6;     /* sigma^12 */
117         sigmaPow      = sigmaPow * sigmaPow; /* sigma^24 */
118         sigmaPow      = sigmaPow * sigmaPow; /* sigma^48 */
119         return (sigmaPow);
120     }
121 }
122
123 template<SoftCoreTreatment softCoreTreatment, class SCReal>
124 static inline real calculateRinv6(const SCReal rinvV)
125 {
126     if (softCoreTreatment == SoftCoreTreatment::RPower6)
127     {
128         return rinvV;
129     }
130     else
131     {
132         real rinv6 = rinvV * rinvV;
133         return (rinv6 * rinv6 * rinv6);
134     }
135 }
136
137 static inline real calculateVdw6(const real c6, const real rinv6)
138 {
139     return (c6 * rinv6);
140 }
141
142 static inline real calculateVdw12(const real c12, const real rinv6)
143 {
144     return (c12 * rinv6 * rinv6);
145 }
146
147 /* reaction-field electrostatics */
148 template<class SCReal>
149 static inline SCReal
150 reactionFieldScalarForce(const real qq, const real rinv, const SCReal r, const real krf, const real two)
151 {
152     return (qq * (rinv - two * krf * r * r));
153 }
154 template<class SCReal>
155 static inline real
156 reactionFieldPotential(const real qq, const real rinv, const SCReal r, const real krf, const real potentialShift)
157 {
158     return (qq * (rinv + krf * r * r - potentialShift));
159 }
160
161 /* Ewald electrostatics */
162 static inline real ewaldScalarForce(const real coulomb, const real rinv)
163 {
164     return (coulomb * rinv);
165 }
166 static inline real ewaldPotential(const real coulomb, const real rinv, const real potentialShift)
167 {
168     return (coulomb * (rinv - potentialShift));
169 }
170
171 /* cutoff LJ */
172 static inline real lennardJonesScalarForce(const real v6, const real v12)
173 {
174     return (v12 - v6);
175 }
176 static inline real lennardJonesPotential(const real v6,
177                                          const real v12,
178                                          const real c6,
179                                          const real c12,
180                                          const real repulsionShift,
181                                          const real dispersionShift,
182                                          const real onesixth,
183                                          const real onetwelfth)
184 {
185     return ((v12 + c12 * repulsionShift) * onetwelfth - (v6 + c6 * dispersionShift) * onesixth);
186 }
187
188 /* Ewald LJ */
189 static inline real ewaldLennardJonesGridSubtract(const real c6grid, const real potentialShift, const real onesixth)
190 {
191     return (c6grid * potentialShift * onesixth);
192 }
193
194 /* LJ Potential switch */
195 template<class SCReal>
196 static inline SCReal potSwitchScalarForceMod(const SCReal fScalarInp,
197                                              const real   potential,
198                                              const real   sw,
199                                              const SCReal r,
200                                              const real   rVdw,
201                                              const real   dsw,
202                                              const real   zero)
203 {
204     if (r < rVdw)
205     {
206         SCReal fScalar = fScalarInp * sw - r * potential * dsw;
207         return (fScalar);
208     }
209     return (zero);
210 }
211 template<class SCReal>
212 static inline real
213 potSwitchPotentialMod(const real potentialInp, const real sw, const SCReal r, const real rVdw, const real zero)
214 {
215     if (r < rVdw)
216     {
217         real potential = potentialInp * sw;
218         return (potential);
219     }
220     return (zero);
221 }
222
223
224 //! Templated free-energy non-bonded kernel
225 template<SoftCoreTreatment softCoreTreatment, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald, bool vdwModifierIsPotSwitch>
226 static void nb_free_energy_kernel(const t_nblist* gmx_restrict nlist,
227                                   rvec* gmx_restrict         xx,
228                                   gmx::ForceWithShiftForces* forceWithShiftForces,
229                                   const t_forcerec* gmx_restrict fr,
230                                   const t_mdatoms* gmx_restrict mdatoms,
231                                   nb_kernel_data_t* gmx_restrict kernel_data,
232                                   t_nrnb* gmx_restrict nrnb)
233 {
234     using SCReal = typename SoftCoreReal<softCoreTreatment>::Real;
235
236     constexpr bool useSoftCore = (softCoreTreatment != SoftCoreTreatment::None);
237
238 #define STATE_A 0
239 #define STATE_B 1
240 #define NSTATES 2
241
242     constexpr real onetwelfth = 1.0 / 12.0;
243     constexpr real onesixth   = 1.0 / 6.0;
244     constexpr real zero       = 0.0;
245     constexpr real half       = 0.5;
246     constexpr real one        = 1.0;
247     constexpr real two        = 2.0;
248     constexpr real six        = 6.0;
249
250     /* Extract pointer to non-bonded interaction constants */
251     const interaction_const_t* ic = fr->ic;
252
253     // Extract pair list data
254     const int  nri    = nlist->nri;
255     const int* iinr   = nlist->iinr;
256     const int* jindex = nlist->jindex;
257     const int* jjnr   = nlist->jjnr;
258     const int* shift  = nlist->shift;
259     const int* gid    = nlist->gid;
260
261     const real* shiftvec      = fr->shift_vec[0];
262     const real* chargeA       = mdatoms->chargeA;
263     const real* chargeB       = mdatoms->chargeB;
264     real*       Vc            = kernel_data->energygrp_elec;
265     const int*  typeA         = mdatoms->typeA;
266     const int*  typeB         = mdatoms->typeB;
267     const int   ntype         = fr->ntype;
268     const real* nbfp          = fr->nbfp.data();
269     const real* nbfp_grid     = fr->ljpme_c6grid;
270     real*       Vv            = kernel_data->energygrp_vdw;
271     const real  lambda_coul   = kernel_data->lambda[efptCOUL];
272     const real  lambda_vdw    = kernel_data->lambda[efptVDW];
273     real*       dvdl          = kernel_data->dvdl;
274     const real  alpha_coul    = fr->sc_alphacoul;
275     const real  alpha_vdw     = fr->sc_alphavdw;
276     const real  lam_power     = fr->sc_power;
277     const real  sigma6_def    = fr->sc_sigma6_def;
278     const real  sigma6_min    = fr->sc_sigma6_min;
279     const bool  doForces      = ((kernel_data->flags & GMX_NONBONDED_DO_FORCE) != 0);
280     const bool  doShiftForces = ((kernel_data->flags & GMX_NONBONDED_DO_SHIFTFORCE) != 0);
281     const bool  doPotential   = ((kernel_data->flags & GMX_NONBONDED_DO_POTENTIAL) != 0);
282
283     // Extract data from interaction_const_t
284     const real facel           = ic->epsfac;
285     const real rcoulomb        = ic->rcoulomb;
286     const real krf             = ic->k_rf;
287     const real crf             = ic->c_rf;
288     const real sh_lj_ewald     = ic->sh_lj_ewald;
289     const real rvdw            = ic->rvdw;
290     const real dispersionShift = ic->dispersion_shift.cpot;
291     const real repulsionShift  = ic->repulsion_shift.cpot;
292
293     // Note that the nbnxm kernels do not support Coulomb potential switching at all
294     GMX_ASSERT(ic->coulomb_modifier != eintmodPOTSWITCH,
295                "Potential switching is not supported for Coulomb with FEP");
296
297     real vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4;
298     if (vdwModifierIsPotSwitch)
299     {
300         const real d = ic->rvdw - ic->rvdw_switch;
301         vdw_swV3     = -10.0 / (d * d * d);
302         vdw_swV4     = 15.0 / (d * d * d * d);
303         vdw_swV5     = -6.0 / (d * d * d * d * d);
304         vdw_swF2     = -30.0 / (d * d * d);
305         vdw_swF3     = 60.0 / (d * d * d * d);
306         vdw_swF4     = -30.0 / (d * d * d * d * d);
307     }
308     else
309     {
310         /* Avoid warnings from stupid compilers (looking at you, Clang!) */
311         vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0;
312     }
313
314     int icoul;
315     if (ic->eeltype == eelCUT || EEL_RF(ic->eeltype))
316     {
317         icoul = GMX_NBKERNEL_ELEC_REACTIONFIELD;
318     }
319     else
320     {
321         icoul = GMX_NBKERNEL_ELEC_NONE;
322     }
323
324     real rcutoff_max2 = std::max(ic->rcoulomb, ic->rvdw);
325     rcutoff_max2      = rcutoff_max2 * rcutoff_max2;
326
327     const real* tab_ewald_F_lj = nullptr;
328     const real* tab_ewald_V_lj = nullptr;
329     const real* ewtab          = nullptr;
330     real        ewtabscale     = 0;
331     real        ewtabhalfspace = 0;
332     real        sh_ewald       = 0;
333     if (elecInteractionTypeIsEwald || vdwInteractionTypeIsEwald)
334     {
335         const auto& tables = *ic->coulombEwaldTables;
336         sh_ewald           = ic->sh_ewald;
337         ewtab              = tables.tableFDV0.data();
338         ewtabscale         = tables.scale;
339         ewtabhalfspace     = half / ewtabscale;
340         tab_ewald_F_lj     = tables.tableF.data();
341         tab_ewald_V_lj     = tables.tableV.data();
342     }
343
344     /* For Ewald/PME interactions we cannot easily apply the soft-core component to
345      * reciprocal space. When we use non-switched Ewald interactions, we
346      * assume the soft-coring does not significantly affect the grid contribution
347      * and apply the soft-core only to the full 1/r (- shift) pair contribution.
348      *
349      * However, we cannot use this approach for switch-modified since we would then
350      * effectively end up evaluating a significantly different interaction here compared to the
351      * normal (non-free-energy) kernels, either by applying a cutoff at a different
352      * position than what the user requested, or by switching different
353      * things (1/r rather than short-range Ewald). For these settings, we just
354      * use the traditional short-range Ewald interaction in that case.
355      */
356     GMX_RELEASE_ASSERT(!(vdwInteractionTypeIsEwald && vdwModifierIsPotSwitch),
357                        "Can not apply soft-core to switched Ewald potentials");
358
359     SCReal dvdl_coul = 0; /* Needs double for sc_power==48 */
360     SCReal dvdl_vdw  = 0; /* Needs double for sc_power==48 */
361
362     /* Lambda factor for state A, 1-lambda*/
363     real LFC[NSTATES], LFV[NSTATES];
364     LFC[STATE_A] = one - lambda_coul;
365     LFV[STATE_A] = one - lambda_vdw;
366
367     /* Lambda factor for state B, lambda*/
368     LFC[STATE_B] = lambda_coul;
369     LFV[STATE_B] = lambda_vdw;
370
371     /*derivative of the lambda factor for state A and B */
372     real DLF[NSTATES];
373     DLF[STATE_A] = -1;
374     DLF[STATE_B] = 1;
375
376     real           lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
377     constexpr real sc_r_power = (softCoreTreatment == SoftCoreTreatment::RPower48 ? 48.0_real : 6.0_real);
378     for (int i = 0; i < NSTATES; i++)
379     {
380         lfac_coul[i]  = (lam_power == 2 ? (1 - LFC[i]) * (1 - LFC[i]) : (1 - LFC[i]));
381         dlfac_coul[i] = DLF[i] * lam_power / sc_r_power * (lam_power == 2 ? (1 - LFC[i]) : 1);
382         lfac_vdw[i]   = (lam_power == 2 ? (1 - LFV[i]) * (1 - LFV[i]) : (1 - LFV[i]));
383         dlfac_vdw[i]  = DLF[i] * lam_power / sc_r_power * (lam_power == 2 ? (1 - LFV[i]) : 1);
384     }
385
386     // TODO: We should get rid of using pointers to real
387     const real* x             = xx[0];
388     real* gmx_restrict f      = &(forceWithShiftForces->force()[0][0]);
389     real* gmx_restrict fshift = &(forceWithShiftForces->shiftForces()[0][0]);
390
391     for (int n = 0; n < nri; n++)
392     {
393         int npair_within_cutoff = 0;
394
395         const int  is3   = 3 * shift[n];
396         const real shX   = shiftvec[is3];
397         const real shY   = shiftvec[is3 + 1];
398         const real shZ   = shiftvec[is3 + 2];
399         const int  nj0   = jindex[n];
400         const int  nj1   = jindex[n + 1];
401         const int  ii    = iinr[n];
402         const int  ii3   = 3 * ii;
403         const real ix    = shX + x[ii3 + 0];
404         const real iy    = shY + x[ii3 + 1];
405         const real iz    = shZ + x[ii3 + 2];
406         const real iqA   = facel * chargeA[ii];
407         const real iqB   = facel * chargeB[ii];
408         const int  ntiA  = 2 * ntype * typeA[ii];
409         const int  ntiB  = 2 * ntype * typeB[ii];
410         real       vctot = 0;
411         real       vvtot = 0;
412         real       fix   = 0;
413         real       fiy   = 0;
414         real       fiz   = 0;
415
416         for (int k = nj0; k < nj1; k++)
417         {
418             int        tj[NSTATES];
419             const int  jnr = jjnr[k];
420             const int  j3  = 3 * jnr;
421             real       c6[NSTATES], c12[NSTATES], qq[NSTATES], Vcoul[NSTATES], Vvdw[NSTATES];
422             real       r, rinv, rp, rpm2;
423             real       alpha_vdw_eff, alpha_coul_eff, sigma_pow[NSTATES];
424             const real dx  = ix - x[j3];
425             const real dy  = iy - x[j3 + 1];
426             const real dz  = iz - x[j3 + 2];
427             const real rsq = dx * dx + dy * dy + dz * dz;
428             SCReal     FscalC[NSTATES], FscalV[NSTATES]; /* Needs double for sc_power==48 */
429
430             if (rsq >= rcutoff_max2)
431             {
432                 /* We save significant time by skipping all code below.
433                  * Note that with soft-core interactions, the actual cut-off
434                  * check might be different. But since the soft-core distance
435                  * is always larger than r, checking on r here is safe.
436                  */
437                 continue;
438             }
439             npair_within_cutoff++;
440
441             if (rsq > 0)
442             {
443                 /* Note that unlike in the nbnxn kernels, we do not need
444                  * to clamp the value of rsq before taking the invsqrt
445                  * to avoid NaN in the LJ calculation, since here we do
446                  * not calculate LJ interactions when C6 and C12 are zero.
447                  */
448
449                 rinv = gmx::invsqrt(rsq);
450                 r    = rsq * rinv;
451             }
452             else
453             {
454                 /* The force at r=0 is zero, because of symmetry.
455                  * But note that the potential is in general non-zero,
456                  * since the soft-cored r will be non-zero.
457                  */
458                 rinv = 0;
459                 r    = 0;
460             }
461
462             if (softCoreTreatment == SoftCoreTreatment::None)
463             {
464                 /* The soft-core power p will not affect the results
465                  * with not using soft-core, so we use power of 0 which gives
466                  * the simplest math and cheapest code.
467                  */
468                 rpm2 = rinv * rinv;
469                 rp   = 1;
470             }
471             if (softCoreTreatment == SoftCoreTreatment::RPower6)
472             {
473                 rpm2 = rsq * rsq;  /* r4 */
474                 rp   = rpm2 * rsq; /* r6 */
475             }
476             if (softCoreTreatment == SoftCoreTreatment::RPower48)
477             {
478                 rp   = rsq * rsq * rsq; /* r6 */
479                 rp   = rp * rp;         /* r12 */
480                 rp   = rp * rp;         /* r24 */
481                 rp   = rp * rp;         /* r48 */
482                 rpm2 = rp / rsq;        /* r46 */
483             }
484
485             real Fscal = 0;
486
487             qq[STATE_A] = iqA * chargeA[jnr];
488             qq[STATE_B] = iqB * chargeB[jnr];
489
490             tj[STATE_A] = ntiA + 2 * typeA[jnr];
491             tj[STATE_B] = ntiB + 2 * typeB[jnr];
492
493             if (nlist->excl_fep == nullptr || nlist->excl_fep[k])
494             {
495                 c6[STATE_A] = nbfp[tj[STATE_A]];
496                 c6[STATE_B] = nbfp[tj[STATE_B]];
497
498                 for (int i = 0; i < NSTATES; i++)
499                 {
500                     c12[i] = nbfp[tj[i] + 1];
501                     if (useSoftCore)
502                     {
503                         real sigma6[NSTATES];
504                         if ((c6[i] > 0) && (c12[i] > 0))
505                         {
506                             /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
507                             sigma6[i] = half * c12[i] / c6[i];
508                             if (sigma6[i] < sigma6_min) /* for disappearing coul and vdw with soft core at the same time */
509                             {
510                                 sigma6[i] = sigma6_min;
511                             }
512                         }
513                         else
514                         {
515                             sigma6[i] = sigma6_def;
516                         }
517                         sigma_pow[i] = calculateSigmaPow<softCoreTreatment>(sigma6[i]);
518                     }
519                 }
520
521                 if (useSoftCore)
522                 {
523                     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
524                     if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
525                     {
526                         alpha_vdw_eff  = 0;
527                         alpha_coul_eff = 0;
528                     }
529                     else
530                     {
531                         alpha_vdw_eff  = alpha_vdw;
532                         alpha_coul_eff = alpha_coul;
533                     }
534                 }
535
536                 for (int i = 0; i < NSTATES; i++)
537                 {
538                     FscalC[i] = 0;
539                     FscalV[i] = 0;
540                     Vcoul[i]  = 0;
541                     Vvdw[i]   = 0;
542
543                     real   rinvC, rinvV;
544                     SCReal rC, rV, rpinvC, rpinvV; /* Needs double for sc_power==48 */
545
546                     /* Only spend time on A or B state if it is non-zero */
547                     if ((qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0))
548                     {
549                         /* this section has to be inside the loop because of the dependence on sigma_pow */
550                         if (useSoftCore)
551                         {
552                             rpinvC = one / (alpha_coul_eff * lfac_coul[i] * sigma_pow[i] + rp);
553                             pthRoot<softCoreTreatment>(rpinvC, &rinvC, &rC);
554                             if (scLambdasOrAlphasDiffer)
555                             {
556                                 rpinvV = one / (alpha_vdw_eff * lfac_vdw[i] * sigma_pow[i] + rp);
557                                 pthRoot<softCoreTreatment>(rpinvV, &rinvV, &rV);
558                             }
559                             else
560                             {
561                                 /* We can avoid one expensive pow and one / operation */
562                                 rpinvV = rpinvC;
563                                 rinvV  = rinvC;
564                                 rV     = rC;
565                             }
566                         }
567                         else
568                         {
569                             rpinvC = 1;
570                             rinvC  = rinv;
571                             rC     = r;
572
573                             rpinvV = 1;
574                             rinvV  = rinv;
575                             rV     = r;
576                         }
577
578                         /* Only process the coulomb interactions if we have charges,
579                          * and if we either include all entries in the list (no cutoff
580                          * used in the kernel), or if we are within the cutoff.
581                          */
582                         bool computeElecInteraction = (elecInteractionTypeIsEwald && r < rcoulomb)
583                                                       || (!elecInteractionTypeIsEwald && rC < rcoulomb);
584
585                         if ((qq[i] != 0) && computeElecInteraction)
586                         {
587                             if (elecInteractionTypeIsEwald)
588                             {
589                                 Vcoul[i]  = ewaldPotential(qq[i], rinvC, sh_ewald);
590                                 FscalC[i] = ewaldScalarForce(qq[i], rinvC);
591                             }
592                             else
593                             {
594                                 Vcoul[i]  = reactionFieldPotential(qq[i], rinvC, rC, krf, crf);
595                                 FscalC[i] = reactionFieldScalarForce(qq[i], rinvC, rC, krf, two);
596                             }
597                         }
598
599                         /* Only process the VDW interactions if we have
600                          * some non-zero parameters, and if we either
601                          * include all entries in the list (no cutoff used
602                          * in the kernel), or if we are within the cutoff.
603                          */
604                         bool computeVdwInteraction = (vdwInteractionTypeIsEwald && r < rvdw)
605                                                      || (!vdwInteractionTypeIsEwald && rV < rvdw);
606                         if ((c6[i] != 0 || c12[i] != 0) && computeVdwInteraction)
607                         {
608                             real rinv6;
609                             if (softCoreTreatment == SoftCoreTreatment::RPower6)
610                             {
611                                 rinv6 = calculateRinv6<softCoreTreatment>(rpinvV);
612                             }
613                             else
614                             {
615                                 rinv6 = calculateRinv6<softCoreTreatment>(rinvV);
616                             }
617                             real Vvdw6  = calculateVdw6(c6[i], rinv6);
618                             real Vvdw12 = calculateVdw12(c12[i], rinv6);
619
620                             Vvdw[i] = lennardJonesPotential(Vvdw6, Vvdw12, c6[i], c12[i], repulsionShift,
621                                                             dispersionShift, onesixth, onetwelfth);
622                             FscalV[i] = lennardJonesScalarForce(Vvdw6, Vvdw12);
623
624                             if (vdwInteractionTypeIsEwald)
625                             {
626                                 /* Subtract the grid potential at the cut-off */
627                                 Vvdw[i] += ewaldLennardJonesGridSubtract(nbfp_grid[tj[i]],
628                                                                          sh_lj_ewald, onesixth);
629                             }
630
631                             if (vdwModifierIsPotSwitch)
632                             {
633                                 real d        = rV - ic->rvdw_switch;
634                                 d             = (d > zero) ? d : zero;
635                                 const real d2 = d * d;
636                                 const real sw = one + d2 * d * (vdw_swV3 + d * (vdw_swV4 + d * vdw_swV5));
637                                 const real dsw = d2 * (vdw_swF2 + d * (vdw_swF3 + d * vdw_swF4));
638
639                                 FscalV[i] = potSwitchScalarForceMod(FscalV[i], Vvdw[i], sw, rV,
640                                                                     rvdw, dsw, zero);
641                                 Vvdw[i]   = potSwitchPotentialMod(Vvdw[i], sw, rV, rvdw, zero);
642                             }
643                         }
644
645                         /* FscalC (and FscalV) now contain: dV/drC * rC
646                          * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
647                          * Further down we first multiply by r^p-2 and then by
648                          * the vector r, which in total gives: dV/drC * (r/rC)^1-p
649                          */
650                         FscalC[i] *= rpinvC;
651                         FscalV[i] *= rpinvV;
652                     }
653                 }
654
655                 /* Assemble A and B states */
656                 for (int i = 0; i < NSTATES; i++)
657                 {
658                     vctot += LFC[i] * Vcoul[i];
659                     vvtot += LFV[i] * Vvdw[i];
660
661                     Fscal += LFC[i] * FscalC[i] * rpm2;
662                     Fscal += LFV[i] * FscalV[i] * rpm2;
663
664                     if (useSoftCore)
665                     {
666                         dvdl_coul +=
667                                 Vcoul[i] * DLF[i]
668                                 + LFC[i] * alpha_coul_eff * dlfac_coul[i] * FscalC[i] * sigma_pow[i];
669                         dvdl_vdw += Vvdw[i] * DLF[i]
670                                     + LFV[i] * alpha_vdw_eff * dlfac_vdw[i] * FscalV[i] * sigma_pow[i];
671                     }
672                     else
673                     {
674                         dvdl_coul += Vcoul[i] * DLF[i];
675                         dvdl_vdw += Vvdw[i] * DLF[i];
676                     }
677                 }
678             }
679             else if (icoul == GMX_NBKERNEL_ELEC_REACTIONFIELD)
680             {
681                 /* For excluded pairs, which are only in this pair list when
682                  * using the Verlet scheme, we don't use soft-core.
683                  * As there is no singularity, there is no need for soft-core.
684                  */
685                 const real FF = -two * krf;
686                 real       VV = krf * rsq - crf;
687
688                 if (ii == jnr)
689                 {
690                     VV *= half;
691                 }
692
693                 for (int i = 0; i < NSTATES; i++)
694                 {
695                     vctot += LFC[i] * qq[i] * VV;
696                     Fscal += LFC[i] * qq[i] * FF;
697                     dvdl_coul += DLF[i] * qq[i] * VV;
698                 }
699             }
700
701             if (elecInteractionTypeIsEwald && r < rcoulomb)
702             {
703                 /* See comment in the preamble. When using Ewald interactions
704                  * (unless we use a switch modifier) we subtract the reciprocal-space
705                  * Ewald component here which made it possible to apply the free
706                  * energy interaction to 1/r (vanilla coulomb short-range part)
707                  * above. This gets us closer to the ideal case of applying
708                  * the softcore to the entire electrostatic interaction,
709                  * including the reciprocal-space component.
710                  */
711                 real v_lr, f_lr;
712
713                 const real ewrt   = r * ewtabscale;
714                 int        ewitab = static_cast<int>(ewrt);
715                 const real eweps  = ewrt - ewitab;
716                 ewitab            = 4 * ewitab;
717                 f_lr              = ewtab[ewitab] + eweps * ewtab[ewitab + 1];
718                 v_lr = (ewtab[ewitab + 2] - ewtabhalfspace * eweps * (ewtab[ewitab] + f_lr));
719                 f_lr *= rinv;
720
721                 /* Note that any possible Ewald shift has already been applied in
722                  * the normal interaction part above.
723                  */
724
725                 if (ii == jnr)
726                 {
727                     /* If we get here, the i particle (ii) has itself (jnr)
728                      * in its neighborlist. This can only happen with the Verlet
729                      * scheme, and corresponds to a self-interaction that will
730                      * occur twice. Scale it down by 50% to only include it once.
731                      */
732                     v_lr *= half;
733                 }
734
735                 for (int i = 0; i < NSTATES; i++)
736                 {
737                     vctot -= LFC[i] * qq[i] * v_lr;
738                     Fscal -= LFC[i] * qq[i] * f_lr;
739                     dvdl_coul -= (DLF[i] * qq[i]) * v_lr;
740                 }
741             }
742
743             if (vdwInteractionTypeIsEwald && r < rvdw)
744             {
745                 /* See comment in the preamble. When using LJ-Ewald interactions
746                  * (unless we use a switch modifier) we subtract the reciprocal-space
747                  * Ewald component here which made it possible to apply the free
748                  * energy interaction to r^-6 (vanilla LJ6 short-range part)
749                  * above. This gets us closer to the ideal case of applying
750                  * the softcore to the entire VdW interaction,
751                  * including the reciprocal-space component.
752                  */
753                 /* We could also use the analytical form here
754                  * iso a table, but that can cause issues for
755                  * r close to 0 for non-interacting pairs.
756                  */
757
758                 const real rs   = rsq * rinv * ewtabscale;
759                 const int  ri   = static_cast<int>(rs);
760                 const real frac = rs - ri;
761                 const real f_lr = (1 - frac) * tab_ewald_F_lj[ri] + frac * tab_ewald_F_lj[ri + 1];
762                 /* TODO: Currently the Ewald LJ table does not contain
763                  * the factor 1/6, we should add this.
764                  */
765                 const real FF = f_lr * rinv / six;
766                 real VV = (tab_ewald_V_lj[ri] - ewtabhalfspace * frac * (tab_ewald_F_lj[ri] + f_lr)) / six;
767
768                 if (ii == jnr)
769                 {
770                     /* If we get here, the i particle (ii) has itself (jnr)
771                      * in its neighborlist. This can only happen with the Verlet
772                      * scheme, and corresponds to a self-interaction that will
773                      * occur twice. Scale it down by 50% to only include it once.
774                      */
775                     VV *= half;
776                 }
777
778                 for (int i = 0; i < NSTATES; i++)
779                 {
780                     const real c6grid = nbfp_grid[tj[i]];
781                     vvtot += LFV[i] * c6grid * VV;
782                     Fscal += LFV[i] * c6grid * FF;
783                     dvdl_vdw += (DLF[i] * c6grid) * VV;
784                 }
785             }
786
787             if (doForces)
788             {
789                 const real tx = Fscal * dx;
790                 const real ty = Fscal * dy;
791                 const real tz = Fscal * dz;
792                 fix           = fix + tx;
793                 fiy           = fiy + ty;
794                 fiz           = fiz + tz;
795                 /* OpenMP atomics are expensive, but this kernels is also
796                  * expensive, so we can take this hit, instead of using
797                  * thread-local output buffers and extra reduction.
798                  *
799                  * All the OpenMP regions in this file are trivial and should
800                  * not throw, so no need for try/catch.
801                  */
802 #pragma omp atomic
803                 f[j3] -= tx;
804 #pragma omp atomic
805                 f[j3 + 1] -= ty;
806 #pragma omp atomic
807                 f[j3 + 2] -= tz;
808             }
809         }
810
811         /* The atomics below are expensive with many OpenMP threads.
812          * Here unperturbed i-particles will usually only have a few
813          * (perturbed) j-particles in the list. Thus with a buffered list
814          * we can skip a significant number of i-reductions with a check.
815          */
816         if (npair_within_cutoff > 0)
817         {
818             if (doForces)
819             {
820 #pragma omp atomic
821                 f[ii3] += fix;
822 #pragma omp atomic
823                 f[ii3 + 1] += fiy;
824 #pragma omp atomic
825                 f[ii3 + 2] += fiz;
826             }
827             if (doShiftForces)
828             {
829 #pragma omp atomic
830                 fshift[is3] += fix;
831 #pragma omp atomic
832                 fshift[is3 + 1] += fiy;
833 #pragma omp atomic
834                 fshift[is3 + 2] += fiz;
835             }
836             if (doPotential)
837             {
838                 int ggid = gid[n];
839 #pragma omp atomic
840                 Vc[ggid] += vctot;
841 #pragma omp atomic
842                 Vv[ggid] += vvtot;
843             }
844         }
845     }
846
847 #pragma omp atomic
848     dvdl[efptCOUL] += dvdl_coul;
849 #pragma omp atomic
850     dvdl[efptVDW] += dvdl_vdw;
851
852     /* Estimate flops, average for free energy stuff:
853      * 12  flops per outer iteration
854      * 150 flops per inner iteration
855      */
856 #pragma omp atomic
857     inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri * 12 + nlist->jindex[nri] * 150);
858 }
859
860 typedef void (*KernelFunction)(const t_nblist* gmx_restrict nlist,
861                                rvec* gmx_restrict         xx,
862                                gmx::ForceWithShiftForces* forceWithShiftForces,
863                                const t_forcerec* gmx_restrict fr,
864                                const t_mdatoms* gmx_restrict mdatoms,
865                                nb_kernel_data_t* gmx_restrict kernel_data,
866                                t_nrnb* gmx_restrict nrnb);
867
868 template<SoftCoreTreatment softCoreTreatment, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald>
869 static KernelFunction dispatchKernelOnVdwModifier(const bool vdwModifierIsPotSwitch)
870 {
871     if (vdwModifierIsPotSwitch)
872     {
873         return (nb_free_energy_kernel<softCoreTreatment, scLambdasOrAlphasDiffer,
874                                       vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, true>);
875     }
876     else
877     {
878         return (nb_free_energy_kernel<softCoreTreatment, scLambdasOrAlphasDiffer,
879                                       vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, false>);
880     }
881 }
882
883 template<SoftCoreTreatment softCoreTreatment, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald>
884 static KernelFunction dispatchKernelOnElecInteractionType(const bool elecInteractionTypeIsEwald,
885                                                           const bool vdwModifierIsPotSwitch)
886 {
887     if (elecInteractionTypeIsEwald)
888     {
889         return (dispatchKernelOnVdwModifier<softCoreTreatment, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, true>(
890                 vdwModifierIsPotSwitch));
891     }
892     else
893     {
894         return (dispatchKernelOnVdwModifier<softCoreTreatment, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, false>(
895                 vdwModifierIsPotSwitch));
896     }
897 }
898
899 template<SoftCoreTreatment softCoreTreatment, bool scLambdasOrAlphasDiffer>
900 static KernelFunction dispatchKernelOnVdwInteractionType(const bool vdwInteractionTypeIsEwald,
901                                                          const bool elecInteractionTypeIsEwald,
902                                                          const bool vdwModifierIsPotSwitch)
903 {
904     if (vdwInteractionTypeIsEwald)
905     {
906         return (dispatchKernelOnElecInteractionType<softCoreTreatment, scLambdasOrAlphasDiffer, true>(
907                 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch));
908     }
909     else
910     {
911         return (dispatchKernelOnElecInteractionType<softCoreTreatment, scLambdasOrAlphasDiffer, false>(
912                 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch));
913     }
914 }
915
916 template<SoftCoreTreatment softCoreTreatment>
917 static KernelFunction dispatchKernelOnScLambdasOrAlphasDifference(const bool scLambdasOrAlphasDiffer,
918                                                                   const bool vdwInteractionTypeIsEwald,
919                                                                   const bool elecInteractionTypeIsEwald,
920                                                                   const bool vdwModifierIsPotSwitch)
921 {
922     if (scLambdasOrAlphasDiffer)
923     {
924         return (dispatchKernelOnVdwInteractionType<softCoreTreatment, true>(
925                 vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch));
926     }
927     else
928     {
929         return (dispatchKernelOnVdwInteractionType<softCoreTreatment, false>(
930                 vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch));
931     }
932 }
933
934 static KernelFunction dispatchKernel(const bool        scLambdasOrAlphasDiffer,
935                                      const bool        vdwInteractionTypeIsEwald,
936                                      const bool        elecInteractionTypeIsEwald,
937                                      const bool        vdwModifierIsPotSwitch,
938                                      const t_forcerec* fr)
939 {
940     if (fr->sc_alphacoul == 0 && fr->sc_alphavdw == 0)
941     {
942         return (dispatchKernelOnScLambdasOrAlphasDifference<SoftCoreTreatment::None>(
943                 scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
944                 vdwModifierIsPotSwitch));
945     }
946     else if (fr->sc_r_power == 6.0_real)
947     {
948         return (dispatchKernelOnScLambdasOrAlphasDifference<SoftCoreTreatment::RPower6>(
949                 scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
950                 vdwModifierIsPotSwitch));
951     }
952     else
953     {
954         return (dispatchKernelOnScLambdasOrAlphasDifference<SoftCoreTreatment::RPower48>(
955                 scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
956                 vdwModifierIsPotSwitch));
957     }
958 }
959
960
961 void gmx_nb_free_energy_kernel(const t_nblist*            nlist,
962                                rvec*                      xx,
963                                gmx::ForceWithShiftForces* ff,
964                                const t_forcerec*          fr,
965                                const t_mdatoms*           mdatoms,
966                                nb_kernel_data_t*          kernel_data,
967                                t_nrnb*                    nrnb)
968 {
969     GMX_ASSERT(EEL_PME_EWALD(fr->ic->eeltype) || fr->ic->eeltype == eelCUT || EEL_RF(fr->ic->eeltype),
970                "Unsupported eeltype with free energy");
971
972     const bool vdwInteractionTypeIsEwald  = (EVDW_PME(fr->ic->vdwtype));
973     const bool elecInteractionTypeIsEwald = (EEL_PME_EWALD(fr->ic->eeltype));
974     const bool vdwModifierIsPotSwitch     = (fr->ic->vdw_modifier == eintmodPOTSWITCH);
975     bool       scLambdasOrAlphasDiffer    = true;
976
977     if (fr->sc_alphacoul == 0 && fr->sc_alphavdw == 0)
978     {
979         scLambdasOrAlphasDiffer = false;
980     }
981     else if (fr->sc_r_power == 6.0_real || fr->sc_r_power == 48.0_real)
982     {
983         if (kernel_data->lambda[efptCOUL] == kernel_data->lambda[efptVDW] && fr->sc_alphacoul == fr->sc_alphavdw)
984         {
985             scLambdasOrAlphasDiffer = false;
986         }
987     }
988     else
989     {
990         GMX_RELEASE_ASSERT(false, "Unsupported soft-core r-power");
991     }
992     KernelFunction kernelFunc = dispatchKernel(scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
993                                                elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, fr);
994     kernelFunc(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
995 }