Fix copyright
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_free_energy.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2013,2014,2015,2016,2017,2018,2019,2020, by the GROMACS development team, led by
7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8  * and including many others, as listed in the AUTHORS file in the
9  * top-level source directory and at http://www.gromacs.org.
10  *
11  * GROMACS is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public License
13  * as published by the Free Software Foundation; either version 2.1
14  * of the License, or (at your option) any later version.
15  *
16  * GROMACS is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with GROMACS; if not, see
23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
25  *
26  * If you want to redistribute modifications to GROMACS, please
27  * consider that scientific software is very special. Version
28  * control is crucial - bugs must be traceable. We will be happy to
29  * consider code for inclusion in the official distribution, but
30  * derived work must not be called official GROMACS. Details are found
31  * in the README & COPYING files - if they are missing, get the
32  * official version at http://www.gromacs.org.
33  *
34  * To help us fund GROMACS development, we humbly ask that you cite
35  * the research papers on the package. Check out http://www.gromacs.org.
36  */
37 #include "gmxpre.h"
38
39 #include "nb_free_energy.h"
40
41 #include <cmath>
42
43 #include <algorithm>
44
45 #include "gromacs/gmxlib/nrnb.h"
46 #include "gromacs/gmxlib/nonbonded/nb_kernel.h"
47 #include "gromacs/gmxlib/nonbonded/nonbonded.h"
48 #include "gromacs/math/functions.h"
49 #include "gromacs/math/vec.h"
50 #include "gromacs/mdtypes/forceoutput.h"
51 #include "gromacs/mdtypes/forcerec.h"
52 #include "gromacs/mdtypes/md_enums.h"
53 #include "gromacs/utility/fatalerror.h"
54
55
56 //! Enum for templating the soft-core treatment in the kernel
57 enum class SoftCoreTreatment
58 {
59     None,    //!< No soft-core
60     RPower6, //!< Soft-core with r-power = 6
61     RPower48 //!< Soft-core with r-power = 48
62 };
63
64 //! Most treatments are fine with float in mixed-precision mode.
65 template<SoftCoreTreatment softCoreTreatment>
66 struct SoftCoreReal
67 {
68     //! Real type for soft-core calculations
69     using Real = real;
70 };
71
72 //! This treatment requires double precision for some computations.
73 template<>
74 struct SoftCoreReal<SoftCoreTreatment::RPower48>
75 {
76     //! Real type for soft-core calculations
77     using Real = double;
78 };
79
80 //! Computes r^(1/p) and 1/r^(1/p) for the standard p=6
81 template<SoftCoreTreatment softCoreTreatment>
82 static inline void pthRoot(const real r, real* pthRoot, real* invPthRoot)
83 {
84     *invPthRoot = gmx::invsqrt(std::cbrt(r));
85     *pthRoot    = 1 / (*invPthRoot);
86 }
87
88 // We need a double version to make the specialization below work
89 #if !GMX_DOUBLE
90 //! Computes r^(1/p) and 1/r^(1/p) for the standard p=6
91 template<SoftCoreTreatment softCoreTreatment>
92 static inline void pthRoot(const double r, real* pthRoot, double* invPthRoot)
93 {
94     *invPthRoot = gmx::invsqrt(std::cbrt(r));
95     *pthRoot    = 1 / (*invPthRoot);
96 }
97 #endif
98
99 //! Computes r^(1/p) and 1/r^(1/p) for p=48
100 template<>
101 inline void pthRoot<SoftCoreTreatment::RPower48>(const double r, real* pthRoot, double* invPthRoot)
102 {
103     *pthRoot    = std::pow(r, 1.0 / 48.0);
104     *invPthRoot = 1 / (*pthRoot);
105 }
106
107 template<SoftCoreTreatment softCoreTreatment>
108 static inline real calculateSigmaPow(const real sigma6)
109 {
110     if (softCoreTreatment == SoftCoreTreatment::RPower6)
111     {
112         return sigma6;
113     }
114     else
115     {
116         real sigmaPow = sigma6 * sigma6;     /* sigma^12 */
117         sigmaPow      = sigmaPow * sigmaPow; /* sigma^24 */
118         sigmaPow      = sigmaPow * sigmaPow; /* sigma^48 */
119         return (sigmaPow);
120     }
121 }
122
123 template<SoftCoreTreatment softCoreTreatment, class SCReal>
124 static inline real calculateRinv6(const SCReal rinvV)
125 {
126     if (softCoreTreatment == SoftCoreTreatment::RPower6)
127     {
128         return rinvV;
129     }
130     else
131     {
132         real rinv6 = rinvV * rinvV;
133         return (rinv6 * rinv6 * rinv6);
134     }
135 }
136
137 static inline real calculateVdw6(const real c6, const real rinv6)
138 {
139     return (c6 * rinv6);
140 }
141
142 static inline real calculateVdw12(const real c12, const real rinv6)
143 {
144     return (c12 * rinv6 * rinv6);
145 }
146
147 /* reaction-field electrostatics */
148 template<class SCReal>
149 static inline SCReal
150 reactionFieldScalarForce(const real qq, const real rinv, const SCReal r, const real krf, const real two)
151 {
152     return (qq * (rinv - two * krf * r * r));
153 }
154 template<class SCReal>
155 static inline real
156 reactionFieldPotential(const real qq, const real rinv, const SCReal r, const real krf, const real potentialShift)
157 {
158     return (qq * (rinv + krf * r * r - potentialShift));
159 }
160
161 /* Ewald electrostatics */
162 static inline real ewaldScalarForce(const real coulomb, const real rinv)
163 {
164     return (coulomb * rinv);
165 }
166 static inline real ewaldPotential(const real coulomb, const real rinv, const real potentialShift)
167 {
168     return (coulomb * (rinv - potentialShift));
169 }
170
171 /* cutoff LJ */
172 static inline real lennardJonesScalarForce(const real v6, const real v12)
173 {
174     return (v12 - v6);
175 }
176 static inline real lennardJonesPotential(const real v6,
177                                          const real v12,
178                                          const real c6,
179                                          const real c12,
180                                          const real repulsionShift,
181                                          const real dispersionShift,
182                                          const real onesixth,
183                                          const real onetwelfth)
184 {
185     return ((v12 + c12 * repulsionShift) * onetwelfth - (v6 + c6 * dispersionShift) * onesixth);
186 }
187
188 /* Ewald LJ */
189 static inline real ewaldLennardJonesGridSubtract(const real c6grid, const real potentialShift, const real onesixth)
190 {
191     return (c6grid * potentialShift * onesixth);
192 }
193
194 /* LJ Potential switch */
195 template<class SCReal>
196 static inline SCReal potSwitchScalarForceMod(const SCReal fScalarInp,
197                                              const real   potential,
198                                              const real   sw,
199                                              const SCReal r,
200                                              const real   rVdw,
201                                              const real   dsw,
202                                              const real   zero)
203 {
204     if (r < rVdw)
205     {
206         SCReal fScalar = fScalarInp * sw - r * potential * dsw;
207         return (fScalar);
208     }
209     return (zero);
210 }
211 template<class SCReal>
212 static inline real
213 potSwitchPotentialMod(const real potentialInp, const real sw, const SCReal r, const real rVdw, const real zero)
214 {
215     if (r < rVdw)
216     {
217         real potential = potentialInp * sw;
218         return (potential);
219     }
220     return (zero);
221 }
222
223
224 //! Templated free-energy non-bonded kernel
225 template<SoftCoreTreatment softCoreTreatment, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald, bool vdwModifierIsPotSwitch>
226 static void nb_free_energy_kernel(const t_nblist* gmx_restrict nlist,
227                                   rvec* gmx_restrict         xx,
228                                   gmx::ForceWithShiftForces* forceWithShiftForces,
229                                   const t_forcerec* gmx_restrict fr,
230                                   const t_mdatoms* gmx_restrict mdatoms,
231                                   nb_kernel_data_t* gmx_restrict kernel_data,
232                                   t_nrnb* gmx_restrict nrnb)
233 {
234     using SCReal = typename SoftCoreReal<softCoreTreatment>::Real;
235
236     constexpr bool useSoftCore = (softCoreTreatment != SoftCoreTreatment::None);
237
238 #define STATE_A 0
239 #define STATE_B 1
240 #define NSTATES 2
241
242     constexpr real onetwelfth = 1.0 / 12.0;
243     constexpr real onesixth   = 1.0 / 6.0;
244     constexpr real zero       = 0.0;
245     constexpr real half       = 0.5;
246     constexpr real one        = 1.0;
247     constexpr real two        = 2.0;
248     constexpr real six        = 6.0;
249
250     /* Extract pointer to non-bonded interaction constants */
251     const interaction_const_t* ic = fr->ic;
252
253     // Extract pair list data
254     const int  nri    = nlist->nri;
255     const int* iinr   = nlist->iinr;
256     const int* jindex = nlist->jindex;
257     const int* jjnr   = nlist->jjnr;
258     const int* shift  = nlist->shift;
259     const int* gid    = nlist->gid;
260
261     const real* shiftvec      = fr->shift_vec[0];
262     const real* chargeA       = mdatoms->chargeA;
263     const real* chargeB       = mdatoms->chargeB;
264     real*       Vc            = kernel_data->energygrp_elec;
265     const int*  typeA         = mdatoms->typeA;
266     const int*  typeB         = mdatoms->typeB;
267     const int   ntype         = fr->ntype;
268     const real* nbfp          = fr->nbfp;
269     const real* nbfp_grid     = fr->ljpme_c6grid;
270     real*       Vv            = kernel_data->energygrp_vdw;
271     const real  lambda_coul   = kernel_data->lambda[efptCOUL];
272     const real  lambda_vdw    = kernel_data->lambda[efptVDW];
273     real*       dvdl          = kernel_data->dvdl;
274     const real  alpha_coul    = fr->sc_alphacoul;
275     const real  alpha_vdw     = fr->sc_alphavdw;
276     const real  lam_power     = fr->sc_power;
277     const real  sigma6_def    = fr->sc_sigma6_def;
278     const real  sigma6_min    = fr->sc_sigma6_min;
279     const bool  doForces      = ((kernel_data->flags & GMX_NONBONDED_DO_FORCE) != 0);
280     const bool  doShiftForces = ((kernel_data->flags & GMX_NONBONDED_DO_SHIFTFORCE) != 0);
281     const bool  doPotential   = ((kernel_data->flags & GMX_NONBONDED_DO_POTENTIAL) != 0);
282
283     // Extract data from interaction_const_t
284     const real facel           = ic->epsfac;
285     const real rcoulomb        = ic->rcoulomb;
286     const real krf             = ic->k_rf;
287     const real crf             = ic->c_rf;
288     const real sh_lj_ewald     = ic->sh_lj_ewald;
289     const real rvdw            = ic->rvdw;
290     const real dispersionShift = ic->dispersion_shift.cpot;
291     const real repulsionShift  = ic->repulsion_shift.cpot;
292
293     // Note that the nbnxm kernels do not support Coulomb potential switching at all
294     GMX_ASSERT(ic->coulomb_modifier != eintmodPOTSWITCH,
295                "Potential switching is not supported for Coulomb with FEP");
296
297     real vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4;
298     if (vdwModifierIsPotSwitch)
299     {
300         const real d = ic->rvdw - ic->rvdw_switch;
301         vdw_swV3     = -10.0 / (d * d * d);
302         vdw_swV4     = 15.0 / (d * d * d * d);
303         vdw_swV5     = -6.0 / (d * d * d * d * d);
304         vdw_swF2     = -30.0 / (d * d * d);
305         vdw_swF3     = 60.0 / (d * d * d * d);
306         vdw_swF4     = -30.0 / (d * d * d * d * d);
307     }
308     else
309     {
310         /* Avoid warnings from stupid compilers (looking at you, Clang!) */
311         vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0;
312     }
313
314     int icoul;
315     if (ic->eeltype == eelCUT || EEL_RF(ic->eeltype))
316     {
317         icoul = GMX_NBKERNEL_ELEC_REACTIONFIELD;
318     }
319     else
320     {
321         icoul = GMX_NBKERNEL_ELEC_NONE;
322     }
323
324     real rcutoff_max2 = std::max(ic->rcoulomb, ic->rvdw);
325     rcutoff_max2      = rcutoff_max2 * rcutoff_max2;
326
327     const real* tab_ewald_F_lj           = nullptr;
328     const real* tab_ewald_V_lj           = nullptr;
329     const real* ewtab                    = nullptr;
330     real        coulombTableScale        = 0;
331     real        coulombTableScaleInvHalf = 0;
332     real        vdwTableScale            = 0;
333     real        vdwTableScaleInvHalf     = 0;
334     real        sh_ewald                 = 0;
335     if (elecInteractionTypeIsEwald || vdwInteractionTypeIsEwald)
336     {
337         sh_ewald = ic->sh_ewald;
338     }
339     if (elecInteractionTypeIsEwald)
340     {
341         const auto& coulombTables = *ic->coulombEwaldTables;
342         ewtab                     = coulombTables.tableFDV0.data();
343         coulombTableScale         = coulombTables.scale;
344         coulombTableScaleInvHalf  = half / coulombTableScale;
345     }
346     if (vdwInteractionTypeIsEwald)
347     {
348         const auto& vdwTables = *ic->vdwEwaldTables;
349         tab_ewald_F_lj        = vdwTables.tableF.data();
350         tab_ewald_V_lj        = vdwTables.tableV.data();
351         vdwTableScale         = vdwTables.scale;
352         vdwTableScaleInvHalf  = half / vdwTableScale;
353     }
354
355     /* For Ewald/PME interactions we cannot easily apply the soft-core component to
356      * reciprocal space. When we use non-switched Ewald interactions, we
357      * assume the soft-coring does not significantly affect the grid contribution
358      * and apply the soft-core only to the full 1/r (- shift) pair contribution.
359      *
360      * However, we cannot use this approach for switch-modified since we would then
361      * effectively end up evaluating a significantly different interaction here compared to the
362      * normal (non-free-energy) kernels, either by applying a cutoff at a different
363      * position than what the user requested, or by switching different
364      * things (1/r rather than short-range Ewald). For these settings, we just
365      * use the traditional short-range Ewald interaction in that case.
366      */
367     GMX_RELEASE_ASSERT(!(vdwInteractionTypeIsEwald && vdwModifierIsPotSwitch),
368                        "Can not apply soft-core to switched Ewald potentials");
369
370     SCReal dvdl_coul = 0; /* Needs double for sc_power==48 */
371     SCReal dvdl_vdw  = 0; /* Needs double for sc_power==48 */
372
373     /* Lambda factor for state A, 1-lambda*/
374     real LFC[NSTATES], LFV[NSTATES];
375     LFC[STATE_A] = one - lambda_coul;
376     LFV[STATE_A] = one - lambda_vdw;
377
378     /* Lambda factor for state B, lambda*/
379     LFC[STATE_B] = lambda_coul;
380     LFV[STATE_B] = lambda_vdw;
381
382     /*derivative of the lambda factor for state A and B */
383     real DLF[NSTATES];
384     DLF[STATE_A] = -1;
385     DLF[STATE_B] = 1;
386
387     real           lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
388     constexpr real sc_r_power = (softCoreTreatment == SoftCoreTreatment::RPower48 ? 48.0_real : 6.0_real);
389     for (int i = 0; i < NSTATES; i++)
390     {
391         lfac_coul[i]  = (lam_power == 2 ? (1 - LFC[i]) * (1 - LFC[i]) : (1 - LFC[i]));
392         dlfac_coul[i] = DLF[i] * lam_power / sc_r_power * (lam_power == 2 ? (1 - LFC[i]) : 1);
393         lfac_vdw[i]   = (lam_power == 2 ? (1 - LFV[i]) * (1 - LFV[i]) : (1 - LFV[i]));
394         dlfac_vdw[i]  = DLF[i] * lam_power / sc_r_power * (lam_power == 2 ? (1 - LFV[i]) : 1);
395     }
396
397     // TODO: We should get rid of using pointers to real
398     const real* x             = xx[0];
399     real* gmx_restrict f      = &(forceWithShiftForces->force()[0][0]);
400     real* gmx_restrict fshift = &(forceWithShiftForces->shiftForces()[0][0]);
401
402     for (int n = 0; n < nri; n++)
403     {
404         int npair_within_cutoff = 0;
405
406         const int  is3   = 3 * shift[n];
407         const real shX   = shiftvec[is3];
408         const real shY   = shiftvec[is3 + 1];
409         const real shZ   = shiftvec[is3 + 2];
410         const int  nj0   = jindex[n];
411         const int  nj1   = jindex[n + 1];
412         const int  ii    = iinr[n];
413         const int  ii3   = 3 * ii;
414         const real ix    = shX + x[ii3 + 0];
415         const real iy    = shY + x[ii3 + 1];
416         const real iz    = shZ + x[ii3 + 2];
417         const real iqA   = facel * chargeA[ii];
418         const real iqB   = facel * chargeB[ii];
419         const int  ntiA  = 2 * ntype * typeA[ii];
420         const int  ntiB  = 2 * ntype * typeB[ii];
421         real       vctot = 0;
422         real       vvtot = 0;
423         real       fix   = 0;
424         real       fiy   = 0;
425         real       fiz   = 0;
426
427         for (int k = nj0; k < nj1; k++)
428         {
429             int        tj[NSTATES];
430             const int  jnr = jjnr[k];
431             const int  j3  = 3 * jnr;
432             real       c6[NSTATES], c12[NSTATES], qq[NSTATES], Vcoul[NSTATES], Vvdw[NSTATES];
433             real       r, rinv, rp, rpm2;
434             real       alpha_vdw_eff, alpha_coul_eff, sigma_pow[NSTATES];
435             const real dx  = ix - x[j3];
436             const real dy  = iy - x[j3 + 1];
437             const real dz  = iz - x[j3 + 2];
438             const real rsq = dx * dx + dy * dy + dz * dz;
439             SCReal     FscalC[NSTATES], FscalV[NSTATES]; /* Needs double for sc_power==48 */
440
441             if (rsq >= rcutoff_max2)
442             {
443                 /* We save significant time by skipping all code below.
444                  * Note that with soft-core interactions, the actual cut-off
445                  * check might be different. But since the soft-core distance
446                  * is always larger than r, checking on r here is safe.
447                  */
448                 continue;
449             }
450             npair_within_cutoff++;
451
452             if (rsq > 0)
453             {
454                 /* Note that unlike in the nbnxn kernels, we do not need
455                  * to clamp the value of rsq before taking the invsqrt
456                  * to avoid NaN in the LJ calculation, since here we do
457                  * not calculate LJ interactions when C6 and C12 are zero.
458                  */
459
460                 rinv = gmx::invsqrt(rsq);
461                 r    = rsq * rinv;
462             }
463             else
464             {
465                 /* The force at r=0 is zero, because of symmetry.
466                  * But note that the potential is in general non-zero,
467                  * since the soft-cored r will be non-zero.
468                  */
469                 rinv = 0;
470                 r    = 0;
471             }
472
473             if (softCoreTreatment == SoftCoreTreatment::None)
474             {
475                 /* The soft-core power p will not affect the results
476                  * with not using soft-core, so we use power of 0 which gives
477                  * the simplest math and cheapest code.
478                  */
479                 rpm2 = rinv * rinv;
480                 rp   = 1;
481             }
482             if (softCoreTreatment == SoftCoreTreatment::RPower6)
483             {
484                 rpm2 = rsq * rsq;  /* r4 */
485                 rp   = rpm2 * rsq; /* r6 */
486             }
487             if (softCoreTreatment == SoftCoreTreatment::RPower48)
488             {
489                 rp   = rsq * rsq * rsq; /* r6 */
490                 rp   = rp * rp;         /* r12 */
491                 rp   = rp * rp;         /* r24 */
492                 rp   = rp * rp;         /* r48 */
493                 rpm2 = rp / rsq;        /* r46 */
494             }
495
496             real Fscal = 0;
497
498             qq[STATE_A] = iqA * chargeA[jnr];
499             qq[STATE_B] = iqB * chargeB[jnr];
500
501             tj[STATE_A] = ntiA + 2 * typeA[jnr];
502             tj[STATE_B] = ntiB + 2 * typeB[jnr];
503
504             if (nlist->excl_fep == nullptr || nlist->excl_fep[k])
505             {
506                 c6[STATE_A] = nbfp[tj[STATE_A]];
507                 c6[STATE_B] = nbfp[tj[STATE_B]];
508
509                 for (int i = 0; i < NSTATES; i++)
510                 {
511                     c12[i] = nbfp[tj[i] + 1];
512                     if (useSoftCore)
513                     {
514                         real sigma6[NSTATES];
515                         if ((c6[i] > 0) && (c12[i] > 0))
516                         {
517                             /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
518                             sigma6[i] = half * c12[i] / c6[i];
519                             if (sigma6[i] < sigma6_min) /* for disappearing coul and vdw with soft core at the same time */
520                             {
521                                 sigma6[i] = sigma6_min;
522                             }
523                         }
524                         else
525                         {
526                             sigma6[i] = sigma6_def;
527                         }
528                         sigma_pow[i] = calculateSigmaPow<softCoreTreatment>(sigma6[i]);
529                     }
530                 }
531
532                 if (useSoftCore)
533                 {
534                     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
535                     if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
536                     {
537                         alpha_vdw_eff  = 0;
538                         alpha_coul_eff = 0;
539                     }
540                     else
541                     {
542                         alpha_vdw_eff  = alpha_vdw;
543                         alpha_coul_eff = alpha_coul;
544                     }
545                 }
546
547                 for (int i = 0; i < NSTATES; i++)
548                 {
549                     FscalC[i] = 0;
550                     FscalV[i] = 0;
551                     Vcoul[i]  = 0;
552                     Vvdw[i]   = 0;
553
554                     real   rinvC, rinvV;
555                     SCReal rC, rV, rpinvC, rpinvV; /* Needs double for sc_power==48 */
556
557                     /* Only spend time on A or B state if it is non-zero */
558                     if ((qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0))
559                     {
560                         /* this section has to be inside the loop because of the dependence on sigma_pow */
561                         if (useSoftCore)
562                         {
563                             rpinvC = one / (alpha_coul_eff * lfac_coul[i] * sigma_pow[i] + rp);
564                             pthRoot<softCoreTreatment>(rpinvC, &rinvC, &rC);
565                             if (scLambdasOrAlphasDiffer)
566                             {
567                                 rpinvV = one / (alpha_vdw_eff * lfac_vdw[i] * sigma_pow[i] + rp);
568                                 pthRoot<softCoreTreatment>(rpinvV, &rinvV, &rV);
569                             }
570                             else
571                             {
572                                 /* We can avoid one expensive pow and one / operation */
573                                 rpinvV = rpinvC;
574                                 rinvV  = rinvC;
575                                 rV     = rC;
576                             }
577                         }
578                         else
579                         {
580                             rpinvC = 1;
581                             rinvC  = rinv;
582                             rC     = r;
583
584                             rpinvV = 1;
585                             rinvV  = rinv;
586                             rV     = r;
587                         }
588
589                         /* Only process the coulomb interactions if we have charges,
590                          * and if we either include all entries in the list (no cutoff
591                          * used in the kernel), or if we are within the cutoff.
592                          */
593                         bool computeElecInteraction = (elecInteractionTypeIsEwald && r < rcoulomb)
594                                                       || (!elecInteractionTypeIsEwald && rC < rcoulomb);
595
596                         if ((qq[i] != 0) && computeElecInteraction)
597                         {
598                             if (elecInteractionTypeIsEwald)
599                             {
600                                 Vcoul[i]  = ewaldPotential(qq[i], rinvC, sh_ewald);
601                                 FscalC[i] = ewaldScalarForce(qq[i], rinvC);
602                             }
603                             else
604                             {
605                                 Vcoul[i]  = reactionFieldPotential(qq[i], rinvC, rC, krf, crf);
606                                 FscalC[i] = reactionFieldScalarForce(qq[i], rinvC, rC, krf, two);
607                             }
608                         }
609
610                         /* Only process the VDW interactions if we have
611                          * some non-zero parameters, and if we either
612                          * include all entries in the list (no cutoff used
613                          * in the kernel), or if we are within the cutoff.
614                          */
615                         bool computeVdwInteraction = (vdwInteractionTypeIsEwald && r < rvdw)
616                                                      || (!vdwInteractionTypeIsEwald && rV < rvdw);
617                         if ((c6[i] != 0 || c12[i] != 0) && computeVdwInteraction)
618                         {
619                             real rinv6;
620                             if (softCoreTreatment == SoftCoreTreatment::RPower6)
621                             {
622                                 rinv6 = calculateRinv6<softCoreTreatment>(rpinvV);
623                             }
624                             else
625                             {
626                                 rinv6 = calculateRinv6<softCoreTreatment>(rinvV);
627                             }
628                             real Vvdw6  = calculateVdw6(c6[i], rinv6);
629                             real Vvdw12 = calculateVdw12(c12[i], rinv6);
630
631                             Vvdw[i] = lennardJonesPotential(Vvdw6, Vvdw12, c6[i], c12[i], repulsionShift,
632                                                             dispersionShift, onesixth, onetwelfth);
633                             FscalV[i] = lennardJonesScalarForce(Vvdw6, Vvdw12);
634
635                             if (vdwInteractionTypeIsEwald)
636                             {
637                                 /* Subtract the grid potential at the cut-off */
638                                 Vvdw[i] += ewaldLennardJonesGridSubtract(nbfp_grid[tj[i]],
639                                                                          sh_lj_ewald, onesixth);
640                             }
641
642                             if (vdwModifierIsPotSwitch)
643                             {
644                                 real d        = rV - ic->rvdw_switch;
645                                 d             = (d > zero) ? d : zero;
646                                 const real d2 = d * d;
647                                 const real sw = one + d2 * d * (vdw_swV3 + d * (vdw_swV4 + d * vdw_swV5));
648                                 const real dsw = d2 * (vdw_swF2 + d * (vdw_swF3 + d * vdw_swF4));
649
650                                 FscalV[i] = potSwitchScalarForceMod(FscalV[i], Vvdw[i], sw, rV,
651                                                                     rvdw, dsw, zero);
652                                 Vvdw[i]   = potSwitchPotentialMod(Vvdw[i], sw, rV, rvdw, zero);
653                             }
654                         }
655
656                         /* FscalC (and FscalV) now contain: dV/drC * rC
657                          * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
658                          * Further down we first multiply by r^p-2 and then by
659                          * the vector r, which in total gives: dV/drC * (r/rC)^1-p
660                          */
661                         FscalC[i] *= rpinvC;
662                         FscalV[i] *= rpinvV;
663                     }
664                 }
665
666                 /* Assemble A and B states */
667                 for (int i = 0; i < NSTATES; i++)
668                 {
669                     vctot += LFC[i] * Vcoul[i];
670                     vvtot += LFV[i] * Vvdw[i];
671
672                     Fscal += LFC[i] * FscalC[i] * rpm2;
673                     Fscal += LFV[i] * FscalV[i] * rpm2;
674
675                     if (useSoftCore)
676                     {
677                         dvdl_coul +=
678                                 Vcoul[i] * DLF[i]
679                                 + LFC[i] * alpha_coul_eff * dlfac_coul[i] * FscalC[i] * sigma_pow[i];
680                         dvdl_vdw += Vvdw[i] * DLF[i]
681                                     + LFV[i] * alpha_vdw_eff * dlfac_vdw[i] * FscalV[i] * sigma_pow[i];
682                     }
683                     else
684                     {
685                         dvdl_coul += Vcoul[i] * DLF[i];
686                         dvdl_vdw += Vvdw[i] * DLF[i];
687                     }
688                 }
689             }
690             else if (icoul == GMX_NBKERNEL_ELEC_REACTIONFIELD)
691             {
692                 /* For excluded pairs, which are only in this pair list when
693                  * using the Verlet scheme, we don't use soft-core.
694                  * As there is no singularity, there is no need for soft-core.
695                  */
696                 const real FF = -two * krf;
697                 real       VV = krf * rsq - crf;
698
699                 if (ii == jnr)
700                 {
701                     VV *= half;
702                 }
703
704                 for (int i = 0; i < NSTATES; i++)
705                 {
706                     vctot += LFC[i] * qq[i] * VV;
707                     Fscal += LFC[i] * qq[i] * FF;
708                     dvdl_coul += DLF[i] * qq[i] * VV;
709                 }
710             }
711
712             if (elecInteractionTypeIsEwald && r < rcoulomb)
713             {
714                 /* See comment in the preamble. When using Ewald interactions
715                  * (unless we use a switch modifier) we subtract the reciprocal-space
716                  * Ewald component here which made it possible to apply the free
717                  * energy interaction to 1/r (vanilla coulomb short-range part)
718                  * above. This gets us closer to the ideal case of applying
719                  * the softcore to the entire electrostatic interaction,
720                  * including the reciprocal-space component.
721                  */
722                 real v_lr, f_lr;
723
724                 const real ewrt   = r * coulombTableScale;
725                 int        ewitab = static_cast<int>(ewrt);
726                 const real eweps  = ewrt - ewitab;
727                 ewitab            = 4 * ewitab;
728                 f_lr              = ewtab[ewitab] + eweps * ewtab[ewitab + 1];
729                 v_lr = (ewtab[ewitab + 2] - coulombTableScaleInvHalf * eweps * (ewtab[ewitab] + f_lr));
730                 f_lr *= rinv;
731
732                 /* Note that any possible Ewald shift has already been applied in
733                  * the normal interaction part above.
734                  */
735
736                 if (ii == jnr)
737                 {
738                     /* If we get here, the i particle (ii) has itself (jnr)
739                      * in its neighborlist. This can only happen with the Verlet
740                      * scheme, and corresponds to a self-interaction that will
741                      * occur twice. Scale it down by 50% to only include it once.
742                      */
743                     v_lr *= half;
744                 }
745
746                 for (int i = 0; i < NSTATES; i++)
747                 {
748                     vctot -= LFC[i] * qq[i] * v_lr;
749                     Fscal -= LFC[i] * qq[i] * f_lr;
750                     dvdl_coul -= (DLF[i] * qq[i]) * v_lr;
751                 }
752             }
753
754             if (vdwInteractionTypeIsEwald && r < rvdw)
755             {
756                 /* See comment in the preamble. When using LJ-Ewald interactions
757                  * (unless we use a switch modifier) we subtract the reciprocal-space
758                  * Ewald component here which made it possible to apply the free
759                  * energy interaction to r^-6 (vanilla LJ6 short-range part)
760                  * above. This gets us closer to the ideal case of applying
761                  * the softcore to the entire VdW interaction,
762                  * including the reciprocal-space component.
763                  */
764                 /* We could also use the analytical form here
765                  * iso a table, but that can cause issues for
766                  * r close to 0 for non-interacting pairs.
767                  */
768
769                 const real rs   = rsq * rinv * vdwTableScale;
770                 const int  ri   = static_cast<int>(rs);
771                 const real frac = rs - ri;
772                 const real f_lr = (1 - frac) * tab_ewald_F_lj[ri] + frac * tab_ewald_F_lj[ri + 1];
773                 /* TODO: Currently the Ewald LJ table does not contain
774                  * the factor 1/6, we should add this.
775                  */
776                 const real FF = f_lr * rinv / six;
777                 real VV = (tab_ewald_V_lj[ri] - vdwTableScaleInvHalf * frac * (tab_ewald_F_lj[ri] + f_lr))
778                           / six;
779
780                 if (ii == jnr)
781                 {
782                     /* If we get here, the i particle (ii) has itself (jnr)
783                      * in its neighborlist. This can only happen with the Verlet
784                      * scheme, and corresponds to a self-interaction that will
785                      * occur twice. Scale it down by 50% to only include it once.
786                      */
787                     VV *= half;
788                 }
789
790                 for (int i = 0; i < NSTATES; i++)
791                 {
792                     const real c6grid = nbfp_grid[tj[i]];
793                     vvtot += LFV[i] * c6grid * VV;
794                     Fscal += LFV[i] * c6grid * FF;
795                     dvdl_vdw += (DLF[i] * c6grid) * VV;
796                 }
797             }
798
799             if (doForces)
800             {
801                 const real tx = Fscal * dx;
802                 const real ty = Fscal * dy;
803                 const real tz = Fscal * dz;
804                 fix           = fix + tx;
805                 fiy           = fiy + ty;
806                 fiz           = fiz + tz;
807                 /* OpenMP atomics are expensive, but this kernels is also
808                  * expensive, so we can take this hit, instead of using
809                  * thread-local output buffers and extra reduction.
810                  *
811                  * All the OpenMP regions in this file are trivial and should
812                  * not throw, so no need for try/catch.
813                  */
814 #pragma omp atomic
815                 f[j3] -= tx;
816 #pragma omp atomic
817                 f[j3 + 1] -= ty;
818 #pragma omp atomic
819                 f[j3 + 2] -= tz;
820             }
821         }
822
823         /* The atomics below are expensive with many OpenMP threads.
824          * Here unperturbed i-particles will usually only have a few
825          * (perturbed) j-particles in the list. Thus with a buffered list
826          * we can skip a significant number of i-reductions with a check.
827          */
828         if (npair_within_cutoff > 0)
829         {
830             if (doForces)
831             {
832 #pragma omp atomic
833                 f[ii3] += fix;
834 #pragma omp atomic
835                 f[ii3 + 1] += fiy;
836 #pragma omp atomic
837                 f[ii3 + 2] += fiz;
838             }
839             if (doShiftForces)
840             {
841 #pragma omp atomic
842                 fshift[is3] += fix;
843 #pragma omp atomic
844                 fshift[is3 + 1] += fiy;
845 #pragma omp atomic
846                 fshift[is3 + 2] += fiz;
847             }
848             if (doPotential)
849             {
850                 int ggid = gid[n];
851 #pragma omp atomic
852                 Vc[ggid] += vctot;
853 #pragma omp atomic
854                 Vv[ggid] += vvtot;
855             }
856         }
857     }
858
859 #pragma omp atomic
860     dvdl[efptCOUL] += dvdl_coul;
861 #pragma omp atomic
862     dvdl[efptVDW] += dvdl_vdw;
863
864     /* Estimate flops, average for free energy stuff:
865      * 12  flops per outer iteration
866      * 150 flops per inner iteration
867      */
868 #pragma omp atomic
869     inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri * 12 + nlist->jindex[nri] * 150);
870 }
871
872 typedef void (*KernelFunction)(const t_nblist* gmx_restrict nlist,
873                                rvec* gmx_restrict         xx,
874                                gmx::ForceWithShiftForces* forceWithShiftForces,
875                                const t_forcerec* gmx_restrict fr,
876                                const t_mdatoms* gmx_restrict mdatoms,
877                                nb_kernel_data_t* gmx_restrict kernel_data,
878                                t_nrnb* gmx_restrict nrnb);
879
880 template<SoftCoreTreatment softCoreTreatment, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald>
881 static KernelFunction dispatchKernelOnVdwModifier(const bool vdwModifierIsPotSwitch)
882 {
883     if (vdwModifierIsPotSwitch)
884     {
885         return (nb_free_energy_kernel<softCoreTreatment, scLambdasOrAlphasDiffer,
886                                       vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, true>);
887     }
888     else
889     {
890         return (nb_free_energy_kernel<softCoreTreatment, scLambdasOrAlphasDiffer,
891                                       vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, false>);
892     }
893 }
894
895 template<SoftCoreTreatment softCoreTreatment, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald>
896 static KernelFunction dispatchKernelOnElecInteractionType(const bool elecInteractionTypeIsEwald,
897                                                           const bool vdwModifierIsPotSwitch)
898 {
899     if (elecInteractionTypeIsEwald)
900     {
901         return (dispatchKernelOnVdwModifier<softCoreTreatment, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, true>(
902                 vdwModifierIsPotSwitch));
903     }
904     else
905     {
906         return (dispatchKernelOnVdwModifier<softCoreTreatment, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, false>(
907                 vdwModifierIsPotSwitch));
908     }
909 }
910
911 template<SoftCoreTreatment softCoreTreatment, bool scLambdasOrAlphasDiffer>
912 static KernelFunction dispatchKernelOnVdwInteractionType(const bool vdwInteractionTypeIsEwald,
913                                                          const bool elecInteractionTypeIsEwald,
914                                                          const bool vdwModifierIsPotSwitch)
915 {
916     if (vdwInteractionTypeIsEwald)
917     {
918         return (dispatchKernelOnElecInteractionType<softCoreTreatment, scLambdasOrAlphasDiffer, true>(
919                 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch));
920     }
921     else
922     {
923         return (dispatchKernelOnElecInteractionType<softCoreTreatment, scLambdasOrAlphasDiffer, false>(
924                 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch));
925     }
926 }
927
928 template<SoftCoreTreatment softCoreTreatment>
929 static KernelFunction dispatchKernelOnScLambdasOrAlphasDifference(const bool scLambdasOrAlphasDiffer,
930                                                                   const bool vdwInteractionTypeIsEwald,
931                                                                   const bool elecInteractionTypeIsEwald,
932                                                                   const bool vdwModifierIsPotSwitch)
933 {
934     if (scLambdasOrAlphasDiffer)
935     {
936         return (dispatchKernelOnVdwInteractionType<softCoreTreatment, true>(
937                 vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch));
938     }
939     else
940     {
941         return (dispatchKernelOnVdwInteractionType<softCoreTreatment, false>(
942                 vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch));
943     }
944 }
945
946 static KernelFunction dispatchKernel(const bool        scLambdasOrAlphasDiffer,
947                                      const bool        vdwInteractionTypeIsEwald,
948                                      const bool        elecInteractionTypeIsEwald,
949                                      const bool        vdwModifierIsPotSwitch,
950                                      const t_forcerec* fr)
951 {
952     if (fr->sc_alphacoul == 0 && fr->sc_alphavdw == 0)
953     {
954         return (dispatchKernelOnScLambdasOrAlphasDifference<SoftCoreTreatment::None>(
955                 scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
956                 vdwModifierIsPotSwitch));
957     }
958     else if (fr->sc_r_power == 6.0_real)
959     {
960         return (dispatchKernelOnScLambdasOrAlphasDifference<SoftCoreTreatment::RPower6>(
961                 scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
962                 vdwModifierIsPotSwitch));
963     }
964     else
965     {
966         return (dispatchKernelOnScLambdasOrAlphasDifference<SoftCoreTreatment::RPower48>(
967                 scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
968                 vdwModifierIsPotSwitch));
969     }
970 }
971
972
973 void gmx_nb_free_energy_kernel(const t_nblist*            nlist,
974                                rvec*                      xx,
975                                gmx::ForceWithShiftForces* ff,
976                                const t_forcerec*          fr,
977                                const t_mdatoms*           mdatoms,
978                                nb_kernel_data_t*          kernel_data,
979                                t_nrnb*                    nrnb)
980 {
981     GMX_ASSERT(EEL_PME_EWALD(fr->ic->eeltype) || fr->ic->eeltype == eelCUT || EEL_RF(fr->ic->eeltype),
982                "Unsupported eeltype with free energy");
983
984     const bool vdwInteractionTypeIsEwald  = (EVDW_PME(fr->ic->vdwtype));
985     const bool elecInteractionTypeIsEwald = (EEL_PME_EWALD(fr->ic->eeltype));
986     const bool vdwModifierIsPotSwitch     = (fr->ic->vdw_modifier == eintmodPOTSWITCH);
987     bool       scLambdasOrAlphasDiffer    = true;
988
989     if (fr->sc_alphacoul == 0 && fr->sc_alphavdw == 0)
990     {
991         scLambdasOrAlphasDiffer = false;
992     }
993     else if (fr->sc_r_power == 6.0_real || fr->sc_r_power == 48.0_real)
994     {
995         if (kernel_data->lambda[efptCOUL] == kernel_data->lambda[efptVDW] && fr->sc_alphacoul == fr->sc_alphavdw)
996         {
997             scLambdasOrAlphasDiffer = false;
998         }
999     }
1000     else
1001     {
1002         GMX_RELEASE_ASSERT(false, "Unsupported soft-core r-power");
1003     }
1004     KernelFunction kernelFunc = dispatchKernel(scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
1005                                                elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, fr);
1006     kernelFunc(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
1007 }