Remove or update comments containing 'group scheme'
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_free_energy.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8  * and including many others, as listed in the AUTHORS file in the
9  * top-level source directory and at http://www.gromacs.org.
10  *
11  * GROMACS is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public License
13  * as published by the Free Software Foundation; either version 2.1
14  * of the License, or (at your option) any later version.
15  *
16  * GROMACS is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with GROMACS; if not, see
23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
25  *
26  * If you want to redistribute modifications to GROMACS, please
27  * consider that scientific software is very special. Version
28  * control is crucial - bugs must be traceable. We will be happy to
29  * consider code for inclusion in the official distribution, but
30  * derived work must not be called official GROMACS. Details are found
31  * in the README & COPYING files - if they are missing, get the
32  * official version at http://www.gromacs.org.
33  *
34  * To help us fund GROMACS development, we humbly ask that you cite
35  * the research papers on the package. Check out http://www.gromacs.org.
36  */
37 #include "gmxpre.h"
38
39 #include "nb_free_energy.h"
40
41 #include <cmath>
42
43 #include <algorithm>
44
45 #include "gromacs/gmxlib/nrnb.h"
46 #include "gromacs/gmxlib/nonbonded/nb_kernel.h"
47 #include "gromacs/gmxlib/nonbonded/nonbonded.h"
48 #include "gromacs/math/functions.h"
49 #include "gromacs/math/vec.h"
50 #include "gromacs/mdtypes/forceoutput.h"
51 #include "gromacs/mdtypes/forcerec.h"
52 #include "gromacs/mdtypes/md_enums.h"
53 #include "gromacs/utility/fatalerror.h"
54
55
56 //! Enum for templating the soft-core treatment in the kernel
57 enum class SoftCoreTreatment
58 {
59     None,    //!< No soft-core
60     RPower6, //!< Soft-core with r-power = 6
61     RPower48 //!< Soft-core with r-power = 48
62 };
63
64 //! Most treatments are fine with float in mixed-precision mode.
65 template <SoftCoreTreatment softCoreTreatment>
66 struct SoftCoreReal
67 {
68     //! Real type for soft-core calculations
69     using Real = real;
70 };
71
72 //! This treatment requires double precision for some computations.
73 template <>
74 struct SoftCoreReal<SoftCoreTreatment::RPower48>
75 {
76     //! Real type for soft-core calculations
77     using Real = double;
78 };
79
80 //! Computes r^(1/p) and 1/r^(1/p) for the standard p=6
81 template <SoftCoreTreatment softCoreTreatment>
82 static inline void pthRoot(const real  r,
83                            real       *pthRoot,
84                            real       *invPthRoot)
85 {
86     *invPthRoot = gmx::invsqrt(std::cbrt(r));
87     *pthRoot    = 1/(*invPthRoot);
88 }
89
90 // We need a double version to make the specialization below work
91 #if !GMX_DOUBLE
92 //! Computes r^(1/p) and 1/r^(1/p) for the standard p=6
93 template <SoftCoreTreatment softCoreTreatment>
94 static inline void pthRoot(const double  r,
95                            real         *pthRoot,
96                            double       *invPthRoot)
97 {
98     *invPthRoot = gmx::invsqrt(std::cbrt(r));
99     *pthRoot    = 1/(*invPthRoot);
100 }
101 #endif
102
103 //! Computes r^(1/p) and 1/r^(1/p) for p=48
104 template <>
105 inline void pthRoot<SoftCoreTreatment::RPower48>(const double  r,
106                                                  real         *pthRoot,
107                                                  double       *invPthRoot)
108 {
109     *pthRoot    = std::pow(r, 1.0/48.0);
110     *invPthRoot = 1/(*pthRoot);
111 }
112
113 template<SoftCoreTreatment softCoreTreatment>
114 static inline real calculateSigmaPow(const real sigma6)
115 {
116     if (softCoreTreatment == SoftCoreTreatment::RPower6)
117     {
118         return sigma6;
119     }
120     else
121     {
122         real sigmaPow = sigma6 * sigma6;     /* sigma^12 */
123         sigmaPow      = sigmaPow * sigmaPow; /* sigma^24 */
124         sigmaPow      = sigmaPow * sigmaPow; /* sigma^48 */
125         return (sigmaPow);
126     }
127 }
128
129 template<SoftCoreTreatment softCoreTreatment, class SCReal>
130 static inline real calculateRinv6(const SCReal rinvV)
131 {
132     if (softCoreTreatment == SoftCoreTreatment::RPower6)
133     {
134         return rinvV;
135     }
136     else
137     {
138         real rinv6 = rinvV * rinvV;
139         return (rinv6 * rinv6 * rinv6);
140     }
141 }
142
143 static inline real calculateVdw6(const real c6, const real rinv6)
144 {
145     return (c6 * rinv6);
146 }
147
148 static inline real calculateVdw12(const real c12, const real rinv6)
149 {
150     return (c12 * rinv6 * rinv6);
151 }
152
153 /* reaction-field electrostatics */
154 template<class SCReal>
155 static inline SCReal
156 reactionFieldScalarForce(const real qq, const real rinv, const SCReal r, const real krf,
157                          const real two)
158 {
159     return (qq*(rinv - two*krf*r*r));
160 }
161 template<class SCReal>
162 static inline real
163 reactionFieldPotential(const real qq, const real rinv, const SCReal r, const real krf,
164                        const real potentialShift)
165 {
166     return (qq*(rinv + krf*r*r-potentialShift));
167 }
168
169 /* Ewald electrostatics */
170 static inline real
171 ewaldScalarForce(const real coulomb, const real rinv)
172 {
173     return (coulomb*rinv);
174 }
175 static inline real
176 ewaldPotential(const real coulomb, const real rinv, const real potentialShift)
177 {
178     return (coulomb*(rinv-potentialShift));
179 }
180
181 /* cutoff LJ */
182 static inline real
183 lennardJonesScalarForce(const real v6, const real v12)
184 {
185     return (v12 - v6);
186 }
187 static inline real
188 lennardJonesPotential(const real v6, const real v12, const real c6, const real c12, const real repulsionShift,
189                       const real dispersionShift, const real onesixth, const real onetwelfth)
190 {
191     return ((v12 + c12*repulsionShift)*onetwelfth - (v6 + c6*dispersionShift)*onesixth);
192 }
193
194 /* Ewald LJ */
195 static inline real
196 ewaldLennardJonesGridSubtract(const real c6grid, const real potentialShift, const real onesixth)
197 {
198     return (c6grid * potentialShift * onesixth);
199 }
200
201 /* LJ Potential switch */
202 template<class SCReal>
203 static inline SCReal
204 potSwitchScalarForceMod(const SCReal fScalarInp, const real potential, const real sw, const SCReal r, const real rVdw, const real dsw, const real zero)
205 {
206     if (r < rVdw)
207     {
208         SCReal fScalar = fScalarInp * sw - r * potential * dsw;
209         return (fScalar);
210     }
211     return (zero);
212 }
213 template<class SCReal>
214 static inline real
215 potSwitchPotentialMod(const real potentialInp, const real sw, const SCReal r, const real rVdw, const real zero)
216 {
217     if (r < rVdw)
218     {
219         real potential = potentialInp * sw;
220         return (potential);
221     }
222     return (zero);
223 }
224
225
226 //! Templated free-energy non-bonded kernel
227 template<SoftCoreTreatment softCoreTreatment,
228          bool              scLambdasOrAlphasDiffer,
229          bool              vdwInteractionTypeIsEwald,
230          bool              elecInteractionTypeIsEwald,
231          bool              vdwModifierIsPotSwitch>
232 static void
233 nb_free_energy_kernel(const t_nblist * gmx_restrict    nlist,
234                       rvec * gmx_restrict              xx,
235                       gmx::ForceWithShiftForces *      forceWithShiftForces,
236                       const t_forcerec * gmx_restrict  fr,
237                       const t_mdatoms * gmx_restrict   mdatoms,
238                       nb_kernel_data_t * gmx_restrict  kernel_data,
239                       t_nrnb * gmx_restrict            nrnb)
240 {
241     using SCReal = typename SoftCoreReal<softCoreTreatment>::Real;
242
243     constexpr bool useSoftCore = (softCoreTreatment != SoftCoreTreatment::None);
244
245 #define  STATE_A  0
246 #define  STATE_B  1
247 #define  NSTATES  2
248
249     constexpr real  onetwelfth  = 1.0/12.0;
250     constexpr real  onesixth    = 1.0/6.0;
251     constexpr real  zero        = 0.0;
252     constexpr real  half        = 0.5;
253     constexpr real  one         = 1.0;
254     constexpr real  two         = 2.0;
255     constexpr real  six         = 6.0;
256
257     /* Extract pointer to non-bonded interaction constants */
258     const interaction_const_t *ic = fr->ic;
259
260     // Extract pair list data
261     const int   nri             = nlist->nri;
262     const int  *iinr            = nlist->iinr;
263     const int  *jindex          = nlist->jindex;
264     const int  *jjnr            = nlist->jjnr;
265     const int  *shift           = nlist->shift;
266     const int  *gid             = nlist->gid;
267
268     const real *shiftvec        = fr->shift_vec[0];
269     const real *chargeA         = mdatoms->chargeA;
270     const real *chargeB         = mdatoms->chargeB;
271     real       *Vc              = kernel_data->energygrp_elec;
272     const int  *typeA           = mdatoms->typeA;
273     const int  *typeB           = mdatoms->typeB;
274     const int   ntype           = fr->ntype;
275     const real *nbfp            = fr->nbfp;
276     const real *nbfp_grid       = fr->ljpme_c6grid;
277     real       *Vv              = kernel_data->energygrp_vdw;
278     const real  lambda_coul     = kernel_data->lambda[efptCOUL];
279     const real  lambda_vdw      = kernel_data->lambda[efptVDW];
280     real       *dvdl            = kernel_data->dvdl;
281     const real  alpha_coul      = fr->sc_alphacoul;
282     const real  alpha_vdw       = fr->sc_alphavdw;
283     const real  lam_power       = fr->sc_power;
284     const real  sigma6_def      = fr->sc_sigma6_def;
285     const real  sigma6_min      = fr->sc_sigma6_min;
286     const bool  doForces        = ((kernel_data->flags & GMX_NONBONDED_DO_FORCE) != 0);
287     const bool  doShiftForces   = ((kernel_data->flags & GMX_NONBONDED_DO_SHIFTFORCE) != 0);
288     const bool  doPotential     = ((kernel_data->flags & GMX_NONBONDED_DO_POTENTIAL) != 0);
289
290     // Extract data from interaction_const_t
291     const real  facel           = ic->epsfac;
292     const real  rcoulomb        = ic->rcoulomb;
293     const real  krf             = ic->k_rf;
294     const real  crf             = ic->c_rf;
295     const real  sh_lj_ewald     = ic->sh_lj_ewald;
296     const real  rvdw            = ic->rvdw;
297     const real  dispersionShift = ic->dispersion_shift.cpot;
298     const real  repulsionShift  = ic->repulsion_shift.cpot;
299
300     // Note that the nbnxm kernels do not support Coulomb potential switching at all
301     GMX_ASSERT(ic->coulomb_modifier != eintmodPOTSWITCH,
302                "Potential switching is not supported for Coulomb with FEP");
303
304     real vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4;
305     if (vdwModifierIsPotSwitch)
306     {
307         const real d = ic->rvdw - ic->rvdw_switch;
308         vdw_swV3     = -10.0/(d*d*d);
309         vdw_swV4     =  15.0/(d*d*d*d);
310         vdw_swV5     =  -6.0/(d*d*d*d*d);
311         vdw_swF2     = -30.0/(d*d*d);
312         vdw_swF3     =  60.0/(d*d*d*d);
313         vdw_swF4     = -30.0/(d*d*d*d*d);
314     }
315     else
316     {
317         /* Avoid warnings from stupid compilers (looking at you, Clang!) */
318         vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0;
319     }
320
321     int icoul;
322     if (ic->eeltype == eelCUT || EEL_RF(ic->eeltype))
323     {
324         icoul = GMX_NBKERNEL_ELEC_REACTIONFIELD;
325     }
326     else
327     {
328         icoul = GMX_NBKERNEL_ELEC_NONE;
329     }
330
331     real rcutoff_max2 = std::max(ic->rcoulomb, ic->rvdw);
332     rcutoff_max2      = rcutoff_max2*rcutoff_max2;
333
334     const real   *tab_ewald_F_lj = nullptr;
335     const real   *tab_ewald_V_lj = nullptr;
336     const real   *ewtab          = nullptr;
337     real          ewtabscale     = 0;
338     real          ewtabhalfspace = 0;
339     real          sh_ewald       = 0;
340     if (elecInteractionTypeIsEwald || vdwInteractionTypeIsEwald)
341     {
342         const auto &tables = *ic->coulombEwaldTables;
343         sh_ewald           = ic->sh_ewald;
344         ewtab              = tables.tableFDV0.data();
345         ewtabscale         = tables.scale;
346         ewtabhalfspace     = half/ewtabscale;
347         tab_ewald_F_lj     = tables.tableF.data();
348         tab_ewald_V_lj     = tables.tableV.data();
349     }
350
351     /* For Ewald/PME interactions we cannot easily apply the soft-core component to
352      * reciprocal space. When we use non-switched Ewald interactions, we
353      * assume the soft-coring does not significantly affect the grid contribution
354      * and apply the soft-core only to the full 1/r (- shift) pair contribution.
355      *
356      * However, we cannot use this approach for switch-modified since we would then
357      * effectively end up evaluating a significantly different interaction here compared to the
358      * normal (non-free-energy) kernels, either by applying a cutoff at a different
359      * position than what the user requested, or by switching different
360      * things (1/r rather than short-range Ewald). For these settings, we just
361      * use the traditional short-range Ewald interaction in that case.
362      */
363     GMX_RELEASE_ASSERT(!(vdwInteractionTypeIsEwald  && vdwModifierIsPotSwitch),
364                        "Can not apply soft-core to switched Ewald potentials");
365
366     SCReal dvdl_coul  = 0; /* Needs double for sc_power==48 */
367     SCReal dvdl_vdw   = 0; /* Needs double for sc_power==48 */
368
369     /* Lambda factor for state A, 1-lambda*/
370     real LFC[NSTATES], LFV[NSTATES];
371     LFC[STATE_A] = one - lambda_coul;
372     LFV[STATE_A] = one - lambda_vdw;
373
374     /* Lambda factor for state B, lambda*/
375     LFC[STATE_B] = lambda_coul;
376     LFV[STATE_B] = lambda_vdw;
377
378     /*derivative of the lambda factor for state A and B */
379     real DLF[NSTATES];
380     DLF[STATE_A] = -1;
381     DLF[STATE_B] = 1;
382
383     real           lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
384     constexpr real sc_r_power = (softCoreTreatment == SoftCoreTreatment::RPower48 ? 48.0_real : 6.0_real);
385     for (int i = 0; i < NSTATES; i++)
386     {
387         lfac_coul[i]  = (lam_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
388         dlfac_coul[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFC[i]) : 1);
389         lfac_vdw[i]   = (lam_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
390         dlfac_vdw[i]  = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFV[i]) : 1);
391     }
392
393     // TODO: We should get rid of using pointers to real
394     const real          *x      = xx[0];
395     real * gmx_restrict  f      = &(forceWithShiftForces->force()[0][0]);
396     real * gmx_restrict  fshift = &(forceWithShiftForces->shiftForces()[0][0]);
397
398     for (int n = 0; n < nri; n++)
399     {
400         int        npair_within_cutoff = 0;
401
402         const int  is3   = 3*shift[n];
403         const real shX   = shiftvec[is3];
404         const real shY   = shiftvec[is3+1];
405         const real shZ   = shiftvec[is3+2];
406         const int  nj0   = jindex[n];
407         const int  nj1   = jindex[n+1];
408         const int  ii    = iinr[n];
409         const int  ii3   = 3*ii;
410         const real ix    = shX + x[ii3+0];
411         const real iy    = shY + x[ii3+1];
412         const real iz    = shZ + x[ii3+2];
413         const real iqA   = facel*chargeA[ii];
414         const real iqB   = facel*chargeB[ii];
415         const int  ntiA  = 2*ntype*typeA[ii];
416         const int  ntiB  = 2*ntype*typeB[ii];
417         real       vctot = 0;
418         real       vvtot = 0;
419         real       fix   = 0;
420         real       fiy   = 0;
421         real       fiz   = 0;
422
423         for (int k = nj0; k < nj1; k++)
424         {
425             int        tj[NSTATES];
426             const int  jnr             = jjnr[k];
427             const int  j3              = 3*jnr;
428             real       c6[NSTATES], c12[NSTATES], qq[NSTATES], Vcoul[NSTATES], Vvdw[NSTATES];
429             real       r, rinv, rp, rpm2;
430             real       alpha_vdw_eff, alpha_coul_eff, sigma_pow[NSTATES];
431             const real dx             = ix - x[j3];
432             const real dy             = iy - x[j3+1];
433             const real dz             = iz - x[j3+2];
434             const real rsq            = dx*dx + dy*dy + dz*dz;
435             SCReal     FscalC[NSTATES], FscalV[NSTATES];  /* Needs double for sc_power==48 */
436
437             if (rsq >= rcutoff_max2)
438             {
439                 /* We save significant time by skipping all code below.
440                  * Note that with soft-core interactions, the actual cut-off
441                  * check might be different. But since the soft-core distance
442                  * is always larger than r, checking on r here is safe.
443                  */
444                 continue;
445             }
446             npair_within_cutoff++;
447
448             if (rsq > 0)
449             {
450                 /* Note that unlike in the nbnxn kernels, we do not need
451                  * to clamp the value of rsq before taking the invsqrt
452                  * to avoid NaN in the LJ calculation, since here we do
453                  * not calculate LJ interactions when C6 and C12 are zero.
454                  */
455
456                 rinv         = gmx::invsqrt(rsq);
457                 r            = rsq*rinv;
458             }
459             else
460             {
461                 /* The force at r=0 is zero, because of symmetry.
462                  * But note that the potential is in general non-zero,
463                  * since the soft-cored r will be non-zero.
464                  */
465                 rinv         = 0;
466                 r            = 0;
467             }
468
469             if (softCoreTreatment == SoftCoreTreatment::None)
470             {
471                 /* The soft-core power p will not affect the results
472                  * with not using soft-core, so we use power of 0 which gives
473                  * the simplest math and cheapest code.
474                  */
475                 rpm2         = rinv*rinv;
476                 rp           = 1;
477             }
478             if (softCoreTreatment == SoftCoreTreatment::RPower6)
479             {
480                 rpm2         = rsq*rsq;  /* r4 */
481                 rp           = rpm2*rsq; /* r6 */
482             }
483             if (softCoreTreatment == SoftCoreTreatment::RPower48)
484             {
485                 rp           = rsq*rsq*rsq; /* r6 */
486                 rp           = rp*rp;       /* r12 */
487                 rp           = rp*rp;       /* r24 */
488                 rp           = rp*rp;       /* r48 */
489                 rpm2         = rp/rsq;      /* r46 */
490             }
491
492             real Fscal = 0;
493
494             qq[STATE_A]      = iqA*chargeA[jnr];
495             qq[STATE_B]      = iqB*chargeB[jnr];
496
497             tj[STATE_A]      = ntiA+2*typeA[jnr];
498             tj[STATE_B]      = ntiB+2*typeB[jnr];
499
500             if (nlist->excl_fep == nullptr || nlist->excl_fep[k])
501             {
502                 c6[STATE_A]      = nbfp[tj[STATE_A]];
503                 c6[STATE_B]      = nbfp[tj[STATE_B]];
504
505                 for (int i = 0; i < NSTATES; i++)
506                 {
507                     c12[i]             = nbfp[tj[i]+1];
508                     if (useSoftCore)
509                     {
510                         real sigma6[NSTATES];
511                         if ((c6[i] > 0) && (c12[i] > 0))
512                         {
513                             /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
514                             sigma6[i]       = half*c12[i]/c6[i];
515                             if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
516                             {
517                                 sigma6[i] = sigma6_min;
518                             }
519                         }
520                         else
521                         {
522                             sigma6[i]       = sigma6_def;
523                         }
524                         sigma_pow[i] = calculateSigmaPow<softCoreTreatment>(sigma6[i]);
525                     }
526                 }
527
528                 if (useSoftCore)
529                 {
530                     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
531                     if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
532                     {
533                         alpha_vdw_eff    = 0;
534                         alpha_coul_eff   = 0;
535                     }
536                     else
537                     {
538                         alpha_vdw_eff    = alpha_vdw;
539                         alpha_coul_eff   = alpha_coul;
540                     }
541                 }
542
543                 for (int i = 0; i < NSTATES; i++)
544                 {
545                     FscalC[i] = 0;
546                     FscalV[i] = 0;
547                     Vcoul[i]  = 0;
548                     Vvdw[i]   = 0;
549
550                     real   rinvC, rinvV;
551                     SCReal rC, rV, rpinvC, rpinvV; /* Needs double for sc_power==48 */
552
553                     /* Only spend time on A or B state if it is non-zero */
554                     if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
555                     {
556                         /* this section has to be inside the loop because of the dependence on sigma_pow */
557                         if (useSoftCore)
558                         {
559                             rpinvC     = one/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
560                             pthRoot<softCoreTreatment>(rpinvC, &rinvC, &rC);
561                             if (scLambdasOrAlphasDiffer)
562                             {
563                                 rpinvV = one/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
564                                 pthRoot<softCoreTreatment>(rpinvV, &rinvV, &rV);
565                             }
566                             else
567                             {
568                                 /* We can avoid one expensive pow and one / operation */
569                                 rpinvV = rpinvC;
570                                 rinvV  = rinvC;
571                                 rV     = rC;
572                             }
573                         }
574                         else
575                         {
576                             rpinvC     = 1;
577                             rinvC      = rinv;
578                             rC         = r;
579
580                             rpinvV     = 1;
581                             rinvV      = rinv;
582                             rV         = r;
583                         }
584
585                         /* Only process the coulomb interactions if we have charges,
586                          * and if we either include all entries in the list (no cutoff
587                          * used in the kernel), or if we are within the cutoff.
588                          */
589                         bool computeElecInteraction =
590                             ( elecInteractionTypeIsEwald && r < rcoulomb) ||
591                             (!elecInteractionTypeIsEwald && rC < rcoulomb);
592
593                         if ( (qq[i] != 0) && computeElecInteraction)
594                         {
595                             if (elecInteractionTypeIsEwald)
596                             {
597                                 Vcoul[i]  = ewaldPotential(qq[i], rinvC, sh_ewald);
598                                 FscalC[i] = ewaldScalarForce(qq[i], rinvC);
599                             }
600                             else
601                             {
602                                 Vcoul[i]  = reactionFieldPotential(qq[i], rinvC, rC, krf, crf);
603                                 FscalC[i] = reactionFieldScalarForce(qq[i], rinvC, rC, krf, two);
604                             }
605                         }
606
607                         /* Only process the VDW interactions if we have
608                          * some non-zero parameters, and if we either
609                          * include all entries in the list (no cutoff used
610                          * in the kernel), or if we are within the cutoff.
611                          */
612                         bool computeVdwInteraction =
613                             ( vdwInteractionTypeIsEwald && r < rvdw) ||
614                             (!vdwInteractionTypeIsEwald && rV < rvdw);
615                         if ((c6[i] != 0 || c12[i] != 0) && computeVdwInteraction)
616                         {
617                             real rinv6;
618                             if (softCoreTreatment == SoftCoreTreatment::RPower6)
619                             {
620                                 rinv6    = calculateRinv6<softCoreTreatment>(rpinvV);
621                             }
622                             else
623                             {
624                                 rinv6    = calculateRinv6<softCoreTreatment>(rinvV);
625                             }
626                             real Vvdw6   = calculateVdw6(c6[i], rinv6);
627                             real Vvdw12  = calculateVdw12(c12[i], rinv6);
628
629                             Vvdw[i]      = lennardJonesPotential(Vvdw6, Vvdw12, c6[i], c12[i], repulsionShift, dispersionShift, onesixth, onetwelfth);
630                             FscalV[i]    = lennardJonesScalarForce(Vvdw6, Vvdw12);
631
632                             if (vdwInteractionTypeIsEwald)
633                             {
634                                 /* Subtract the grid potential at the cut-off */
635                                 Vvdw[i] += ewaldLennardJonesGridSubtract(nbfp_grid[tj[i]], sh_lj_ewald, onesixth);
636                             }
637
638                             if (vdwModifierIsPotSwitch)
639                             {
640                                 real       d   = rV - ic->rvdw_switch;
641                                 d              = (d > zero) ? d : zero;
642                                 const real d2  = d*d;
643                                 const real sw  = one+d2*d*(vdw_swV3+d*(vdw_swV4+d*vdw_swV5));
644                                 const real dsw = d2*(vdw_swF2+d*(vdw_swF3+d*vdw_swF4));
645
646                                 FscalV[i] = potSwitchScalarForceMod(FscalV[i], Vvdw[i], sw, rV, rvdw, dsw, zero);
647                                 Vvdw[i]   = potSwitchPotentialMod(Vvdw[i], sw, rV, rvdw, zero);
648                             }
649                         }
650
651                         /* FscalC (and FscalV) now contain: dV/drC * rC
652                          * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
653                          * Further down we first multiply by r^p-2 and then by
654                          * the vector r, which in total gives: dV/drC * (r/rC)^1-p
655                          */
656                         FscalC[i] *= rpinvC;
657                         FscalV[i] *= rpinvV;
658                     }
659                 }
660
661                 /* Assemble A and B states */
662                 for (int i = 0; i < NSTATES; i++)
663                 {
664                     vctot += LFC[i]*Vcoul[i];
665                     vvtot += LFV[i]*Vvdw[i];
666
667                     Fscal += LFC[i]*FscalC[i]*rpm2;
668                     Fscal += LFV[i]*FscalV[i]*rpm2;
669
670                     if (useSoftCore)
671                     {
672                         dvdl_coul += Vcoul[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*FscalC[i]*sigma_pow[i];
673                         dvdl_vdw  += Vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*FscalV[i]*sigma_pow[i];
674                     }
675                     else
676                     {
677                         dvdl_coul += Vcoul[i]*DLF[i];
678                         dvdl_vdw  += Vvdw[i]*DLF[i];
679                     }
680                 }
681             }
682             else if (icoul == GMX_NBKERNEL_ELEC_REACTIONFIELD)
683             {
684                 /* For excluded pairs, which are only in this pair list when
685                  * using the Verlet scheme, we don't use soft-core.
686                  * As there is no singularity, there is no need for soft-core.
687                  */
688                 const real FF = -two*krf;
689                 real       VV = krf*rsq - crf;
690
691                 if (ii == jnr)
692                 {
693                     VV *= half;
694                 }
695
696                 for (int i = 0; i < NSTATES; i++)
697                 {
698                     vctot      += LFC[i]*qq[i]*VV;
699                     Fscal      += LFC[i]*qq[i]*FF;
700                     dvdl_coul  += DLF[i]*qq[i]*VV;
701                 }
702             }
703
704             if (elecInteractionTypeIsEwald && r < rcoulomb)
705             {
706                 /* See comment in the preamble. When using Ewald interactions
707                  * (unless we use a switch modifier) we subtract the reciprocal-space
708                  * Ewald component here which made it possible to apply the free
709                  * energy interaction to 1/r (vanilla coulomb short-range part)
710                  * above. This gets us closer to the ideal case of applying
711                  * the softcore to the entire electrostatic interaction,
712                  * including the reciprocal-space component.
713                  */
714                 real       v_lr, f_lr;
715
716                 const real ewrt   = r*ewtabscale;
717                 int        ewitab = static_cast<int>(ewrt);
718                 const real eweps  = ewrt - ewitab;
719                 ewitab            = 4*ewitab;
720                 f_lr              = ewtab[ewitab] + eweps*ewtab[ewitab+1];
721                 v_lr              = (ewtab[ewitab + 2] - ewtabhalfspace*eweps*(ewtab[ewitab] + f_lr));
722                 f_lr             *= rinv;
723
724                 /* Note that any possible Ewald shift has already been applied in
725                  * the normal interaction part above.
726                  */
727
728                 if (ii == jnr)
729                 {
730                     /* If we get here, the i particle (ii) has itself (jnr)
731                      * in its neighborlist. This can only happen with the Verlet
732                      * scheme, and corresponds to a self-interaction that will
733                      * occur twice. Scale it down by 50% to only include it once.
734                      */
735                     v_lr *= half;
736                 }
737
738                 for (int i = 0; i < NSTATES; i++)
739                 {
740                     vctot     -= LFC[i]*qq[i]*v_lr;
741                     Fscal     -= LFC[i]*qq[i]*f_lr;
742                     dvdl_coul -= (DLF[i]*qq[i])*v_lr;
743                 }
744             }
745
746             if (vdwInteractionTypeIsEwald && r < rvdw)
747             {
748                 /* See comment in the preamble. When using LJ-Ewald interactions
749                  * (unless we use a switch modifier) we subtract the reciprocal-space
750                  * Ewald component here which made it possible to apply the free
751                  * energy interaction to r^-6 (vanilla LJ6 short-range part)
752                  * above. This gets us closer to the ideal case of applying
753                  * the softcore to the entire VdW interaction,
754                  * including the reciprocal-space component.
755                  */
756                 /* We could also use the analytical form here
757                  * iso a table, but that can cause issues for
758                  * r close to 0 for non-interacting pairs.
759                  */
760
761                 const real rs   = rsq*rinv*ewtabscale;
762                 const int  ri   = static_cast<int>(rs);
763                 const real frac = rs - ri;
764                 const real f_lr = (1 - frac)*tab_ewald_F_lj[ri] + frac*tab_ewald_F_lj[ri+1];
765                 /* TODO: Currently the Ewald LJ table does not contain
766                  * the factor 1/6, we should add this.
767                  */
768                 const real FF = f_lr*rinv/six;
769                 real       VV = (tab_ewald_V_lj[ri] - ewtabhalfspace*frac*(tab_ewald_F_lj[ri] + f_lr))/six;
770
771                 if (ii == jnr)
772                 {
773                     /* If we get here, the i particle (ii) has itself (jnr)
774                      * in its neighborlist. This can only happen with the Verlet
775                      * scheme, and corresponds to a self-interaction that will
776                      * occur twice. Scale it down by 50% to only include it once.
777                      */
778                     VV *= half;
779                 }
780
781                 for (int i = 0; i < NSTATES; i++)
782                 {
783                     const real c6grid = nbfp_grid[tj[i]];
784                     vvtot            += LFV[i]*c6grid*VV;
785                     Fscal            += LFV[i]*c6grid*FF;
786                     dvdl_vdw         += (DLF[i]*c6grid)*VV;
787                 }
788             }
789
790             if (doForces)
791             {
792                 const real tx  = Fscal*dx;
793                 const real ty  = Fscal*dy;
794                 const real tz  = Fscal*dz;
795                 fix            = fix + tx;
796                 fiy            = fiy + ty;
797                 fiz            = fiz + tz;
798                 /* OpenMP atomics are expensive, but this kernels is also
799                  * expensive, so we can take this hit, instead of using
800                  * thread-local output buffers and extra reduction.
801                  *
802                  * All the OpenMP regions in this file are trivial and should
803                  * not throw, so no need for try/catch.
804                  */
805 #pragma omp atomic
806                 f[j3]     -= tx;
807 #pragma omp atomic
808                 f[j3+1]   -= ty;
809 #pragma omp atomic
810                 f[j3+2]   -= tz;
811             }
812         }
813
814         /* The atomics below are expensive with many OpenMP threads.
815          * Here unperturbed i-particles will usually only have a few
816          * (perturbed) j-particles in the list. Thus with a buffered list
817          * we can skip a significant number of i-reductions with a check.
818          */
819         if (npair_within_cutoff > 0)
820         {
821             if (doForces)
822             {
823 #pragma omp atomic
824                 f[ii3]        += fix;
825 #pragma omp atomic
826                 f[ii3+1]      += fiy;
827 #pragma omp atomic
828                 f[ii3+2]      += fiz;
829             }
830             if (doShiftForces)
831             {
832 #pragma omp atomic
833                 fshift[is3]   += fix;
834 #pragma omp atomic
835                 fshift[is3+1] += fiy;
836 #pragma omp atomic
837                 fshift[is3+2] += fiz;
838             }
839             if (doPotential)
840             {
841                 int ggid  = gid[n];
842 #pragma omp atomic
843                 Vc[ggid] += vctot;
844 #pragma omp atomic
845                 Vv[ggid] += vvtot;
846             }
847         }
848     }
849
850 #pragma omp atomic
851     dvdl[efptCOUL]     += dvdl_coul;
852  #pragma omp atomic
853     dvdl[efptVDW]      += dvdl_vdw;
854
855     /* Estimate flops, average for free energy stuff:
856      * 12  flops per outer iteration
857      * 150 flops per inner iteration
858      */
859 #pragma omp atomic
860     inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri*12 + nlist->jindex[nri]*150);
861 }
862
863 typedef void (* KernelFunction)(const t_nblist * gmx_restrict    nlist,
864                                 rvec * gmx_restrict              xx,
865                                 gmx::ForceWithShiftForces *      forceWithShiftForces,
866                                 const t_forcerec * gmx_restrict  fr,
867                                 const t_mdatoms * gmx_restrict   mdatoms,
868                                 nb_kernel_data_t * gmx_restrict  kernel_data,
869                                 t_nrnb * gmx_restrict            nrnb);
870
871 template<SoftCoreTreatment softCoreTreatment,
872          bool scLambdasOrAlphasDiffer,
873          bool vdwInteractionTypeIsEwald,
874          bool elecInteractionTypeIsEwald>
875 static KernelFunction
876 dispatchKernelOnVdwModifier(const bool vdwModifierIsPotSwitch)
877 {
878     if (vdwModifierIsPotSwitch)
879     {
880         return(nb_free_energy_kernel<softCoreTreatment, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
881                                      elecInteractionTypeIsEwald, true>);
882     }
883     else
884     {
885         return(nb_free_energy_kernel<softCoreTreatment, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
886                                      elecInteractionTypeIsEwald, false>);
887     }
888 }
889
890 template<SoftCoreTreatment softCoreTreatment,
891          bool scLambdasOrAlphasDiffer,
892          bool vdwInteractionTypeIsEwald>
893 static KernelFunction
894 dispatchKernelOnElecInteractionType(const bool elecInteractionTypeIsEwald,
895                                     const bool vdwModifierIsPotSwitch)
896 {
897     if (elecInteractionTypeIsEwald)
898     {
899         return(dispatchKernelOnVdwModifier<softCoreTreatment, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
900                                            true>(vdwModifierIsPotSwitch));
901     }
902     else
903     {
904         return(dispatchKernelOnVdwModifier<softCoreTreatment, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
905                                            false>(vdwModifierIsPotSwitch));
906     }
907 }
908
909 template<SoftCoreTreatment softCoreTreatment,
910          bool scLambdasOrAlphasDiffer>
911 static KernelFunction
912 dispatchKernelOnVdwInteractionType(const bool vdwInteractionTypeIsEwald,
913                                    const bool elecInteractionTypeIsEwald,
914                                    const bool vdwModifierIsPotSwitch)
915 {
916     if (vdwInteractionTypeIsEwald)
917     {
918         return(dispatchKernelOnElecInteractionType<softCoreTreatment, scLambdasOrAlphasDiffer,
919                                                    true>(elecInteractionTypeIsEwald, vdwModifierIsPotSwitch));
920     }
921     else
922     {
923         return(dispatchKernelOnElecInteractionType<softCoreTreatment, scLambdasOrAlphasDiffer,
924                                                    false>(elecInteractionTypeIsEwald, vdwModifierIsPotSwitch));
925     }
926 }
927
928 template<SoftCoreTreatment softCoreTreatment>
929 static KernelFunction
930 dispatchKernelOnScLambdasOrAlphasDifference(const bool scLambdasOrAlphasDiffer,
931                                             const bool vdwInteractionTypeIsEwald,
932                                             const bool elecInteractionTypeIsEwald,
933                                             const bool vdwModifierIsPotSwitch)
934 {
935     if (scLambdasOrAlphasDiffer)
936     {
937         return(dispatchKernelOnVdwInteractionType<softCoreTreatment, true>(vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
938                                                                            vdwModifierIsPotSwitch));
939     }
940     else
941     {
942         return(dispatchKernelOnVdwInteractionType<softCoreTreatment, false>(vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
943                                                                             vdwModifierIsPotSwitch));
944     }
945 }
946
947 static KernelFunction
948 dispatchKernel(const bool                 scLambdasOrAlphasDiffer,
949                const bool                 vdwInteractionTypeIsEwald,
950                const bool                 elecInteractionTypeIsEwald,
951                const bool                 vdwModifierIsPotSwitch,
952                const t_forcerec          *fr)
953 {
954     if (fr->sc_alphacoul == 0 && fr->sc_alphavdw == 0)
955     {
956         return(dispatchKernelOnScLambdasOrAlphasDifference<SoftCoreTreatment::None>(scLambdasOrAlphasDiffer,
957                                                                                     vdwInteractionTypeIsEwald,
958                                                                                     elecInteractionTypeIsEwald,
959                                                                                     vdwModifierIsPotSwitch));
960     }
961     else if (fr->sc_r_power == 6.0_real)
962     {
963         return(dispatchKernelOnScLambdasOrAlphasDifference<SoftCoreTreatment::RPower6>(scLambdasOrAlphasDiffer,
964                                                                                        vdwInteractionTypeIsEwald,
965                                                                                        elecInteractionTypeIsEwald,
966                                                                                        vdwModifierIsPotSwitch));
967     }
968     else
969     {
970         return(dispatchKernelOnScLambdasOrAlphasDifference<SoftCoreTreatment::RPower48>(scLambdasOrAlphasDiffer,
971                                                                                         vdwInteractionTypeIsEwald,
972                                                                                         elecInteractionTypeIsEwald,
973                                                                                         vdwModifierIsPotSwitch));
974     }
975 }
976
977
978 void gmx_nb_free_energy_kernel(const t_nblist            *nlist,
979                                rvec                      *xx,
980                                gmx::ForceWithShiftForces *ff,
981                                const t_forcerec          *fr,
982                                const t_mdatoms           *mdatoms,
983                                nb_kernel_data_t          *kernel_data,
984                                t_nrnb                    *nrnb)
985 {
986     GMX_ASSERT(EEL_PME_EWALD(fr->ic->eeltype)  || fr->ic->eeltype == eelCUT || EEL_RF(fr->ic->eeltype),
987                "Unsupported eeltype with free energy");
988
989     const bool vdwInteractionTypeIsEwald  = (EVDW_PME(fr->ic->vdwtype));
990     const bool elecInteractionTypeIsEwald = (EEL_PME_EWALD(fr->ic->eeltype));
991     const bool vdwModifierIsPotSwitch     = (fr->ic->vdw_modifier == eintmodPOTSWITCH);
992     bool       scLambdasOrAlphasDiffer    = true;
993
994     if (fr->sc_alphacoul == 0 && fr->sc_alphavdw == 0)
995     {
996         scLambdasOrAlphasDiffer = false;
997     }
998     else if (fr->sc_r_power == 6.0_real || fr->sc_r_power == 48.0_real)
999     {
1000         if (kernel_data->lambda[efptCOUL] == kernel_data->lambda[efptVDW] &&
1001             fr->sc_alphacoul == fr->sc_alphavdw)
1002         {
1003             scLambdasOrAlphasDiffer = false;
1004         }
1005     }
1006     else
1007     {
1008         GMX_RELEASE_ASSERT(false, "Unsupported soft-core r-power");
1009     }
1010     KernelFunction kernelFunc = dispatchKernel(scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
1011                                                vdwModifierIsPotSwitch, fr);
1012     kernelFunc(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
1013 }