7b1c8453ebd15ebae718cd78009e0e28df4887e4
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_free_energy.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8  * and including many others, as listed in the AUTHORS file in the
9  * top-level source directory and at http://www.gromacs.org.
10  *
11  * GROMACS is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public License
13  * as published by the Free Software Foundation; either version 2.1
14  * of the License, or (at your option) any later version.
15  *
16  * GROMACS is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with GROMACS; if not, see
23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
25  *
26  * If you want to redistribute modifications to GROMACS, please
27  * consider that scientific software is very special. Version
28  * control is crucial - bugs must be traceable. We will be happy to
29  * consider code for inclusion in the official distribution, but
30  * derived work must not be called official GROMACS. Details are found
31  * in the README & COPYING files - if they are missing, get the
32  * official version at http://www.gromacs.org.
33  *
34  * To help us fund GROMACS development, we humbly ask that you cite
35  * the research papers on the package. Check out http://www.gromacs.org.
36  */
37 #include "gmxpre.h"
38
39 #include "nb_free_energy.h"
40
41 #include <cmath>
42
43 #include <algorithm>
44
45 #include "gromacs/gmxlib/nrnb.h"
46 #include "gromacs/gmxlib/nonbonded/nb_kernel.h"
47 #include "gromacs/gmxlib/nonbonded/nonbonded.h"
48 #include "gromacs/math/functions.h"
49 #include "gromacs/math/vec.h"
50 #include "gromacs/mdtypes/forceoutput.h"
51 #include "gromacs/mdtypes/forcerec.h"
52 #include "gromacs/mdtypes/md_enums.h"
53 #include "gromacs/utility/fatalerror.h"
54
55
56 //! Enum for templating the soft-core treatment in the kernel
57 enum class SoftCoreTreatment
58 {
59     None,    //!< No soft-core
60     RPower6, //!< Soft-core with r-power = 6
61     RPower48 //!< Soft-core with r-power = 48
62 };
63
64 //! Most treatments are fine with float in mixed-precision mode.
65 template <SoftCoreTreatment softCoreTreatment>
66 struct SoftCoreReal
67 {
68     //! Real type for soft-core calculations
69     using Real = real;
70 };
71
72 //! This treatment requires double precision for some computations.
73 template <>
74 struct SoftCoreReal<SoftCoreTreatment::RPower48>
75 {
76     //! Real type for soft-core calculations
77     using Real = double;
78 };
79
80 //! Computes r^(1/p) and 1/r^(1/p) for the standard p=6
81 template <SoftCoreTreatment softCoreTreatment>
82 static inline void pthRoot(const real  r,
83                            real       *pthRoot,
84                            real       *invPthRoot)
85 {
86     *invPthRoot = gmx::invsqrt(std::cbrt(r));
87     *pthRoot    = 1/(*invPthRoot);
88 }
89
90 // We need a double version to make the specialization below work
91 #if !GMX_DOUBLE
92 //! Computes r^(1/p) and 1/r^(1/p) for the standard p=6
93 template <SoftCoreTreatment softCoreTreatment>
94 static inline void pthRoot(const double  r,
95                            real         *pthRoot,
96                            double       *invPthRoot)
97 {
98     *invPthRoot = gmx::invsqrt(std::cbrt(r));
99     *pthRoot    = 1/(*invPthRoot);
100 }
101 #endif
102
103 //! Computes r^(1/p) and 1/r^(1/p) for p=48
104 template <>
105 inline void pthRoot<SoftCoreTreatment::RPower48>(const double  r,
106                                                  real         *pthRoot,
107                                                  double       *invPthRoot)
108 {
109     *pthRoot    = std::pow(r, 1.0/48.0);
110     *invPthRoot = 1/(*pthRoot);
111 }
112
113 template<SoftCoreTreatment softCoreTreatment>
114 static inline real calculateSigmaPow(const real sigma6)
115 {
116     if (softCoreTreatment == SoftCoreTreatment::RPower6)
117     {
118         return sigma6;
119     }
120     else
121     {
122         real sigmaPow = sigma6 * sigma6;     /* sigma^12 */
123         sigmaPow      = sigmaPow * sigmaPow; /* sigma^24 */
124         sigmaPow      = sigmaPow * sigmaPow; /* sigma^48 */
125         return (sigmaPow);
126     }
127 }
128
129 template<SoftCoreTreatment softCoreTreatment, class SCReal>
130 static inline real calculateRinv6(const SCReal rinvV)
131 {
132     if (softCoreTreatment == SoftCoreTreatment::RPower6)
133     {
134         return rinvV;
135     }
136     else
137     {
138         real rinv6 = rinvV * rinvV;
139         return (rinv6 * rinv6 * rinv6);
140     }
141 }
142
143 static inline real calculateVdw6(const real c6, const real rinv6)
144 {
145     return (c6 * rinv6);
146 }
147
148 static inline real calculateVdw12(const real c12, const real rinv6)
149 {
150     return (c12 * rinv6 * rinv6);
151 }
152
153 /* reaction-field electrostatics */
154 template<class SCReal>
155 static inline SCReal
156 reactionFieldScalarForce(const real qq, const real rinv, const SCReal r, const real krf,
157                          const real two)
158 {
159     return (qq*(rinv - two*krf*r*r));
160 }
161 template<class SCReal>
162 static inline real
163 reactionFieldPotential(const real qq, const real rinv, const SCReal r, const real krf,
164                        const real potentialShift)
165 {
166     return (qq*(rinv + krf*r*r-potentialShift));
167 }
168
169 /* Ewald electrostatics */
170 static inline real
171 ewaldScalarForce(const real coulomb, const real rinv)
172 {
173     return (coulomb*rinv);
174 }
175 static inline real
176 ewaldPotential(const real coulomb, const real rinv, const real potentialShift)
177 {
178     return (coulomb*(rinv-potentialShift));
179 }
180
181 /* cutoff LJ */
182 static inline real
183 lennardJonesScalarForce(const real v6, const real v12)
184 {
185     return (v12 - v6);
186 }
187 static inline real
188 lennardJonesPotential(const real v6, const real v12, const real c6, const real c12, const real repulsionShift,
189                       const real dispersionShift, const real onesixth, const real onetwelfth)
190 {
191     return ((v12 + c12*repulsionShift)*onetwelfth - (v6 + c6*dispersionShift)*onesixth);
192 }
193
194 /* Ewald LJ */
195 static inline real
196 ewaldLennardJonesGridSubtract(const real c6grid, const real potentialShift, const real onesixth)
197 {
198     return (c6grid * potentialShift * onesixth);
199 }
200
201 /* LJ Potential switch */
202 template<class SCReal>
203 static inline SCReal
204 potSwitchScalarForceMod(const SCReal fScalarInp, const real potential, const real sw, const SCReal r, const real rVdw, const real dsw, const real zero)
205 {
206     if (r < rVdw)
207     {
208         SCReal fScalar = fScalarInp * sw - r * potential * dsw;
209         return (fScalar);
210     }
211     return (zero);
212 }
213 template<class SCReal>
214 static inline real
215 potSwitchPotentialMod(const real potentialInp, const real sw, const SCReal r, const real rVdw, const real zero)
216 {
217     if (r < rVdw)
218     {
219         real potential = potentialInp * sw;
220         return (potential);
221     }
222     return (zero);
223 }
224
225
226 //! Templated free-energy non-bonded kernel
227 template<SoftCoreTreatment softCoreTreatment,
228          bool              scLambdasOrAlphasDiffer,
229          bool              vdwInteractionTypeIsEwald,
230          bool              elecInteractionTypeIsEwald,
231          bool              vdwModifierIsPotSwitch>
232 static void
233 nb_free_energy_kernel(const t_nblist * gmx_restrict    nlist,
234                       rvec * gmx_restrict              xx,
235                       gmx::ForceWithShiftForces *      forceWithShiftForces,
236                       const t_forcerec * gmx_restrict  fr,
237                       const t_mdatoms * gmx_restrict   mdatoms,
238                       nb_kernel_data_t * gmx_restrict  kernel_data,
239                       t_nrnb * gmx_restrict            nrnb)
240 {
241     using SCReal = typename SoftCoreReal<softCoreTreatment>::Real;
242
243     constexpr bool useSoftCore = (softCoreTreatment != SoftCoreTreatment::None);
244
245 #define  STATE_A  0
246 #define  STATE_B  1
247 #define  NSTATES  2
248
249     constexpr real  onetwelfth  = 1.0/12.0;
250     constexpr real  onesixth    = 1.0/6.0;
251     constexpr real  zero        = 0.0;
252     constexpr real  half        = 0.5;
253     constexpr real  one         = 1.0;
254     constexpr real  two         = 2.0;
255     constexpr real  six         = 6.0;
256
257     /* Extract pointer to non-bonded interaction constants */
258     const interaction_const_t *ic = fr->ic;
259
260     // Extract pair list data
261     const int   nri             = nlist->nri;
262     const int  *iinr            = nlist->iinr;
263     const int  *jindex          = nlist->jindex;
264     const int  *jjnr            = nlist->jjnr;
265     const int  *shift           = nlist->shift;
266     const int  *gid             = nlist->gid;
267
268     const real *shiftvec        = fr->shift_vec[0];
269     const real *chargeA         = mdatoms->chargeA;
270     const real *chargeB         = mdatoms->chargeB;
271     real       *Vc              = kernel_data->energygrp_elec;
272     const int  *typeA           = mdatoms->typeA;
273     const int  *typeB           = mdatoms->typeB;
274     const int   ntype           = fr->ntype;
275     const real *nbfp            = fr->nbfp;
276     const real *nbfp_grid       = fr->ljpme_c6grid;
277     real       *Vv              = kernel_data->energygrp_vdw;
278     const real  lambda_coul     = kernel_data->lambda[efptCOUL];
279     const real  lambda_vdw      = kernel_data->lambda[efptVDW];
280     real       *dvdl            = kernel_data->dvdl;
281     const real  alpha_coul      = fr->sc_alphacoul;
282     const real  alpha_vdw       = fr->sc_alphavdw;
283     const real  lam_power       = fr->sc_power;
284     const real  sigma6_def      = fr->sc_sigma6_def;
285     const real  sigma6_min      = fr->sc_sigma6_min;
286     const bool  doForces        = ((kernel_data->flags & GMX_NONBONDED_DO_FORCE) != 0);
287     const bool  doShiftForces   = ((kernel_data->flags & GMX_NONBONDED_DO_SHIFTFORCE) != 0);
288     const bool  doPotential     = ((kernel_data->flags & GMX_NONBONDED_DO_POTENTIAL) != 0);
289
290     // Extract data from interaction_const_t
291     const real  facel           = ic->epsfac;
292     const real  rcoulomb        = ic->rcoulomb;
293     const real  krf             = ic->k_rf;
294     const real  crf             = ic->c_rf;
295     const real  sh_lj_ewald     = ic->sh_lj_ewald;
296     const real  rvdw            = ic->rvdw;
297     const real  dispersionShift = ic->dispersion_shift.cpot;
298     const real  repulsionShift  = ic->repulsion_shift.cpot;
299
300     // Note that the nbnxm kernels do not support Coulomb potential switching at all
301     GMX_ASSERT(ic->coulomb_modifier != eintmodPOTSWITCH,
302                "Potential switching is not supported for Coulomb with FEP");
303
304     real vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4;
305     if (vdwModifierIsPotSwitch)
306     {
307         const real d = ic->rvdw - ic->rvdw_switch;
308         vdw_swV3     = -10.0/(d*d*d);
309         vdw_swV4     =  15.0/(d*d*d*d);
310         vdw_swV5     =  -6.0/(d*d*d*d*d);
311         vdw_swF2     = -30.0/(d*d*d);
312         vdw_swF3     =  60.0/(d*d*d*d);
313         vdw_swF4     = -30.0/(d*d*d*d*d);
314     }
315     else
316     {
317         /* Avoid warnings from stupid compilers (looking at you, Clang!) */
318         vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0;
319     }
320
321     int icoul;
322     if (ic->eeltype == eelCUT || EEL_RF(ic->eeltype))
323     {
324         icoul = GMX_NBKERNEL_ELEC_REACTIONFIELD;
325     }
326     else
327     {
328         icoul = GMX_NBKERNEL_ELEC_NONE;
329     }
330
331     real rcutoff_max2 = std::max(ic->rcoulomb, ic->rvdw);
332     rcutoff_max2      = rcutoff_max2*rcutoff_max2;
333
334     const real   *tab_ewald_F_lj = nullptr;
335     const real   *tab_ewald_V_lj = nullptr;
336     const real   *ewtab          = nullptr;
337     real          ewtabscale     = 0;
338     real          ewtabhalfspace = 0;
339     real          sh_ewald       = 0;
340     if (elecInteractionTypeIsEwald || vdwInteractionTypeIsEwald)
341     {
342         const auto &tables = *ic->coulombEwaldTables;
343         sh_ewald           = ic->sh_ewald;
344         ewtab              = tables.tableFDV0.data();
345         ewtabscale         = tables.scale;
346         ewtabhalfspace     = half/ewtabscale;
347         tab_ewald_F_lj     = tables.tableF.data();
348         tab_ewald_V_lj     = tables.tableV.data();
349     }
350
351     /* For Ewald/PME interactions we cannot easily apply the soft-core component to
352      * reciprocal space. When we use non-switched Ewald interactions, we
353      * assume the soft-coring does not significantly affect the grid contribution
354      * and apply the soft-core only to the full 1/r (- shift) pair contribution.
355      *
356      * However, we cannot use this approach for switch-modified since we would then
357      * effectively end up evaluating a significantly different interaction here compared to the
358      * normal (non-free-energy) kernels, either by applying a cutoff at a different
359      * position than what the user requested, or by switching different
360      * things (1/r rather than short-range Ewald). For these settings, we just
361      * use the traditional short-range Ewald interaction in that case.
362      */
363     GMX_RELEASE_ASSERT(!(vdwInteractionTypeIsEwald  && vdwModifierIsPotSwitch),
364                        "Can not apply soft-core to switched Ewald potentials");
365
366     SCReal dvdl_coul  = 0; /* Needs double for sc_power==48 */
367     SCReal dvdl_vdw   = 0; /* Needs double for sc_power==48 */
368
369     /* Lambda factor for state A, 1-lambda*/
370     real LFC[NSTATES], LFV[NSTATES];
371     LFC[STATE_A] = one - lambda_coul;
372     LFV[STATE_A] = one - lambda_vdw;
373
374     /* Lambda factor for state B, lambda*/
375     LFC[STATE_B] = lambda_coul;
376     LFV[STATE_B] = lambda_vdw;
377
378     /*derivative of the lambda factor for state A and B */
379     real DLF[NSTATES];
380     DLF[STATE_A] = -1;
381     DLF[STATE_B] = 1;
382
383     real           lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
384     constexpr real sc_r_power = (softCoreTreatment == SoftCoreTreatment::RPower48 ? 48.0_real : 6.0_real);
385     for (int i = 0; i < NSTATES; i++)
386     {
387         lfac_coul[i]  = (lam_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
388         dlfac_coul[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFC[i]) : 1);
389         lfac_vdw[i]   = (lam_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
390         dlfac_vdw[i]  = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFV[i]) : 1);
391     }
392
393     // TODO: We should get rid of using pointers to real
394     const real          *x      = xx[0];
395     real * gmx_restrict  f      = &(forceWithShiftForces->force()[0][0]);
396     real * gmx_restrict  fshift = &(forceWithShiftForces->shiftForces()[0][0]);
397
398     for (int n = 0; n < nri; n++)
399     {
400         int        npair_within_cutoff = 0;
401
402         const int  is3   = 3*shift[n];
403         const real shX   = shiftvec[is3];
404         const real shY   = shiftvec[is3+1];
405         const real shZ   = shiftvec[is3+2];
406         const int  nj0   = jindex[n];
407         const int  nj1   = jindex[n+1];
408         const int  ii    = iinr[n];
409         const int  ii3   = 3*ii;
410         const real ix    = shX + x[ii3+0];
411         const real iy    = shY + x[ii3+1];
412         const real iz    = shZ + x[ii3+2];
413         const real iqA   = facel*chargeA[ii];
414         const real iqB   = facel*chargeB[ii];
415         const int  ntiA  = 2*ntype*typeA[ii];
416         const int  ntiB  = 2*ntype*typeB[ii];
417         real       vctot = 0;
418         real       vvtot = 0;
419         real       fix   = 0;
420         real       fiy   = 0;
421         real       fiz   = 0;
422
423         for (int k = nj0; k < nj1; k++)
424         {
425             int        tj[NSTATES];
426             const int  jnr             = jjnr[k];
427             const int  j3              = 3*jnr;
428             real       c6[NSTATES], c12[NSTATES], qq[NSTATES], Vcoul[NSTATES], Vvdw[NSTATES];
429             real       r, rinv, rp, rpm2;
430             real       alpha_vdw_eff, alpha_coul_eff, sigma_pow[NSTATES];
431             const real dx             = ix - x[j3];
432             const real dy             = iy - x[j3+1];
433             const real dz             = iz - x[j3+2];
434             const real rsq            = dx*dx + dy*dy + dz*dz;
435             SCReal     FscalC[NSTATES], FscalV[NSTATES];  /* Needs double for sc_power==48 */
436
437             if (rsq >= rcutoff_max2)
438             {
439                 /* We save significant time by skipping all code below.
440                  * Note that with soft-core interactions, the actual cut-off
441                  * check might be different. But since the soft-core distance
442                  * is always larger than r, checking on r here is safe.
443                  */
444                 continue;
445             }
446             npair_within_cutoff++;
447
448             if (rsq > 0)
449             {
450                 /* Note that unlike in the nbnxn kernels, we do not need
451                  * to clamp the value of rsq before taking the invsqrt
452                  * to avoid NaN in the LJ calculation, since here we do
453                  * not calculate LJ interactions when C6 and C12 are zero.
454                  */
455
456                 rinv         = gmx::invsqrt(rsq);
457                 r            = rsq*rinv;
458             }
459             else
460             {
461                 /* The force at r=0 is zero, because of symmetry.
462                  * But note that the potential is in general non-zero,
463                  * since the soft-cored r will be non-zero.
464                  */
465                 rinv         = 0;
466                 r            = 0;
467             }
468
469             if (softCoreTreatment == SoftCoreTreatment::None)
470             {
471                 /* The soft-core power p will not affect the results
472                  * with not using soft-core, so we use power of 0 which gives
473                  * the simplest math and cheapest code.
474                  */
475                 rpm2         = rinv*rinv;
476                 rp           = 1;
477             }
478             if (softCoreTreatment == SoftCoreTreatment::RPower6)
479             {
480                 rpm2         = rsq*rsq;  /* r4 */
481                 rp           = rpm2*rsq; /* r6 */
482             }
483             if (softCoreTreatment == SoftCoreTreatment::RPower48)
484             {
485                 rp           = rsq*rsq*rsq; /* r6 */
486                 rp           = rp*rp;       /* r12 */
487                 rp           = rp*rp;       /* r24 */
488                 rp           = rp*rp;       /* r48 */
489                 rpm2         = rp/rsq;      /* r46 */
490             }
491
492             real Fscal = 0;
493
494             qq[STATE_A]      = iqA*chargeA[jnr];
495             qq[STATE_B]      = iqB*chargeB[jnr];
496
497             tj[STATE_A]      = ntiA+2*typeA[jnr];
498             tj[STATE_B]      = ntiB+2*typeB[jnr];
499
500             if (nlist->excl_fep == nullptr || nlist->excl_fep[k])
501             {
502                 c6[STATE_A]      = nbfp[tj[STATE_A]];
503                 c6[STATE_B]      = nbfp[tj[STATE_B]];
504
505                 for (int i = 0; i < NSTATES; i++)
506                 {
507                     c12[i]             = nbfp[tj[i]+1];
508                     if (useSoftCore)
509                     {
510                         real sigma6[NSTATES];
511                         if ((c6[i] > 0) && (c12[i] > 0))
512                         {
513                             /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
514                             sigma6[i]       = half*c12[i]/c6[i];
515                             if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
516                             {
517                                 sigma6[i] = sigma6_min;
518                             }
519                         }
520                         else
521                         {
522                             sigma6[i]       = sigma6_def;
523                         }
524                         sigma_pow[i] = calculateSigmaPow<softCoreTreatment>(sigma6[i]);
525                     }
526                 }
527
528                 if (useSoftCore)
529                 {
530                     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
531                     if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
532                     {
533                         alpha_vdw_eff    = 0;
534                         alpha_coul_eff   = 0;
535                     }
536                     else
537                     {
538                         alpha_vdw_eff    = alpha_vdw;
539                         alpha_coul_eff   = alpha_coul;
540                     }
541                 }
542
543                 for (int i = 0; i < NSTATES; i++)
544                 {
545                     FscalC[i] = 0;
546                     FscalV[i] = 0;
547                     Vcoul[i]  = 0;
548                     Vvdw[i]   = 0;
549
550                     real   rinvC, rinvV;
551                     SCReal rC, rV, rpinvC, rpinvV; /* Needs double for sc_power==48 */
552
553                     /* Only spend time on A or B state if it is non-zero */
554                     if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
555                     {
556                         /* this section has to be inside the loop because of the dependence on sigma_pow */
557                         if (useSoftCore)
558                         {
559                             rpinvC     = one/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
560                             pthRoot<softCoreTreatment>(rpinvC, &rinvC, &rC);
561                             if (scLambdasOrAlphasDiffer)
562                             {
563                                 rpinvV = one/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
564                                 pthRoot<softCoreTreatment>(rpinvV, &rinvV, &rV);
565                             }
566                             else
567                             {
568                                 /* We can avoid one expensive pow and one / operation */
569                                 rpinvV = rpinvC;
570                                 rinvV  = rinvC;
571                                 rV     = rC;
572                             }
573                         }
574                         else
575                         {
576                             rpinvC     = 1;
577                             rinvC      = rinv;
578                             rC         = r;
579
580                             rpinvV     = 1;
581                             rinvV      = rinv;
582                             rV         = r;
583                         }
584
585                         /* Only process the coulomb interactions if we have charges,
586                          * and if we either include all entries in the list (no cutoff
587                          * used in the kernel), or if we are within the cutoff.
588                          */
589                         bool computeElecInteraction =
590                             ( elecInteractionTypeIsEwald && r < rcoulomb) ||
591                             (!elecInteractionTypeIsEwald && rC < rcoulomb);
592
593                         if ( (qq[i] != 0) && computeElecInteraction)
594                         {
595                             if (elecInteractionTypeIsEwald)
596                             {
597                                 Vcoul[i]  = ewaldPotential(qq[i], rinvC, sh_ewald);
598                                 FscalC[i] = ewaldScalarForce(qq[i], rinvC);
599                             }
600                             else
601                             {
602                                 Vcoul[i]  = reactionFieldPotential(qq[i], rinvC, rC, krf, crf);
603                                 FscalC[i] = reactionFieldScalarForce(qq[i], rinvC, rC, krf, two);
604                             }
605                         }
606
607                         /* Only process the VDW interactions if we have
608                          * some non-zero parameters, and if we either
609                          * include all entries in the list (no cutoff used
610                          * in the kernel), or if we are within the cutoff.
611                          */
612                         bool computeVdwInteraction =
613                             ( vdwInteractionTypeIsEwald && r < rvdw) ||
614                             (!vdwInteractionTypeIsEwald && rV < rvdw);
615                         if ((c6[i] != 0 || c12[i] != 0) && computeVdwInteraction)
616                         {
617                             real rinv6;
618                             if (softCoreTreatment == SoftCoreTreatment::RPower6)
619                             {
620                                 rinv6    = calculateRinv6<softCoreTreatment>(rpinvV);
621                             }
622                             else
623                             {
624                                 rinv6    = calculateRinv6<softCoreTreatment>(rinvV);
625                             }
626                             real Vvdw6   = calculateVdw6(c6[i], rinv6);
627                             real Vvdw12  = calculateVdw12(c12[i], rinv6);
628
629                             Vvdw[i]      = lennardJonesPotential(Vvdw6, Vvdw12, c6[i], c12[i], repulsionShift, dispersionShift, onesixth, onetwelfth);
630                             FscalV[i]    = lennardJonesScalarForce(Vvdw6, Vvdw12);
631
632                             if (vdwInteractionTypeIsEwald)
633                             {
634                                 /* Subtract the grid potential at the cut-off */
635                                 Vvdw[i] += ewaldLennardJonesGridSubtract(nbfp_grid[tj[i]], sh_lj_ewald, onesixth);
636                             }
637
638                             if (vdwModifierIsPotSwitch)
639                             {
640                                 real       d   = rV - ic->rvdw_switch;
641                                 d              = (d > zero) ? d : zero;
642                                 const real d2  = d*d;
643                                 const real sw  = one+d2*d*(vdw_swV3+d*(vdw_swV4+d*vdw_swV5));
644                                 const real dsw = d2*(vdw_swF2+d*(vdw_swF3+d*vdw_swF4));
645
646                                 FscalV[i] = potSwitchScalarForceMod(FscalV[i], Vvdw[i], sw, rV, rvdw, dsw, zero);
647                                 Vvdw[i]   = potSwitchPotentialMod(Vvdw[i], sw, rV, rvdw, zero);
648                             }
649                         }
650
651                         /* FscalC (and FscalV) now contain: dV/drC * rC
652                          * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
653                          * Further down we first multiply by r^p-2 and then by
654                          * the vector r, which in total gives: dV/drC * (r/rC)^1-p
655                          */
656                         FscalC[i] *= rpinvC;
657                         FscalV[i] *= rpinvV;
658                     }
659                 }
660
661                 /* Assemble A and B states */
662                 for (int i = 0; i < NSTATES; i++)
663                 {
664                     vctot += LFC[i]*Vcoul[i];
665                     vvtot += LFV[i]*Vvdw[i];
666
667                     Fscal += LFC[i]*FscalC[i]*rpm2;
668                     Fscal += LFV[i]*FscalV[i]*rpm2;
669
670                     if (useSoftCore)
671                     {
672                         dvdl_coul += Vcoul[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*FscalC[i]*sigma_pow[i];
673                         dvdl_vdw  += Vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*FscalV[i]*sigma_pow[i];
674                     }
675                     else
676                     {
677                         dvdl_coul += Vcoul[i]*DLF[i];
678                         dvdl_vdw  += Vvdw[i]*DLF[i];
679                     }
680                 }
681             }
682             else if (icoul == GMX_NBKERNEL_ELEC_REACTIONFIELD)
683             {
684                 /* For excluded pairs, which are only in this pair list when
685                  * using the Verlet scheme, we don't use soft-core.
686                  * The group scheme also doesn't soft-core for these.
687                  * As there is no singularity, there is no need for soft-core.
688                  */
689                 const real FF = -two*krf;
690                 real       VV = krf*rsq - crf;
691
692                 if (ii == jnr)
693                 {
694                     VV *= half;
695                 }
696
697                 for (int i = 0; i < NSTATES; i++)
698                 {
699                     vctot      += LFC[i]*qq[i]*VV;
700                     Fscal      += LFC[i]*qq[i]*FF;
701                     dvdl_coul  += DLF[i]*qq[i]*VV;
702                 }
703             }
704
705             if (elecInteractionTypeIsEwald && r < rcoulomb)
706             {
707                 /* See comment in the preamble. When using Ewald interactions
708                  * (unless we use a switch modifier) we subtract the reciprocal-space
709                  * Ewald component here which made it possible to apply the free
710                  * energy interaction to 1/r (vanilla coulomb short-range part)
711                  * above. This gets us closer to the ideal case of applying
712                  * the softcore to the entire electrostatic interaction,
713                  * including the reciprocal-space component.
714                  */
715                 real       v_lr, f_lr;
716
717                 const real ewrt   = r*ewtabscale;
718                 int        ewitab = static_cast<int>(ewrt);
719                 const real eweps  = ewrt - ewitab;
720                 ewitab            = 4*ewitab;
721                 f_lr              = ewtab[ewitab] + eweps*ewtab[ewitab+1];
722                 v_lr              = (ewtab[ewitab + 2] - ewtabhalfspace*eweps*(ewtab[ewitab] + f_lr));
723                 f_lr             *= rinv;
724
725                 /* Note that any possible Ewald shift has already been applied in
726                  * the normal interaction part above.
727                  */
728
729                 if (ii == jnr)
730                 {
731                     /* If we get here, the i particle (ii) has itself (jnr)
732                      * in its neighborlist. This can only happen with the Verlet
733                      * scheme, and corresponds to a self-interaction that will
734                      * occur twice. Scale it down by 50% to only include it once.
735                      */
736                     v_lr *= half;
737                 }
738
739                 for (int i = 0; i < NSTATES; i++)
740                 {
741                     vctot     -= LFC[i]*qq[i]*v_lr;
742                     Fscal     -= LFC[i]*qq[i]*f_lr;
743                     dvdl_coul -= (DLF[i]*qq[i])*v_lr;
744                 }
745             }
746
747             if (vdwInteractionTypeIsEwald && r < rvdw)
748             {
749                 /* See comment in the preamble. When using LJ-Ewald interactions
750                  * (unless we use a switch modifier) we subtract the reciprocal-space
751                  * Ewald component here which made it possible to apply the free
752                  * energy interaction to r^-6 (vanilla LJ6 short-range part)
753                  * above. This gets us closer to the ideal case of applying
754                  * the softcore to the entire VdW interaction,
755                  * including the reciprocal-space component.
756                  */
757                 /* We could also use the analytical form here
758                  * iso a table, but that can cause issues for
759                  * r close to 0 for non-interacting pairs.
760                  */
761
762                 const real rs   = rsq*rinv*ewtabscale;
763                 const int  ri   = static_cast<int>(rs);
764                 const real frac = rs - ri;
765                 const real f_lr = (1 - frac)*tab_ewald_F_lj[ri] + frac*tab_ewald_F_lj[ri+1];
766                 /* TODO: Currently the Ewald LJ table does not contain
767                  * the factor 1/6, we should add this.
768                  */
769                 const real FF = f_lr*rinv/six;
770                 real       VV = (tab_ewald_V_lj[ri] - ewtabhalfspace*frac*(tab_ewald_F_lj[ri] + f_lr))/six;
771
772                 if (ii == jnr)
773                 {
774                     /* If we get here, the i particle (ii) has itself (jnr)
775                      * in its neighborlist. This can only happen with the Verlet
776                      * scheme, and corresponds to a self-interaction that will
777                      * occur twice. Scale it down by 50% to only include it once.
778                      */
779                     VV *= half;
780                 }
781
782                 for (int i = 0; i < NSTATES; i++)
783                 {
784                     const real c6grid = nbfp_grid[tj[i]];
785                     vvtot            += LFV[i]*c6grid*VV;
786                     Fscal            += LFV[i]*c6grid*FF;
787                     dvdl_vdw         += (DLF[i]*c6grid)*VV;
788                 }
789             }
790
791             if (doForces)
792             {
793                 const real tx  = Fscal*dx;
794                 const real ty  = Fscal*dy;
795                 const real tz  = Fscal*dz;
796                 fix            = fix + tx;
797                 fiy            = fiy + ty;
798                 fiz            = fiz + tz;
799                 /* OpenMP atomics are expensive, but this kernels is also
800                  * expensive, so we can take this hit, instead of using
801                  * thread-local output buffers and extra reduction.
802                  *
803                  * All the OpenMP regions in this file are trivial and should
804                  * not throw, so no need for try/catch.
805                  */
806 #pragma omp atomic
807                 f[j3]     -= tx;
808 #pragma omp atomic
809                 f[j3+1]   -= ty;
810 #pragma omp atomic
811                 f[j3+2]   -= tz;
812             }
813         }
814
815         /* The atomics below are expensive with many OpenMP threads.
816          * Here unperturbed i-particles will usually only have a few
817          * (perturbed) j-particles in the list. Thus with a buffered list
818          * we can skip a significant number of i-reductions with a check.
819          */
820         if (npair_within_cutoff > 0)
821         {
822             if (doForces)
823             {
824 #pragma omp atomic
825                 f[ii3]        += fix;
826 #pragma omp atomic
827                 f[ii3+1]      += fiy;
828 #pragma omp atomic
829                 f[ii3+2]      += fiz;
830             }
831             if (doShiftForces)
832             {
833 #pragma omp atomic
834                 fshift[is3]   += fix;
835 #pragma omp atomic
836                 fshift[is3+1] += fiy;
837 #pragma omp atomic
838                 fshift[is3+2] += fiz;
839             }
840             if (doPotential)
841             {
842                 int ggid  = gid[n];
843 #pragma omp atomic
844                 Vc[ggid] += vctot;
845 #pragma omp atomic
846                 Vv[ggid] += vvtot;
847             }
848         }
849     }
850
851 #pragma omp atomic
852     dvdl[efptCOUL]     += dvdl_coul;
853  #pragma omp atomic
854     dvdl[efptVDW]      += dvdl_vdw;
855
856     /* Estimate flops, average for free energy stuff:
857      * 12  flops per outer iteration
858      * 150 flops per inner iteration
859      */
860 #pragma omp atomic
861     inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri*12 + nlist->jindex[nri]*150);
862 }
863
864 typedef void (* KernelFunction)(const t_nblist * gmx_restrict    nlist,
865                                 rvec * gmx_restrict              xx,
866                                 gmx::ForceWithShiftForces *      forceWithShiftForces,
867                                 const t_forcerec * gmx_restrict  fr,
868                                 const t_mdatoms * gmx_restrict   mdatoms,
869                                 nb_kernel_data_t * gmx_restrict  kernel_data,
870                                 t_nrnb * gmx_restrict            nrnb);
871
872 template<SoftCoreTreatment softCoreTreatment,
873          bool scLambdasOrAlphasDiffer,
874          bool vdwInteractionTypeIsEwald,
875          bool elecInteractionTypeIsEwald>
876 static KernelFunction
877 dispatchKernelOnVdwModifier(const bool vdwModifierIsPotSwitch)
878 {
879     if (vdwModifierIsPotSwitch)
880     {
881         return(nb_free_energy_kernel<softCoreTreatment, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
882                                      elecInteractionTypeIsEwald, true>);
883     }
884     else
885     {
886         return(nb_free_energy_kernel<softCoreTreatment, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
887                                      elecInteractionTypeIsEwald, false>);
888     }
889 }
890
891 template<SoftCoreTreatment softCoreTreatment,
892          bool scLambdasOrAlphasDiffer,
893          bool vdwInteractionTypeIsEwald>
894 static KernelFunction
895 dispatchKernelOnElecInteractionType(const bool elecInteractionTypeIsEwald,
896                                     const bool vdwModifierIsPotSwitch)
897 {
898     if (elecInteractionTypeIsEwald)
899     {
900         return(dispatchKernelOnVdwModifier<softCoreTreatment, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
901                                            true>(vdwModifierIsPotSwitch));
902     }
903     else
904     {
905         return(dispatchKernelOnVdwModifier<softCoreTreatment, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
906                                            false>(vdwModifierIsPotSwitch));
907     }
908 }
909
910 template<SoftCoreTreatment softCoreTreatment,
911          bool scLambdasOrAlphasDiffer>
912 static KernelFunction
913 dispatchKernelOnVdwInteractionType(const bool vdwInteractionTypeIsEwald,
914                                    const bool elecInteractionTypeIsEwald,
915                                    const bool vdwModifierIsPotSwitch)
916 {
917     if (vdwInteractionTypeIsEwald)
918     {
919         return(dispatchKernelOnElecInteractionType<softCoreTreatment, scLambdasOrAlphasDiffer,
920                                                    true>(elecInteractionTypeIsEwald, vdwModifierIsPotSwitch));
921     }
922     else
923     {
924         return(dispatchKernelOnElecInteractionType<softCoreTreatment, scLambdasOrAlphasDiffer,
925                                                    false>(elecInteractionTypeIsEwald, vdwModifierIsPotSwitch));
926     }
927 }
928
929 template<SoftCoreTreatment softCoreTreatment>
930 static KernelFunction
931 dispatchKernelOnScLambdasOrAlphasDifference(const bool scLambdasOrAlphasDiffer,
932                                             const bool vdwInteractionTypeIsEwald,
933                                             const bool elecInteractionTypeIsEwald,
934                                             const bool vdwModifierIsPotSwitch)
935 {
936     if (scLambdasOrAlphasDiffer)
937     {
938         return(dispatchKernelOnVdwInteractionType<softCoreTreatment, true>(vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
939                                                                            vdwModifierIsPotSwitch));
940     }
941     else
942     {
943         return(dispatchKernelOnVdwInteractionType<softCoreTreatment, false>(vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
944                                                                             vdwModifierIsPotSwitch));
945     }
946 }
947
948 static KernelFunction
949 dispatchKernel(const bool                 scLambdasOrAlphasDiffer,
950                const bool                 vdwInteractionTypeIsEwald,
951                const bool                 elecInteractionTypeIsEwald,
952                const bool                 vdwModifierIsPotSwitch,
953                const t_forcerec          *fr)
954 {
955     if (fr->sc_alphacoul == 0 && fr->sc_alphavdw == 0)
956     {
957         return(dispatchKernelOnScLambdasOrAlphasDifference<SoftCoreTreatment::None>(scLambdasOrAlphasDiffer,
958                                                                                     vdwInteractionTypeIsEwald,
959                                                                                     elecInteractionTypeIsEwald,
960                                                                                     vdwModifierIsPotSwitch));
961     }
962     else if (fr->sc_r_power == 6.0_real)
963     {
964         return(dispatchKernelOnScLambdasOrAlphasDifference<SoftCoreTreatment::RPower6>(scLambdasOrAlphasDiffer,
965                                                                                        vdwInteractionTypeIsEwald,
966                                                                                        elecInteractionTypeIsEwald,
967                                                                                        vdwModifierIsPotSwitch));
968     }
969     else
970     {
971         return(dispatchKernelOnScLambdasOrAlphasDifference<SoftCoreTreatment::RPower48>(scLambdasOrAlphasDiffer,
972                                                                                         vdwInteractionTypeIsEwald,
973                                                                                         elecInteractionTypeIsEwald,
974                                                                                         vdwModifierIsPotSwitch));
975     }
976 }
977
978
979 void gmx_nb_free_energy_kernel(const t_nblist            *nlist,
980                                rvec                      *xx,
981                                gmx::ForceWithShiftForces *ff,
982                                const t_forcerec          *fr,
983                                const t_mdatoms           *mdatoms,
984                                nb_kernel_data_t          *kernel_data,
985                                t_nrnb                    *nrnb)
986 {
987     GMX_ASSERT(EEL_PME_EWALD(fr->ic->eeltype)  || fr->ic->eeltype == eelCUT || EEL_RF(fr->ic->eeltype),
988                "Unsupported eeltype with free energy");
989
990     const bool vdwInteractionTypeIsEwald  = (EVDW_PME(fr->ic->vdwtype));
991     const bool elecInteractionTypeIsEwald = (EEL_PME_EWALD(fr->ic->eeltype));
992     const bool vdwModifierIsPotSwitch     = (fr->ic->vdw_modifier == eintmodPOTSWITCH);
993     bool       scLambdasOrAlphasDiffer    = true;
994
995     if (fr->sc_alphacoul == 0 && fr->sc_alphavdw == 0)
996     {
997         scLambdasOrAlphasDiffer = false;
998     }
999     else if (fr->sc_r_power == 6.0_real || fr->sc_r_power == 48.0_real)
1000     {
1001         if (kernel_data->lambda[efptCOUL] == kernel_data->lambda[efptVDW] &&
1002             fr->sc_alphacoul == fr->sc_alphavdw)
1003         {
1004             scLambdasOrAlphasDiffer = false;
1005         }
1006     }
1007     else
1008     {
1009         GMX_RELEASE_ASSERT(false, "Unsupported soft-core r-power");
1010     }
1011     KernelFunction kernelFunc = dispatchKernel(scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
1012                                                vdwModifierIsPotSwitch, fr);
1013     kernelFunc(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
1014 }