cd821ef41411c034f2e9ccea6e145c83d2fce6e8
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_free_energy.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8  * and including many others, as listed in the AUTHORS file in the
9  * top-level source directory and at http://www.gromacs.org.
10  *
11  * GROMACS is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public License
13  * as published by the Free Software Foundation; either version 2.1
14  * of the License, or (at your option) any later version.
15  *
16  * GROMACS is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with GROMACS; if not, see
23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
25  *
26  * If you want to redistribute modifications to GROMACS, please
27  * consider that scientific software is very special. Version
28  * control is crucial - bugs must be traceable. We will be happy to
29  * consider code for inclusion in the official distribution, but
30  * derived work must not be called official GROMACS. Details are found
31  * in the README & COPYING files - if they are missing, get the
32  * official version at http://www.gromacs.org.
33  *
34  * To help us fund GROMACS development, we humbly ask that you cite
35  * the research papers on the package. Check out http://www.gromacs.org.
36  */
37 #include "gmxpre.h"
38
39 #include "nb_free_energy.h"
40
41 #include <cmath>
42
43 #include <algorithm>
44
45 #include "gromacs/gmxlib/nrnb.h"
46 #include "gromacs/gmxlib/nonbonded/nb_kernel.h"
47 #include "gromacs/gmxlib/nonbonded/nonbonded.h"
48 #include "gromacs/math/functions.h"
49 #include "gromacs/math/vec.h"
50 #include "gromacs/mdtypes/forceoutput.h"
51 #include "gromacs/mdtypes/forcerec.h"
52 #include "gromacs/mdtypes/md_enums.h"
53 #include "gromacs/utility/fatalerror.h"
54
55
56 //! Enum for templating the soft-core treatment in the kernel
57 enum class SoftCoreTreatment
58 {
59     None,    //!< No soft-core
60     RPower6, //!< Soft-core with r-power = 6
61     RPower48 //!< Soft-core with r-power = 48
62 };
63
64 //! Most treatments are fine with float in mixed-precision mode.
65 template <SoftCoreTreatment softCoreTreatment>
66 struct SoftCoreReal
67 {
68     //! Real type for soft-core calculations
69     using Real = real;
70 };
71
72 //! This treatment requires double precision for some computations.
73 template <>
74 struct SoftCoreReal<SoftCoreTreatment::RPower48>
75 {
76     //! Real type for soft-core calculations
77     using Real = double;
78 };
79
80 //! Computes r^(1/p) and 1/r^(1/p) for the standard p=6
81 template <SoftCoreTreatment softCoreTreatment>
82 static inline void pthRoot(const real  r,
83                            real       *pthRoot,
84                            real       *invPthRoot)
85 {
86     *invPthRoot = gmx::invsqrt(std::cbrt(r));
87     *pthRoot    = 1/(*invPthRoot);
88 }
89
90 // We need a double version to make the specialization below work
91 #if !GMX_DOUBLE
92 //! Computes r^(1/p) and 1/r^(1/p) for the standard p=6
93 template <SoftCoreTreatment softCoreTreatment>
94 static inline void pthRoot(const double  r,
95                            real         *pthRoot,
96                            double       *invPthRoot)
97 {
98     *invPthRoot = gmx::invsqrt(std::cbrt(r));
99     *pthRoot    = 1/(*invPthRoot);
100 }
101 #endif
102
103 //! Computes r^(1/p) and 1/r^(1/p) for p=48
104 template <>
105 inline void pthRoot<SoftCoreTreatment::RPower48>(const double  r,
106                                                  real         *pthRoot,
107                                                  double       *invPthRoot)
108 {
109     *pthRoot    = std::pow(r, 1.0/48.0);
110     *invPthRoot = 1/(*pthRoot);
111 }
112
113 template<SoftCoreTreatment softCoreTreatment>
114 static inline real calculateSigmaPow(const real sigma6)
115 {
116     if (softCoreTreatment == SoftCoreTreatment::RPower6)
117     {
118         return sigma6;
119     }
120     else
121     {
122         real sigmaPow = sigma6 * sigma6;     /* sigma^12 */
123         sigmaPow      = sigmaPow * sigmaPow; /* sigma^24 */
124         sigmaPow      = sigmaPow * sigmaPow; /* sigma^48 */
125         return (sigmaPow);
126     }
127 }
128
129 template<SoftCoreTreatment softCoreTreatment, class SCReal>
130 static inline real calculateRinv6(const SCReal rinvV)
131 {
132     if (softCoreTreatment == SoftCoreTreatment::RPower6)
133     {
134         return rinvV;
135     }
136     else
137     {
138         real rinv6 = rinvV * rinvV;
139         return (rinv6 * rinv6 * rinv6);
140     }
141 }
142
143 static inline real calculateVdw6(const real c6, const real rinv6)
144 {
145     return (c6 * rinv6);
146 }
147
148 static inline real calculateVdw12(const real c12, const real rinv6)
149 {
150     return (c12 * rinv6 * rinv6);
151 }
152
153 /* reaction-field electrostatics */
154 template<class SCReal>
155 static inline SCReal
156 reactionFieldScalarForce(const real qq, const real rinv, const SCReal r, const real krf,
157                          const real two)
158 {
159     return (qq*(rinv - two*krf*r*r));
160 }
161 template<class SCReal>
162 static inline real
163 reactionFieldPotential(const real qq, const real rinv, const SCReal r, const real krf,
164                        const real potentialShift)
165 {
166     return (qq*(rinv + krf*r*r-potentialShift));
167 }
168
169 /* Ewald electrostatics */
170 static inline real
171 ewaldScalarForce(const real coulomb, const real rinv)
172 {
173     return (coulomb*rinv);
174 }
175 static inline real
176 ewaldPotential(const real coulomb, const real rinv, const real potentialShift)
177 {
178     return (coulomb*(rinv-potentialShift));
179 }
180
181 /* cutoff LJ */
182 static inline real
183 lennardJonesScalarForce(const real v6, const real v12)
184 {
185     return (v12 - v6);
186 }
187 static inline real
188 lennardJonesPotential(const real v6, const real v12, const real c6, const real c12, const real repulsionShift,
189                       const real dispersionShift, const real onesixth, const real onetwelfth)
190 {
191     return ((v12 + c12*repulsionShift)*onetwelfth - (v6 + c6*dispersionShift)*onesixth);
192 }
193
194 /* Ewald LJ */
195 static inline real
196 ewaldLennardJonesGridSubtract(const real c6grid, const real potentialShift, const real onesixth)
197 {
198     return (c6grid * potentialShift * onesixth);
199 }
200
201 /* LJ Potential switch */
202 template<class SCReal>
203 static inline SCReal
204 potSwitchScalarForceMod(const SCReal fScalarInp, const real potential, const real sw, const SCReal r, const real rVdw, const real dsw, const real zero)
205 {
206     if (r < rVdw)
207     {
208         SCReal fScalar = fScalarInp * sw - r * potential * dsw;
209         return (fScalar);
210     }
211     return (zero);
212 }
213 template<class SCReal>
214 static inline real
215 potSwitchPotentialMod(const real potentialInp, const real sw, const SCReal r, const real rVdw, const real zero)
216 {
217     if (r < rVdw)
218     {
219         real potential = potentialInp * sw;
220         return (potential);
221     }
222     return (zero);
223 }
224
225
226 //! Templated free-energy non-bonded kernel
227 template<SoftCoreTreatment softCoreTreatment, bool scLambdasOrAlphasDiffer>
228 static void
229 nb_free_energy_kernel(const t_nblist * gmx_restrict    nlist,
230                       rvec * gmx_restrict              xx,
231                       gmx::ForceWithShiftForces *      forceWithShiftForces,
232                       const t_forcerec * gmx_restrict  fr,
233                       const t_mdatoms * gmx_restrict   mdatoms,
234                       nb_kernel_data_t * gmx_restrict  kernel_data,
235                       t_nrnb * gmx_restrict            nrnb)
236 {
237     using SCReal = typename SoftCoreReal<softCoreTreatment>::Real;
238
239     constexpr bool useSoftCore = (softCoreTreatment != SoftCoreTreatment::None);
240
241 #define  STATE_A  0
242 #define  STATE_B  1
243 #define  NSTATES  2
244     int           i, n, ii, is3, ii3, k, nj0, nj1, jnr, j3, ggid;
245     real          shX, shY, shZ;
246     real          tx, ty, tz, Fscal;
247     SCReal        FscalC[NSTATES], FscalV[NSTATES];  /* Needs double for sc_power==48 */
248     real          Vcoul[NSTATES], Vvdw[NSTATES];
249     real          iqA, iqB;
250     real          qq[NSTATES], vctot;
251     int           ntiA, ntiB, tj[NSTATES];
252     real          vvtot;
253     real          ix, iy, iz, fix, fiy, fiz;
254     real          dx, dy, dz, r, rsq, rinv;
255     real          c6[NSTATES], c12[NSTATES], c6grid;
256     real          LFC[NSTATES], LFV[NSTATES], DLF[NSTATES];
257     SCReal        dvdl_coul, dvdl_vdw;
258     real          lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
259     real          sigma6[NSTATES], alpha_vdw_eff, alpha_coul_eff;
260     real          rinvC, rinvV;
261     SCReal        rp, rpm2, rC, rV, rpinvC, rpinvV; /* Needs double for sc_power==48 */
262     real          sigma_pow[NSTATES];
263     real          VV, FF;
264     int           icoul, ivdw;
265     int           nri;
266     const int *   iinr;
267     const int *   jindex;
268     const int *   jjnr;
269     const int *   shift;
270     const int *   gid;
271     const int *   typeA;
272     const int *   typeB;
273     int           ntype;
274     const real *  shiftvec;
275     const real *  chargeA;
276     const real *  chargeB;
277     real          sigma6_min, sigma6_def, lam_power;
278     real          alpha_coul, alpha_vdw, lambda_coul, lambda_vdw;
279     const real *  nbfp, *nbfp_grid;
280     real *        dvdl;
281     real *        Vv;
282     real *        Vc;
283     gmx_bool      bDoForces, bDoShiftForces, bDoPotential;
284     gmx_bool      bEwald, bEwaldLJ;
285     real          rcutoff_max2;
286     const real *  tab_ewald_F_lj = nullptr;
287     const real *  tab_ewald_V_lj = nullptr;
288     real          vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4;
289     gmx_bool      bComputeVdwInteraction, bComputeElecInteraction;
290     const real *  ewtab = nullptr;
291     int           ewitab;
292     real          ewrt, eweps, ewtabscale = 0, ewtabhalfspace = 0, sh_ewald = 0;
293
294     const real    onetwelfth  = 1.0/12.0;
295     const real    onesixth    = 1.0/6.0;
296     const real    zero        = 0.0;
297     const real    half        = 0.5;
298     const real    one         = 1.0;
299     const real    two         = 2.0;
300     const real    six         = 6.0;
301
302     /* Extract pointer to non-bonded interaction constants */
303     const interaction_const_t *ic = fr->ic;
304
305     // TODO: We should get rid of using pointers to real
306     const real          *x      = xx[0];
307     real * gmx_restrict  f      = &(forceWithShiftForces->force()[0][0]);
308
309     real * gmx_restrict  fshift = &(forceWithShiftForces->shiftForces()[0][0]);
310
311     // Extract pair list data
312     nri                 = nlist->nri;
313     iinr                = nlist->iinr;
314     jindex              = nlist->jindex;
315     jjnr                = nlist->jjnr;
316     shift               = nlist->shift;
317     gid                 = nlist->gid;
318
319     shiftvec            = fr->shift_vec[0];
320     chargeA             = mdatoms->chargeA;
321     chargeB             = mdatoms->chargeB;
322     Vc                  = kernel_data->energygrp_elec;
323     typeA               = mdatoms->typeA;
324     typeB               = mdatoms->typeB;
325     ntype               = fr->ntype;
326     nbfp                = fr->nbfp;
327     nbfp_grid           = fr->ljpme_c6grid;
328     Vv                  = kernel_data->energygrp_vdw;
329     lambda_coul         = kernel_data->lambda[efptCOUL];
330     lambda_vdw          = kernel_data->lambda[efptVDW];
331     dvdl                = kernel_data->dvdl;
332     alpha_coul          = fr->sc_alphacoul;
333     alpha_vdw           = fr->sc_alphavdw;
334     lam_power           = fr->sc_power;
335     sigma6_def          = fr->sc_sigma6_def;
336     sigma6_min          = fr->sc_sigma6_min;
337     bDoForces           = ((kernel_data->flags & GMX_NONBONDED_DO_FORCE) != 0);
338     bDoShiftForces      = ((kernel_data->flags & GMX_NONBONDED_DO_SHIFTFORCE) != 0);
339     bDoPotential        = ((kernel_data->flags & GMX_NONBONDED_DO_POTENTIAL) != 0);
340
341     // Extract data from interaction_const_t
342     const real facel           = ic->epsfac;
343     const real rcoulomb        = ic->rcoulomb;
344     const real krf             = ic->k_rf;
345     const real crf             = ic->c_rf;
346     const real sh_lj_ewald     = ic->sh_lj_ewald;
347     const real rvdw            = ic->rvdw;
348     const real dispersionShift = ic->dispersion_shift.cpot;
349     const real repulsionShift  = ic->repulsion_shift.cpot;
350
351     // Note that the nbnxm kernels do not support Coulomb potential switching at all
352     GMX_ASSERT(ic->coulomb_modifier != eintmodPOTSWITCH,
353                "Potential switching is not supported for Coulomb with FEP");
354
355     if (ic->vdw_modifier == eintmodPOTSWITCH)
356     {
357         real d          = ic->rvdw - ic->rvdw_switch;
358         vdw_swV3        = -10.0/(d*d*d);
359         vdw_swV4        =  15.0/(d*d*d*d);
360         vdw_swV5        =  -6.0/(d*d*d*d*d);
361         vdw_swF2        = -30.0/(d*d*d);
362         vdw_swF3        =  60.0/(d*d*d*d);
363         vdw_swF4        = -30.0/(d*d*d*d*d);
364     }
365     else
366     {
367         /* Avoid warnings from stupid compilers (looking at you, Clang!) */
368         vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0;
369     }
370
371     if (EVDW_PME(ic->vdwtype))
372     {
373         ivdw         = GMX_NBKERNEL_VDW_LJEWALD;
374     }
375     else
376     {
377         ivdw         = GMX_NBKERNEL_VDW_LENNARDJONES;
378     }
379
380     if (ic->eeltype == eelCUT || EEL_RF(ic->eeltype))
381     {
382         icoul        = GMX_NBKERNEL_ELEC_REACTIONFIELD;
383     }
384     else if (EEL_PME_EWALD(ic->eeltype))
385     {
386         icoul        = GMX_NBKERNEL_ELEC_EWALD;
387     }
388     else
389     {
390         gmx_incons("Unsupported eeltype with Verlet and free-energy");
391     }
392
393     rcutoff_max2    = std::max(ic->rcoulomb, ic->rvdw);
394     rcutoff_max2    = rcutoff_max2*rcutoff_max2;
395
396     bEwald          = (icoul == GMX_NBKERNEL_ELEC_EWALD);
397     bEwaldLJ        = (ivdw == GMX_NBKERNEL_VDW_LJEWALD);
398
399     if (bEwald || bEwaldLJ)
400     {
401         const auto &tables = *ic->coulombEwaldTables;
402         sh_ewald       = ic->sh_ewald;
403         ewtab          = tables.tableFDV0.data();
404         ewtabscale     = tables.scale;
405         ewtabhalfspace = half/ewtabscale;
406         tab_ewald_F_lj = tables.tableF.data();
407         tab_ewald_V_lj = tables.tableV.data();
408     }
409
410     /* For Ewald/PME interactions we cannot easily apply the soft-core component to
411      * reciprocal space. When we use non-switched Ewald interactions, we
412      * assume the soft-coring does not significantly affect the grid contribution
413      * and apply the soft-core only to the full 1/r (- shift) pair contribution.
414      *
415      * However, we cannot use this approach for switch-modified since we would then
416      * effectively end up evaluating a significantly different interaction here compared to the
417      * normal (non-free-energy) kernels, either by applying a cutoff at a different
418      * position than what the user requested, or by switching different
419      * things (1/r rather than short-range Ewald). For these settings, we just
420      * use the traditional short-range Ewald interaction in that case.
421      */
422     GMX_RELEASE_ASSERT(!(bEwald   && ic->coulomb_modifier == eintmodPOTSWITCH) &&
423                        !(bEwaldLJ && ic->vdw_modifier   == eintmodPOTSWITCH),
424                        "Can not apply soft-core to switched Ewald potentials");
425
426     dvdl_coul  = 0;
427     dvdl_vdw   = 0;
428
429     /* Lambda factor for state A, 1-lambda*/
430     LFC[STATE_A] = one - lambda_coul;
431     LFV[STATE_A] = one - lambda_vdw;
432
433     /* Lambda factor for state B, lambda*/
434     LFC[STATE_B] = lambda_coul;
435     LFV[STATE_B] = lambda_vdw;
436
437     /*derivative of the lambda factor for state A and B */
438     DLF[STATE_A] = -1;
439     DLF[STATE_B] = 1;
440
441     constexpr real sc_r_power = (softCoreTreatment == SoftCoreTreatment::RPower48 ? 48.0_real : 6.0_real);
442     for (i = 0; i < NSTATES; i++)
443     {
444         lfac_coul[i]  = (lam_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
445         dlfac_coul[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFC[i]) : 1);
446         lfac_vdw[i]   = (lam_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
447         dlfac_vdw[i]  = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFV[i]) : 1);
448     }
449
450     for (n = 0; (n < nri); n++)
451     {
452         int npair_within_cutoff;
453
454         npair_within_cutoff = 0;
455
456         is3              = 3*shift[n];
457         shX              = shiftvec[is3];
458         shY              = shiftvec[is3+1];
459         shZ              = shiftvec[is3+2];
460         nj0              = jindex[n];
461         nj1              = jindex[n+1];
462         ii               = iinr[n];
463         ii3              = 3*ii;
464         ix               = shX + x[ii3+0];
465         iy               = shY + x[ii3+1];
466         iz               = shZ + x[ii3+2];
467         iqA              = facel*chargeA[ii];
468         iqB              = facel*chargeB[ii];
469         ntiA             = 2*ntype*typeA[ii];
470         ntiB             = 2*ntype*typeB[ii];
471         vctot            = 0;
472         vvtot            = 0;
473         fix              = 0;
474         fiy              = 0;
475         fiz              = 0;
476
477         for (k = nj0; (k < nj1); k++)
478         {
479             jnr              = jjnr[k];
480             j3               = 3*jnr;
481             dx               = ix - x[j3];
482             dy               = iy - x[j3+1];
483             dz               = iz - x[j3+2];
484             rsq              = dx*dx + dy*dy + dz*dz;
485
486             if (rsq >= rcutoff_max2)
487             {
488                 /* We save significant time by skipping all code below.
489                  * Note that with soft-core interactions, the actual cut-off
490                  * check might be different. But since the soft-core distance
491                  * is always larger than r, checking on r here is safe.
492                  */
493                 continue;
494             }
495             npair_within_cutoff++;
496
497             if (rsq > 0)
498             {
499                 /* Note that unlike in the nbnxn kernels, we do not need
500                  * to clamp the value of rsq before taking the invsqrt
501                  * to avoid NaN in the LJ calculation, since here we do
502                  * not calculate LJ interactions when C6 and C12 are zero.
503                  */
504
505                 rinv         = gmx::invsqrt(rsq);
506                 r            = rsq*rinv;
507             }
508             else
509             {
510                 /* The force at r=0 is zero, because of symmetry.
511                  * But note that the potential is in general non-zero,
512                  * since the soft-cored r will be non-zero.
513                  */
514                 rinv         = 0;
515                 r            = 0;
516             }
517
518             if (softCoreTreatment == SoftCoreTreatment::None)
519             {
520                 /* The soft-core power p will not affect the results
521                  * with not using soft-core, so we use power of 0 which gives
522                  * the simplest math and cheapest code.
523                  */
524                 rpm2         = rinv*rinv;
525                 rp           = 1;
526             }
527             if (softCoreTreatment == SoftCoreTreatment::RPower6)
528             {
529                 rpm2         = rsq*rsq;  /* r4 */
530                 rp           = rpm2*rsq; /* r6 */
531             }
532             if (softCoreTreatment == SoftCoreTreatment::RPower48)
533             {
534                 rp           = rsq*rsq*rsq; /* r6 */
535                 rp           = rp*rp;       /* r12 */
536                 rp           = rp*rp;       /* r24 */
537                 rp           = rp*rp;       /* r48 */
538                 rpm2         = rp/rsq;      /* r46 */
539             }
540
541             Fscal = 0;
542
543             qq[STATE_A]      = iqA*chargeA[jnr];
544             qq[STATE_B]      = iqB*chargeB[jnr];
545
546             tj[STATE_A]      = ntiA+2*typeA[jnr];
547             tj[STATE_B]      = ntiB+2*typeB[jnr];
548
549             if (nlist->excl_fep == nullptr || nlist->excl_fep[k])
550             {
551                 c6[STATE_A]      = nbfp[tj[STATE_A]];
552                 c6[STATE_B]      = nbfp[tj[STATE_B]];
553
554                 for (i = 0; i < NSTATES; i++)
555                 {
556                     c12[i]             = nbfp[tj[i]+1];
557                     if (useSoftCore)
558                     {
559                         if ((c6[i] > 0) && (c12[i] > 0))
560                         {
561                             /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
562                             sigma6[i]       = half*c12[i]/c6[i];
563                             if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
564                             {
565                                 sigma6[i] = sigma6_min;
566                             }
567                         }
568                         else
569                         {
570                             sigma6[i]       = sigma6_def;
571                         }
572                         sigma_pow[i] = calculateSigmaPow<softCoreTreatment>(sigma6[i]);
573                     }
574                 }
575
576                 if (useSoftCore)
577                 {
578                     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
579                     if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
580                     {
581                         alpha_vdw_eff    = 0;
582                         alpha_coul_eff   = 0;
583                     }
584                     else
585                     {
586                         alpha_vdw_eff    = alpha_vdw;
587                         alpha_coul_eff   = alpha_coul;
588                     }
589                 }
590
591                 for (i = 0; i < NSTATES; i++)
592                 {
593                     FscalC[i]    = 0;
594                     FscalV[i]    = 0;
595                     Vcoul[i]     = 0;
596                     Vvdw[i]      = 0;
597
598                     /* Only spend time on A or B state if it is non-zero */
599                     if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
600                     {
601                         /* this section has to be inside the loop because of the dependence on sigma_pow */
602                         if (useSoftCore)
603                         {
604                             rpinvC     = one/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
605                             pthRoot<softCoreTreatment>(rpinvC, &rinvC, &rC);
606
607                             if (scLambdasOrAlphasDiffer)
608                             {
609                                 rpinvV = one/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
610                                 pthRoot<softCoreTreatment>(rpinvV, &rinvV, &rV);
611                             }
612                             else
613                             {
614                                 /* We can avoid one expensive pow and one / operation */
615                                 rpinvV = rpinvC;
616                                 rinvV  = rinvC;
617                                 rV     = rC;
618                             }
619                         }
620                         else
621                         {
622                             rpinvC     = 1;
623                             rinvC      = rinv;
624                             rC         = r;
625
626                             rpinvV     = 1;
627                             rinvV      = rinv;
628                             rV         = r;
629                         }
630
631                         /* Only process the coulomb interactions if we have charges,
632                          * and if we either include all entries in the list (no cutoff
633                          * used in the kernel), or if we are within the cutoff.
634                          */
635                         bComputeElecInteraction =
636                             ( bEwald && r < rcoulomb) ||
637                             (!bEwald && rC < rcoulomb);
638
639                         if ( (qq[i] != 0) && bComputeElecInteraction)
640                         {
641                             if (bEwald)
642                             {
643                                 Vcoul[i]  = ewaldPotential(qq[i], rinvC, sh_ewald);
644                                 FscalC[i] = ewaldScalarForce(qq[i], rinvC);
645                             }
646                             else
647                             {
648                                 Vcoul[i]  = reactionFieldPotential(qq[i], rinvC, rC, krf, crf);
649                                 FscalC[i] = reactionFieldScalarForce(qq[i], rinvC, rC, krf, two);
650                             }
651                         }
652
653                         /* Only process the VDW interactions if we have
654                          * some non-zero parameters, and if we either
655                          * include all entries in the list (no cutoff used
656                          * in the kernel), or if we are within the cutoff.
657                          */
658                         bComputeVdwInteraction =
659                             ( bEwaldLJ && r < rvdw) ||
660                             (!bEwaldLJ && rV < rvdw);
661                         if ((c6[i] != 0 || c12[i] != 0) && bComputeVdwInteraction)
662                         {
663                             real rinv6;
664                             if (softCoreTreatment == SoftCoreTreatment::RPower6)
665                             {
666                                 rinv6  = calculateRinv6<softCoreTreatment>(rpinvV);
667                             }
668                             else
669                             {
670                                 rinv6  = calculateRinv6<softCoreTreatment>(rinvV);
671                             }
672                             real Vvdw6  = calculateVdw6(c6[i], rinv6);
673                             real Vvdw12 = calculateVdw12(c12[i], rinv6);
674
675                             Vvdw[i]     = lennardJonesPotential(Vvdw6, Vvdw12, c6[i], c12[i], repulsionShift, dispersionShift, onesixth, onetwelfth);
676                             FscalV[i]   = lennardJonesScalarForce(Vvdw6, Vvdw12);
677
678                             if (bEwaldLJ)
679                             {
680                                 /* Subtract the grid potential at the cut-off */
681                                 Vvdw[i] += ewaldLennardJonesGridSubtract(nbfp_grid[tj[i]], sh_lj_ewald, onesixth);
682                             }
683
684                             if (ic->vdw_modifier == eintmodPOTSWITCH)
685                             {
686                                 real d    = rV - ic->rvdw_switch;
687                                 d         = (d > zero) ? d : zero;
688                                 real d2   = d*d;
689                                 real sw   = one+d2*d*(vdw_swV3+d*(vdw_swV4+d*vdw_swV5));
690                                 real dsw  = d2*(vdw_swF2+d*(vdw_swF3+d*vdw_swF4));
691
692                                 FscalV[i] = potSwitchScalarForceMod(FscalV[i], Vvdw[i], sw, rV, rvdw, dsw, zero);
693                                 Vvdw[i]   = potSwitchPotentialMod(Vvdw[i], sw, rV, rvdw, zero);
694                             }
695                         }
696
697                         /* FscalC (and FscalV) now contain: dV/drC * rC
698                          * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
699                          * Further down we first multiply by r^p-2 and then by
700                          * the vector r, which in total gives: dV/drC * (r/rC)^1-p
701                          */
702                         FscalC[i] *= rpinvC;
703                         FscalV[i] *= rpinvV;
704                     }
705                 }
706
707                 /* Assemble A and B states */
708                 for (i = 0; i < NSTATES; i++)
709                 {
710                     vctot         += LFC[i]*Vcoul[i];
711                     vvtot         += LFV[i]*Vvdw[i];
712
713                     Fscal     += LFC[i]*FscalC[i]*rpm2;
714                     Fscal     += LFV[i]*FscalV[i]*rpm2;
715
716                     if (useSoftCore)
717                     {
718                         dvdl_coul += Vcoul[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*FscalC[i]*sigma_pow[i];
719                         dvdl_vdw  += Vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*FscalV[i]*sigma_pow[i];
720                     }
721                     else
722                     {
723                         dvdl_coul += Vcoul[i]*DLF[i];
724                         dvdl_vdw  += Vvdw[i]*DLF[i];
725                     }
726                 }
727             }
728             else if (icoul == GMX_NBKERNEL_ELEC_REACTIONFIELD)
729             {
730                 /* For excluded pairs, which are only in this pair list when
731                  * using the Verlet scheme, we don't use soft-core.
732                  * The group scheme also doesn't soft-core for these.
733                  * As there is no singularity, there is no need for soft-core.
734                  */
735                 VV = krf*rsq - crf;
736                 FF = -two*krf;
737
738                 if (ii == jnr)
739                 {
740                     VV *= half;
741                 }
742
743                 for (i = 0; i < NSTATES; i++)
744                 {
745                     vctot      += LFC[i]*qq[i]*VV;
746                     Fscal      += LFC[i]*qq[i]*FF;
747                     dvdl_coul  += DLF[i]*qq[i]*VV;
748                 }
749             }
750
751             if (bEwald && r < rcoulomb)
752             {
753                 /* See comment in the preamble. When using Ewald interactions
754                  * (unless we use a switch modifier) we subtract the reciprocal-space
755                  * Ewald component here which made it possible to apply the free
756                  * energy interaction to 1/r (vanilla coulomb short-range part)
757                  * above. This gets us closer to the ideal case of applying
758                  * the softcore to the entire electrostatic interaction,
759                  * including the reciprocal-space component.
760                  */
761                 real v_lr, f_lr;
762
763                 ewrt      = r*ewtabscale;
764                 ewitab    = static_cast<int>(ewrt);
765                 eweps     = ewrt-ewitab;
766                 ewitab    = 4*ewitab;
767                 f_lr      = ewtab[ewitab]+eweps*ewtab[ewitab+1];
768                 v_lr      = (ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+f_lr));
769                 f_lr     *= rinv;
770
771                 /* Note that any possible Ewald shift has already been applied in
772                  * the normal interaction part above.
773                  */
774
775                 if (ii == jnr)
776                 {
777                     /* If we get here, the i particle (ii) has itself (jnr)
778                      * in its neighborlist. This can only happen with the Verlet
779                      * scheme, and corresponds to a self-interaction that will
780                      * occur twice. Scale it down by 50% to only include it once.
781                      */
782                     v_lr *= half;
783                 }
784
785                 for (i = 0; i < NSTATES; i++)
786                 {
787                     vctot      -= LFC[i]*qq[i]*v_lr;
788                     Fscal      -= LFC[i]*qq[i]*f_lr;
789                     dvdl_coul  -= (DLF[i]*qq[i])*v_lr;
790                 }
791             }
792
793             if (bEwaldLJ && r < rvdw)
794             {
795                 /* See comment in the preamble. When using LJ-Ewald interactions
796                  * (unless we use a switch modifier) we subtract the reciprocal-space
797                  * Ewald component here which made it possible to apply the free
798                  * energy interaction to r^-6 (vanilla LJ6 short-range part)
799                  * above. This gets us closer to the ideal case of applying
800                  * the softcore to the entire VdW interaction,
801                  * including the reciprocal-space component.
802                  */
803                 /* We could also use the analytical form here
804                  * iso a table, but that can cause issues for
805                  * r close to 0 for non-interacting pairs.
806                  */
807                 real rs, frac, f_lr;
808                 int  ri;
809
810                 rs     = rsq*rinv*ewtabscale;
811                 ri     = static_cast<int>(rs);
812                 frac   = rs - ri;
813                 f_lr   = (1 - frac)*tab_ewald_F_lj[ri] + frac*tab_ewald_F_lj[ri+1];
814                 /* TODO: Currently the Ewald LJ table does not contain
815                  * the factor 1/6, we should add this.
816                  */
817                 FF     = f_lr*rinv/six;
818                 VV     = (tab_ewald_V_lj[ri] - ewtabhalfspace*frac*(tab_ewald_F_lj[ri] + f_lr))/six;
819
820                 if (ii == jnr)
821                 {
822                     /* If we get here, the i particle (ii) has itself (jnr)
823                      * in its neighborlist. This can only happen with the Verlet
824                      * scheme, and corresponds to a self-interaction that will
825                      * occur twice. Scale it down by 50% to only include it once.
826                      */
827                     VV *= half;
828                 }
829
830                 for (i = 0; i < NSTATES; i++)
831                 {
832                     c6grid      = nbfp_grid[tj[i]];
833                     vvtot      += LFV[i]*c6grid*VV;
834                     Fscal      += LFV[i]*c6grid*FF;
835                     dvdl_vdw   += (DLF[i]*c6grid)*VV;
836                 }
837             }
838
839             if (bDoForces)
840             {
841                 tx         = Fscal*dx;
842                 ty         = Fscal*dy;
843                 tz         = Fscal*dz;
844                 fix        = fix + tx;
845                 fiy        = fiy + ty;
846                 fiz        = fiz + tz;
847                 /* OpenMP atomics are expensive, but this kernels is also
848                  * expensive, so we can take this hit, instead of using
849                  * thread-local output buffers and extra reduction.
850                  *
851                  * All the OpenMP regions in this file are trivial and should
852                  * not throw, so no need for try/catch.
853                  */
854 #pragma omp atomic
855                 f[j3]     -= tx;
856 #pragma omp atomic
857                 f[j3+1]   -= ty;
858 #pragma omp atomic
859                 f[j3+2]   -= tz;
860             }
861         }
862
863         /* The atomics below are expensive with many OpenMP threads.
864          * Here unperturbed i-particles will usually only have a few
865          * (perturbed) j-particles in the list. Thus with a buffered list
866          * we can skip a significant number of i-reductions with a check.
867          */
868         if (npair_within_cutoff > 0)
869         {
870             if (bDoForces)
871             {
872 #pragma omp atomic
873                 f[ii3]        += fix;
874 #pragma omp atomic
875                 f[ii3+1]      += fiy;
876 #pragma omp atomic
877                 f[ii3+2]      += fiz;
878             }
879             if (bDoShiftForces)
880             {
881 #pragma omp atomic
882                 fshift[is3]   += fix;
883 #pragma omp atomic
884                 fshift[is3+1] += fiy;
885 #pragma omp atomic
886                 fshift[is3+2] += fiz;
887             }
888             if (bDoPotential)
889             {
890                 ggid               = gid[n];
891 #pragma omp atomic
892                 Vc[ggid]          += vctot;
893 #pragma omp atomic
894                 Vv[ggid]          += vvtot;
895             }
896         }
897     }
898
899 #pragma omp atomic
900     dvdl[efptCOUL]     += dvdl_coul;
901  #pragma omp atomic
902     dvdl[efptVDW]      += dvdl_vdw;
903
904     /* Estimate flops, average for free energy stuff:
905      * 12  flops per outer iteration
906      * 150 flops per inner iteration
907      */
908 #pragma omp atomic
909     inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri*12 + nlist->jindex[n]*150);
910 }
911
912 void gmx_nb_free_energy_kernel(const t_nblist            *nlist,
913                                rvec                      *xx,
914                                gmx::ForceWithShiftForces *ff,
915                                const t_forcerec          *fr,
916                                const t_mdatoms           *mdatoms,
917                                nb_kernel_data_t          *kernel_data,
918                                t_nrnb                    *nrnb)
919 {
920     if (fr->sc_alphacoul == 0 && fr->sc_alphavdw == 0)
921     {
922         nb_free_energy_kernel<SoftCoreTreatment::None, false>(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
923     }
924     else if (fr->sc_r_power == 6.0_real)
925     {
926         if (kernel_data->lambda[efptCOUL] == kernel_data->lambda[efptVDW] &&
927             fr->sc_alphacoul == fr->sc_alphavdw)
928         {
929             nb_free_energy_kernel<SoftCoreTreatment::RPower6, false>(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
930         }
931         else
932         {
933             nb_free_energy_kernel<SoftCoreTreatment::RPower6, true>(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
934         }
935     }
936     else if (fr->sc_r_power == 48.0_real)
937     {
938         nb_free_energy_kernel<SoftCoreTreatment::RPower48, true>(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
939     }
940     else
941     {
942         GMX_RELEASE_ASSERT(false, "Unsupported soft-core r-power");
943     }
944 }