More templating of gmx_nb_free_energy_kernel()
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_free_energy.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8  * and including many others, as listed in the AUTHORS file in the
9  * top-level source directory and at http://www.gromacs.org.
10  *
11  * GROMACS is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public License
13  * as published by the Free Software Foundation; either version 2.1
14  * of the License, or (at your option) any later version.
15  *
16  * GROMACS is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with GROMACS; if not, see
23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
25  *
26  * If you want to redistribute modifications to GROMACS, please
27  * consider that scientific software is very special. Version
28  * control is crucial - bugs must be traceable. We will be happy to
29  * consider code for inclusion in the official distribution, but
30  * derived work must not be called official GROMACS. Details are found
31  * in the README & COPYING files - if they are missing, get the
32  * official version at http://www.gromacs.org.
33  *
34  * To help us fund GROMACS development, we humbly ask that you cite
35  * the research papers on the package. Check out http://www.gromacs.org.
36  */
37 #include "gmxpre.h"
38
39 #include "nb_free_energy.h"
40
41 #include <cmath>
42
43 #include <algorithm>
44
45 #include "gromacs/gmxlib/nrnb.h"
46 #include "gromacs/gmxlib/nonbonded/nb_kernel.h"
47 #include "gromacs/gmxlib/nonbonded/nonbonded.h"
48 #include "gromacs/math/functions.h"
49 #include "gromacs/math/vec.h"
50 #include "gromacs/mdtypes/forceoutput.h"
51 #include "gromacs/mdtypes/forcerec.h"
52 #include "gromacs/mdtypes/md_enums.h"
53 #include "gromacs/utility/fatalerror.h"
54
55
56 //! Enum for templating the soft-core treatment in the kernel
57 enum class SoftCoreTreatment
58 {
59     None,    //!< No soft-core
60     RPower6, //!< Soft-core with r-power = 6
61     RPower48 //!< Soft-core with r-power = 48
62 };
63
64 //! Most treatments are fine with float in mixed-precision mode.
65 template <SoftCoreTreatment softCoreTreatment>
66 struct SoftCoreReal
67 {
68     //! Real type for soft-core calculations
69     using Real = real;
70 };
71
72 //! This treatment requires double precision for some computations.
73 template <>
74 struct SoftCoreReal<SoftCoreTreatment::RPower48>
75 {
76     //! Real type for soft-core calculations
77     using Real = double;
78 };
79
80 //! Computes r^(1/p) and 1/r^(1/p) for the standard p=6
81 template <SoftCoreTreatment softCoreTreatment>
82 static inline void pthRoot(const real  r,
83                            real       *pthRoot,
84                            real       *invPthRoot)
85 {
86     *invPthRoot = gmx::invsqrt(std::cbrt(r));
87     *pthRoot    = 1/(*invPthRoot);
88 }
89
90 // We need a double version to make the specialization below work
91 #if !GMX_DOUBLE
92 //! Computes r^(1/p) and 1/r^(1/p) for the standard p=6
93 template <SoftCoreTreatment softCoreTreatment>
94 static inline void pthRoot(const double  r,
95                            real         *pthRoot,
96                            double       *invPthRoot)
97 {
98     *invPthRoot = gmx::invsqrt(std::cbrt(r));
99     *pthRoot    = 1/(*invPthRoot);
100 }
101 #endif
102
103 //! Computes r^(1/p) and 1/r^(1/p) for p=48
104 template <>
105 inline void pthRoot<SoftCoreTreatment::RPower48>(const double  r,
106                                                  real         *pthRoot,
107                                                  double       *invPthRoot)
108 {
109     *pthRoot    = std::pow(r, 1.0/48.0);
110     *invPthRoot = 1/(*pthRoot);
111 }
112
113 template<SoftCoreTreatment softCoreTreatment>
114 static inline real calculateSigmaPow(const real sigma6)
115 {
116     if (softCoreTreatment == SoftCoreTreatment::RPower6)
117     {
118         return sigma6;
119     }
120     else
121     {
122         real sigmaPow = sigma6 * sigma6;     /* sigma^12 */
123         sigmaPow      = sigmaPow * sigmaPow; /* sigma^24 */
124         sigmaPow      = sigmaPow * sigmaPow; /* sigma^48 */
125         return (sigmaPow);
126     }
127 }
128
129 template<SoftCoreTreatment softCoreTreatment, class SCReal>
130 static inline real calculateRinv6(const SCReal rinvV)
131 {
132     if (softCoreTreatment == SoftCoreTreatment::RPower6)
133     {
134         return rinvV;
135     }
136     else
137     {
138         real rinv6 = rinvV * rinvV;
139         return (rinv6 * rinv6 * rinv6);
140     }
141 }
142
143 static inline real calculateVdw6(const real c6, const real rinv6)
144 {
145     return (c6 * rinv6);
146 }
147
148 static inline real calculateVdw12(const real c12, const real rinv6)
149 {
150     return (c12 * rinv6 * rinv6);
151 }
152
153 /* reaction-field electrostatics */
154 template<class SCReal>
155 static inline SCReal
156 reactionFieldScalarForce(const real qq, const real rinv, const SCReal r, const real krf,
157                          const real two)
158 {
159     return (qq*(rinv - two*krf*r*r));
160 }
161 template<class SCReal>
162 static inline real
163 reactionFieldPotential(const real qq, const real rinv, const SCReal r, const real krf,
164                        const real potentialShift)
165 {
166     return (qq*(rinv + krf*r*r-potentialShift));
167 }
168
169 /* Ewald electrostatics */
170 static inline real
171 ewaldScalarForce(const real coulomb, const real rinv)
172 {
173     return (coulomb*rinv);
174 }
175 static inline real
176 ewaldPotential(const real coulomb, const real rinv, const real potentialShift)
177 {
178     return (coulomb*(rinv-potentialShift));
179 }
180
181 /* cutoff LJ */
182 static inline real
183 lennardJonesScalarForce(const real v6, const real v12)
184 {
185     return (v12 - v6);
186 }
187 static inline real
188 lennardJonesPotential(const real v6, const real v12, const real c6, const real c12, const real repulsionShift,
189                       const real dispersionShift, const real onesixth, const real onetwelfth)
190 {
191     return ((v12 + c12*repulsionShift)*onetwelfth - (v6 + c6*dispersionShift)*onesixth);
192 }
193
194 /* Ewald LJ */
195 static inline real
196 ewaldLennardJonesGridSubtract(const real c6grid, const real potentialShift, const real onesixth)
197 {
198     return (c6grid * potentialShift * onesixth);
199 }
200
201 /* LJ Potential switch */
202 template<class SCReal>
203 static inline SCReal
204 potSwitchScalarForceMod(const SCReal fScalarInp, const real potential, const real sw, const SCReal r, const real rVdw, const real dsw, const real zero)
205 {
206     if (r < rVdw)
207     {
208         SCReal fScalar = fScalarInp * sw - r * potential * dsw;
209         return (fScalar);
210     }
211     return (zero);
212 }
213 template<class SCReal>
214 static inline real
215 potSwitchPotentialMod(const real potentialInp, const real sw, const SCReal r, const real rVdw, const real zero)
216 {
217     if (r < rVdw)
218     {
219         real potential = potentialInp * sw;
220         return (potential);
221     }
222     return (zero);
223 }
224
225
226 //! Templated free-energy non-bonded kernel
227 template<SoftCoreTreatment softCoreTreatment,
228          bool              scLambdasOrAlphasDiffer,
229          bool              vdwInteractionTypeIsEwald,
230          bool              elecInteractionTypeIsEwald,
231          bool              vdwModifierIsPotSwitch>
232 static void
233 nb_free_energy_kernel(const t_nblist * gmx_restrict    nlist,
234                       rvec * gmx_restrict              xx,
235                       gmx::ForceWithShiftForces *      forceWithShiftForces,
236                       const t_forcerec * gmx_restrict  fr,
237                       const t_mdatoms * gmx_restrict   mdatoms,
238                       nb_kernel_data_t * gmx_restrict  kernel_data,
239                       t_nrnb * gmx_restrict            nrnb)
240 {
241     using SCReal = typename SoftCoreReal<softCoreTreatment>::Real;
242
243     constexpr bool useSoftCore = (softCoreTreatment != SoftCoreTreatment::None);
244
245 #define  STATE_A  0
246 #define  STATE_B  1
247 #define  NSTATES  2
248     int           i, n, ii, is3, ii3, k, nj0, nj1, jnr, j3, ggid;
249     real          shX, shY, shZ;
250     real          tx, ty, tz, Fscal;
251     SCReal        FscalC[NSTATES], FscalV[NSTATES];  /* Needs double for sc_power==48 */
252     real          Vcoul[NSTATES], Vvdw[NSTATES];
253     real          iqA, iqB;
254     real          qq[NSTATES], vctot;
255     int           ntiA, ntiB, tj[NSTATES];
256     real          vvtot;
257     real          ix, iy, iz, fix, fiy, fiz;
258     real          dx, dy, dz, r, rsq, rinv;
259     real          c6[NSTATES], c12[NSTATES], c6grid;
260     real          LFC[NSTATES], LFV[NSTATES], DLF[NSTATES];
261     SCReal        dvdl_coul, dvdl_vdw;
262     real          lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
263     real          sigma6[NSTATES], alpha_vdw_eff, alpha_coul_eff;
264     real          rinvC, rinvV;
265     SCReal        rp, rpm2, rC, rV, rpinvC, rpinvV; /* Needs double for sc_power==48 */
266     real          sigma_pow[NSTATES];
267     real          VV, FF;
268     int           icoul = GMX_NBKERNEL_ELEC_NONE;
269     int           nri;
270     const int *   iinr;
271     const int *   jindex;
272     const int *   jjnr;
273     const int *   shift;
274     const int *   gid;
275     const int *   typeA;
276     const int *   typeB;
277     int           ntype;
278     const real *  shiftvec;
279     const real *  chargeA;
280     const real *  chargeB;
281     real          sigma6_min, sigma6_def, lam_power;
282     real          alpha_coul, alpha_vdw, lambda_coul, lambda_vdw;
283     const real *  nbfp, *nbfp_grid;
284     real *        dvdl;
285     real *        Vv;
286     real *        Vc;
287     gmx_bool      bDoForces, bDoShiftForces, bDoPotential;
288     real          rcutoff_max2;
289     const real *  tab_ewald_F_lj = nullptr;
290     const real *  tab_ewald_V_lj = nullptr;
291     real          vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4;
292     gmx_bool      bComputeVdwInteraction, bComputeElecInteraction;
293     const real *  ewtab = nullptr;
294     int           ewitab;
295     real          ewrt, eweps, ewtabscale = 0, ewtabhalfspace = 0, sh_ewald = 0;
296
297     const real    onetwelfth  = 1.0/12.0;
298     const real    onesixth    = 1.0/6.0;
299     const real    zero        = 0.0;
300     const real    half        = 0.5;
301     const real    one         = 1.0;
302     const real    two         = 2.0;
303     const real    six         = 6.0;
304
305     /* Extract pointer to non-bonded interaction constants */
306     const interaction_const_t *ic = fr->ic;
307
308     // TODO: We should get rid of using pointers to real
309     const real          *x      = xx[0];
310     real * gmx_restrict  f      = &(forceWithShiftForces->force()[0][0]);
311
312     real * gmx_restrict  fshift = &(forceWithShiftForces->shiftForces()[0][0]);
313
314     // Extract pair list data
315     nri                 = nlist->nri;
316     iinr                = nlist->iinr;
317     jindex              = nlist->jindex;
318     jjnr                = nlist->jjnr;
319     shift               = nlist->shift;
320     gid                 = nlist->gid;
321
322     shiftvec            = fr->shift_vec[0];
323     chargeA             = mdatoms->chargeA;
324     chargeB             = mdatoms->chargeB;
325     Vc                  = kernel_data->energygrp_elec;
326     typeA               = mdatoms->typeA;
327     typeB               = mdatoms->typeB;
328     ntype               = fr->ntype;
329     nbfp                = fr->nbfp;
330     nbfp_grid           = fr->ljpme_c6grid;
331     Vv                  = kernel_data->energygrp_vdw;
332     lambda_coul         = kernel_data->lambda[efptCOUL];
333     lambda_vdw          = kernel_data->lambda[efptVDW];
334     dvdl                = kernel_data->dvdl;
335     alpha_coul          = fr->sc_alphacoul;
336     alpha_vdw           = fr->sc_alphavdw;
337     lam_power           = fr->sc_power;
338     sigma6_def          = fr->sc_sigma6_def;
339     sigma6_min          = fr->sc_sigma6_min;
340     bDoForces           = ((kernel_data->flags & GMX_NONBONDED_DO_FORCE) != 0);
341     bDoShiftForces      = ((kernel_data->flags & GMX_NONBONDED_DO_SHIFTFORCE) != 0);
342     bDoPotential        = ((kernel_data->flags & GMX_NONBONDED_DO_POTENTIAL) != 0);
343
344     // Extract data from interaction_const_t
345     const real facel           = ic->epsfac;
346     const real rcoulomb        = ic->rcoulomb;
347     const real krf             = ic->k_rf;
348     const real crf             = ic->c_rf;
349     const real sh_lj_ewald     = ic->sh_lj_ewald;
350     const real rvdw            = ic->rvdw;
351     const real dispersionShift = ic->dispersion_shift.cpot;
352     const real repulsionShift  = ic->repulsion_shift.cpot;
353
354     // Note that the nbnxm kernels do not support Coulomb potential switching at all
355     GMX_ASSERT(ic->coulomb_modifier != eintmodPOTSWITCH,
356                "Potential switching is not supported for Coulomb with FEP");
357
358     if (vdwModifierIsPotSwitch)
359     {
360         real d          = ic->rvdw - ic->rvdw_switch;
361         vdw_swV3        = -10.0/(d*d*d);
362         vdw_swV4        =  15.0/(d*d*d*d);
363         vdw_swV5        =  -6.0/(d*d*d*d*d);
364         vdw_swF2        = -30.0/(d*d*d);
365         vdw_swF3        =  60.0/(d*d*d*d);
366         vdw_swF4        = -30.0/(d*d*d*d*d);
367     }
368     else
369     {
370         /* Avoid warnings from stupid compilers (looking at you, Clang!) */
371         vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0;
372     }
373
374     if (ic->eeltype == eelCUT || EEL_RF(ic->eeltype))
375     {
376         icoul = GMX_NBKERNEL_ELEC_REACTIONFIELD;
377     }
378
379     rcutoff_max2    = std::max(ic->rcoulomb, ic->rvdw);
380     rcutoff_max2    = rcutoff_max2*rcutoff_max2;
381
382     if (elecInteractionTypeIsEwald || vdwInteractionTypeIsEwald)
383     {
384         const auto &tables = *ic->coulombEwaldTables;
385         sh_ewald       = ic->sh_ewald;
386         ewtab          = tables.tableFDV0.data();
387         ewtabscale     = tables.scale;
388         ewtabhalfspace = half/ewtabscale;
389         tab_ewald_F_lj = tables.tableF.data();
390         tab_ewald_V_lj = tables.tableV.data();
391     }
392
393     /* For Ewald/PME interactions we cannot easily apply the soft-core component to
394      * reciprocal space. When we use non-switched Ewald interactions, we
395      * assume the soft-coring does not significantly affect the grid contribution
396      * and apply the soft-core only to the full 1/r (- shift) pair contribution.
397      *
398      * However, we cannot use this approach for switch-modified since we would then
399      * effectively end up evaluating a significantly different interaction here compared to the
400      * normal (non-free-energy) kernels, either by applying a cutoff at a different
401      * position than what the user requested, or by switching different
402      * things (1/r rather than short-range Ewald). For these settings, we just
403      * use the traditional short-range Ewald interaction in that case.
404      */
405     GMX_RELEASE_ASSERT(!(vdwInteractionTypeIsEwald  && vdwModifierIsPotSwitch),
406                        "Can not apply soft-core to switched Ewald potentials");
407
408     dvdl_coul  = 0;
409     dvdl_vdw   = 0;
410
411     /* Lambda factor for state A, 1-lambda*/
412     LFC[STATE_A] = one - lambda_coul;
413     LFV[STATE_A] = one - lambda_vdw;
414
415     /* Lambda factor for state B, lambda*/
416     LFC[STATE_B] = lambda_coul;
417     LFV[STATE_B] = lambda_vdw;
418
419     /*derivative of the lambda factor for state A and B */
420     DLF[STATE_A] = -1;
421     DLF[STATE_B] = 1;
422
423     constexpr real sc_r_power = (softCoreTreatment == SoftCoreTreatment::RPower48 ? 48.0_real : 6.0_real);
424     for (i = 0; i < NSTATES; i++)
425     {
426         lfac_coul[i]  = (lam_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
427         dlfac_coul[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFC[i]) : 1);
428         lfac_vdw[i]   = (lam_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
429         dlfac_vdw[i]  = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFV[i]) : 1);
430     }
431
432     for (n = 0; (n < nri); n++)
433     {
434         int npair_within_cutoff;
435
436         npair_within_cutoff = 0;
437
438         is3              = 3*shift[n];
439         shX              = shiftvec[is3];
440         shY              = shiftvec[is3+1];
441         shZ              = shiftvec[is3+2];
442         nj0              = jindex[n];
443         nj1              = jindex[n+1];
444         ii               = iinr[n];
445         ii3              = 3*ii;
446         ix               = shX + x[ii3+0];
447         iy               = shY + x[ii3+1];
448         iz               = shZ + x[ii3+2];
449         iqA              = facel*chargeA[ii];
450         iqB              = facel*chargeB[ii];
451         ntiA             = 2*ntype*typeA[ii];
452         ntiB             = 2*ntype*typeB[ii];
453         vctot            = 0;
454         vvtot            = 0;
455         fix              = 0;
456         fiy              = 0;
457         fiz              = 0;
458
459         for (k = nj0; (k < nj1); k++)
460         {
461             jnr              = jjnr[k];
462             j3               = 3*jnr;
463             dx               = ix - x[j3];
464             dy               = iy - x[j3+1];
465             dz               = iz - x[j3+2];
466             rsq              = dx*dx + dy*dy + dz*dz;
467
468             if (rsq >= rcutoff_max2)
469             {
470                 /* We save significant time by skipping all code below.
471                  * Note that with soft-core interactions, the actual cut-off
472                  * check might be different. But since the soft-core distance
473                  * is always larger than r, checking on r here is safe.
474                  */
475                 continue;
476             }
477             npair_within_cutoff++;
478
479             if (rsq > 0)
480             {
481                 /* Note that unlike in the nbnxn kernels, we do not need
482                  * to clamp the value of rsq before taking the invsqrt
483                  * to avoid NaN in the LJ calculation, since here we do
484                  * not calculate LJ interactions when C6 and C12 are zero.
485                  */
486
487                 rinv         = gmx::invsqrt(rsq);
488                 r            = rsq*rinv;
489             }
490             else
491             {
492                 /* The force at r=0 is zero, because of symmetry.
493                  * But note that the potential is in general non-zero,
494                  * since the soft-cored r will be non-zero.
495                  */
496                 rinv         = 0;
497                 r            = 0;
498             }
499
500             if (softCoreTreatment == SoftCoreTreatment::None)
501             {
502                 /* The soft-core power p will not affect the results
503                  * with not using soft-core, so we use power of 0 which gives
504                  * the simplest math and cheapest code.
505                  */
506                 rpm2         = rinv*rinv;
507                 rp           = 1;
508             }
509             if (softCoreTreatment == SoftCoreTreatment::RPower6)
510             {
511                 rpm2         = rsq*rsq;  /* r4 */
512                 rp           = rpm2*rsq; /* r6 */
513             }
514             if (softCoreTreatment == SoftCoreTreatment::RPower48)
515             {
516                 rp           = rsq*rsq*rsq; /* r6 */
517                 rp           = rp*rp;       /* r12 */
518                 rp           = rp*rp;       /* r24 */
519                 rp           = rp*rp;       /* r48 */
520                 rpm2         = rp/rsq;      /* r46 */
521             }
522
523             Fscal = 0;
524
525             qq[STATE_A]      = iqA*chargeA[jnr];
526             qq[STATE_B]      = iqB*chargeB[jnr];
527
528             tj[STATE_A]      = ntiA+2*typeA[jnr];
529             tj[STATE_B]      = ntiB+2*typeB[jnr];
530
531             if (nlist->excl_fep == nullptr || nlist->excl_fep[k])
532             {
533                 c6[STATE_A]      = nbfp[tj[STATE_A]];
534                 c6[STATE_B]      = nbfp[tj[STATE_B]];
535
536                 for (i = 0; i < NSTATES; i++)
537                 {
538                     c12[i]             = nbfp[tj[i]+1];
539                     if (useSoftCore)
540                     {
541                         if ((c6[i] > 0) && (c12[i] > 0))
542                         {
543                             /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
544                             sigma6[i]       = half*c12[i]/c6[i];
545                             if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
546                             {
547                                 sigma6[i] = sigma6_min;
548                             }
549                         }
550                         else
551                         {
552                             sigma6[i]       = sigma6_def;
553                         }
554                         sigma_pow[i] = calculateSigmaPow<softCoreTreatment>(sigma6[i]);
555                     }
556                 }
557
558                 if (useSoftCore)
559                 {
560                     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
561                     if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
562                     {
563                         alpha_vdw_eff    = 0;
564                         alpha_coul_eff   = 0;
565                     }
566                     else
567                     {
568                         alpha_vdw_eff    = alpha_vdw;
569                         alpha_coul_eff   = alpha_coul;
570                     }
571                 }
572
573                 for (i = 0; i < NSTATES; i++)
574                 {
575                     FscalC[i]    = 0;
576                     FscalV[i]    = 0;
577                     Vcoul[i]     = 0;
578                     Vvdw[i]      = 0;
579
580                     /* Only spend time on A or B state if it is non-zero */
581                     if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
582                     {
583                         /* this section has to be inside the loop because of the dependence on sigma_pow */
584                         if (useSoftCore)
585                         {
586                             rpinvC     = one/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
587                             pthRoot<softCoreTreatment>(rpinvC, &rinvC, &rC);
588
589                             if (scLambdasOrAlphasDiffer)
590                             {
591                                 rpinvV = one/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
592                                 pthRoot<softCoreTreatment>(rpinvV, &rinvV, &rV);
593                             }
594                             else
595                             {
596                                 /* We can avoid one expensive pow and one / operation */
597                                 rpinvV = rpinvC;
598                                 rinvV  = rinvC;
599                                 rV     = rC;
600                             }
601                         }
602                         else
603                         {
604                             rpinvC     = 1;
605                             rinvC      = rinv;
606                             rC         = r;
607
608                             rpinvV     = 1;
609                             rinvV      = rinv;
610                             rV         = r;
611                         }
612
613                         /* Only process the coulomb interactions if we have charges,
614                          * and if we either include all entries in the list (no cutoff
615                          * used in the kernel), or if we are within the cutoff.
616                          */
617                         bComputeElecInteraction =
618                             ( elecInteractionTypeIsEwald && r < rcoulomb) ||
619                             (!elecInteractionTypeIsEwald && rC < rcoulomb);
620
621                         if ( (qq[i] != 0) && bComputeElecInteraction)
622                         {
623                             if (elecInteractionTypeIsEwald)
624                             {
625                                 Vcoul[i]  = ewaldPotential(qq[i], rinvC, sh_ewald);
626                                 FscalC[i] = ewaldScalarForce(qq[i], rinvC);
627                             }
628                             else
629                             {
630                                 Vcoul[i]  = reactionFieldPotential(qq[i], rinvC, rC, krf, crf);
631                                 FscalC[i] = reactionFieldScalarForce(qq[i], rinvC, rC, krf, two);
632                             }
633                         }
634
635                         /* Only process the VDW interactions if we have
636                          * some non-zero parameters, and if we either
637                          * include all entries in the list (no cutoff used
638                          * in the kernel), or if we are within the cutoff.
639                          */
640                         bComputeVdwInteraction =
641                             ( vdwInteractionTypeIsEwald && r < rvdw) ||
642                             (!vdwInteractionTypeIsEwald && rV < rvdw);
643                         if ((c6[i] != 0 || c12[i] != 0) && bComputeVdwInteraction)
644                         {
645                             real rinv6;
646                             if (softCoreTreatment == SoftCoreTreatment::RPower6)
647                             {
648                                 rinv6  = calculateRinv6<softCoreTreatment>(rpinvV);
649                             }
650                             else
651                             {
652                                 rinv6  = calculateRinv6<softCoreTreatment>(rinvV);
653                             }
654                             real Vvdw6  = calculateVdw6(c6[i], rinv6);
655                             real Vvdw12 = calculateVdw12(c12[i], rinv6);
656
657                             Vvdw[i]     = lennardJonesPotential(Vvdw6, Vvdw12, c6[i], c12[i], repulsionShift, dispersionShift, onesixth, onetwelfth);
658                             FscalV[i]   = lennardJonesScalarForce(Vvdw6, Vvdw12);
659
660                             if (vdwInteractionTypeIsEwald)
661                             {
662                                 /* Subtract the grid potential at the cut-off */
663                                 Vvdw[i] += ewaldLennardJonesGridSubtract(nbfp_grid[tj[i]], sh_lj_ewald, onesixth);
664                             }
665
666                             if (vdwModifierIsPotSwitch)
667                             {
668                                 real d    = rV - ic->rvdw_switch;
669                                 d         = (d > zero) ? d : zero;
670                                 real d2   = d*d;
671                                 real sw   = one+d2*d*(vdw_swV3+d*(vdw_swV4+d*vdw_swV5));
672                                 real dsw  = d2*(vdw_swF2+d*(vdw_swF3+d*vdw_swF4));
673
674                                 FscalV[i] = potSwitchScalarForceMod(FscalV[i], Vvdw[i], sw, rV, rvdw, dsw, zero);
675                                 Vvdw[i]   = potSwitchPotentialMod(Vvdw[i], sw, rV, rvdw, zero);
676                             }
677                         }
678
679                         /* FscalC (and FscalV) now contain: dV/drC * rC
680                          * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
681                          * Further down we first multiply by r^p-2 and then by
682                          * the vector r, which in total gives: dV/drC * (r/rC)^1-p
683                          */
684                         FscalC[i] *= rpinvC;
685                         FscalV[i] *= rpinvV;
686                     }
687                 }
688
689                 /* Assemble A and B states */
690                 for (i = 0; i < NSTATES; i++)
691                 {
692                     vctot         += LFC[i]*Vcoul[i];
693                     vvtot         += LFV[i]*Vvdw[i];
694
695                     Fscal     += LFC[i]*FscalC[i]*rpm2;
696                     Fscal     += LFV[i]*FscalV[i]*rpm2;
697
698                     if (useSoftCore)
699                     {
700                         dvdl_coul += Vcoul[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*FscalC[i]*sigma_pow[i];
701                         dvdl_vdw  += Vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*FscalV[i]*sigma_pow[i];
702                     }
703                     else
704                     {
705                         dvdl_coul += Vcoul[i]*DLF[i];
706                         dvdl_vdw  += Vvdw[i]*DLF[i];
707                     }
708                 }
709             }
710             else if (icoul == GMX_NBKERNEL_ELEC_REACTIONFIELD)
711             {
712                 /* For excluded pairs, which are only in this pair list when
713                  * using the Verlet scheme, we don't use soft-core.
714                  * The group scheme also doesn't soft-core for these.
715                  * As there is no singularity, there is no need for soft-core.
716                  */
717                 VV = krf*rsq - crf;
718                 FF = -two*krf;
719
720                 if (ii == jnr)
721                 {
722                     VV *= half;
723                 }
724
725                 for (i = 0; i < NSTATES; i++)
726                 {
727                     vctot      += LFC[i]*qq[i]*VV;
728                     Fscal      += LFC[i]*qq[i]*FF;
729                     dvdl_coul  += DLF[i]*qq[i]*VV;
730                 }
731             }
732
733             if (elecInteractionTypeIsEwald && r < rcoulomb)
734             {
735                 /* See comment in the preamble. When using Ewald interactions
736                  * (unless we use a switch modifier) we subtract the reciprocal-space
737                  * Ewald component here which made it possible to apply the free
738                  * energy interaction to 1/r (vanilla coulomb short-range part)
739                  * above. This gets us closer to the ideal case of applying
740                  * the softcore to the entire electrostatic interaction,
741                  * including the reciprocal-space component.
742                  */
743                 real v_lr, f_lr;
744
745                 ewrt      = r*ewtabscale;
746                 ewitab    = static_cast<int>(ewrt);
747                 eweps     = ewrt-ewitab;
748                 ewitab    = 4*ewitab;
749                 f_lr      = ewtab[ewitab]+eweps*ewtab[ewitab+1];
750                 v_lr      = (ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+f_lr));
751                 f_lr     *= rinv;
752
753                 /* Note that any possible Ewald shift has already been applied in
754                  * the normal interaction part above.
755                  */
756
757                 if (ii == jnr)
758                 {
759                     /* If we get here, the i particle (ii) has itself (jnr)
760                      * in its neighborlist. This can only happen with the Verlet
761                      * scheme, and corresponds to a self-interaction that will
762                      * occur twice. Scale it down by 50% to only include it once.
763                      */
764                     v_lr *= half;
765                 }
766
767                 for (i = 0; i < NSTATES; i++)
768                 {
769                     vctot      -= LFC[i]*qq[i]*v_lr;
770                     Fscal      -= LFC[i]*qq[i]*f_lr;
771                     dvdl_coul  -= (DLF[i]*qq[i])*v_lr;
772                 }
773             }
774
775             if (vdwInteractionTypeIsEwald && r < rvdw)
776             {
777                 /* See comment in the preamble. When using LJ-Ewald interactions
778                  * (unless we use a switch modifier) we subtract the reciprocal-space
779                  * Ewald component here which made it possible to apply the free
780                  * energy interaction to r^-6 (vanilla LJ6 short-range part)
781                  * above. This gets us closer to the ideal case of applying
782                  * the softcore to the entire VdW interaction,
783                  * including the reciprocal-space component.
784                  */
785                 /* We could also use the analytical form here
786                  * iso a table, but that can cause issues for
787                  * r close to 0 for non-interacting pairs.
788                  */
789                 real rs, frac, f_lr;
790                 int  ri;
791
792                 rs     = rsq*rinv*ewtabscale;
793                 ri     = static_cast<int>(rs);
794                 frac   = rs - ri;
795                 f_lr   = (1 - frac)*tab_ewald_F_lj[ri] + frac*tab_ewald_F_lj[ri+1];
796                 /* TODO: Currently the Ewald LJ table does not contain
797                  * the factor 1/6, we should add this.
798                  */
799                 FF     = f_lr*rinv/six;
800                 VV     = (tab_ewald_V_lj[ri] - ewtabhalfspace*frac*(tab_ewald_F_lj[ri] + f_lr))/six;
801
802                 if (ii == jnr)
803                 {
804                     /* If we get here, the i particle (ii) has itself (jnr)
805                      * in its neighborlist. This can only happen with the Verlet
806                      * scheme, and corresponds to a self-interaction that will
807                      * occur twice. Scale it down by 50% to only include it once.
808                      */
809                     VV *= half;
810                 }
811
812                 for (i = 0; i < NSTATES; i++)
813                 {
814                     c6grid      = nbfp_grid[tj[i]];
815                     vvtot      += LFV[i]*c6grid*VV;
816                     Fscal      += LFV[i]*c6grid*FF;
817                     dvdl_vdw   += (DLF[i]*c6grid)*VV;
818                 }
819             }
820
821             if (bDoForces)
822             {
823                 tx         = Fscal*dx;
824                 ty         = Fscal*dy;
825                 tz         = Fscal*dz;
826                 fix        = fix + tx;
827                 fiy        = fiy + ty;
828                 fiz        = fiz + tz;
829                 /* OpenMP atomics are expensive, but this kernels is also
830                  * expensive, so we can take this hit, instead of using
831                  * thread-local output buffers and extra reduction.
832                  *
833                  * All the OpenMP regions in this file are trivial and should
834                  * not throw, so no need for try/catch.
835                  */
836 #pragma omp atomic
837                 f[j3]     -= tx;
838 #pragma omp atomic
839                 f[j3+1]   -= ty;
840 #pragma omp atomic
841                 f[j3+2]   -= tz;
842             }
843         }
844
845         /* The atomics below are expensive with many OpenMP threads.
846          * Here unperturbed i-particles will usually only have a few
847          * (perturbed) j-particles in the list. Thus with a buffered list
848          * we can skip a significant number of i-reductions with a check.
849          */
850         if (npair_within_cutoff > 0)
851         {
852             if (bDoForces)
853             {
854 #pragma omp atomic
855                 f[ii3]        += fix;
856 #pragma omp atomic
857                 f[ii3+1]      += fiy;
858 #pragma omp atomic
859                 f[ii3+2]      += fiz;
860             }
861             if (bDoShiftForces)
862             {
863 #pragma omp atomic
864                 fshift[is3]   += fix;
865 #pragma omp atomic
866                 fshift[is3+1] += fiy;
867 #pragma omp atomic
868                 fshift[is3+2] += fiz;
869             }
870             if (bDoPotential)
871             {
872                 ggid               = gid[n];
873 #pragma omp atomic
874                 Vc[ggid]          += vctot;
875 #pragma omp atomic
876                 Vv[ggid]          += vvtot;
877             }
878         }
879     }
880
881 #pragma omp atomic
882     dvdl[efptCOUL]     += dvdl_coul;
883  #pragma omp atomic
884     dvdl[efptVDW]      += dvdl_vdw;
885
886     /* Estimate flops, average for free energy stuff:
887      * 12  flops per outer iteration
888      * 150 flops per inner iteration
889      */
890 #pragma omp atomic
891     inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri*12 + nlist->jindex[n]*150);
892 }
893
894 typedef void (* KernelFunction)(const t_nblist * gmx_restrict    nlist,
895                                 rvec * gmx_restrict              xx,
896                                 gmx::ForceWithShiftForces *      forceWithShiftForces,
897                                 const t_forcerec * gmx_restrict  fr,
898                                 const t_mdatoms * gmx_restrict   mdatoms,
899                                 nb_kernel_data_t * gmx_restrict  kernel_data,
900                                 t_nrnb * gmx_restrict            nrnb);
901
902 template<SoftCoreTreatment softCoreTreatment,
903          bool scLambdasOrAlphasDiffer,
904          bool vdwInteractionTypeIsEwald,
905          bool elecInteractionTypeIsEwald>
906 static KernelFunction
907 dispatchKernelOnVdwModifier(const bool vdwModifierIsPotSwitch)
908 {
909     if (vdwModifierIsPotSwitch)
910     {
911         return(nb_free_energy_kernel<softCoreTreatment, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
912                                      elecInteractionTypeIsEwald, true>);
913     }
914     else
915     {
916         return(nb_free_energy_kernel<softCoreTreatment, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
917                                      elecInteractionTypeIsEwald, false>);
918     }
919 }
920
921 template<SoftCoreTreatment softCoreTreatment,
922          bool scLambdasOrAlphasDiffer,
923          bool vdwInteractionTypeIsEwald>
924 static KernelFunction
925 dispatchKernelOnElecInteractionType(const bool elecInteractionTypeIsEwald,
926                                     const bool vdwModifierIsPotSwitch)
927 {
928     if (elecInteractionTypeIsEwald)
929     {
930         return(dispatchKernelOnVdwModifier<softCoreTreatment, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
931                                            true>(vdwModifierIsPotSwitch));
932     }
933     else
934     {
935         return(dispatchKernelOnVdwModifier<softCoreTreatment, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
936                                            false>(vdwModifierIsPotSwitch));
937     }
938 }
939
940 template<SoftCoreTreatment softCoreTreatment,
941          bool scLambdasOrAlphasDiffer>
942 static KernelFunction
943 dispatchKernelOnVdwInteractionType(const bool vdwInteractionTypeIsEwald,
944                                    const bool elecInteractionTypeIsEwald,
945                                    const bool vdwModifierIsPotSwitch)
946 {
947     if (vdwInteractionTypeIsEwald)
948     {
949         return(dispatchKernelOnElecInteractionType<softCoreTreatment, scLambdasOrAlphasDiffer,
950                                                    true>(elecInteractionTypeIsEwald, vdwModifierIsPotSwitch));
951     }
952     else
953     {
954         return(dispatchKernelOnElecInteractionType<softCoreTreatment, scLambdasOrAlphasDiffer,
955                                                    false>(elecInteractionTypeIsEwald, vdwModifierIsPotSwitch));
956     }
957 }
958
959 template<SoftCoreTreatment softCoreTreatment>
960 static KernelFunction
961 dispatchKernelOnScLambdasOrAlphasDifference(const bool scLambdasOrAlphasDiffer,
962                                             const bool vdwInteractionTypeIsEwald,
963                                             const bool elecInteractionTypeIsEwald,
964                                             const bool vdwModifierIsPotSwitch)
965 {
966     if (scLambdasOrAlphasDiffer)
967     {
968         return(dispatchKernelOnVdwInteractionType<softCoreTreatment, true>(vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
969                                                                            vdwModifierIsPotSwitch));
970     }
971     else
972     {
973         return(dispatchKernelOnVdwInteractionType<softCoreTreatment, false>(vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
974                                                                             vdwModifierIsPotSwitch));
975     }
976 }
977
978 static KernelFunction
979 dispatchKernel(const bool                 scLambdasOrAlphasDiffer,
980                const bool                 vdwInteractionTypeIsEwald,
981                const bool                 elecInteractionTypeIsEwald,
982                const bool                 vdwModifierIsPotSwitch,
983                const t_forcerec          *fr)
984 {
985     if (fr->sc_alphacoul == 0 && fr->sc_alphavdw == 0)
986     {
987         return(dispatchKernelOnScLambdasOrAlphasDifference<SoftCoreTreatment::None>(scLambdasOrAlphasDiffer,
988                                                                                     vdwInteractionTypeIsEwald,
989                                                                                     elecInteractionTypeIsEwald,
990                                                                                     vdwModifierIsPotSwitch));
991     }
992     else if (fr->sc_r_power == 6.0_real)
993     {
994         return(dispatchKernelOnScLambdasOrAlphasDifference<SoftCoreTreatment::RPower6>(scLambdasOrAlphasDiffer,
995                                                                                        vdwInteractionTypeIsEwald,
996                                                                                        elecInteractionTypeIsEwald,
997                                                                                        vdwModifierIsPotSwitch));
998     }
999     else
1000     {
1001         return(dispatchKernelOnScLambdasOrAlphasDifference<SoftCoreTreatment::RPower48>(scLambdasOrAlphasDiffer,
1002                                                                                         vdwInteractionTypeIsEwald,
1003                                                                                         elecInteractionTypeIsEwald,
1004                                                                                         vdwModifierIsPotSwitch));
1005     }
1006 }
1007
1008
1009 void gmx_nb_free_energy_kernel(const t_nblist            *nlist,
1010                                rvec                      *xx,
1011                                gmx::ForceWithShiftForces *ff,
1012                                const t_forcerec          *fr,
1013                                const t_mdatoms           *mdatoms,
1014                                nb_kernel_data_t          *kernel_data,
1015                                t_nrnb                    *nrnb)
1016 {
1017     GMX_ASSERT(EEL_PME_EWALD(fr->ic->eeltype)  || fr->ic->eeltype == eelCUT || EEL_RF(fr->ic->eeltype),
1018                "Unsupported eeltype with free energy");
1019
1020     const bool vdwInteractionTypeIsEwald  = (EVDW_PME(fr->ic->vdwtype));
1021     const bool elecInteractionTypeIsEwald = (EEL_PME_EWALD(fr->ic->eeltype));
1022     const bool vdwModifierIsPotSwitch     = (fr->ic->vdw_modifier == eintmodPOTSWITCH);
1023     bool       scLambdasOrAlphasDiffer    = true;
1024
1025     if (fr->sc_alphacoul == 0 && fr->sc_alphavdw == 0)
1026     {
1027         scLambdasOrAlphasDiffer = false;
1028     }
1029     else if (fr->sc_r_power == 6.0_real || fr->sc_r_power == 48.0_real)
1030     {
1031         if (kernel_data->lambda[efptCOUL] == kernel_data->lambda[efptVDW] &&
1032             fr->sc_alphacoul == fr->sc_alphavdw)
1033         {
1034             scLambdasOrAlphasDiffer = false;
1035         }
1036     }
1037     else
1038     {
1039         GMX_RELEASE_ASSERT(false, "Unsupported soft-core r-power");
1040     }
1041     KernelFunction kernelFunc = dispatchKernel(scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
1042                                                vdwModifierIsPotSwitch, fr);
1043     kernelFunc(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
1044 }