ecd6aa7917e0df32ad1bfaa8b3e78630897fc5fa
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_free_energy.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
7  * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9  * and including many others, as listed in the AUTHORS file in the
10  * top-level source directory and at http://www.gromacs.org.
11  *
12  * GROMACS is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU Lesser General Public License
14  * as published by the Free Software Foundation; either version 2.1
15  * of the License, or (at your option) any later version.
16  *
17  * GROMACS is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public
23  * License along with GROMACS; if not, see
24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
26  *
27  * If you want to redistribute modifications to GROMACS, please
28  * consider that scientific software is very special. Version
29  * control is crucial - bugs must be traceable. We will be happy to
30  * consider code for inclusion in the official distribution, but
31  * derived work must not be called official GROMACS. Details are found
32  * in the README & COPYING files - if they are missing, get the
33  * official version at http://www.gromacs.org.
34  *
35  * To help us fund GROMACS development, we humbly ask that you cite
36  * the research papers on the package. Check out http://www.gromacs.org.
37  */
38 #include "gmxpre.h"
39
40 #include "nb_free_energy.h"
41
42 #include <cmath>
43
44 #include <algorithm>
45
46 #include "gromacs/gmxlib/nrnb.h"
47 #include "gromacs/gmxlib/nonbonded/nb_kernel.h"
48 #include "gromacs/gmxlib/nonbonded/nonbonded.h"
49 #include "gromacs/math/functions.h"
50 #include "gromacs/math/vec.h"
51 #include "gromacs/mdtypes/forceoutput.h"
52 #include "gromacs/mdtypes/forcerec.h"
53 #include "gromacs/mdtypes/md_enums.h"
54 #include "gromacs/utility/fatalerror.h"
55
56
57 //! Computes r^(1/p) and 1/r^(1/p) for the standard p=6
58 static inline void pthRoot(const real r, real* pthRoot, real* invPthRoot)
59 {
60     *invPthRoot = gmx::invsqrt(std::cbrt(r));
61     *pthRoot    = 1 / (*invPthRoot);
62 }
63
64 static inline real calculateRinv6(const real rinvV)
65 {
66     real rinv6 = rinvV * rinvV;
67     return (rinv6 * rinv6 * rinv6);
68 }
69
70 static inline real calculateVdw6(const real c6, const real rinv6)
71 {
72     return (c6 * rinv6);
73 }
74
75 static inline real calculateVdw12(const real c12, const real rinv6)
76 {
77     return (c12 * rinv6 * rinv6);
78 }
79
80 /* reaction-field electrostatics */
81 static inline real
82 reactionFieldScalarForce(const real qq, const real rinv, const real r, const real krf, const real two)
83 {
84     return (qq * (rinv - two * krf * r * r));
85 }
86 static inline real reactionFieldPotential(const real qq, const real rinv, const real r, const real krf, const real potentialShift)
87 {
88     return (qq * (rinv + krf * r * r - potentialShift));
89 }
90
91 /* Ewald electrostatics */
92 static inline real ewaldScalarForce(const real coulomb, const real rinv)
93 {
94     return (coulomb * rinv);
95 }
96 static inline real ewaldPotential(const real coulomb, const real rinv, const real potentialShift)
97 {
98     return (coulomb * (rinv - potentialShift));
99 }
100
101 /* cutoff LJ */
102 static inline real lennardJonesScalarForce(const real v6, const real v12)
103 {
104     return (v12 - v6);
105 }
106 static inline real lennardJonesPotential(const real v6,
107                                          const real v12,
108                                          const real c6,
109                                          const real c12,
110                                          const real repulsionShift,
111                                          const real dispersionShift,
112                                          const real onesixth,
113                                          const real onetwelfth)
114 {
115     return ((v12 + c12 * repulsionShift) * onetwelfth - (v6 + c6 * dispersionShift) * onesixth);
116 }
117
118 /* Ewald LJ */
119 static inline real ewaldLennardJonesGridSubtract(const real c6grid, const real potentialShift, const real onesixth)
120 {
121     return (c6grid * potentialShift * onesixth);
122 }
123
124 /* LJ Potential switch */
125 static inline real potSwitchScalarForceMod(const real fScalarInp,
126                                            const real potential,
127                                            const real sw,
128                                            const real r,
129                                            const real rVdw,
130                                            const real dsw,
131                                            const real zero)
132 {
133     if (r < rVdw)
134     {
135         real fScalar = fScalarInp * sw - r * potential * dsw;
136         return (fScalar);
137     }
138     return (zero);
139 }
140 static inline real
141 potSwitchPotentialMod(const real potentialInp, const real sw, const real r, const real rVdw, const real zero)
142 {
143     if (r < rVdw)
144     {
145         real potential = potentialInp * sw;
146         return (potential);
147     }
148     return (zero);
149 }
150
151
152 //! Templated free-energy non-bonded kernel
153 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald, bool vdwModifierIsPotSwitch>
154 static void nb_free_energy_kernel(const t_nblist* gmx_restrict nlist,
155                                   rvec* gmx_restrict         xx,
156                                   gmx::ForceWithShiftForces* forceWithShiftForces,
157                                   const t_forcerec* gmx_restrict fr,
158                                   const t_mdatoms* gmx_restrict mdatoms,
159                                   nb_kernel_data_t* gmx_restrict kernel_data,
160                                   t_nrnb* gmx_restrict nrnb)
161 {
162 #define STATE_A 0
163 #define STATE_B 1
164 #define NSTATES 2
165
166     constexpr real onetwelfth = 1.0 / 12.0;
167     constexpr real onesixth   = 1.0 / 6.0;
168     constexpr real zero       = 0.0;
169     constexpr real half       = 0.5;
170     constexpr real one        = 1.0;
171     constexpr real two        = 2.0;
172     constexpr real six        = 6.0;
173
174     /* Extract pointer to non-bonded interaction constants */
175     const interaction_const_t* ic = fr->ic;
176
177     // Extract pair list data
178     const int  nri    = nlist->nri;
179     const int* iinr   = nlist->iinr;
180     const int* jindex = nlist->jindex;
181     const int* jjnr   = nlist->jjnr;
182     const int* shift  = nlist->shift;
183     const int* gid    = nlist->gid;
184
185     const real* shiftvec      = fr->shift_vec[0];
186     const real* chargeA       = mdatoms->chargeA;
187     const real* chargeB       = mdatoms->chargeB;
188     real*       Vc            = kernel_data->energygrp_elec;
189     const int*  typeA         = mdatoms->typeA;
190     const int*  typeB         = mdatoms->typeB;
191     const int   ntype         = fr->ntype;
192     const real* nbfp          = fr->nbfp.data();
193     const real* nbfp_grid     = fr->ljpme_c6grid;
194     real*       Vv            = kernel_data->energygrp_vdw;
195     const real  lambda_coul   = kernel_data->lambda[efptCOUL];
196     const real  lambda_vdw    = kernel_data->lambda[efptVDW];
197     real*       dvdl          = kernel_data->dvdl;
198     const real  alpha_coul    = fr->sc_alphacoul;
199     const real  alpha_vdw     = fr->sc_alphavdw;
200     const real  lam_power     = fr->sc_power;
201     const real  sigma6_def    = fr->sc_sigma6_def;
202     const real  sigma6_min    = fr->sc_sigma6_min;
203     const bool  doForces      = ((kernel_data->flags & GMX_NONBONDED_DO_FORCE) != 0);
204     const bool  doShiftForces = ((kernel_data->flags & GMX_NONBONDED_DO_SHIFTFORCE) != 0);
205     const bool  doPotential   = ((kernel_data->flags & GMX_NONBONDED_DO_POTENTIAL) != 0);
206
207     // Extract data from interaction_const_t
208     const real facel           = ic->epsfac;
209     const real rcoulomb        = ic->rcoulomb;
210     const real krf             = ic->k_rf;
211     const real crf             = ic->c_rf;
212     const real sh_lj_ewald     = ic->sh_lj_ewald;
213     const real rvdw            = ic->rvdw;
214     const real dispersionShift = ic->dispersion_shift.cpot;
215     const real repulsionShift  = ic->repulsion_shift.cpot;
216
217     // Note that the nbnxm kernels do not support Coulomb potential switching at all
218     GMX_ASSERT(ic->coulomb_modifier != eintmodPOTSWITCH,
219                "Potential switching is not supported for Coulomb with FEP");
220
221     real vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4;
222     if (vdwModifierIsPotSwitch)
223     {
224         const real d = ic->rvdw - ic->rvdw_switch;
225         vdw_swV3     = -10.0 / (d * d * d);
226         vdw_swV4     = 15.0 / (d * d * d * d);
227         vdw_swV5     = -6.0 / (d * d * d * d * d);
228         vdw_swF2     = -30.0 / (d * d * d);
229         vdw_swF3     = 60.0 / (d * d * d * d);
230         vdw_swF4     = -30.0 / (d * d * d * d * d);
231     }
232     else
233     {
234         /* Avoid warnings from stupid compilers (looking at you, Clang!) */
235         vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0;
236     }
237
238     int icoul;
239     if (ic->eeltype == eelCUT || EEL_RF(ic->eeltype))
240     {
241         icoul = GMX_NBKERNEL_ELEC_REACTIONFIELD;
242     }
243     else
244     {
245         icoul = GMX_NBKERNEL_ELEC_NONE;
246     }
247
248     real rcutoff_max2 = std::max(ic->rcoulomb, ic->rvdw);
249     rcutoff_max2      = rcutoff_max2 * rcutoff_max2;
250
251     const real* tab_ewald_F_lj = nullptr;
252     const real* tab_ewald_V_lj = nullptr;
253     const real* ewtab          = nullptr;
254     real        ewtabscale     = 0;
255     real        ewtabhalfspace = 0;
256     real        sh_ewald       = 0;
257     if (elecInteractionTypeIsEwald || vdwInteractionTypeIsEwald)
258     {
259         const auto& tables = *ic->coulombEwaldTables;
260         sh_ewald           = ic->sh_ewald;
261         ewtab              = tables.tableFDV0.data();
262         ewtabscale         = tables.scale;
263         ewtabhalfspace     = half / ewtabscale;
264         tab_ewald_F_lj     = tables.tableF.data();
265         tab_ewald_V_lj     = tables.tableV.data();
266     }
267
268     /* For Ewald/PME interactions we cannot easily apply the soft-core component to
269      * reciprocal space. When we use non-switched Ewald interactions, we
270      * assume the soft-coring does not significantly affect the grid contribution
271      * and apply the soft-core only to the full 1/r (- shift) pair contribution.
272      *
273      * However, we cannot use this approach for switch-modified since we would then
274      * effectively end up evaluating a significantly different interaction here compared to the
275      * normal (non-free-energy) kernels, either by applying a cutoff at a different
276      * position than what the user requested, or by switching different
277      * things (1/r rather than short-range Ewald). For these settings, we just
278      * use the traditional short-range Ewald interaction in that case.
279      */
280     GMX_RELEASE_ASSERT(!(vdwInteractionTypeIsEwald && vdwModifierIsPotSwitch),
281                        "Can not apply soft-core to switched Ewald potentials");
282
283     real dvdl_coul = 0;
284     real dvdl_vdw  = 0;
285
286     /* Lambda factor for state A, 1-lambda*/
287     real LFC[NSTATES], LFV[NSTATES];
288     LFC[STATE_A] = one - lambda_coul;
289     LFV[STATE_A] = one - lambda_vdw;
290
291     /* Lambda factor for state B, lambda*/
292     LFC[STATE_B] = lambda_coul;
293     LFV[STATE_B] = lambda_vdw;
294
295     /*derivative of the lambda factor for state A and B */
296     real DLF[NSTATES];
297     DLF[STATE_A] = -1;
298     DLF[STATE_B] = 1;
299
300     real           lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
301     constexpr real sc_r_power = 6.0_real;
302     for (int i = 0; i < NSTATES; i++)
303     {
304         lfac_coul[i]  = (lam_power == 2 ? (1 - LFC[i]) * (1 - LFC[i]) : (1 - LFC[i]));
305         dlfac_coul[i] = DLF[i] * lam_power / sc_r_power * (lam_power == 2 ? (1 - LFC[i]) : 1);
306         lfac_vdw[i]   = (lam_power == 2 ? (1 - LFV[i]) * (1 - LFV[i]) : (1 - LFV[i]));
307         dlfac_vdw[i]  = DLF[i] * lam_power / sc_r_power * (lam_power == 2 ? (1 - LFV[i]) : 1);
308     }
309
310     // TODO: We should get rid of using pointers to real
311     const real* x             = xx[0];
312     real* gmx_restrict f      = &(forceWithShiftForces->force()[0][0]);
313     real* gmx_restrict fshift = &(forceWithShiftForces->shiftForces()[0][0]);
314
315     for (int n = 0; n < nri; n++)
316     {
317         int npair_within_cutoff = 0;
318
319         const int  is3   = 3 * shift[n];
320         const real shX   = shiftvec[is3];
321         const real shY   = shiftvec[is3 + 1];
322         const real shZ   = shiftvec[is3 + 2];
323         const int  nj0   = jindex[n];
324         const int  nj1   = jindex[n + 1];
325         const int  ii    = iinr[n];
326         const int  ii3   = 3 * ii;
327         const real ix    = shX + x[ii3 + 0];
328         const real iy    = shY + x[ii3 + 1];
329         const real iz    = shZ + x[ii3 + 2];
330         const real iqA   = facel * chargeA[ii];
331         const real iqB   = facel * chargeB[ii];
332         const int  ntiA  = 2 * ntype * typeA[ii];
333         const int  ntiB  = 2 * ntype * typeB[ii];
334         real       vctot = 0;
335         real       vvtot = 0;
336         real       fix   = 0;
337         real       fiy   = 0;
338         real       fiz   = 0;
339
340         for (int k = nj0; k < nj1; k++)
341         {
342             int        tj[NSTATES];
343             const int  jnr = jjnr[k];
344             const int  j3  = 3 * jnr;
345             real       c6[NSTATES], c12[NSTATES], qq[NSTATES], Vcoul[NSTATES], Vvdw[NSTATES];
346             real       r, rinv, rp, rpm2;
347             real       alpha_vdw_eff, alpha_coul_eff, sigma6[NSTATES];
348             const real dx  = ix - x[j3];
349             const real dy  = iy - x[j3 + 1];
350             const real dz  = iz - x[j3 + 2];
351             const real rsq = dx * dx + dy * dy + dz * dz;
352             real       FscalC[NSTATES], FscalV[NSTATES];
353
354             if (rsq >= rcutoff_max2)
355             {
356                 /* We save significant time by skipping all code below.
357                  * Note that with soft-core interactions, the actual cut-off
358                  * check might be different. But since the soft-core distance
359                  * is always larger than r, checking on r here is safe.
360                  */
361                 continue;
362             }
363             npair_within_cutoff++;
364
365             if (rsq > 0)
366             {
367                 /* Note that unlike in the nbnxn kernels, we do not need
368                  * to clamp the value of rsq before taking the invsqrt
369                  * to avoid NaN in the LJ calculation, since here we do
370                  * not calculate LJ interactions when C6 and C12 are zero.
371                  */
372
373                 rinv = gmx::invsqrt(rsq);
374                 r    = rsq * rinv;
375             }
376             else
377             {
378                 /* The force at r=0 is zero, because of symmetry.
379                  * But note that the potential is in general non-zero,
380                  * since the soft-cored r will be non-zero.
381                  */
382                 rinv = 0;
383                 r    = 0;
384             }
385
386             if (useSoftCore)
387             {
388                 rpm2 = rsq * rsq;  /* r4 */
389                 rp   = rpm2 * rsq; /* r6 */
390             }
391             else
392             {
393                 /* The soft-core power p will not affect the results
394                  * with not using soft-core, so we use power of 0 which gives
395                  * the simplest math and cheapest code.
396                  */
397                 rpm2 = rinv * rinv;
398                 rp   = 1;
399             }
400
401             real Fscal = 0;
402
403             qq[STATE_A] = iqA * chargeA[jnr];
404             qq[STATE_B] = iqB * chargeB[jnr];
405
406             tj[STATE_A] = ntiA + 2 * typeA[jnr];
407             tj[STATE_B] = ntiB + 2 * typeB[jnr];
408
409             if (nlist->excl_fep == nullptr || nlist->excl_fep[k])
410             {
411                 c6[STATE_A] = nbfp[tj[STATE_A]];
412                 c6[STATE_B] = nbfp[tj[STATE_B]];
413
414                 for (int i = 0; i < NSTATES; i++)
415                 {
416                     c12[i] = nbfp[tj[i] + 1];
417                     if (useSoftCore)
418                     {
419                         if ((c6[i] > 0) && (c12[i] > 0))
420                         {
421                             /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
422                             sigma6[i] = half * c12[i] / c6[i];
423                             if (sigma6[i] < sigma6_min) /* for disappearing coul and vdw with soft core at the same time */
424                             {
425                                 sigma6[i] = sigma6_min;
426                             }
427                         }
428                         else
429                         {
430                             sigma6[i] = sigma6_def;
431                         }
432                     }
433                 }
434
435                 if (useSoftCore)
436                 {
437                     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
438                     if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
439                     {
440                         alpha_vdw_eff  = 0;
441                         alpha_coul_eff = 0;
442                     }
443                     else
444                     {
445                         alpha_vdw_eff  = alpha_vdw;
446                         alpha_coul_eff = alpha_coul;
447                     }
448                 }
449
450                 for (int i = 0; i < NSTATES; i++)
451                 {
452                     FscalC[i] = 0;
453                     FscalV[i] = 0;
454                     Vcoul[i]  = 0;
455                     Vvdw[i]   = 0;
456
457                     real rinvC, rinvV, rC, rV, rpinvC, rpinvV;
458
459                     /* Only spend time on A or B state if it is non-zero */
460                     if ((qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0))
461                     {
462                         /* this section has to be inside the loop because of the dependence on sigma6 */
463                         if (useSoftCore)
464                         {
465                             rpinvC = one / (alpha_coul_eff * lfac_coul[i] * sigma6[i] + rp);
466                             pthRoot(rpinvC, &rinvC, &rC);
467                             if (scLambdasOrAlphasDiffer)
468                             {
469                                 rpinvV = one / (alpha_vdw_eff * lfac_vdw[i] * sigma6[i] + rp);
470                                 pthRoot(rpinvV, &rinvV, &rV);
471                             }
472                             else
473                             {
474                                 /* We can avoid one expensive pow and one / operation */
475                                 rpinvV = rpinvC;
476                                 rinvV  = rinvC;
477                                 rV     = rC;
478                             }
479                         }
480                         else
481                         {
482                             rpinvC = 1;
483                             rinvC  = rinv;
484                             rC     = r;
485
486                             rpinvV = 1;
487                             rinvV  = rinv;
488                             rV     = r;
489                         }
490
491                         /* Only process the coulomb interactions if we have charges,
492                          * and if we either include all entries in the list (no cutoff
493                          * used in the kernel), or if we are within the cutoff.
494                          */
495                         bool computeElecInteraction = (elecInteractionTypeIsEwald && r < rcoulomb)
496                                                       || (!elecInteractionTypeIsEwald && rC < rcoulomb);
497
498                         if ((qq[i] != 0) && computeElecInteraction)
499                         {
500                             if (elecInteractionTypeIsEwald)
501                             {
502                                 Vcoul[i]  = ewaldPotential(qq[i], rinvC, sh_ewald);
503                                 FscalC[i] = ewaldScalarForce(qq[i], rinvC);
504                             }
505                             else
506                             {
507                                 Vcoul[i]  = reactionFieldPotential(qq[i], rinvC, rC, krf, crf);
508                                 FscalC[i] = reactionFieldScalarForce(qq[i], rinvC, rC, krf, two);
509                             }
510                         }
511
512                         /* Only process the VDW interactions if we have
513                          * some non-zero parameters, and if we either
514                          * include all entries in the list (no cutoff used
515                          * in the kernel), or if we are within the cutoff.
516                          */
517                         bool computeVdwInteraction = (vdwInteractionTypeIsEwald && r < rvdw)
518                                                      || (!vdwInteractionTypeIsEwald && rV < rvdw);
519                         if ((c6[i] != 0 || c12[i] != 0) && computeVdwInteraction)
520                         {
521                             real rinv6;
522                             if (useSoftCore)
523                             {
524                                 rinv6 = rpinvV;
525                             }
526                             else
527                             {
528                                 rinv6 = calculateRinv6(rinvV);
529                             }
530                             real Vvdw6  = calculateVdw6(c6[i], rinv6);
531                             real Vvdw12 = calculateVdw12(c12[i], rinv6);
532
533                             Vvdw[i] = lennardJonesPotential(Vvdw6, Vvdw12, c6[i], c12[i], repulsionShift,
534                                                             dispersionShift, onesixth, onetwelfth);
535                             FscalV[i] = lennardJonesScalarForce(Vvdw6, Vvdw12);
536
537                             if (vdwInteractionTypeIsEwald)
538                             {
539                                 /* Subtract the grid potential at the cut-off */
540                                 Vvdw[i] += ewaldLennardJonesGridSubtract(nbfp_grid[tj[i]],
541                                                                          sh_lj_ewald, onesixth);
542                             }
543
544                             if (vdwModifierIsPotSwitch)
545                             {
546                                 real d        = rV - ic->rvdw_switch;
547                                 d             = (d > zero) ? d : zero;
548                                 const real d2 = d * d;
549                                 const real sw = one + d2 * d * (vdw_swV3 + d * (vdw_swV4 + d * vdw_swV5));
550                                 const real dsw = d2 * (vdw_swF2 + d * (vdw_swF3 + d * vdw_swF4));
551
552                                 FscalV[i] = potSwitchScalarForceMod(FscalV[i], Vvdw[i], sw, rV,
553                                                                     rvdw, dsw, zero);
554                                 Vvdw[i]   = potSwitchPotentialMod(Vvdw[i], sw, rV, rvdw, zero);
555                             }
556                         }
557
558                         /* FscalC (and FscalV) now contain: dV/drC * rC
559                          * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
560                          * Further down we first multiply by r^p-2 and then by
561                          * the vector r, which in total gives: dV/drC * (r/rC)^1-p
562                          */
563                         FscalC[i] *= rpinvC;
564                         FscalV[i] *= rpinvV;
565                     }
566                 }
567
568                 /* Assemble A and B states */
569                 for (int i = 0; i < NSTATES; i++)
570                 {
571                     vctot += LFC[i] * Vcoul[i];
572                     vvtot += LFV[i] * Vvdw[i];
573
574                     Fscal += LFC[i] * FscalC[i] * rpm2;
575                     Fscal += LFV[i] * FscalV[i] * rpm2;
576
577                     if (useSoftCore)
578                     {
579                         dvdl_coul += Vcoul[i] * DLF[i]
580                                      + LFC[i] * alpha_coul_eff * dlfac_coul[i] * FscalC[i] * sigma6[i];
581                         dvdl_vdw += Vvdw[i] * DLF[i]
582                                     + LFV[i] * alpha_vdw_eff * dlfac_vdw[i] * FscalV[i] * sigma6[i];
583                     }
584                     else
585                     {
586                         dvdl_coul += Vcoul[i] * DLF[i];
587                         dvdl_vdw += Vvdw[i] * DLF[i];
588                     }
589                 }
590             }
591             else if (icoul == GMX_NBKERNEL_ELEC_REACTIONFIELD)
592             {
593                 /* For excluded pairs, which are only in this pair list when
594                  * using the Verlet scheme, we don't use soft-core.
595                  * As there is no singularity, there is no need for soft-core.
596                  */
597                 const real FF = -two * krf;
598                 real       VV = krf * rsq - crf;
599
600                 if (ii == jnr)
601                 {
602                     VV *= half;
603                 }
604
605                 for (int i = 0; i < NSTATES; i++)
606                 {
607                     vctot += LFC[i] * qq[i] * VV;
608                     Fscal += LFC[i] * qq[i] * FF;
609                     dvdl_coul += DLF[i] * qq[i] * VV;
610                 }
611             }
612
613             if (elecInteractionTypeIsEwald && r < rcoulomb)
614             {
615                 /* See comment in the preamble. When using Ewald interactions
616                  * (unless we use a switch modifier) we subtract the reciprocal-space
617                  * Ewald component here which made it possible to apply the free
618                  * energy interaction to 1/r (vanilla coulomb short-range part)
619                  * above. This gets us closer to the ideal case of applying
620                  * the softcore to the entire electrostatic interaction,
621                  * including the reciprocal-space component.
622                  */
623                 real v_lr, f_lr;
624
625                 const real ewrt   = r * ewtabscale;
626                 int        ewitab = static_cast<int>(ewrt);
627                 const real eweps  = ewrt - ewitab;
628                 ewitab            = 4 * ewitab;
629                 f_lr              = ewtab[ewitab] + eweps * ewtab[ewitab + 1];
630                 v_lr = (ewtab[ewitab + 2] - ewtabhalfspace * eweps * (ewtab[ewitab] + f_lr));
631                 f_lr *= rinv;
632
633                 /* Note that any possible Ewald shift has already been applied in
634                  * the normal interaction part above.
635                  */
636
637                 if (ii == jnr)
638                 {
639                     /* If we get here, the i particle (ii) has itself (jnr)
640                      * in its neighborlist. This can only happen with the Verlet
641                      * scheme, and corresponds to a self-interaction that will
642                      * occur twice. Scale it down by 50% to only include it once.
643                      */
644                     v_lr *= half;
645                 }
646
647                 for (int i = 0; i < NSTATES; i++)
648                 {
649                     vctot -= LFC[i] * qq[i] * v_lr;
650                     Fscal -= LFC[i] * qq[i] * f_lr;
651                     dvdl_coul -= (DLF[i] * qq[i]) * v_lr;
652                 }
653             }
654
655             if (vdwInteractionTypeIsEwald && r < rvdw)
656             {
657                 /* See comment in the preamble. When using LJ-Ewald interactions
658                  * (unless we use a switch modifier) we subtract the reciprocal-space
659                  * Ewald component here which made it possible to apply the free
660                  * energy interaction to r^-6 (vanilla LJ6 short-range part)
661                  * above. This gets us closer to the ideal case of applying
662                  * the softcore to the entire VdW interaction,
663                  * including the reciprocal-space component.
664                  */
665                 /* We could also use the analytical form here
666                  * iso a table, but that can cause issues for
667                  * r close to 0 for non-interacting pairs.
668                  */
669
670                 const real rs   = rsq * rinv * ewtabscale;
671                 const int  ri   = static_cast<int>(rs);
672                 const real frac = rs - ri;
673                 const real f_lr = (1 - frac) * tab_ewald_F_lj[ri] + frac * tab_ewald_F_lj[ri + 1];
674                 /* TODO: Currently the Ewald LJ table does not contain
675                  * the factor 1/6, we should add this.
676                  */
677                 const real FF = f_lr * rinv / six;
678                 real VV = (tab_ewald_V_lj[ri] - ewtabhalfspace * frac * (tab_ewald_F_lj[ri] + f_lr)) / six;
679
680                 if (ii == jnr)
681                 {
682                     /* If we get here, the i particle (ii) has itself (jnr)
683                      * in its neighborlist. This can only happen with the Verlet
684                      * scheme, and corresponds to a self-interaction that will
685                      * occur twice. Scale it down by 50% to only include it once.
686                      */
687                     VV *= half;
688                 }
689
690                 for (int i = 0; i < NSTATES; i++)
691                 {
692                     const real c6grid = nbfp_grid[tj[i]];
693                     vvtot += LFV[i] * c6grid * VV;
694                     Fscal += LFV[i] * c6grid * FF;
695                     dvdl_vdw += (DLF[i] * c6grid) * VV;
696                 }
697             }
698
699             if (doForces)
700             {
701                 const real tx = Fscal * dx;
702                 const real ty = Fscal * dy;
703                 const real tz = Fscal * dz;
704                 fix           = fix + tx;
705                 fiy           = fiy + ty;
706                 fiz           = fiz + tz;
707                 /* OpenMP atomics are expensive, but this kernels is also
708                  * expensive, so we can take this hit, instead of using
709                  * thread-local output buffers and extra reduction.
710                  *
711                  * All the OpenMP regions in this file are trivial and should
712                  * not throw, so no need for try/catch.
713                  */
714 #pragma omp atomic
715                 f[j3] -= tx;
716 #pragma omp atomic
717                 f[j3 + 1] -= ty;
718 #pragma omp atomic
719                 f[j3 + 2] -= tz;
720             }
721         }
722
723         /* The atomics below are expensive with many OpenMP threads.
724          * Here unperturbed i-particles will usually only have a few
725          * (perturbed) j-particles in the list. Thus with a buffered list
726          * we can skip a significant number of i-reductions with a check.
727          */
728         if (npair_within_cutoff > 0)
729         {
730             if (doForces)
731             {
732 #pragma omp atomic
733                 f[ii3] += fix;
734 #pragma omp atomic
735                 f[ii3 + 1] += fiy;
736 #pragma omp atomic
737                 f[ii3 + 2] += fiz;
738             }
739             if (doShiftForces)
740             {
741 #pragma omp atomic
742                 fshift[is3] += fix;
743 #pragma omp atomic
744                 fshift[is3 + 1] += fiy;
745 #pragma omp atomic
746                 fshift[is3 + 2] += fiz;
747             }
748             if (doPotential)
749             {
750                 int ggid = gid[n];
751 #pragma omp atomic
752                 Vc[ggid] += vctot;
753 #pragma omp atomic
754                 Vv[ggid] += vvtot;
755             }
756         }
757     }
758
759 #pragma omp atomic
760     dvdl[efptCOUL] += dvdl_coul;
761 #pragma omp atomic
762     dvdl[efptVDW] += dvdl_vdw;
763
764     /* Estimate flops, average for free energy stuff:
765      * 12  flops per outer iteration
766      * 150 flops per inner iteration
767      */
768 #pragma omp atomic
769     inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri * 12 + nlist->jindex[nri] * 150);
770 }
771
772 typedef void (*KernelFunction)(const t_nblist* gmx_restrict nlist,
773                                rvec* gmx_restrict         xx,
774                                gmx::ForceWithShiftForces* forceWithShiftForces,
775                                const t_forcerec* gmx_restrict fr,
776                                const t_mdatoms* gmx_restrict mdatoms,
777                                nb_kernel_data_t* gmx_restrict kernel_data,
778                                t_nrnb* gmx_restrict nrnb);
779
780 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald>
781 static KernelFunction dispatchKernelOnVdwModifier(const bool vdwModifierIsPotSwitch)
782 {
783     if (vdwModifierIsPotSwitch)
784     {
785         return (nb_free_energy_kernel<useSoftCore, scLambdasOrAlphasDiffer,
786                                       vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, true>);
787     }
788     else
789     {
790         return (nb_free_energy_kernel<useSoftCore, scLambdasOrAlphasDiffer,
791                                       vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, false>);
792     }
793 }
794
795 template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald>
796 static KernelFunction dispatchKernelOnElecInteractionType(const bool elecInteractionTypeIsEwald,
797                                                           const bool vdwModifierIsPotSwitch)
798 {
799     if (elecInteractionTypeIsEwald)
800     {
801         return (dispatchKernelOnVdwModifier<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, true>(
802                 vdwModifierIsPotSwitch));
803     }
804     else
805     {
806         return (dispatchKernelOnVdwModifier<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, false>(
807                 vdwModifierIsPotSwitch));
808     }
809 }
810
811 template<bool useSoftCore, bool scLambdasOrAlphasDiffer>
812 static KernelFunction dispatchKernelOnVdwInteractionType(const bool vdwInteractionTypeIsEwald,
813                                                          const bool elecInteractionTypeIsEwald,
814                                                          const bool vdwModifierIsPotSwitch)
815 {
816     if (vdwInteractionTypeIsEwald)
817     {
818         return (dispatchKernelOnElecInteractionType<useSoftCore, scLambdasOrAlphasDiffer, true>(
819                 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch));
820     }
821     else
822     {
823         return (dispatchKernelOnElecInteractionType<useSoftCore, scLambdasOrAlphasDiffer, false>(
824                 elecInteractionTypeIsEwald, vdwModifierIsPotSwitch));
825     }
826 }
827
828 template<bool useSoftCore>
829 static KernelFunction dispatchKernelOnScLambdasOrAlphasDifference(const bool scLambdasOrAlphasDiffer,
830                                                                   const bool vdwInteractionTypeIsEwald,
831                                                                   const bool elecInteractionTypeIsEwald,
832                                                                   const bool vdwModifierIsPotSwitch)
833 {
834     if (scLambdasOrAlphasDiffer)
835     {
836         return (dispatchKernelOnVdwInteractionType<useSoftCore, true>(
837                 vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch));
838     }
839     else
840     {
841         return (dispatchKernelOnVdwInteractionType<useSoftCore, false>(
842                 vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch));
843     }
844 }
845
846 static KernelFunction dispatchKernel(const bool        scLambdasOrAlphasDiffer,
847                                      const bool        vdwInteractionTypeIsEwald,
848                                      const bool        elecInteractionTypeIsEwald,
849                                      const bool        vdwModifierIsPotSwitch,
850                                      const t_forcerec* fr)
851 {
852     if (fr->sc_alphacoul == 0 && fr->sc_alphavdw == 0)
853     {
854         return (dispatchKernelOnScLambdasOrAlphasDifference<false>(
855                 scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
856                 vdwModifierIsPotSwitch));
857     }
858     else
859     {
860         return (dispatchKernelOnScLambdasOrAlphasDifference<true>(
861                 scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
862                 vdwModifierIsPotSwitch));
863     }
864 }
865
866
867 void gmx_nb_free_energy_kernel(const t_nblist*            nlist,
868                                rvec*                      xx,
869                                gmx::ForceWithShiftForces* ff,
870                                const t_forcerec*          fr,
871                                const t_mdatoms*           mdatoms,
872                                nb_kernel_data_t*          kernel_data,
873                                t_nrnb*                    nrnb)
874 {
875     GMX_ASSERT(EEL_PME_EWALD(fr->ic->eeltype) || fr->ic->eeltype == eelCUT || EEL_RF(fr->ic->eeltype),
876                "Unsupported eeltype with free energy");
877
878     const bool vdwInteractionTypeIsEwald  = (EVDW_PME(fr->ic->vdwtype));
879     const bool elecInteractionTypeIsEwald = (EEL_PME_EWALD(fr->ic->eeltype));
880     const bool vdwModifierIsPotSwitch     = (fr->ic->vdw_modifier == eintmodPOTSWITCH);
881     bool       scLambdasOrAlphasDiffer    = true;
882
883     if (fr->sc_alphacoul == 0 && fr->sc_alphavdw == 0)
884     {
885         scLambdasOrAlphasDiffer = false;
886     }
887     else if (fr->sc_r_power == 6.0_real)
888     {
889         if (kernel_data->lambda[efptCOUL] == kernel_data->lambda[efptVDW] && fr->sc_alphacoul == fr->sc_alphavdw)
890         {
891             scLambdasOrAlphasDiffer = false;
892         }
893     }
894     else
895     {
896         GMX_RELEASE_ASSERT(false, "Unsupported soft-core r-power");
897     }
898     KernelFunction kernelFunc = dispatchKernel(scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
899                                                elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, fr);
900     kernelFunc(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
901 }