8303ae6007348e5ae6e22735054521d68f366716
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_free_energy.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8  * and including many others, as listed in the AUTHORS file in the
9  * top-level source directory and at http://www.gromacs.org.
10  *
11  * GROMACS is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public License
13  * as published by the Free Software Foundation; either version 2.1
14  * of the License, or (at your option) any later version.
15  *
16  * GROMACS is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with GROMACS; if not, see
23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
25  *
26  * If you want to redistribute modifications to GROMACS, please
27  * consider that scientific software is very special. Version
28  * control is crucial - bugs must be traceable. We will be happy to
29  * consider code for inclusion in the official distribution, but
30  * derived work must not be called official GROMACS. Details are found
31  * in the README & COPYING files - if they are missing, get the
32  * official version at http://www.gromacs.org.
33  *
34  * To help us fund GROMACS development, we humbly ask that you cite
35  * the research papers on the package. Check out http://www.gromacs.org.
36  */
37 #include "gmxpre.h"
38
39 #include "nb_free_energy.h"
40
41 #include <cmath>
42
43 #include <algorithm>
44
45 #include "gromacs/gmxlib/nrnb.h"
46 #include "gromacs/gmxlib/nonbonded/nb_kernel.h"
47 #include "gromacs/gmxlib/nonbonded/nonbonded.h"
48 #include "gromacs/math/functions.h"
49 #include "gromacs/math/vec.h"
50 #include "gromacs/mdtypes/forceoutput.h"
51 #include "gromacs/mdtypes/forcerec.h"
52 #include "gromacs/mdtypes/md_enums.h"
53 #include "gromacs/utility/fatalerror.h"
54
55
56 //! Enum for templating the soft-core treatment in the kernel
57 enum class SoftCoreTreatment
58 {
59     None,    //!< No soft-core
60     RPower6, //!< Soft-core with r-power = 6
61     RPower48 //!< Soft-core with r-power = 48
62 };
63
64 //! Most treatments are fine with float in mixed-precision mode.
65 template <SoftCoreTreatment softCoreTreatment>
66 struct SoftCoreReal
67 {
68     //! Real type for soft-core calculations
69     using Real = real;
70 };
71
72 //! This treatment requires double precision for some computations.
73 template <>
74 struct SoftCoreReal<SoftCoreTreatment::RPower48>
75 {
76     //! Real type for soft-core calculations
77     using Real = double;
78 };
79
80 //! Computes r^(1/p) and 1/r^(1/p) for the standard p=6
81 template <SoftCoreTreatment softCoreTreatment>
82 static inline void pthRoot(const real  r,
83                            real       *pthRoot,
84                            real       *invPthRoot)
85 {
86     *invPthRoot = gmx::invsqrt(std::cbrt(r));
87     *pthRoot    = 1/(*invPthRoot);
88 }
89
90 // We need a double version to make the specialization below work
91 #if !GMX_DOUBLE
92 //! Computes r^(1/p) and 1/r^(1/p) for the standard p=6
93 template <SoftCoreTreatment softCoreTreatment>
94 static inline void pthRoot(const double  r,
95                            real         *pthRoot,
96                            double       *invPthRoot)
97 {
98     *invPthRoot = gmx::invsqrt(std::cbrt(r));
99     *pthRoot    = 1/(*invPthRoot);
100 }
101 #endif
102
103 //! Computes r^(1/p) and 1/r^(1/p) for p=48
104 template <>
105 inline void pthRoot<SoftCoreTreatment::RPower48>(const double  r,
106                                                  real         *pthRoot,
107                                                  double       *invPthRoot)
108 {
109     *pthRoot    = std::pow(r, 1.0/48.0);
110     *invPthRoot = 1/(*pthRoot);
111 }
112
113 //! Templated free-energy non-bonded kernel
114 template<SoftCoreTreatment softCoreTreatment, bool scLambdasOrAlphasDiffer>
115 static void
116 nb_free_energy_kernel(const t_nblist * gmx_restrict    nlist,
117                       rvec * gmx_restrict              xx,
118                       gmx::ForceWithShiftForces *      forceWithShiftForces,
119                       const t_forcerec * gmx_restrict  fr,
120                       const t_mdatoms * gmx_restrict   mdatoms,
121                       nb_kernel_data_t * gmx_restrict  kernel_data,
122                       t_nrnb * gmx_restrict            nrnb)
123 {
124     using SCReal = typename SoftCoreReal<softCoreTreatment>::Real;
125
126     constexpr bool useSoftCore = (softCoreTreatment != SoftCoreTreatment::None);
127
128 #define  STATE_A  0
129 #define  STATE_B  1
130 #define  NSTATES  2
131     int           i, n, ii, is3, ii3, k, nj0, nj1, jnr, j3, ggid;
132     real          shX, shY, shZ;
133     real          tx, ty, tz, Fscal;
134     SCReal        FscalC[NSTATES], FscalV[NSTATES];  /* Needs double for sc_power==48 */
135     real          Vcoul[NSTATES], Vvdw[NSTATES];
136     real          rinv6, r;
137     real          iqA, iqB;
138     real          qq[NSTATES], vctot;
139     int           ntiA, ntiB, tj[NSTATES];
140     real          Vvdw6, Vvdw12, vvtot;
141     real          ix, iy, iz, fix, fiy, fiz;
142     real          dx, dy, dz, rsq, rinv;
143     real          c6[NSTATES], c12[NSTATES], c6grid;
144     real          LFC[NSTATES], LFV[NSTATES], DLF[NSTATES];
145     SCReal        dvdl_coul, dvdl_vdw;
146     real          lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
147     real          sigma6[NSTATES], alpha_vdw_eff, alpha_coul_eff;
148     real          rinvC, rinvV;
149     SCReal        rp, rpm2, rC, rV, rpinvC, rpinvV; /* Needs double for sc_power==48 */
150     real          sigma_pow[NSTATES];
151     real          VV, FF;
152     int           icoul, ivdw;
153     int           nri;
154     const int *   iinr;
155     const int *   jindex;
156     const int *   jjnr;
157     const int *   shift;
158     const int *   gid;
159     const int *   typeA;
160     const int *   typeB;
161     int           ntype;
162     const real *  shiftvec;
163     const real *  chargeA;
164     const real *  chargeB;
165     real          sigma6_min, sigma6_def, lam_power;
166     real          alpha_coul, alpha_vdw, lambda_coul, lambda_vdw;
167     const real *  nbfp, *nbfp_grid;
168     real *        dvdl;
169     real *        Vv;
170     real *        Vc;
171     gmx_bool      bDoForces, bDoShiftForces, bDoPotential;
172     gmx_bool      bEwald, bEwaldLJ;
173     real          rcutoff_max2;
174     const real *  tab_ewald_F_lj = nullptr;
175     const real *  tab_ewald_V_lj = nullptr;
176     real          d, d2, sw, dsw;
177     real          vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4;
178     gmx_bool      bComputeVdwInteraction, bComputeElecInteraction;
179     const real *  ewtab = nullptr;
180     int           ewitab;
181     real          ewrt, eweps, ewtabscale = 0, ewtabhalfspace = 0, sh_ewald = 0;
182
183     const real    onetwelfth  = 1.0/12.0;
184     const real    onesixth    = 1.0/6.0;
185     const real    zero        = 0.0;
186     const real    half        = 0.5;
187     const real    one         = 1.0;
188     const real    two         = 2.0;
189     const real    six         = 6.0;
190
191     /* Extract pointer to non-bonded interaction constants */
192     const interaction_const_t *ic = fr->ic;
193
194     // TODO: We should get rid of using pointers to real
195     const real          *x      = xx[0];
196     real * gmx_restrict  f      = &(forceWithShiftForces->force()[0][0]);
197
198     real * gmx_restrict  fshift = &(forceWithShiftForces->shiftForces()[0][0]);
199
200     // Extract pair list data
201     nri                 = nlist->nri;
202     iinr                = nlist->iinr;
203     jindex              = nlist->jindex;
204     jjnr                = nlist->jjnr;
205     shift               = nlist->shift;
206     gid                 = nlist->gid;
207
208     shiftvec            = fr->shift_vec[0];
209     chargeA             = mdatoms->chargeA;
210     chargeB             = mdatoms->chargeB;
211     Vc                  = kernel_data->energygrp_elec;
212     typeA               = mdatoms->typeA;
213     typeB               = mdatoms->typeB;
214     ntype               = fr->ntype;
215     nbfp                = fr->nbfp;
216     nbfp_grid           = fr->ljpme_c6grid;
217     Vv                  = kernel_data->energygrp_vdw;
218     lambda_coul         = kernel_data->lambda[efptCOUL];
219     lambda_vdw          = kernel_data->lambda[efptVDW];
220     dvdl                = kernel_data->dvdl;
221     alpha_coul          = fr->sc_alphacoul;
222     alpha_vdw           = fr->sc_alphavdw;
223     lam_power           = fr->sc_power;
224     sigma6_def          = fr->sc_sigma6_def;
225     sigma6_min          = fr->sc_sigma6_min;
226     bDoForces           = ((kernel_data->flags & GMX_NONBONDED_DO_FORCE) != 0);
227     bDoShiftForces      = ((kernel_data->flags & GMX_NONBONDED_DO_SHIFTFORCE) != 0);
228     bDoPotential        = ((kernel_data->flags & GMX_NONBONDED_DO_POTENTIAL) != 0);
229
230     // Extract data from interaction_const_t
231     const real facel           = ic->epsfac;
232     const real rcoulomb        = ic->rcoulomb;
233     const real krf             = ic->k_rf;
234     const real crf             = ic->c_rf;
235     const real sh_lj_ewald     = ic->sh_lj_ewald;
236     const real rvdw            = ic->rvdw;
237     const real dispersionShift = ic->dispersion_shift.cpot;
238     const real repulsionShift  = ic->repulsion_shift.cpot;
239
240     // Note that the nbnxm kernels do not support Coulomb potential switching at all
241     GMX_ASSERT(ic->coulomb_modifier != eintmodPOTSWITCH,
242                "Potential switching is not supported for Coulomb with FEP");
243
244     if (ic->vdw_modifier == eintmodPOTSWITCH)
245     {
246         d               = ic->rvdw - ic->rvdw_switch;
247         vdw_swV3        = -10.0/(d*d*d);
248         vdw_swV4        =  15.0/(d*d*d*d);
249         vdw_swV5        =  -6.0/(d*d*d*d*d);
250         vdw_swF2        = -30.0/(d*d*d);
251         vdw_swF3        =  60.0/(d*d*d*d);
252         vdw_swF4        = -30.0/(d*d*d*d*d);
253     }
254     else
255     {
256         /* Avoid warnings from stupid compilers (looking at you, Clang!) */
257         vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0;
258     }
259
260     if (EVDW_PME(ic->vdwtype))
261     {
262         ivdw         = GMX_NBKERNEL_VDW_LJEWALD;
263     }
264     else
265     {
266         ivdw         = GMX_NBKERNEL_VDW_LENNARDJONES;
267     }
268
269     if (ic->eeltype == eelCUT || EEL_RF(ic->eeltype))
270     {
271         icoul        = GMX_NBKERNEL_ELEC_REACTIONFIELD;
272     }
273     else if (EEL_PME_EWALD(ic->eeltype))
274     {
275         icoul        = GMX_NBKERNEL_ELEC_EWALD;
276     }
277     else
278     {
279         gmx_incons("Unsupported eeltype with Verlet and free-energy");
280     }
281
282     rcutoff_max2    = std::max(ic->rcoulomb, ic->rvdw);
283     rcutoff_max2    = rcutoff_max2*rcutoff_max2;
284
285     bEwald          = (icoul == GMX_NBKERNEL_ELEC_EWALD);
286     bEwaldLJ        = (ivdw == GMX_NBKERNEL_VDW_LJEWALD);
287
288     if (bEwald || bEwaldLJ)
289     {
290         const auto &tables = *ic->coulombEwaldTables;
291         sh_ewald       = ic->sh_ewald;
292         ewtab          = tables.tableFDV0.data();
293         ewtabscale     = tables.scale;
294         ewtabhalfspace = half/ewtabscale;
295         tab_ewald_F_lj = tables.tableF.data();
296         tab_ewald_V_lj = tables.tableV.data();
297     }
298
299     /* For Ewald/PME interactions we cannot easily apply the soft-core component to
300      * reciprocal space. When we use non-switched Ewald interactions, we
301      * assume the soft-coring does not significantly affect the grid contribution
302      * and apply the soft-core only to the full 1/r (- shift) pair contribution.
303      *
304      * However, we cannot use this approach for switch-modified since we would then
305      * effectively end up evaluating a significantly different interaction here compared to the
306      * normal (non-free-energy) kernels, either by applying a cutoff at a different
307      * position than what the user requested, or by switching different
308      * things (1/r rather than short-range Ewald). For these settings, we just
309      * use the traditional short-range Ewald interaction in that case.
310      */
311     GMX_RELEASE_ASSERT(!(bEwald   && ic->coulomb_modifier == eintmodPOTSWITCH) &&
312                        !(bEwaldLJ && ic->vdw_modifier   == eintmodPOTSWITCH),
313                        "Can not apply soft-core to switched Ewald potentials");
314
315     dvdl_coul  = 0;
316     dvdl_vdw   = 0;
317
318     /* Lambda factor for state A, 1-lambda*/
319     LFC[STATE_A] = one - lambda_coul;
320     LFV[STATE_A] = one - lambda_vdw;
321
322     /* Lambda factor for state B, lambda*/
323     LFC[STATE_B] = lambda_coul;
324     LFV[STATE_B] = lambda_vdw;
325
326     /*derivative of the lambda factor for state A and B */
327     DLF[STATE_A] = -1;
328     DLF[STATE_B] = 1;
329
330     constexpr real sc_r_power = (softCoreTreatment == SoftCoreTreatment::RPower48 ? 48.0_real : 6.0_real);
331     for (i = 0; i < NSTATES; i++)
332     {
333         lfac_coul[i]  = (lam_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
334         dlfac_coul[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFC[i]) : 1);
335         lfac_vdw[i]   = (lam_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
336         dlfac_vdw[i]  = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFV[i]) : 1);
337     }
338
339     for (n = 0; (n < nri); n++)
340     {
341         int npair_within_cutoff;
342
343         npair_within_cutoff = 0;
344
345         is3              = 3*shift[n];
346         shX              = shiftvec[is3];
347         shY              = shiftvec[is3+1];
348         shZ              = shiftvec[is3+2];
349         nj0              = jindex[n];
350         nj1              = jindex[n+1];
351         ii               = iinr[n];
352         ii3              = 3*ii;
353         ix               = shX + x[ii3+0];
354         iy               = shY + x[ii3+1];
355         iz               = shZ + x[ii3+2];
356         iqA              = facel*chargeA[ii];
357         iqB              = facel*chargeB[ii];
358         ntiA             = 2*ntype*typeA[ii];
359         ntiB             = 2*ntype*typeB[ii];
360         vctot            = 0;
361         vvtot            = 0;
362         fix              = 0;
363         fiy              = 0;
364         fiz              = 0;
365
366         for (k = nj0; (k < nj1); k++)
367         {
368             jnr              = jjnr[k];
369             j3               = 3*jnr;
370             dx               = ix - x[j3];
371             dy               = iy - x[j3+1];
372             dz               = iz - x[j3+2];
373             rsq              = dx*dx + dy*dy + dz*dz;
374
375             if (rsq >= rcutoff_max2)
376             {
377                 /* We save significant time by skipping all code below.
378                  * Note that with soft-core interactions, the actual cut-off
379                  * check might be different. But since the soft-core distance
380                  * is always larger than r, checking on r here is safe.
381                  */
382                 continue;
383             }
384             npair_within_cutoff++;
385
386             if (rsq > 0)
387             {
388                 /* Note that unlike in the nbnxn kernels, we do not need
389                  * to clamp the value of rsq before taking the invsqrt
390                  * to avoid NaN in the LJ calculation, since here we do
391                  * not calculate LJ interactions when C6 and C12 are zero.
392                  */
393
394                 rinv         = gmx::invsqrt(rsq);
395                 r            = rsq*rinv;
396             }
397             else
398             {
399                 /* The force at r=0 is zero, because of symmetry.
400                  * But note that the potential is in general non-zero,
401                  * since the soft-cored r will be non-zero.
402                  */
403                 rinv         = 0;
404                 r            = 0;
405             }
406
407             if (softCoreTreatment == SoftCoreTreatment::None)
408             {
409                 /* The soft-core power p will not affect the results
410                  * with not using soft-core, so we use power of 0 which gives
411                  * the simplest math and cheapest code.
412                  */
413                 rpm2         = rinv*rinv;
414                 rp           = 1;
415             }
416             if (softCoreTreatment == SoftCoreTreatment::RPower6)
417             {
418                 rpm2         = rsq*rsq;  /* r4 */
419                 rp           = rpm2*rsq; /* r6 */
420             }
421             if (softCoreTreatment == SoftCoreTreatment::RPower48)
422             {
423                 rp           = rsq*rsq*rsq; /* r6 */
424                 rp           = rp*rp;       /* r12 */
425                 rp           = rp*rp;       /* r24 */
426                 rp           = rp*rp;       /* r48 */
427                 rpm2         = rp/rsq;      /* r46 */
428             }
429
430             Fscal = 0;
431
432             qq[STATE_A]      = iqA*chargeA[jnr];
433             qq[STATE_B]      = iqB*chargeB[jnr];
434
435             tj[STATE_A]      = ntiA+2*typeA[jnr];
436             tj[STATE_B]      = ntiB+2*typeB[jnr];
437
438             if (nlist->excl_fep == nullptr || nlist->excl_fep[k])
439             {
440                 c6[STATE_A]      = nbfp[tj[STATE_A]];
441                 c6[STATE_B]      = nbfp[tj[STATE_B]];
442
443                 for (i = 0; i < NSTATES; i++)
444                 {
445                     c12[i]             = nbfp[tj[i]+1];
446                     if (useSoftCore)
447                     {
448                         if ((c6[i] > 0) && (c12[i] > 0))
449                         {
450                             /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
451                             sigma6[i]       = half*c12[i]/c6[i];
452                             if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
453                             {
454                                 sigma6[i] = sigma6_min;
455                             }
456                         }
457                         else
458                         {
459                             sigma6[i]       = sigma6_def;
460                         }
461                         if (softCoreTreatment == SoftCoreTreatment::RPower6)
462                         {
463                             sigma_pow[i]    = sigma6[i];
464                         }
465                         else
466                         {
467                             sigma_pow[i]    = sigma6[i]*sigma6[i];       /* sigma^12 */
468                             sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
469                             sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
470                         }
471                     }
472                 }
473
474                 if (useSoftCore)
475                 {
476                     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
477                     if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
478                     {
479                         alpha_vdw_eff    = 0;
480                         alpha_coul_eff   = 0;
481                     }
482                     else
483                     {
484                         alpha_vdw_eff    = alpha_vdw;
485                         alpha_coul_eff   = alpha_coul;
486                     }
487                 }
488
489                 for (i = 0; i < NSTATES; i++)
490                 {
491                     FscalC[i]    = 0;
492                     FscalV[i]    = 0;
493                     Vcoul[i]     = 0;
494                     Vvdw[i]      = 0;
495
496                     /* Only spend time on A or B state if it is non-zero */
497                     if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
498                     {
499                         /* this section has to be inside the loop because of the dependence on sigma_pow */
500                         if (useSoftCore)
501                         {
502                             rpinvC     = one/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
503                             pthRoot<softCoreTreatment>(rpinvC, &rinvC, &rC);
504
505                             if (scLambdasOrAlphasDiffer)
506                             {
507                                 rpinvV = one/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
508                                 pthRoot<softCoreTreatment>(rpinvV, &rinvV, &rV);
509                             }
510                             else
511                             {
512                                 /* We can avoid one expensive pow and one / operation */
513                                 rpinvV = rpinvC;
514                                 rinvV  = rinvC;
515                                 rV     = rC;
516                             }
517                         }
518                         else
519                         {
520                             rpinvC     = 1;
521                             rinvC      = rinv;
522                             rC         = r;
523
524                             rpinvV     = 1;
525                             rinvV      = rinv;
526                             rV         = r;
527                         }
528
529                         /* Only process the coulomb interactions if we have charges,
530                          * and if we either include all entries in the list (no cutoff
531                          * used in the kernel), or if we are within the cutoff.
532                          */
533                         bComputeElecInteraction =
534                             ( bEwald && r < rcoulomb) ||
535                             (!bEwald && rC < rcoulomb);
536
537                         if ( (qq[i] != 0) && bComputeElecInteraction)
538                         {
539                             if (bEwald)
540                             {
541                                 /* Ewald FEP is done only on the 1/r part */
542                                 Vcoul[i]   = qq[i]*(rinvC-sh_ewald);
543                                 FscalC[i]  = qq[i]*rinvC;
544                             }
545                             else
546                             {
547                                 /* reaction-field */
548                                 Vcoul[i]   = qq[i]*(rinvC + krf*rC*rC-crf);
549                                 FscalC[i]  = qq[i]*(rinvC - two*krf*rC*rC);
550                             }
551                         }
552
553                         /* Only process the VDW interactions if we have
554                          * some non-zero parameters, and if we either
555                          * include all entries in the list (no cutoff used
556                          * in the kernel), or if we are within the cutoff.
557                          */
558                         bComputeVdwInteraction =
559                             ( bEwaldLJ && r < rvdw) ||
560                             (!bEwaldLJ && rV < rvdw);
561                         if ((c6[i] != 0 || c12[i] != 0) && bComputeVdwInteraction)
562                         {
563                             /* cutoff LJ, also handles part of Ewald LJ */
564                             if (softCoreTreatment == SoftCoreTreatment::RPower6)
565                             {
566                                 rinv6        = rpinvV;
567                             }
568                             else
569                             {
570                                 rinv6        = rinvV*rinvV;
571                                 rinv6        = rinv6*rinv6*rinv6;
572                             }
573                             Vvdw6            = c6[i]*rinv6;
574                             Vvdw12           = c12[i]*rinv6*rinv6;
575
576                             Vvdw[i]          = ( (Vvdw12 + c12[i]*repulsionShift)*onetwelfth
577                                                  - (Vvdw6 + c6[i]*dispersionShift)*onesixth);
578                             FscalV[i]        = Vvdw12 - Vvdw6;
579
580                             if (bEwaldLJ)
581                             {
582                                 /* Subtract the grid potential at the cut-off */
583                                 c6grid      = nbfp_grid[tj[i]];
584                                 Vvdw[i]    += c6grid*sh_lj_ewald*onesixth;
585                             }
586
587                             if (ic->vdw_modifier == eintmodPOTSWITCH)
588                             {
589                                 d                = rV - ic->rvdw_switch;
590                                 d                = (d > zero) ? d : zero;
591                                 d2               = d*d;
592                                 sw               = one+d2*d*(vdw_swV3+d*(vdw_swV4+d*vdw_swV5));
593                                 dsw              = d2*(vdw_swF2+d*(vdw_swF3+d*vdw_swF4));
594
595                                 FscalV[i]        = FscalV[i]*sw - rV*Vvdw[i]*dsw;
596                                 Vvdw[i]         *= sw;
597
598                                 FscalV[i]  = (rV < rvdw) ? FscalV[i] : zero;
599                                 Vvdw[i]    = (rV < rvdw) ? Vvdw[i] : zero;
600                             }
601                         }
602
603                         /* FscalC (and FscalV) now contain: dV/drC * rC
604                          * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
605                          * Further down we first multiply by r^p-2 and then by
606                          * the vector r, which in total gives: dV/drC * (r/rC)^1-p
607                          */
608                         FscalC[i] *= rpinvC;
609                         FscalV[i] *= rpinvV;
610                     }
611                 }
612
613                 /* Assemble A and B states */
614                 for (i = 0; i < NSTATES; i++)
615                 {
616                     vctot         += LFC[i]*Vcoul[i];
617                     vvtot         += LFV[i]*Vvdw[i];
618
619                     Fscal     += LFC[i]*FscalC[i]*rpm2;
620                     Fscal     += LFV[i]*FscalV[i]*rpm2;
621
622                     if (useSoftCore)
623                     {
624                         dvdl_coul += Vcoul[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*FscalC[i]*sigma_pow[i];
625                         dvdl_vdw  += Vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*FscalV[i]*sigma_pow[i];
626                     }
627                     else
628                     {
629                         dvdl_coul += Vcoul[i]*DLF[i];
630                         dvdl_vdw  += Vvdw[i]*DLF[i];
631                     }
632                 }
633             }
634             else if (icoul == GMX_NBKERNEL_ELEC_REACTIONFIELD)
635             {
636                 /* For excluded pairs, which are only in this pair list when
637                  * using the Verlet scheme, we don't use soft-core.
638                  * The group scheme also doesn't soft-core for these.
639                  * As there is no singularity, there is no need for soft-core.
640                  */
641                 VV = krf*rsq - crf;
642                 FF = -two*krf;
643
644                 if (ii == jnr)
645                 {
646                     VV *= half;
647                 }
648
649                 for (i = 0; i < NSTATES; i++)
650                 {
651                     vctot      += LFC[i]*qq[i]*VV;
652                     Fscal      += LFC[i]*qq[i]*FF;
653                     dvdl_coul  += DLF[i]*qq[i]*VV;
654                 }
655             }
656
657             if (bEwald && r < rcoulomb)
658             {
659                 /* See comment in the preamble. When using Ewald interactions
660                  * (unless we use a switch modifier) we subtract the reciprocal-space
661                  * Ewald component here which made it possible to apply the free
662                  * energy interaction to 1/r (vanilla coulomb short-range part)
663                  * above. This gets us closer to the ideal case of applying
664                  * the softcore to the entire electrostatic interaction,
665                  * including the reciprocal-space component.
666                  */
667                 real v_lr, f_lr;
668
669                 ewrt      = r*ewtabscale;
670                 ewitab    = static_cast<int>(ewrt);
671                 eweps     = ewrt-ewitab;
672                 ewitab    = 4*ewitab;
673                 f_lr      = ewtab[ewitab]+eweps*ewtab[ewitab+1];
674                 v_lr      = (ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+f_lr));
675                 f_lr     *= rinv;
676
677                 /* Note that any possible Ewald shift has already been applied in
678                  * the normal interaction part above.
679                  */
680
681                 if (ii == jnr)
682                 {
683                     /* If we get here, the i particle (ii) has itself (jnr)
684                      * in its neighborlist. This can only happen with the Verlet
685                      * scheme, and corresponds to a self-interaction that will
686                      * occur twice. Scale it down by 50% to only include it once.
687                      */
688                     v_lr *= half;
689                 }
690
691                 for (i = 0; i < NSTATES; i++)
692                 {
693                     vctot      -= LFC[i]*qq[i]*v_lr;
694                     Fscal      -= LFC[i]*qq[i]*f_lr;
695                     dvdl_coul  -= (DLF[i]*qq[i])*v_lr;
696                 }
697             }
698
699             if (bEwaldLJ && r < rvdw)
700             {
701                 /* See comment in the preamble. When using LJ-Ewald interactions
702                  * (unless we use a switch modifier) we subtract the reciprocal-space
703                  * Ewald component here which made it possible to apply the free
704                  * energy interaction to r^-6 (vanilla LJ6 short-range part)
705                  * above. This gets us closer to the ideal case of applying
706                  * the softcore to the entire VdW interaction,
707                  * including the reciprocal-space component.
708                  */
709                 /* We could also use the analytical form here
710                  * iso a table, but that can cause issues for
711                  * r close to 0 for non-interacting pairs.
712                  */
713                 real rs, frac, f_lr;
714                 int  ri;
715
716                 rs     = rsq*rinv*ewtabscale;
717                 ri     = static_cast<int>(rs);
718                 frac   = rs - ri;
719                 f_lr   = (1 - frac)*tab_ewald_F_lj[ri] + frac*tab_ewald_F_lj[ri+1];
720                 /* TODO: Currently the Ewald LJ table does not contain
721                  * the factor 1/6, we should add this.
722                  */
723                 FF     = f_lr*rinv/six;
724                 VV     = (tab_ewald_V_lj[ri] - ewtabhalfspace*frac*(tab_ewald_F_lj[ri] + f_lr))/six;
725
726                 if (ii == jnr)
727                 {
728                     /* If we get here, the i particle (ii) has itself (jnr)
729                      * in its neighborlist. This can only happen with the Verlet
730                      * scheme, and corresponds to a self-interaction that will
731                      * occur twice. Scale it down by 50% to only include it once.
732                      */
733                     VV *= half;
734                 }
735
736                 for (i = 0; i < NSTATES; i++)
737                 {
738                     c6grid      = nbfp_grid[tj[i]];
739                     vvtot      += LFV[i]*c6grid*VV;
740                     Fscal      += LFV[i]*c6grid*FF;
741                     dvdl_vdw   += (DLF[i]*c6grid)*VV;
742                 }
743             }
744
745             if (bDoForces)
746             {
747                 tx         = Fscal*dx;
748                 ty         = Fscal*dy;
749                 tz         = Fscal*dz;
750                 fix        = fix + tx;
751                 fiy        = fiy + ty;
752                 fiz        = fiz + tz;
753                 /* OpenMP atomics are expensive, but this kernels is also
754                  * expensive, so we can take this hit, instead of using
755                  * thread-local output buffers and extra reduction.
756                  *
757                  * All the OpenMP regions in this file are trivial and should
758                  * not throw, so no need for try/catch.
759                  */
760 #pragma omp atomic
761                 f[j3]     -= tx;
762 #pragma omp atomic
763                 f[j3+1]   -= ty;
764 #pragma omp atomic
765                 f[j3+2]   -= tz;
766             }
767         }
768
769         /* The atomics below are expensive with many OpenMP threads.
770          * Here unperturbed i-particles will usually only have a few
771          * (perturbed) j-particles in the list. Thus with a buffered list
772          * we can skip a significant number of i-reductions with a check.
773          */
774         if (npair_within_cutoff > 0)
775         {
776             if (bDoForces)
777             {
778 #pragma omp atomic
779                 f[ii3]        += fix;
780 #pragma omp atomic
781                 f[ii3+1]      += fiy;
782 #pragma omp atomic
783                 f[ii3+2]      += fiz;
784             }
785             if (bDoShiftForces)
786             {
787 #pragma omp atomic
788                 fshift[is3]   += fix;
789 #pragma omp atomic
790                 fshift[is3+1] += fiy;
791 #pragma omp atomic
792                 fshift[is3+2] += fiz;
793             }
794             if (bDoPotential)
795             {
796                 ggid               = gid[n];
797 #pragma omp atomic
798                 Vc[ggid]          += vctot;
799 #pragma omp atomic
800                 Vv[ggid]          += vvtot;
801             }
802         }
803     }
804
805 #pragma omp atomic
806     dvdl[efptCOUL]     += dvdl_coul;
807  #pragma omp atomic
808     dvdl[efptVDW]      += dvdl_vdw;
809
810     /* Estimate flops, average for free energy stuff:
811      * 12  flops per outer iteration
812      * 150 flops per inner iteration
813      */
814 #pragma omp atomic
815     inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri*12 + nlist->jindex[n]*150);
816 }
817
818 void gmx_nb_free_energy_kernel(const t_nblist            *nlist,
819                                rvec                      *xx,
820                                gmx::ForceWithShiftForces *ff,
821                                const t_forcerec          *fr,
822                                const t_mdatoms           *mdatoms,
823                                nb_kernel_data_t          *kernel_data,
824                                t_nrnb                    *nrnb)
825 {
826     if (fr->sc_alphacoul == 0 && fr->sc_alphavdw == 0)
827     {
828         nb_free_energy_kernel<SoftCoreTreatment::None, false>(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
829     }
830     else if (fr->sc_r_power == 6.0_real)
831     {
832         if (kernel_data->lambda[efptCOUL] == kernel_data->lambda[efptVDW] &&
833             fr->sc_alphacoul == fr->sc_alphavdw)
834         {
835             nb_free_energy_kernel<SoftCoreTreatment::RPower6, false>(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
836         }
837         else
838         {
839             nb_free_energy_kernel<SoftCoreTreatment::RPower6, true>(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
840         }
841     }
842     else if (fr->sc_r_power == 48.0_real)
843     {
844         nb_free_energy_kernel<SoftCoreTreatment::RPower48, true>(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
845     }
846     else
847     {
848         GMX_RELEASE_ASSERT(false, "Unsupported soft-core r-power");
849     }
850 }