Use shiftForces in ForceOuputs
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_free_energy.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8  * and including many others, as listed in the AUTHORS file in the
9  * top-level source directory and at http://www.gromacs.org.
10  *
11  * GROMACS is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public License
13  * as published by the Free Software Foundation; either version 2.1
14  * of the License, or (at your option) any later version.
15  *
16  * GROMACS is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with GROMACS; if not, see
23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
25  *
26  * If you want to redistribute modifications to GROMACS, please
27  * consider that scientific software is very special. Version
28  * control is crucial - bugs must be traceable. We will be happy to
29  * consider code for inclusion in the official distribution, but
30  * derived work must not be called official GROMACS. Details are found
31  * in the README & COPYING files - if they are missing, get the
32  * official version at http://www.gromacs.org.
33  *
34  * To help us fund GROMACS development, we humbly ask that you cite
35  * the research papers on the package. Check out http://www.gromacs.org.
36  */
37 #include "gmxpre.h"
38
39 #include "nb_free_energy.h"
40
41 #include <cmath>
42
43 #include <algorithm>
44
45 #include "gromacs/gmxlib/nrnb.h"
46 #include "gromacs/gmxlib/nonbonded/nb_kernel.h"
47 #include "gromacs/gmxlib/nonbonded/nonbonded.h"
48 #include "gromacs/math/functions.h"
49 #include "gromacs/math/vec.h"
50 #include "gromacs/mdtypes/forceoutput.h"
51 #include "gromacs/mdtypes/forcerec.h"
52 #include "gromacs/mdtypes/md_enums.h"
53 #include "gromacs/utility/fatalerror.h"
54
55
56 //! Enum for templating the soft-core treatment in the kernel
57 enum class SoftCoreTreatment
58 {
59     None,    //!< No soft-core
60     RPower6, //!< Soft-core with r-power = 6
61     RPower48 //!< Soft-core with r-power = 48
62 };
63
64 //! Most treatments are fine with float in mixed-precision mode.
65 template <SoftCoreTreatment softCoreTreatment>
66 struct SoftCoreReal
67 {
68     //! Real type for soft-core calculations
69     using Real = real;
70 };
71
72 //! This treatment requires double precision for some computations.
73 template <>
74 struct SoftCoreReal<SoftCoreTreatment::RPower48>
75 {
76     //! Real type for soft-core calculations
77     using Real = double;
78 };
79
80 //! Computes r^(1/p) and 1/r^(1/p) for the standard p=6
81 template <SoftCoreTreatment softCoreTreatment>
82 static inline void pthRoot(const real  r,
83                            real       *pthRoot,
84                            real       *invPthRoot)
85 {
86     *invPthRoot = gmx::invsqrt(std::cbrt(r));
87     *pthRoot    = 1/(*invPthRoot);
88 }
89
90 // We need a double version to make the specialization below work
91 #if !GMX_DOUBLE
92 //! Computes r^(1/p) and 1/r^(1/p) for the standard p=6
93 template <SoftCoreTreatment softCoreTreatment>
94 static inline void pthRoot(const double  r,
95                            double       *pthRoot,
96                            double       *invPthRoot)
97 {
98     *invPthRoot = gmx::invsqrt(std::cbrt(r));
99     *pthRoot    = 1/(*invPthRoot);
100 }
101 #endif
102
103 //! Computes r^(1/p) and 1/r^(1/p) for p=48
104 template <>
105 inline void pthRoot<SoftCoreTreatment::RPower48>(const double  r,
106                                                  double       *pthRoot,
107                                                  double       *invPthRoot)
108 {
109     *pthRoot    = std::pow(r, 1.0/48.0);
110     *invPthRoot = 1/(*pthRoot);
111 }
112
113 //! Templated free-energy non-bonded kernel
114 template<SoftCoreTreatment softCoreTreatment, bool scLambdasOrAlphasDiffer>
115 static void
116 nb_free_energy_kernel(const t_nblist * gmx_restrict    nlist,
117                       rvec * gmx_restrict              xx,
118                       gmx::ForceWithShiftForces *      forceWithShiftForces,
119                       const t_forcerec * gmx_restrict  fr,
120                       const t_mdatoms * gmx_restrict   mdatoms,
121                       nb_kernel_data_t * gmx_restrict  kernel_data,
122                       t_nrnb * gmx_restrict            nrnb)
123 {
124     using SCReal = typename SoftCoreReal<softCoreTreatment>::Real;
125
126     constexpr bool useSoftCore = (softCoreTreatment != SoftCoreTreatment::None);
127
128 #define  STATE_A  0
129 #define  STATE_B  1
130 #define  NSTATES  2
131     int           i, n, ii, is3, ii3, k, nj0, nj1, jnr, j3, ggid;
132     real          shX, shY, shZ;
133     real          tx, ty, tz, Fscal;
134     SCReal        FscalC[NSTATES], FscalV[NSTATES];  /* Needs double for sc_power==48 */
135     SCReal        Vcoul[NSTATES], Vvdw[NSTATES];     /* Needs double for sc_power==48 */
136     real          rinv6, r;
137     real          iqA, iqB;
138     real          qq[NSTATES], vctot;
139     int           ntiA, ntiB, tj[NSTATES];
140     real          Vvdw6, Vvdw12, vvtot;
141     real          ix, iy, iz, fix, fiy, fiz;
142     real          dx, dy, dz, rsq, rinv;
143     real          c6[NSTATES], c12[NSTATES], c6grid;
144     real          LFC[NSTATES], LFV[NSTATES], DLF[NSTATES];
145     SCReal        dvdl_coul, dvdl_vdw;
146     real          lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
147     real          sigma6[NSTATES], alpha_vdw_eff, alpha_coul_eff;
148     SCReal        rp, rpm2, rC, rV, rinvC, rpinvC, rinvV, rpinvV; /* Needs double for sc_power==48 */
149     real          sigma_pow[NSTATES];
150     real          VV, FF;
151     int           icoul, ivdw;
152     int           nri;
153     const int *   iinr;
154     const int *   jindex;
155     const int *   jjnr;
156     const int *   shift;
157     const int *   gid;
158     const int *   typeA;
159     const int *   typeB;
160     int           ntype;
161     const real *  shiftvec;
162     const real *  chargeA;
163     const real *  chargeB;
164     real          sigma6_min, sigma6_def, lam_power;
165     real          alpha_coul, alpha_vdw, lambda_coul, lambda_vdw;
166     const real *  nbfp, *nbfp_grid;
167     real *        dvdl;
168     real *        Vv;
169     real *        Vc;
170     gmx_bool      bDoForces, bDoShiftForces, bDoPotential;
171     gmx_bool      bEwald, bEwaldLJ;
172     real          rcutoff_max2;
173     const real *  tab_ewald_F_lj = nullptr;
174     const real *  tab_ewald_V_lj = nullptr;
175     real          d, d2, sw, dsw;
176     real          vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4;
177     gmx_bool      bComputeVdwInteraction, bComputeElecInteraction;
178     const real *  ewtab = nullptr;
179     int           ewitab;
180     real          ewrt, eweps, ewtabscale = 0, ewtabhalfspace = 0, sh_ewald = 0;
181
182     const real    onetwelfth  = 1.0/12.0;
183     const real    onesixth    = 1.0/6.0;
184     const real    zero        = 0.0;
185     const real    half        = 0.5;
186     const real    one         = 1.0;
187     const real    two         = 2.0;
188     const real    six         = 6.0;
189
190     /* Extract pointer to non-bonded interaction constants */
191     const interaction_const_t *ic = fr->ic;
192
193     // TODO: We should get rid of using pointers to real
194     const real          *x      = xx[0];
195     real * gmx_restrict  f      = &(forceWithShiftForces->force()[0][0]);
196
197     real * gmx_restrict  fshift = &(forceWithShiftForces->shiftForces()[0][0]);
198
199     // Extract pair list data
200     nri                 = nlist->nri;
201     iinr                = nlist->iinr;
202     jindex              = nlist->jindex;
203     jjnr                = nlist->jjnr;
204     shift               = nlist->shift;
205     gid                 = nlist->gid;
206
207     shiftvec            = fr->shift_vec[0];
208     chargeA             = mdatoms->chargeA;
209     chargeB             = mdatoms->chargeB;
210     Vc                  = kernel_data->energygrp_elec;
211     typeA               = mdatoms->typeA;
212     typeB               = mdatoms->typeB;
213     ntype               = fr->ntype;
214     nbfp                = fr->nbfp;
215     nbfp_grid           = fr->ljpme_c6grid;
216     Vv                  = kernel_data->energygrp_vdw;
217     lambda_coul         = kernel_data->lambda[efptCOUL];
218     lambda_vdw          = kernel_data->lambda[efptVDW];
219     dvdl                = kernel_data->dvdl;
220     alpha_coul          = fr->sc_alphacoul;
221     alpha_vdw           = fr->sc_alphavdw;
222     lam_power           = fr->sc_power;
223     sigma6_def          = fr->sc_sigma6_def;
224     sigma6_min          = fr->sc_sigma6_min;
225     bDoForces           = ((kernel_data->flags & GMX_NONBONDED_DO_FORCE) != 0);
226     bDoShiftForces      = ((kernel_data->flags & GMX_NONBONDED_DO_SHIFTFORCE) != 0);
227     bDoPotential        = ((kernel_data->flags & GMX_NONBONDED_DO_POTENTIAL) != 0);
228
229     // Extract data from interaction_const_t
230     const real facel           = ic->epsfac;
231     const real rcoulomb        = ic->rcoulomb;
232     const real krf             = ic->k_rf;
233     const real crf             = ic->c_rf;
234     const real sh_lj_ewald     = ic->sh_lj_ewald;
235     const real rvdw            = ic->rvdw;
236     const real dispersionShift = ic->dispersion_shift.cpot;
237     const real repulsionShift  = ic->repulsion_shift.cpot;
238
239     // Note that the nbnxm kernels do not support Coulomb potential switching at all
240     GMX_ASSERT(ic->coulomb_modifier != eintmodPOTSWITCH,
241                "Potential switching is not supported for Coulomb with FEP");
242
243     if (ic->vdw_modifier == eintmodPOTSWITCH)
244     {
245         d               = ic->rvdw - ic->rvdw_switch;
246         vdw_swV3        = -10.0/(d*d*d);
247         vdw_swV4        =  15.0/(d*d*d*d);
248         vdw_swV5        =  -6.0/(d*d*d*d*d);
249         vdw_swF2        = -30.0/(d*d*d);
250         vdw_swF3        =  60.0/(d*d*d*d);
251         vdw_swF4        = -30.0/(d*d*d*d*d);
252     }
253     else
254     {
255         /* Avoid warnings from stupid compilers (looking at you, Clang!) */
256         vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0;
257     }
258
259     if (EVDW_PME(ic->vdwtype))
260     {
261         ivdw         = GMX_NBKERNEL_VDW_LJEWALD;
262     }
263     else
264     {
265         ivdw         = GMX_NBKERNEL_VDW_LENNARDJONES;
266     }
267
268     if (ic->eeltype == eelCUT || EEL_RF(ic->eeltype))
269     {
270         icoul        = GMX_NBKERNEL_ELEC_REACTIONFIELD;
271     }
272     else if (EEL_PME_EWALD(ic->eeltype))
273     {
274         icoul        = GMX_NBKERNEL_ELEC_EWALD;
275     }
276     else
277     {
278         gmx_incons("Unsupported eeltype with Verlet and free-energy");
279     }
280
281     rcutoff_max2    = std::max(ic->rcoulomb, ic->rvdw);
282     rcutoff_max2    = rcutoff_max2*rcutoff_max2;
283
284     bEwald          = (icoul == GMX_NBKERNEL_ELEC_EWALD);
285     bEwaldLJ        = (ivdw == GMX_NBKERNEL_VDW_LJEWALD);
286
287     if (bEwald || bEwaldLJ)
288     {
289         const auto &tables = *ic->coulombEwaldTables;
290         sh_ewald       = ic->sh_ewald;
291         ewtab          = tables.tableFDV0.data();
292         ewtabscale     = tables.scale;
293         ewtabhalfspace = half/ewtabscale;
294         tab_ewald_F_lj = tables.tableF.data();
295         tab_ewald_V_lj = tables.tableV.data();
296     }
297
298     /* For Ewald/PME interactions we cannot easily apply the soft-core component to
299      * reciprocal space. When we use non-switched Ewald interactions, we
300      * assume the soft-coring does not significantly affect the grid contribution
301      * and apply the soft-core only to the full 1/r (- shift) pair contribution.
302      *
303      * However, we cannot use this approach for switch-modified since we would then
304      * effectively end up evaluating a significantly different interaction here compared to the
305      * normal (non-free-energy) kernels, either by applying a cutoff at a different
306      * position than what the user requested, or by switching different
307      * things (1/r rather than short-range Ewald). For these settings, we just
308      * use the traditional short-range Ewald interaction in that case.
309      */
310     GMX_RELEASE_ASSERT(!(bEwald   && ic->coulomb_modifier == eintmodPOTSWITCH) &&
311                        !(bEwaldLJ && ic->vdw_modifier   == eintmodPOTSWITCH),
312                        "Can not apply soft-core to switched Ewald potentials");
313
314     dvdl_coul  = 0;
315     dvdl_vdw   = 0;
316
317     /* Lambda factor for state A, 1-lambda*/
318     LFC[STATE_A] = one - lambda_coul;
319     LFV[STATE_A] = one - lambda_vdw;
320
321     /* Lambda factor for state B, lambda*/
322     LFC[STATE_B] = lambda_coul;
323     LFV[STATE_B] = lambda_vdw;
324
325     /*derivative of the lambda factor for state A and B */
326     DLF[STATE_A] = -1;
327     DLF[STATE_B] = 1;
328
329     constexpr real sc_r_power = (softCoreTreatment == SoftCoreTreatment::RPower48 ? 48.0_real : 6.0_real);
330     for (i = 0; i < NSTATES; i++)
331     {
332         lfac_coul[i]  = (lam_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
333         dlfac_coul[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFC[i]) : 1);
334         lfac_vdw[i]   = (lam_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
335         dlfac_vdw[i]  = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFV[i]) : 1);
336     }
337
338     for (n = 0; (n < nri); n++)
339     {
340         int npair_within_cutoff;
341
342         npair_within_cutoff = 0;
343
344         is3              = 3*shift[n];
345         shX              = shiftvec[is3];
346         shY              = shiftvec[is3+1];
347         shZ              = shiftvec[is3+2];
348         nj0              = jindex[n];
349         nj1              = jindex[n+1];
350         ii               = iinr[n];
351         ii3              = 3*ii;
352         ix               = shX + x[ii3+0];
353         iy               = shY + x[ii3+1];
354         iz               = shZ + x[ii3+2];
355         iqA              = facel*chargeA[ii];
356         iqB              = facel*chargeB[ii];
357         ntiA             = 2*ntype*typeA[ii];
358         ntiB             = 2*ntype*typeB[ii];
359         vctot            = 0;
360         vvtot            = 0;
361         fix              = 0;
362         fiy              = 0;
363         fiz              = 0;
364
365         for (k = nj0; (k < nj1); k++)
366         {
367             jnr              = jjnr[k];
368             j3               = 3*jnr;
369             dx               = ix - x[j3];
370             dy               = iy - x[j3+1];
371             dz               = iz - x[j3+2];
372             rsq              = dx*dx + dy*dy + dz*dz;
373
374             if (rsq >= rcutoff_max2)
375             {
376                 /* We save significant time by skipping all code below.
377                  * Note that with soft-core interactions, the actual cut-off
378                  * check might be different. But since the soft-core distance
379                  * is always larger than r, checking on r here is safe.
380                  */
381                 continue;
382             }
383             npair_within_cutoff++;
384
385             if (rsq > 0)
386             {
387                 /* Note that unlike in the nbnxn kernels, we do not need
388                  * to clamp the value of rsq before taking the invsqrt
389                  * to avoid NaN in the LJ calculation, since here we do
390                  * not calculate LJ interactions when C6 and C12 are zero.
391                  */
392
393                 rinv         = gmx::invsqrt(rsq);
394                 r            = rsq*rinv;
395             }
396             else
397             {
398                 /* The force at r=0 is zero, because of symmetry.
399                  * But note that the potential is in general non-zero,
400                  * since the soft-cored r will be non-zero.
401                  */
402                 rinv         = 0;
403                 r            = 0;
404             }
405
406             if (softCoreTreatment == SoftCoreTreatment::None)
407             {
408                 /* The soft-core power p will not affect the results
409                  * with not using soft-core, so we use power of 0 which gives
410                  * the simplest math and cheapest code.
411                  */
412                 rpm2         = rinv*rinv;
413                 rp           = 1;
414             }
415             if (softCoreTreatment == SoftCoreTreatment::RPower6)
416             {
417                 rpm2         = rsq*rsq;  /* r4 */
418                 rp           = rpm2*rsq; /* r6 */
419             }
420             if (softCoreTreatment == SoftCoreTreatment::RPower48)
421             {
422                 rp           = rsq*rsq*rsq; /* r6 */
423                 rp           = rp*rp;       /* r12 */
424                 rp           = rp*rp;       /* r24 */
425                 rp           = rp*rp;       /* r48 */
426                 rpm2         = rp/rsq;      /* r46 */
427             }
428
429             Fscal = 0;
430
431             qq[STATE_A]      = iqA*chargeA[jnr];
432             qq[STATE_B]      = iqB*chargeB[jnr];
433
434             tj[STATE_A]      = ntiA+2*typeA[jnr];
435             tj[STATE_B]      = ntiB+2*typeB[jnr];
436
437             if (nlist->excl_fep == nullptr || nlist->excl_fep[k])
438             {
439                 c6[STATE_A]      = nbfp[tj[STATE_A]];
440                 c6[STATE_B]      = nbfp[tj[STATE_B]];
441
442                 for (i = 0; i < NSTATES; i++)
443                 {
444                     c12[i]             = nbfp[tj[i]+1];
445                     if (useSoftCore)
446                     {
447                         if ((c6[i] > 0) && (c12[i] > 0))
448                         {
449                             /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
450                             sigma6[i]       = half*c12[i]/c6[i];
451                             if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
452                             {
453                                 sigma6[i] = sigma6_min;
454                             }
455                         }
456                         else
457                         {
458                             sigma6[i]       = sigma6_def;
459                         }
460                         if (softCoreTreatment == SoftCoreTreatment::RPower6)
461                         {
462                             sigma_pow[i]    = sigma6[i];
463                         }
464                         else
465                         {
466                             sigma_pow[i]    = sigma6[i]*sigma6[i];       /* sigma^12 */
467                             sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
468                             sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
469                         }
470                     }
471                 }
472
473                 if (useSoftCore)
474                 {
475                     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
476                     if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
477                     {
478                         alpha_vdw_eff    = 0;
479                         alpha_coul_eff   = 0;
480                     }
481                     else
482                     {
483                         alpha_vdw_eff    = alpha_vdw;
484                         alpha_coul_eff   = alpha_coul;
485                     }
486                 }
487
488                 for (i = 0; i < NSTATES; i++)
489                 {
490                     FscalC[i]    = 0;
491                     FscalV[i]    = 0;
492                     Vcoul[i]     = 0;
493                     Vvdw[i]      = 0;
494
495                     /* Only spend time on A or B state if it is non-zero */
496                     if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
497                     {
498                         /* this section has to be inside the loop because of the dependence on sigma_pow */
499                         if (useSoftCore)
500                         {
501                             rpinvC     = one/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
502                             pthRoot<softCoreTreatment>(rpinvC, &rinvC, &rC);
503
504                             if (scLambdasOrAlphasDiffer)
505                             {
506                                 rpinvV = one/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
507                                 pthRoot<softCoreTreatment>(rpinvV, &rinvV, &rV);
508                             }
509                             else
510                             {
511                                 /* We can avoid one expensive pow and one / operation */
512                                 rpinvV = rpinvC;
513                                 rinvV  = rinvC;
514                                 rV     = rC;
515                             }
516                         }
517                         else
518                         {
519                             rpinvC     = 1;
520                             rinvC      = rinv;
521                             rC         = r;
522
523                             rpinvV     = 1;
524                             rinvV      = rinv;
525                             rV         = r;
526                         }
527
528                         /* Only process the coulomb interactions if we have charges,
529                          * and if we either include all entries in the list (no cutoff
530                          * used in the kernel), or if we are within the cutoff.
531                          */
532                         bComputeElecInteraction =
533                             ( bEwald && r < rcoulomb) ||
534                             (!bEwald && rC < rcoulomb);
535
536                         if ( (qq[i] != 0) && bComputeElecInteraction)
537                         {
538                             if (bEwald)
539                             {
540                                 /* Ewald FEP is done only on the 1/r part */
541                                 Vcoul[i]   = qq[i]*(rinvC-sh_ewald);
542                                 FscalC[i]  = qq[i]*rinvC;
543                             }
544                             else
545                             {
546                                 /* reaction-field */
547                                 Vcoul[i]   = qq[i]*(rinvC + krf*rC*rC-crf);
548                                 FscalC[i]  = qq[i]*(rinvC - two*krf*rC*rC);
549                             }
550                         }
551
552                         /* Only process the VDW interactions if we have
553                          * some non-zero parameters, and if we either
554                          * include all entries in the list (no cutoff used
555                          * in the kernel), or if we are within the cutoff.
556                          */
557                         bComputeVdwInteraction =
558                             ( bEwaldLJ && r < rvdw) ||
559                             (!bEwaldLJ && rV < rvdw);
560                         if ((c6[i] != 0 || c12[i] != 0) && bComputeVdwInteraction)
561                         {
562                             /* cutoff LJ, also handles part of Ewald LJ */
563                             if (softCoreTreatment == SoftCoreTreatment::RPower6)
564                             {
565                                 rinv6        = rpinvV;
566                             }
567                             else
568                             {
569                                 rinv6        = rinvV*rinvV;
570                                 rinv6        = rinv6*rinv6*rinv6;
571                             }
572                             Vvdw6            = c6[i]*rinv6;
573                             Vvdw12           = c12[i]*rinv6*rinv6;
574
575                             Vvdw[i]          = ( (Vvdw12 + c12[i]*repulsionShift)*onetwelfth
576                                                  - (Vvdw6 + c6[i]*dispersionShift)*onesixth);
577                             FscalV[i]        = Vvdw12 - Vvdw6;
578
579                             if (bEwaldLJ)
580                             {
581                                 /* Subtract the grid potential at the cut-off */
582                                 c6grid      = nbfp_grid[tj[i]];
583                                 Vvdw[i]    += c6grid*sh_lj_ewald*onesixth;
584                             }
585
586                             if (ic->vdw_modifier == eintmodPOTSWITCH)
587                             {
588                                 d                = rV - ic->rvdw_switch;
589                                 d                = (d > zero) ? d : zero;
590                                 d2               = d*d;
591                                 sw               = one+d2*d*(vdw_swV3+d*(vdw_swV4+d*vdw_swV5));
592                                 dsw              = d2*(vdw_swF2+d*(vdw_swF3+d*vdw_swF4));
593
594                                 FscalV[i]        = FscalV[i]*sw - rV*Vvdw[i]*dsw;
595                                 Vvdw[i]         *= sw;
596
597                                 FscalV[i]  = (rV < rvdw) ? FscalV[i] : zero;
598                                 Vvdw[i]    = (rV < rvdw) ? Vvdw[i] : zero;
599                             }
600                         }
601
602                         /* FscalC (and FscalV) now contain: dV/drC * rC
603                          * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
604                          * Further down we first multiply by r^p-2 and then by
605                          * the vector r, which in total gives: dV/drC * (r/rC)^1-p
606                          */
607                         FscalC[i] *= rpinvC;
608                         FscalV[i] *= rpinvV;
609                     }
610                 }
611
612                 /* Assemble A and B states */
613                 for (i = 0; i < NSTATES; i++)
614                 {
615                     vctot         += LFC[i]*Vcoul[i];
616                     vvtot         += LFV[i]*Vvdw[i];
617
618                     Fscal     += LFC[i]*FscalC[i]*rpm2;
619                     Fscal     += LFV[i]*FscalV[i]*rpm2;
620
621                     if (useSoftCore)
622                     {
623                         dvdl_coul += Vcoul[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*FscalC[i]*sigma_pow[i];
624                         dvdl_vdw  += Vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*FscalV[i]*sigma_pow[i];
625                     }
626                     else
627                     {
628                         dvdl_coul += Vcoul[i]*DLF[i];
629                         dvdl_vdw  += Vvdw[i]*DLF[i];
630                     }
631                 }
632             }
633             else if (icoul == GMX_NBKERNEL_ELEC_REACTIONFIELD)
634             {
635                 /* For excluded pairs, which are only in this pair list when
636                  * using the Verlet scheme, we don't use soft-core.
637                  * The group scheme also doesn't soft-core for these.
638                  * As there is no singularity, there is no need for soft-core.
639                  */
640                 VV = krf*rsq - crf;
641                 FF = -two*krf;
642
643                 if (ii == jnr)
644                 {
645                     VV *= half;
646                 }
647
648                 for (i = 0; i < NSTATES; i++)
649                 {
650                     vctot      += LFC[i]*qq[i]*VV;
651                     Fscal      += LFC[i]*qq[i]*FF;
652                     dvdl_coul  += DLF[i]*qq[i]*VV;
653                 }
654             }
655
656             if (bEwald && r < rcoulomb)
657             {
658                 /* See comment in the preamble. When using Ewald interactions
659                  * (unless we use a switch modifier) we subtract the reciprocal-space
660                  * Ewald component here which made it possible to apply the free
661                  * energy interaction to 1/r (vanilla coulomb short-range part)
662                  * above. This gets us closer to the ideal case of applying
663                  * the softcore to the entire electrostatic interaction,
664                  * including the reciprocal-space component.
665                  */
666                 real v_lr, f_lr;
667
668                 ewrt      = r*ewtabscale;
669                 ewitab    = static_cast<int>(ewrt);
670                 eweps     = ewrt-ewitab;
671                 ewitab    = 4*ewitab;
672                 f_lr      = ewtab[ewitab]+eweps*ewtab[ewitab+1];
673                 v_lr      = (ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+f_lr));
674                 f_lr     *= rinv;
675
676                 /* Note that any possible Ewald shift has already been applied in
677                  * the normal interaction part above.
678                  */
679
680                 if (ii == jnr)
681                 {
682                     /* If we get here, the i particle (ii) has itself (jnr)
683                      * in its neighborlist. This can only happen with the Verlet
684                      * scheme, and corresponds to a self-interaction that will
685                      * occur twice. Scale it down by 50% to only include it once.
686                      */
687                     v_lr *= half;
688                 }
689
690                 for (i = 0; i < NSTATES; i++)
691                 {
692                     vctot      -= LFC[i]*qq[i]*v_lr;
693                     Fscal      -= LFC[i]*qq[i]*f_lr;
694                     dvdl_coul  -= (DLF[i]*qq[i])*v_lr;
695                 }
696             }
697
698             if (bEwaldLJ && r < rvdw)
699             {
700                 /* See comment in the preamble. When using LJ-Ewald interactions
701                  * (unless we use a switch modifier) we subtract the reciprocal-space
702                  * Ewald component here which made it possible to apply the free
703                  * energy interaction to r^-6 (vanilla LJ6 short-range part)
704                  * above. This gets us closer to the ideal case of applying
705                  * the softcore to the entire VdW interaction,
706                  * including the reciprocal-space component.
707                  */
708                 /* We could also use the analytical form here
709                  * iso a table, but that can cause issues for
710                  * r close to 0 for non-interacting pairs.
711                  */
712                 real rs, frac, f_lr;
713                 int  ri;
714
715                 rs     = rsq*rinv*ewtabscale;
716                 ri     = static_cast<int>(rs);
717                 frac   = rs - ri;
718                 f_lr   = (1 - frac)*tab_ewald_F_lj[ri] + frac*tab_ewald_F_lj[ri+1];
719                 /* TODO: Currently the Ewald LJ table does not contain
720                  * the factor 1/6, we should add this.
721                  */
722                 FF     = f_lr*rinv/six;
723                 VV     = (tab_ewald_V_lj[ri] - ewtabhalfspace*frac*(tab_ewald_F_lj[ri] + f_lr))/six;
724
725                 if (ii == jnr)
726                 {
727                     /* If we get here, the i particle (ii) has itself (jnr)
728                      * in its neighborlist. This can only happen with the Verlet
729                      * scheme, and corresponds to a self-interaction that will
730                      * occur twice. Scale it down by 50% to only include it once.
731                      */
732                     VV *= half;
733                 }
734
735                 for (i = 0; i < NSTATES; i++)
736                 {
737                     c6grid      = nbfp_grid[tj[i]];
738                     vvtot      += LFV[i]*c6grid*VV;
739                     Fscal      += LFV[i]*c6grid*FF;
740                     dvdl_vdw   += (DLF[i]*c6grid)*VV;
741                 }
742             }
743
744             if (bDoForces)
745             {
746                 tx         = Fscal*dx;
747                 ty         = Fscal*dy;
748                 tz         = Fscal*dz;
749                 fix        = fix + tx;
750                 fiy        = fiy + ty;
751                 fiz        = fiz + tz;
752                 /* OpenMP atomics are expensive, but this kernels is also
753                  * expensive, so we can take this hit, instead of using
754                  * thread-local output buffers and extra reduction.
755                  *
756                  * All the OpenMP regions in this file are trivial and should
757                  * not throw, so no need for try/catch.
758                  */
759 #pragma omp atomic
760                 f[j3]     -= tx;
761 #pragma omp atomic
762                 f[j3+1]   -= ty;
763 #pragma omp atomic
764                 f[j3+2]   -= tz;
765             }
766         }
767
768         /* The atomics below are expensive with many OpenMP threads.
769          * Here unperturbed i-particles will usually only have a few
770          * (perturbed) j-particles in the list. Thus with a buffered list
771          * we can skip a significant number of i-reductions with a check.
772          */
773         if (npair_within_cutoff > 0)
774         {
775             if (bDoForces)
776             {
777 #pragma omp atomic
778                 f[ii3]        += fix;
779 #pragma omp atomic
780                 f[ii3+1]      += fiy;
781 #pragma omp atomic
782                 f[ii3+2]      += fiz;
783             }
784             if (bDoShiftForces)
785             {
786 #pragma omp atomic
787                 fshift[is3]   += fix;
788 #pragma omp atomic
789                 fshift[is3+1] += fiy;
790 #pragma omp atomic
791                 fshift[is3+2] += fiz;
792             }
793             if (bDoPotential)
794             {
795                 ggid               = gid[n];
796 #pragma omp atomic
797                 Vc[ggid]          += vctot;
798 #pragma omp atomic
799                 Vv[ggid]          += vvtot;
800             }
801         }
802     }
803
804 #pragma omp atomic
805     dvdl[efptCOUL]     += dvdl_coul;
806  #pragma omp atomic
807     dvdl[efptVDW]      += dvdl_vdw;
808
809     /* Estimate flops, average for free energy stuff:
810      * 12  flops per outer iteration
811      * 150 flops per inner iteration
812      */
813 #pragma omp atomic
814     inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri*12 + nlist->jindex[n]*150);
815 }
816
817 void gmx_nb_free_energy_kernel(const t_nblist            *nlist,
818                                rvec                      *xx,
819                                gmx::ForceWithShiftForces *ff,
820                                const t_forcerec          *fr,
821                                const t_mdatoms           *mdatoms,
822                                nb_kernel_data_t          *kernel_data,
823                                t_nrnb                    *nrnb)
824 {
825     if (fr->sc_alphacoul == 0 && fr->sc_alphavdw == 0)
826     {
827         nb_free_energy_kernel<SoftCoreTreatment::None, false>(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
828     }
829     else if (fr->sc_r_power == 6.0_real)
830     {
831         if (kernel_data->lambda[efptCOUL] == kernel_data->lambda[efptVDW] &&
832             fr->sc_alphacoul == fr->sc_alphavdw)
833         {
834             nb_free_energy_kernel<SoftCoreTreatment::RPower6, false>(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
835         }
836         else
837         {
838             nb_free_energy_kernel<SoftCoreTreatment::RPower6, true>(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
839         }
840     }
841     else if (fr->sc_r_power == 48.0_real)
842     {
843         nb_free_energy_kernel<SoftCoreTreatment::RPower48, true>(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
844     }
845     else
846     {
847         GMX_RELEASE_ASSERT(false, "Unsupported soft-core r-power");
848     }
849 }