Template free-energy kernel on soft-core
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_free_energy.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8  * and including many others, as listed in the AUTHORS file in the
9  * top-level source directory and at http://www.gromacs.org.
10  *
11  * GROMACS is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public License
13  * as published by the Free Software Foundation; either version 2.1
14  * of the License, or (at your option) any later version.
15  *
16  * GROMACS is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with GROMACS; if not, see
23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
25  *
26  * If you want to redistribute modifications to GROMACS, please
27  * consider that scientific software is very special. Version
28  * control is crucial - bugs must be traceable. We will be happy to
29  * consider code for inclusion in the official distribution, but
30  * derived work must not be called official GROMACS. Details are found
31  * in the README & COPYING files - if they are missing, get the
32  * official version at http://www.gromacs.org.
33  *
34  * To help us fund GROMACS development, we humbly ask that you cite
35  * the research papers on the package. Check out http://www.gromacs.org.
36  */
37 #include "gmxpre.h"
38
39 #include "nb_free_energy.h"
40
41 #include <cmath>
42
43 #include <algorithm>
44
45 #include "gromacs/gmxlib/nrnb.h"
46 #include "gromacs/gmxlib/nonbonded/nb_kernel.h"
47 #include "gromacs/gmxlib/nonbonded/nonbonded.h"
48 #include "gromacs/math/functions.h"
49 #include "gromacs/math/vec.h"
50 #include "gromacs/mdtypes/forcerec.h"
51 #include "gromacs/mdtypes/md_enums.h"
52 #include "gromacs/utility/fatalerror.h"
53
54
55 //! Enum for templating the soft-core treatment in the kernel
56 enum class SoftCoreTreatment
57 {
58     None,    //!< No soft-core
59     RPower6, //!< Soft-core with r-power = 6
60     RPower48 //!< Soft-core with r-power = 48
61 };
62
63 //! Most treatments are fine with float in mixed-precision mode.
64 template <SoftCoreTreatment softCoreTreatment>
65 struct SoftCoreReal
66 {
67     //! Real type for soft-core calculations
68     using Real = real;
69 };
70
71 //! This treatment requires double precision for some computations.
72 template <>
73 struct SoftCoreReal<SoftCoreTreatment::RPower48>
74 {
75     //! Real type for soft-core calculations
76     using Real = double;
77 };
78
79 //! Templated free-energy non-bonded kernel
80 template<SoftCoreTreatment softCoreTreatment>
81 static void
82 nb_free_energy_kernel(const t_nblist * gmx_restrict    nlist,
83                       rvec * gmx_restrict              xx,
84                       rvec * gmx_restrict              ff,
85                       t_forcerec * gmx_restrict        fr,
86                       const t_mdatoms * gmx_restrict   mdatoms,
87                       nb_kernel_data_t * gmx_restrict  kernel_data,
88                       t_nrnb * gmx_restrict            nrnb)
89 {
90     using SCReal = typename SoftCoreReal<softCoreTreatment>::Real;
91
92     constexpr bool useSoftCore = (softCoreTreatment != SoftCoreTreatment::None);
93
94 #define  STATE_A  0
95 #define  STATE_B  1
96 #define  NSTATES  2
97     int           i, n, ii, is3, ii3, k, nj0, nj1, jnr, j3, ggid;
98     real          shX, shY, shZ;
99     real          tx, ty, tz, Fscal;
100     SCReal        FscalC[NSTATES], FscalV[NSTATES];  /* Needs double for sc_power==48 */
101     SCReal        Vcoul[NSTATES], Vvdw[NSTATES];     /* Needs double for sc_power==48 */
102     real          rinv6, r, rtC, rtV;
103     real          iqA, iqB;
104     real          qq[NSTATES], vctot;
105     int           ntiA, ntiB, tj[NSTATES];
106     real          Vvdw6, Vvdw12, vvtot;
107     real          ix, iy, iz, fix, fiy, fiz;
108     real          dx, dy, dz, rsq, rinv;
109     real          c6[NSTATES], c12[NSTATES], c6grid;
110     real          LFC[NSTATES], LFV[NSTATES], DLF[NSTATES];
111     SCReal        dvdl_coul, dvdl_vdw;
112     real          lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
113     real          sigma6[NSTATES], alpha_vdw_eff, alpha_coul_eff;
114     SCReal        rp, rpm2, rC, rV, rinvC, rpinvC, rinvV, rpinvV; /* Needs double for sc_power==48 */
115     real          sigma_pow[NSTATES];
116     int           do_tab, tab_elemsize = 0;
117     int           n0, n1C, n1V, nnn;
118     real          Y, F, Fp, Geps, Heps2, epsC, eps2C, epsV, eps2V, VV, FF;
119     int           icoul, ivdw;
120     int           nri;
121     const int *   iinr;
122     const int *   jindex;
123     const int *   jjnr;
124     const int *   shift;
125     const int *   gid;
126     const int *   typeA;
127     const int *   typeB;
128     int           ntype;
129     const real *  shiftvec;
130     real *        fshift;
131     real          tabscale = 0;
132     const real *  VFtab    = nullptr;
133     const real *  x;
134     real *        f;
135     real          facel, krf, crf;
136     const real *  chargeA;
137     const real *  chargeB;
138     real          sigma6_min, sigma6_def, lam_power;
139     real          alpha_coul, alpha_vdw, lambda_coul, lambda_vdw;
140     real          sh_lj_ewald;
141     const real *  nbfp, *nbfp_grid;
142     real *        dvdl;
143     real *        Vv;
144     real *        Vc;
145     gmx_bool      bDoForces, bDoShiftForces, bDoPotential;
146     real          rcoulomb, rvdw, sh_invrc6;
147     gmx_bool      bEwald, bEwaldLJ;
148     real          rcutoff_max2;
149     const real *  tab_ewald_F_lj = nullptr;
150     const real *  tab_ewald_V_lj = nullptr;
151     real          d, d2, sw, dsw;
152     real          vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4;
153     gmx_bool      bComputeVdwInteraction, bComputeElecInteraction;
154     const real *  ewtab = nullptr;
155     int           ewitab;
156     real          ewrt, eweps, ewtabscale = 0, ewtabhalfspace = 0, sh_ewald = 0;
157
158     const real    onetwelfth  = 1.0/12.0;
159     const real    onesixth    = 1.0/6.0;
160     const real    zero        = 0.0;
161     const real    half        = 0.5;
162     const real    one         = 1.0;
163     const real    two         = 2.0;
164     const real    six         = 6.0;
165
166     /* Extract pointer to non-bonded interaction constants */
167     const interaction_const_t *ic = fr->ic;
168
169     x                   = xx[0];
170     f                   = ff[0];
171
172     fshift              = fr->fshift[0];
173
174     nri                 = nlist->nri;
175     iinr                = nlist->iinr;
176     jindex              = nlist->jindex;
177     jjnr                = nlist->jjnr;
178     shift               = nlist->shift;
179     gid                 = nlist->gid;
180
181     shiftvec            = fr->shift_vec[0];
182     chargeA             = mdatoms->chargeA;
183     chargeB             = mdatoms->chargeB;
184     facel               = fr->ic->epsfac;
185     krf                 = ic->k_rf;
186     crf                 = ic->c_rf;
187     Vc                  = kernel_data->energygrp_elec;
188     typeA               = mdatoms->typeA;
189     typeB               = mdatoms->typeB;
190     ntype               = fr->ntype;
191     nbfp                = fr->nbfp;
192     nbfp_grid           = fr->ljpme_c6grid;
193     Vv                  = kernel_data->energygrp_vdw;
194     lambda_coul         = kernel_data->lambda[efptCOUL];
195     lambda_vdw          = kernel_data->lambda[efptVDW];
196     dvdl                = kernel_data->dvdl;
197     alpha_coul          = fr->sc_alphacoul;
198     alpha_vdw           = fr->sc_alphavdw;
199     lam_power           = fr->sc_power;
200     sigma6_def          = fr->sc_sigma6_def;
201     sigma6_min          = fr->sc_sigma6_min;
202     bDoForces           = ((kernel_data->flags & GMX_NONBONDED_DO_FORCE) != 0);
203     bDoShiftForces      = ((kernel_data->flags & GMX_NONBONDED_DO_SHIFTFORCE) != 0);
204     bDoPotential        = ((kernel_data->flags & GMX_NONBONDED_DO_POTENTIAL) != 0);
205
206     rcoulomb            = ic->rcoulomb;
207     rvdw                = ic->rvdw;
208     sh_invrc6           = ic->sh_invrc6;
209     sh_lj_ewald         = ic->sh_lj_ewald;
210
211     // Note that the nbnxm kernels do not support Coulomb potential switching at all
212     GMX_ASSERT(ic->coulomb_modifier != eintmodPOTSWITCH,
213                "Potential switching is not supported for Coulomb with FEP");
214
215     if (ic->vdw_modifier == eintmodPOTSWITCH)
216     {
217         d               = ic->rvdw - ic->rvdw_switch;
218         vdw_swV3        = -10.0/(d*d*d);
219         vdw_swV4        =  15.0/(d*d*d*d);
220         vdw_swV5        =  -6.0/(d*d*d*d*d);
221         vdw_swF2        = -30.0/(d*d*d);
222         vdw_swF3        =  60.0/(d*d*d*d);
223         vdw_swF4        = -30.0/(d*d*d*d*d);
224     }
225     else
226     {
227         /* Avoid warnings from stupid compilers (looking at you, Clang!) */
228         vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0;
229     }
230
231     if (EVDW_PME(ic->vdwtype))
232     {
233         ivdw         = GMX_NBKERNEL_VDW_LJEWALD;
234     }
235     else
236     {
237         ivdw         = GMX_NBKERNEL_VDW_LENNARDJONES;
238     }
239
240     if (ic->eeltype == eelCUT || EEL_RF(ic->eeltype))
241     {
242         icoul        = GMX_NBKERNEL_ELEC_REACTIONFIELD;
243     }
244     else if (EEL_PME_EWALD(ic->eeltype))
245     {
246         icoul        = GMX_NBKERNEL_ELEC_EWALD;
247     }
248     else
249     {
250         gmx_incons("Unsupported eeltype with Verlet and free-energy");
251     }
252
253     rcutoff_max2    = std::max(ic->rcoulomb, ic->rvdw);
254     rcutoff_max2    = rcutoff_max2*rcutoff_max2;
255
256     bEwald          = (icoul == GMX_NBKERNEL_ELEC_EWALD);
257     bEwaldLJ        = (ivdw == GMX_NBKERNEL_VDW_LJEWALD);
258
259     if (bEwald || bEwaldLJ)
260     {
261         sh_ewald       = ic->sh_ewald;
262         ewtab          = ic->tabq_coul_FDV0;
263         ewtabscale     = ic->tabq_scale;
264         ewtabhalfspace = half/ewtabscale;
265         tab_ewald_F_lj = ic->tabq_vdw_F;
266         tab_ewald_V_lj = ic->tabq_vdw_V;
267     }
268
269     /* For Ewald/PME interactions we cannot easily apply the soft-core component to
270      * reciprocal space. When we use non-switched Ewald interactions, we
271      * assume the soft-coring does not significantly affect the grid contribution
272      * and apply the soft-core only to the full 1/r (- shift) pair contribution.
273      *
274      * However, we cannot use this approach for switch-modified since we would then
275      * effectively end up evaluating a significantly different interaction here compared to the
276      * normal (non-free-energy) kernels, either by applying a cutoff at a different
277      * position than what the user requested, or by switching different
278      * things (1/r rather than short-range Ewald). For these settings, we just
279      * use the traditional short-range Ewald interaction in that case.
280      */
281     GMX_RELEASE_ASSERT(!(bEwald   && ic->coulomb_modifier == eintmodPOTSWITCH) &&
282                        !(bEwaldLJ && ic->vdw_modifier   == eintmodPOTSWITCH),
283                        "Can not apply soft-core to switched Ewald potentials");
284
285     /* fix compiler warnings */
286     n1C   = n1V   = 0;
287     epsC  = epsV  = 0;
288     eps2C = eps2V = 0;
289
290     dvdl_coul  = 0;
291     dvdl_vdw   = 0;
292
293     /* Lambda factor for state A, 1-lambda*/
294     LFC[STATE_A] = one - lambda_coul;
295     LFV[STATE_A] = one - lambda_vdw;
296
297     /* Lambda factor for state B, lambda*/
298     LFC[STATE_B] = lambda_coul;
299     LFV[STATE_B] = lambda_vdw;
300
301     /*derivative of the lambda factor for state A and B */
302     DLF[STATE_A] = -1;
303     DLF[STATE_B] = 1;
304
305     constexpr real sc_r_power = (softCoreTreatment == SoftCoreTreatment::RPower48 ? 48.0_real : 6.0_real);
306     for (i = 0; i < NSTATES; i++)
307     {
308         lfac_coul[i]  = (lam_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
309         dlfac_coul[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFC[i]) : 1);
310         lfac_vdw[i]   = (lam_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
311         dlfac_vdw[i]  = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFV[i]) : 1);
312     }
313
314     /* Ewald (not PME) table is special (icoul==enbcoulFEWALD) */
315
316     do_tab = static_cast<int>(icoul == GMX_NBKERNEL_ELEC_CUBICSPLINETABLE ||
317                               ivdw == GMX_NBKERNEL_VDW_CUBICSPLINETABLE);
318     if (do_tab)
319     {
320         tabscale         = kernel_data->table_elec_vdw->scale;
321         VFtab            = kernel_data->table_elec_vdw->data;
322         /* we always use the combined table here */
323         tab_elemsize     = kernel_data->table_elec_vdw->stride;
324     }
325
326     for (n = 0; (n < nri); n++)
327     {
328         int npair_within_cutoff;
329
330         npair_within_cutoff = 0;
331
332         is3              = 3*shift[n];
333         shX              = shiftvec[is3];
334         shY              = shiftvec[is3+1];
335         shZ              = shiftvec[is3+2];
336         nj0              = jindex[n];
337         nj1              = jindex[n+1];
338         ii               = iinr[n];
339         ii3              = 3*ii;
340         ix               = shX + x[ii3+0];
341         iy               = shY + x[ii3+1];
342         iz               = shZ + x[ii3+2];
343         iqA              = facel*chargeA[ii];
344         iqB              = facel*chargeB[ii];
345         ntiA             = 2*ntype*typeA[ii];
346         ntiB             = 2*ntype*typeB[ii];
347         vctot            = 0;
348         vvtot            = 0;
349         fix              = 0;
350         fiy              = 0;
351         fiz              = 0;
352
353         for (k = nj0; (k < nj1); k++)
354         {
355             jnr              = jjnr[k];
356             j3               = 3*jnr;
357             dx               = ix - x[j3];
358             dy               = iy - x[j3+1];
359             dz               = iz - x[j3+2];
360             rsq              = dx*dx + dy*dy + dz*dz;
361
362             if (rsq >= rcutoff_max2)
363             {
364                 /* We save significant time by skipping all code below.
365                  * Note that with soft-core interactions, the actual cut-off
366                  * check might be different. But since the soft-core distance
367                  * is always larger than r, checking on r here is safe.
368                  */
369                 continue;
370             }
371             npair_within_cutoff++;
372
373             if (rsq > 0)
374             {
375                 /* Note that unlike in the nbnxn kernels, we do not need
376                  * to clamp the value of rsq before taking the invsqrt
377                  * to avoid NaN in the LJ calculation, since here we do
378                  * not calculate LJ interactions when C6 and C12 are zero.
379                  */
380
381                 rinv         = gmx::invsqrt(rsq);
382                 r            = rsq*rinv;
383             }
384             else
385             {
386                 /* The force at r=0 is zero, because of symmetry.
387                  * But note that the potential is in general non-zero,
388                  * since the soft-cored r will be non-zero.
389                  */
390                 rinv         = 0;
391                 r            = 0;
392             }
393
394             if (softCoreTreatment == SoftCoreTreatment::None)
395             {
396                 /* The soft-core power p will not affect the results
397                  * with not using soft-core, so we use power of 0 which gives
398                  * the simplest math and cheapest code.
399                  */
400                 rpm2         = rinv*rinv;
401                 rp           = 1;
402             }
403             if (softCoreTreatment == SoftCoreTreatment::RPower6)
404             {
405                 rpm2         = rsq*rsq;  /* r4 */
406                 rp           = rpm2*rsq; /* r6 */
407             }
408             if (softCoreTreatment == SoftCoreTreatment::RPower48)
409             {
410                 rp           = rsq*rsq*rsq; /* r6 */
411                 rp           = rp*rp;       /* r12 */
412                 rp           = rp*rp;       /* r24 */
413                 rp           = rp*rp;       /* r48 */
414                 rpm2         = rp/rsq;      /* r46 */
415             }
416
417             Fscal = 0;
418
419             qq[STATE_A]      = iqA*chargeA[jnr];
420             qq[STATE_B]      = iqB*chargeB[jnr];
421
422             tj[STATE_A]      = ntiA+2*typeA[jnr];
423             tj[STATE_B]      = ntiB+2*typeB[jnr];
424
425             if (nlist->excl_fep == nullptr || nlist->excl_fep[k])
426             {
427                 c6[STATE_A]      = nbfp[tj[STATE_A]];
428                 c6[STATE_B]      = nbfp[tj[STATE_B]];
429
430                 for (i = 0; i < NSTATES; i++)
431                 {
432                     c12[i]             = nbfp[tj[i]+1];
433                     if (useSoftCore)
434                     {
435                         if ((c6[i] > 0) && (c12[i] > 0))
436                         {
437                             /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
438                             sigma6[i]       = half*c12[i]/c6[i];
439                             if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
440                             {
441                                 sigma6[i] = sigma6_min;
442                             }
443                         }
444                         else
445                         {
446                             sigma6[i]       = sigma6_def;
447                         }
448                         if (softCoreTreatment == SoftCoreTreatment::RPower6)
449                         {
450                             sigma_pow[i]    = sigma6[i];
451                         }
452                         else
453                         {
454                             sigma_pow[i]    = sigma6[i]*sigma6[i];       /* sigma^12 */
455                             sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
456                             sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
457                         }
458                     }
459                 }
460
461                 if (useSoftCore)
462                 {
463                     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
464                     if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
465                     {
466                         alpha_vdw_eff    = 0;
467                         alpha_coul_eff   = 0;
468                     }
469                     else
470                     {
471                         alpha_vdw_eff    = alpha_vdw;
472                         alpha_coul_eff   = alpha_coul;
473                     }
474                 }
475
476                 for (i = 0; i < NSTATES; i++)
477                 {
478                     FscalC[i]    = 0;
479                     FscalV[i]    = 0;
480                     Vcoul[i]     = 0;
481                     Vvdw[i]      = 0;
482
483                     /* Only spend time on A or B state if it is non-zero */
484                     if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
485                     {
486                         /* this section has to be inside the loop because of the dependence on sigma_pow */
487                         if (useSoftCore)
488                         {
489                             rpinvC     = one/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
490                             rinvC      = std::pow(rpinvC, one/sc_r_power);
491                             rC         = one/rinvC;
492
493                             rpinvV     = one/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
494                             rinvV      = std::pow(rpinvV, one/sc_r_power);
495                             rV         = one/rinvV;
496                         }
497                         else
498                         {
499                             rpinvC     = 1;
500                             rinvC      = rinv;
501                             rC         = r;
502
503                             rpinvV     = 1;
504                             rinvV      = rinv;
505                             rV         = r;
506                         }
507
508                         if (do_tab)
509                         {
510                             rtC        = rC*tabscale;
511                             n0         = rtC;
512                             epsC       = rtC-n0;
513                             eps2C      = epsC*epsC;
514                             n1C        = tab_elemsize*n0;
515
516                             rtV        = rV*tabscale;
517                             n0         = rtV;
518                             epsV       = rtV-n0;
519                             eps2V      = epsV*epsV;
520                             n1V        = tab_elemsize*n0;
521                         }
522
523                         /* Only process the coulomb interactions if we have charges,
524                          * and if we either include all entries in the list (no cutoff
525                          * used in the kernel), or if we are within the cutoff.
526                          */
527                         bComputeElecInteraction =
528                             ( bEwald && r < rcoulomb) ||
529                             (!bEwald && rC < rcoulomb);
530
531                         if ( (qq[i] != 0) && bComputeElecInteraction)
532                         {
533                             switch (icoul)
534                             {
535                                 case GMX_NBKERNEL_ELEC_COULOMB:
536                                     /* simple cutoff */
537                                     Vcoul[i]   = qq[i]*rinvC;
538                                     FscalC[i]  = Vcoul[i];
539                                     /* The shift for the Coulomb potential is stored in
540                                      * the RF parameter c_rf, which is 0 without shift.
541                                      */
542                                     Vcoul[i]  -= qq[i]*ic->c_rf;
543                                     break;
544
545                                 case GMX_NBKERNEL_ELEC_REACTIONFIELD:
546                                     /* reaction-field */
547                                     Vcoul[i]   = qq[i]*(rinvC + krf*rC*rC-crf);
548                                     FscalC[i]  = qq[i]*(rinvC - two*krf*rC*rC);
549                                     break;
550
551                                 case GMX_NBKERNEL_ELEC_CUBICSPLINETABLE:
552                                     /* non-Ewald tabulated coulomb */
553                                     nnn        = n1C;
554                                     Y          = VFtab[nnn];
555                                     F          = VFtab[nnn+1];
556                                     Geps       = epsC*VFtab[nnn+2];
557                                     Heps2      = eps2C*VFtab[nnn+3];
558                                     Fp         = F+Geps+Heps2;
559                                     VV         = Y+epsC*Fp;
560                                     FF         = Fp+Geps+two*Heps2;
561                                     Vcoul[i]   = qq[i]*VV;
562                                     FscalC[i]  = -qq[i]*tabscale*FF*rC;
563                                     break;
564
565                                 case GMX_NBKERNEL_ELEC_EWALD:
566                                     /* Ewald FEP is done only on the 1/r part */
567                                     Vcoul[i]   = qq[i]*(rinvC-sh_ewald);
568                                     FscalC[i]  = qq[i]*rinvC;
569                                     break;
570
571                                 case GMX_NBKERNEL_ELEC_NONE:
572                                     FscalC[i]  = zero;
573                                     Vcoul[i]   = zero;
574                                     break;
575
576                                 default:
577                                     gmx_incons("Invalid icoul in free energy kernel");
578                             }
579                         }
580
581                         /* Only process the VDW interactions if we have
582                          * some non-zero parameters, and if we either
583                          * include all entries in the list (no cutoff used
584                          * in the kernel), or if we are within the cutoff.
585                          */
586                         bComputeVdwInteraction =
587                             ( bEwaldLJ && r < rvdw) ||
588                             (!bEwaldLJ && rV < rvdw);
589                         if ((c6[i] != 0 || c12[i] != 0) && bComputeVdwInteraction)
590                         {
591                             switch (ivdw)
592                             {
593                                 case GMX_NBKERNEL_VDW_LENNARDJONES:
594                                     /* cutoff LJ */
595                                     if (softCoreTreatment == SoftCoreTreatment::RPower6)
596                                     {
597                                         rinv6        = rpinvV;
598                                     }
599                                     else
600                                     {
601                                         rinv6        = rinvV*rinvV;
602                                         rinv6        = rinv6*rinv6*rinv6;
603                                     }
604                                     Vvdw6            = c6[i]*rinv6;
605                                     Vvdw12           = c12[i]*rinv6*rinv6;
606
607                                     Vvdw[i]          = ( (Vvdw12 - c12[i]*sh_invrc6*sh_invrc6)*onetwelfth
608                                                          - (Vvdw6 - c6[i]*sh_invrc6)*onesixth);
609                                     FscalV[i]        = Vvdw12 - Vvdw6;
610                                     break;
611
612                                 case GMX_NBKERNEL_VDW_BUCKINGHAM:
613                                     gmx_fatal(FARGS, "Buckingham free energy not supported.");
614                                 case GMX_NBKERNEL_VDW_CUBICSPLINETABLE:
615                                     /* Table LJ */
616                                     nnn = n1V+4;
617                                     /* dispersion */
618                                     Y          = VFtab[nnn];
619                                     F          = VFtab[nnn+1];
620                                     Geps       = epsV*VFtab[nnn+2];
621                                     Heps2      = eps2V*VFtab[nnn+3];
622                                     Fp         = F+Geps+Heps2;
623                                     VV         = Y+epsV*Fp;
624                                     FF         = Fp+Geps+two*Heps2;
625                                     Vvdw[i]   += c6[i]*VV;
626                                     FscalV[i] -= c6[i]*tabscale*FF*rV;
627
628                                     /* repulsion */
629                                     Y          = VFtab[nnn+4];
630                                     F          = VFtab[nnn+5];
631                                     Geps       = epsV*VFtab[nnn+6];
632                                     Heps2      = eps2V*VFtab[nnn+7];
633                                     Fp         = F+Geps+Heps2;
634                                     VV         = Y+epsV*Fp;
635                                     FF         = Fp+Geps+two*Heps2;
636                                     Vvdw[i]   += c12[i]*VV;
637                                     FscalV[i] -= c12[i]*tabscale*FF*rV;
638                                     break;
639
640                                 case GMX_NBKERNEL_VDW_LJEWALD:
641                                     if (softCoreTreatment == SoftCoreTreatment::RPower6)
642                                     {
643                                         rinv6            = rpinvV;
644                                     }
645                                     else
646                                     {
647                                         rinv6            = rinvV*rinvV;
648                                         rinv6            = rinv6*rinv6*rinv6;
649                                     }
650                                     c6grid           = nbfp_grid[tj[i]];
651
652                                     /* cutoff LJ */
653                                     Vvdw6            = c6[i]*rinv6;
654                                     Vvdw12           = c12[i]*rinv6*rinv6;
655
656                                     Vvdw[i]          = ( (Vvdw12 - c12[i]*sh_invrc6*sh_invrc6)*onetwelfth
657                                                          - (Vvdw6 - c6[i]*sh_invrc6 - c6grid*sh_lj_ewald)*onesixth);
658                                     FscalV[i]        = Vvdw12 - Vvdw6;
659                                     break;
660
661                                 case GMX_NBKERNEL_VDW_NONE:
662                                     Vvdw[i]    = zero;
663                                     FscalV[i]  = zero;
664                                     break;
665
666                                 default:
667                                     gmx_incons("Invalid ivdw in free energy kernel");
668                             }
669
670                             if (ic->vdw_modifier == eintmodPOTSWITCH)
671                             {
672                                 d                = rV - ic->rvdw_switch;
673                                 d                = (d > zero) ? d : zero;
674                                 d2               = d*d;
675                                 sw               = one+d2*d*(vdw_swV3+d*(vdw_swV4+d*vdw_swV5));
676                                 dsw              = d2*(vdw_swF2+d*(vdw_swF3+d*vdw_swF4));
677
678                                 FscalV[i]        = FscalV[i]*sw - rV*Vvdw[i]*dsw;
679                                 Vvdw[i]         *= sw;
680
681                                 FscalV[i]  = (rV < rvdw) ? FscalV[i] : zero;
682                                 Vvdw[i]    = (rV < rvdw) ? Vvdw[i] : zero;
683                             }
684                         }
685
686                         /* FscalC (and FscalV) now contain: dV/drC * rC
687                          * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
688                          * Further down we first multiply by r^p-2 and then by
689                          * the vector r, which in total gives: dV/drC * (r/rC)^1-p
690                          */
691                         FscalC[i] *= rpinvC;
692                         FscalV[i] *= rpinvV;
693                     }
694                 }
695
696                 /* Assemble A and B states */
697                 for (i = 0; i < NSTATES; i++)
698                 {
699                     vctot         += LFC[i]*Vcoul[i];
700                     vvtot         += LFV[i]*Vvdw[i];
701
702                     Fscal     += LFC[i]*FscalC[i]*rpm2;
703                     Fscal     += LFV[i]*FscalV[i]*rpm2;
704
705                     if (useSoftCore)
706                     {
707                         dvdl_coul += Vcoul[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*FscalC[i]*sigma_pow[i];
708                         dvdl_vdw  += Vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*FscalV[i]*sigma_pow[i];
709                     }
710                     else
711                     {
712                         dvdl_coul += Vcoul[i]*DLF[i];
713                         dvdl_vdw  += Vvdw[i]*DLF[i];
714                     }
715                 }
716             }
717             else if (icoul == GMX_NBKERNEL_ELEC_REACTIONFIELD)
718             {
719                 /* For excluded pairs, which are only in this pair list when
720                  * using the Verlet scheme, we don't use soft-core.
721                  * The group scheme also doesn't soft-core for these.
722                  * As there is no singularity, there is no need for soft-core.
723                  */
724                 VV = krf*rsq - crf;
725                 FF = -two*krf;
726
727                 if (ii == jnr)
728                 {
729                     VV *= half;
730                 }
731
732                 for (i = 0; i < NSTATES; i++)
733                 {
734                     vctot      += LFC[i]*qq[i]*VV;
735                     Fscal      += LFC[i]*qq[i]*FF;
736                     dvdl_coul  += DLF[i]*qq[i]*VV;
737                 }
738             }
739
740             if (bEwald && r < rcoulomb)
741             {
742                 /* See comment in the preamble. When using Ewald interactions
743                  * (unless we use a switch modifier) we subtract the reciprocal-space
744                  * Ewald component here which made it possible to apply the free
745                  * energy interaction to 1/r (vanilla coulomb short-range part)
746                  * above. This gets us closer to the ideal case of applying
747                  * the softcore to the entire electrostatic interaction,
748                  * including the reciprocal-space component.
749                  */
750                 real v_lr, f_lr;
751
752                 ewrt      = r*ewtabscale;
753                 ewitab    = static_cast<int>(ewrt);
754                 eweps     = ewrt-ewitab;
755                 ewitab    = 4*ewitab;
756                 f_lr      = ewtab[ewitab]+eweps*ewtab[ewitab+1];
757                 v_lr      = (ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+f_lr));
758                 f_lr     *= rinv;
759
760                 /* Note that any possible Ewald shift has already been applied in
761                  * the normal interaction part above.
762                  */
763
764                 if (ii == jnr)
765                 {
766                     /* If we get here, the i particle (ii) has itself (jnr)
767                      * in its neighborlist. This can only happen with the Verlet
768                      * scheme, and corresponds to a self-interaction that will
769                      * occur twice. Scale it down by 50% to only include it once.
770                      */
771                     v_lr *= half;
772                 }
773
774                 for (i = 0; i < NSTATES; i++)
775                 {
776                     vctot      -= LFC[i]*qq[i]*v_lr;
777                     Fscal      -= LFC[i]*qq[i]*f_lr;
778                     dvdl_coul  -= (DLF[i]*qq[i])*v_lr;
779                 }
780             }
781
782             if (bEwaldLJ && r < rvdw)
783             {
784                 /* See comment in the preamble. When using LJ-Ewald interactions
785                  * (unless we use a switch modifier) we subtract the reciprocal-space
786                  * Ewald component here which made it possible to apply the free
787                  * energy interaction to r^-6 (vanilla LJ6 short-range part)
788                  * above. This gets us closer to the ideal case of applying
789                  * the softcore to the entire VdW interaction,
790                  * including the reciprocal-space component.
791                  */
792                 /* We could also use the analytical form here
793                  * iso a table, but that can cause issues for
794                  * r close to 0 for non-interacting pairs.
795                  */
796                 real rs, frac, f_lr;
797                 int  ri;
798
799                 rs     = rsq*rinv*ewtabscale;
800                 ri     = static_cast<int>(rs);
801                 frac   = rs - ri;
802                 f_lr   = (1 - frac)*tab_ewald_F_lj[ri] + frac*tab_ewald_F_lj[ri+1];
803                 /* TODO: Currently the Ewald LJ table does not contain
804                  * the factor 1/6, we should add this.
805                  */
806                 FF     = f_lr*rinv/six;
807                 VV     = (tab_ewald_V_lj[ri] - ewtabhalfspace*frac*(tab_ewald_F_lj[ri] + f_lr))/six;
808
809                 if (ii == jnr)
810                 {
811                     /* If we get here, the i particle (ii) has itself (jnr)
812                      * in its neighborlist. This can only happen with the Verlet
813                      * scheme, and corresponds to a self-interaction that will
814                      * occur twice. Scale it down by 50% to only include it once.
815                      */
816                     VV *= half;
817                 }
818
819                 for (i = 0; i < NSTATES; i++)
820                 {
821                     c6grid      = nbfp_grid[tj[i]];
822                     vvtot      += LFV[i]*c6grid*VV;
823                     Fscal      += LFV[i]*c6grid*FF;
824                     dvdl_vdw   += (DLF[i]*c6grid)*VV;
825                 }
826             }
827
828             if (bDoForces)
829             {
830                 tx         = Fscal*dx;
831                 ty         = Fscal*dy;
832                 tz         = Fscal*dz;
833                 fix        = fix + tx;
834                 fiy        = fiy + ty;
835                 fiz        = fiz + tz;
836                 /* OpenMP atomics are expensive, but this kernels is also
837                  * expensive, so we can take this hit, instead of using
838                  * thread-local output buffers and extra reduction.
839                  *
840                  * All the OpenMP regions in this file are trivial and should
841                  * not throw, so no need for try/catch.
842                  */
843 #pragma omp atomic
844                 f[j3]     -= tx;
845 #pragma omp atomic
846                 f[j3+1]   -= ty;
847 #pragma omp atomic
848                 f[j3+2]   -= tz;
849             }
850         }
851
852         /* The atomics below are expensive with many OpenMP threads.
853          * Here unperturbed i-particles will usually only have a few
854          * (perturbed) j-particles in the list. Thus with a buffered list
855          * we can skip a significant number of i-reductions with a check.
856          */
857         if (npair_within_cutoff > 0)
858         {
859             if (bDoForces)
860             {
861 #pragma omp atomic
862                 f[ii3]        += fix;
863 #pragma omp atomic
864                 f[ii3+1]      += fiy;
865 #pragma omp atomic
866                 f[ii3+2]      += fiz;
867             }
868             if (bDoShiftForces)
869             {
870 #pragma omp atomic
871                 fshift[is3]   += fix;
872 #pragma omp atomic
873                 fshift[is3+1] += fiy;
874 #pragma omp atomic
875                 fshift[is3+2] += fiz;
876             }
877             if (bDoPotential)
878             {
879                 ggid               = gid[n];
880 #pragma omp atomic
881                 Vc[ggid]          += vctot;
882 #pragma omp atomic
883                 Vv[ggid]          += vvtot;
884             }
885         }
886     }
887
888 #pragma omp atomic
889     dvdl[efptCOUL]     += dvdl_coul;
890  #pragma omp atomic
891     dvdl[efptVDW]      += dvdl_vdw;
892
893     /* Estimate flops, average for free energy stuff:
894      * 12  flops per outer iteration
895      * 150 flops per inner iteration
896      */
897 #pragma omp atomic
898     inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri*12 + nlist->jindex[n]*150);
899 }
900
901 void gmx_nb_free_energy_kernel(const t_nblist   *nlist,
902                                rvec             *xx,
903                                rvec             *ff,
904                                t_forcerec       *fr,
905                                const t_mdatoms  *mdatoms,
906                                nb_kernel_data_t *kernel_data,
907                                t_nrnb           *nrnb)
908 {
909     if (fr->sc_alphacoul == 0 && fr->sc_alphavdw == 0)
910     {
911         nb_free_energy_kernel<SoftCoreTreatment::None>(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
912     }
913     else if (fr->sc_r_power == 6.0_real)
914     {
915         nb_free_energy_kernel<SoftCoreTreatment::RPower6>(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
916     }
917     else if (fr->sc_r_power == 48.0_real)
918     {
919         nb_free_energy_kernel<SoftCoreTreatment::RPower48>(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
920     }
921     else
922     {
923         GMX_RELEASE_ASSERT(false, "Unsupported soft-core r-power");
924     }
925 }