Convert Ewald correction tables to C++
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_free_energy.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8  * and including many others, as listed in the AUTHORS file in the
9  * top-level source directory and at http://www.gromacs.org.
10  *
11  * GROMACS is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public License
13  * as published by the Free Software Foundation; either version 2.1
14  * of the License, or (at your option) any later version.
15  *
16  * GROMACS is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with GROMACS; if not, see
23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
25  *
26  * If you want to redistribute modifications to GROMACS, please
27  * consider that scientific software is very special. Version
28  * control is crucial - bugs must be traceable. We will be happy to
29  * consider code for inclusion in the official distribution, but
30  * derived work must not be called official GROMACS. Details are found
31  * in the README & COPYING files - if they are missing, get the
32  * official version at http://www.gromacs.org.
33  *
34  * To help us fund GROMACS development, we humbly ask that you cite
35  * the research papers on the package. Check out http://www.gromacs.org.
36  */
37 #include "gmxpre.h"
38
39 #include "nb_free_energy.h"
40
41 #include <cmath>
42
43 #include <algorithm>
44
45 #include "gromacs/gmxlib/nrnb.h"
46 #include "gromacs/gmxlib/nonbonded/nb_kernel.h"
47 #include "gromacs/gmxlib/nonbonded/nonbonded.h"
48 #include "gromacs/math/functions.h"
49 #include "gromacs/math/vec.h"
50 #include "gromacs/mdtypes/forcerec.h"
51 #include "gromacs/mdtypes/md_enums.h"
52 #include "gromacs/utility/fatalerror.h"
53
54
55 //! Enum for templating the soft-core treatment in the kernel
56 enum class SoftCoreTreatment
57 {
58     None,    //!< No soft-core
59     RPower6, //!< Soft-core with r-power = 6
60     RPower48 //!< Soft-core with r-power = 48
61 };
62
63 //! Most treatments are fine with float in mixed-precision mode.
64 template <SoftCoreTreatment softCoreTreatment>
65 struct SoftCoreReal
66 {
67     //! Real type for soft-core calculations
68     using Real = real;
69 };
70
71 //! This treatment requires double precision for some computations.
72 template <>
73 struct SoftCoreReal<SoftCoreTreatment::RPower48>
74 {
75     //! Real type for soft-core calculations
76     using Real = double;
77 };
78
79 //! Templated free-energy non-bonded kernel
80 template<SoftCoreTreatment softCoreTreatment, bool scLambdasOrAlphasDiffer>
81 static void
82 nb_free_energy_kernel(const t_nblist * gmx_restrict    nlist,
83                       rvec * gmx_restrict              xx,
84                       rvec * gmx_restrict              ff,
85                       t_forcerec * gmx_restrict        fr,
86                       const t_mdatoms * gmx_restrict   mdatoms,
87                       nb_kernel_data_t * gmx_restrict  kernel_data,
88                       t_nrnb * gmx_restrict            nrnb)
89 {
90     using SCReal = typename SoftCoreReal<softCoreTreatment>::Real;
91
92     constexpr bool useSoftCore = (softCoreTreatment != SoftCoreTreatment::None);
93
94 #define  STATE_A  0
95 #define  STATE_B  1
96 #define  NSTATES  2
97     int           i, n, ii, is3, ii3, k, nj0, nj1, jnr, j3, ggid;
98     real          shX, shY, shZ;
99     real          tx, ty, tz, Fscal;
100     SCReal        FscalC[NSTATES], FscalV[NSTATES];  /* Needs double for sc_power==48 */
101     SCReal        Vcoul[NSTATES], Vvdw[NSTATES];     /* Needs double for sc_power==48 */
102     real          rinv6, r;
103     real          iqA, iqB;
104     real          qq[NSTATES], vctot;
105     int           ntiA, ntiB, tj[NSTATES];
106     real          Vvdw6, Vvdw12, vvtot;
107     real          ix, iy, iz, fix, fiy, fiz;
108     real          dx, dy, dz, rsq, rinv;
109     real          c6[NSTATES], c12[NSTATES], c6grid;
110     real          LFC[NSTATES], LFV[NSTATES], DLF[NSTATES];
111     SCReal        dvdl_coul, dvdl_vdw;
112     real          lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
113     real          sigma6[NSTATES], alpha_vdw_eff, alpha_coul_eff;
114     SCReal        rp, rpm2, rC, rV, rinvC, rpinvC, rinvV, rpinvV; /* Needs double for sc_power==48 */
115     real          sigma_pow[NSTATES];
116     real          VV, FF;
117     int           icoul, ivdw;
118     int           nri;
119     const int *   iinr;
120     const int *   jindex;
121     const int *   jjnr;
122     const int *   shift;
123     const int *   gid;
124     const int *   typeA;
125     const int *   typeB;
126     int           ntype;
127     const real *  shiftvec;
128     real *        fshift;
129     const real *  x;
130     real *        f;
131     const real *  chargeA;
132     const real *  chargeB;
133     real          sigma6_min, sigma6_def, lam_power;
134     real          alpha_coul, alpha_vdw, lambda_coul, lambda_vdw;
135     const real *  nbfp, *nbfp_grid;
136     real *        dvdl;
137     real *        Vv;
138     real *        Vc;
139     gmx_bool      bDoForces, bDoShiftForces, bDoPotential;
140     gmx_bool      bEwald, bEwaldLJ;
141     real          rcutoff_max2;
142     const real *  tab_ewald_F_lj = nullptr;
143     const real *  tab_ewald_V_lj = nullptr;
144     real          d, d2, sw, dsw;
145     real          vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4;
146     gmx_bool      bComputeVdwInteraction, bComputeElecInteraction;
147     const real *  ewtab = nullptr;
148     int           ewitab;
149     real          ewrt, eweps, ewtabscale = 0, ewtabhalfspace = 0, sh_ewald = 0;
150
151     const real    onetwelfth  = 1.0/12.0;
152     const real    onesixth    = 1.0/6.0;
153     const real    zero        = 0.0;
154     const real    half        = 0.5;
155     const real    one         = 1.0;
156     const real    two         = 2.0;
157     const real    six         = 6.0;
158
159     /* Extract pointer to non-bonded interaction constants */
160     const interaction_const_t *ic = fr->ic;
161
162     x                   = xx[0];
163     f                   = ff[0];
164
165     fshift              = fr->fshift[0];
166
167     nri                 = nlist->nri;
168     iinr                = nlist->iinr;
169     jindex              = nlist->jindex;
170     jjnr                = nlist->jjnr;
171     shift               = nlist->shift;
172     gid                 = nlist->gid;
173
174     shiftvec            = fr->shift_vec[0];
175     chargeA             = mdatoms->chargeA;
176     chargeB             = mdatoms->chargeB;
177     Vc                  = kernel_data->energygrp_elec;
178     typeA               = mdatoms->typeA;
179     typeB               = mdatoms->typeB;
180     ntype               = fr->ntype;
181     nbfp                = fr->nbfp;
182     nbfp_grid           = fr->ljpme_c6grid;
183     Vv                  = kernel_data->energygrp_vdw;
184     lambda_coul         = kernel_data->lambda[efptCOUL];
185     lambda_vdw          = kernel_data->lambda[efptVDW];
186     dvdl                = kernel_data->dvdl;
187     alpha_coul          = fr->sc_alphacoul;
188     alpha_vdw           = fr->sc_alphavdw;
189     lam_power           = fr->sc_power;
190     sigma6_def          = fr->sc_sigma6_def;
191     sigma6_min          = fr->sc_sigma6_min;
192     bDoForces           = ((kernel_data->flags & GMX_NONBONDED_DO_FORCE) != 0);
193     bDoShiftForces      = ((kernel_data->flags & GMX_NONBONDED_DO_SHIFTFORCE) != 0);
194     bDoPotential        = ((kernel_data->flags & GMX_NONBONDED_DO_POTENTIAL) != 0);
195
196     // Extract data from interaction_const_t
197     const real facel           = ic->epsfac;
198     const real rcoulomb        = ic->rcoulomb;
199     const real krf             = ic->k_rf;
200     const real crf             = ic->c_rf;
201     const real sh_lj_ewald     = ic->sh_lj_ewald;
202     const real rvdw            = ic->rvdw;
203     const real dispersionShift = ic->dispersion_shift.cpot;
204     const real repulsionShift  = ic->repulsion_shift.cpot;
205
206     // Note that the nbnxm kernels do not support Coulomb potential switching at all
207     GMX_ASSERT(ic->coulomb_modifier != eintmodPOTSWITCH,
208                "Potential switching is not supported for Coulomb with FEP");
209
210     if (ic->vdw_modifier == eintmodPOTSWITCH)
211     {
212         d               = ic->rvdw - ic->rvdw_switch;
213         vdw_swV3        = -10.0/(d*d*d);
214         vdw_swV4        =  15.0/(d*d*d*d);
215         vdw_swV5        =  -6.0/(d*d*d*d*d);
216         vdw_swF2        = -30.0/(d*d*d);
217         vdw_swF3        =  60.0/(d*d*d*d);
218         vdw_swF4        = -30.0/(d*d*d*d*d);
219     }
220     else
221     {
222         /* Avoid warnings from stupid compilers (looking at you, Clang!) */
223         vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0;
224     }
225
226     if (EVDW_PME(ic->vdwtype))
227     {
228         ivdw         = GMX_NBKERNEL_VDW_LJEWALD;
229     }
230     else
231     {
232         ivdw         = GMX_NBKERNEL_VDW_LENNARDJONES;
233     }
234
235     if (ic->eeltype == eelCUT || EEL_RF(ic->eeltype))
236     {
237         icoul        = GMX_NBKERNEL_ELEC_REACTIONFIELD;
238     }
239     else if (EEL_PME_EWALD(ic->eeltype))
240     {
241         icoul        = GMX_NBKERNEL_ELEC_EWALD;
242     }
243     else
244     {
245         gmx_incons("Unsupported eeltype with Verlet and free-energy");
246     }
247
248     rcutoff_max2    = std::max(ic->rcoulomb, ic->rvdw);
249     rcutoff_max2    = rcutoff_max2*rcutoff_max2;
250
251     bEwald          = (icoul == GMX_NBKERNEL_ELEC_EWALD);
252     bEwaldLJ        = (ivdw == GMX_NBKERNEL_VDW_LJEWALD);
253
254     if (bEwald || bEwaldLJ)
255     {
256         const auto &tables = *ic->coulombEwaldTables;
257         sh_ewald       = ic->sh_ewald;
258         ewtab          = tables.tableFDV0.data();
259         ewtabscale     = tables.scale;
260         ewtabhalfspace = half/ewtabscale;
261         tab_ewald_F_lj = tables.tableF.data();
262         tab_ewald_V_lj = tables.tableV.data();
263     }
264
265     /* For Ewald/PME interactions we cannot easily apply the soft-core component to
266      * reciprocal space. When we use non-switched Ewald interactions, we
267      * assume the soft-coring does not significantly affect the grid contribution
268      * and apply the soft-core only to the full 1/r (- shift) pair contribution.
269      *
270      * However, we cannot use this approach for switch-modified since we would then
271      * effectively end up evaluating a significantly different interaction here compared to the
272      * normal (non-free-energy) kernels, either by applying a cutoff at a different
273      * position than what the user requested, or by switching different
274      * things (1/r rather than short-range Ewald). For these settings, we just
275      * use the traditional short-range Ewald interaction in that case.
276      */
277     GMX_RELEASE_ASSERT(!(bEwald   && ic->coulomb_modifier == eintmodPOTSWITCH) &&
278                        !(bEwaldLJ && ic->vdw_modifier   == eintmodPOTSWITCH),
279                        "Can not apply soft-core to switched Ewald potentials");
280
281     dvdl_coul  = 0;
282     dvdl_vdw   = 0;
283
284     /* Lambda factor for state A, 1-lambda*/
285     LFC[STATE_A] = one - lambda_coul;
286     LFV[STATE_A] = one - lambda_vdw;
287
288     /* Lambda factor for state B, lambda*/
289     LFC[STATE_B] = lambda_coul;
290     LFV[STATE_B] = lambda_vdw;
291
292     /*derivative of the lambda factor for state A and B */
293     DLF[STATE_A] = -1;
294     DLF[STATE_B] = 1;
295
296     constexpr real sc_r_power = (softCoreTreatment == SoftCoreTreatment::RPower48 ? 48.0_real : 6.0_real);
297     for (i = 0; i < NSTATES; i++)
298     {
299         lfac_coul[i]  = (lam_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
300         dlfac_coul[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFC[i]) : 1);
301         lfac_vdw[i]   = (lam_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
302         dlfac_vdw[i]  = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFV[i]) : 1);
303     }
304
305     for (n = 0; (n < nri); n++)
306     {
307         int npair_within_cutoff;
308
309         npair_within_cutoff = 0;
310
311         is3              = 3*shift[n];
312         shX              = shiftvec[is3];
313         shY              = shiftvec[is3+1];
314         shZ              = shiftvec[is3+2];
315         nj0              = jindex[n];
316         nj1              = jindex[n+1];
317         ii               = iinr[n];
318         ii3              = 3*ii;
319         ix               = shX + x[ii3+0];
320         iy               = shY + x[ii3+1];
321         iz               = shZ + x[ii3+2];
322         iqA              = facel*chargeA[ii];
323         iqB              = facel*chargeB[ii];
324         ntiA             = 2*ntype*typeA[ii];
325         ntiB             = 2*ntype*typeB[ii];
326         vctot            = 0;
327         vvtot            = 0;
328         fix              = 0;
329         fiy              = 0;
330         fiz              = 0;
331
332         for (k = nj0; (k < nj1); k++)
333         {
334             jnr              = jjnr[k];
335             j3               = 3*jnr;
336             dx               = ix - x[j3];
337             dy               = iy - x[j3+1];
338             dz               = iz - x[j3+2];
339             rsq              = dx*dx + dy*dy + dz*dz;
340
341             if (rsq >= rcutoff_max2)
342             {
343                 /* We save significant time by skipping all code below.
344                  * Note that with soft-core interactions, the actual cut-off
345                  * check might be different. But since the soft-core distance
346                  * is always larger than r, checking on r here is safe.
347                  */
348                 continue;
349             }
350             npair_within_cutoff++;
351
352             if (rsq > 0)
353             {
354                 /* Note that unlike in the nbnxn kernels, we do not need
355                  * to clamp the value of rsq before taking the invsqrt
356                  * to avoid NaN in the LJ calculation, since here we do
357                  * not calculate LJ interactions when C6 and C12 are zero.
358                  */
359
360                 rinv         = gmx::invsqrt(rsq);
361                 r            = rsq*rinv;
362             }
363             else
364             {
365                 /* The force at r=0 is zero, because of symmetry.
366                  * But note that the potential is in general non-zero,
367                  * since the soft-cored r will be non-zero.
368                  */
369                 rinv         = 0;
370                 r            = 0;
371             }
372
373             if (softCoreTreatment == SoftCoreTreatment::None)
374             {
375                 /* The soft-core power p will not affect the results
376                  * with not using soft-core, so we use power of 0 which gives
377                  * the simplest math and cheapest code.
378                  */
379                 rpm2         = rinv*rinv;
380                 rp           = 1;
381             }
382             if (softCoreTreatment == SoftCoreTreatment::RPower6)
383             {
384                 rpm2         = rsq*rsq;  /* r4 */
385                 rp           = rpm2*rsq; /* r6 */
386             }
387             if (softCoreTreatment == SoftCoreTreatment::RPower48)
388             {
389                 rp           = rsq*rsq*rsq; /* r6 */
390                 rp           = rp*rp;       /* r12 */
391                 rp           = rp*rp;       /* r24 */
392                 rp           = rp*rp;       /* r48 */
393                 rpm2         = rp/rsq;      /* r46 */
394             }
395
396             Fscal = 0;
397
398             qq[STATE_A]      = iqA*chargeA[jnr];
399             qq[STATE_B]      = iqB*chargeB[jnr];
400
401             tj[STATE_A]      = ntiA+2*typeA[jnr];
402             tj[STATE_B]      = ntiB+2*typeB[jnr];
403
404             if (nlist->excl_fep == nullptr || nlist->excl_fep[k])
405             {
406                 c6[STATE_A]      = nbfp[tj[STATE_A]];
407                 c6[STATE_B]      = nbfp[tj[STATE_B]];
408
409                 for (i = 0; i < NSTATES; i++)
410                 {
411                     c12[i]             = nbfp[tj[i]+1];
412                     if (useSoftCore)
413                     {
414                         if ((c6[i] > 0) && (c12[i] > 0))
415                         {
416                             /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
417                             sigma6[i]       = half*c12[i]/c6[i];
418                             if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
419                             {
420                                 sigma6[i] = sigma6_min;
421                             }
422                         }
423                         else
424                         {
425                             sigma6[i]       = sigma6_def;
426                         }
427                         if (softCoreTreatment == SoftCoreTreatment::RPower6)
428                         {
429                             sigma_pow[i]    = sigma6[i];
430                         }
431                         else
432                         {
433                             sigma_pow[i]    = sigma6[i]*sigma6[i];       /* sigma^12 */
434                             sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
435                             sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
436                         }
437                     }
438                 }
439
440                 if (useSoftCore)
441                 {
442                     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
443                     if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
444                     {
445                         alpha_vdw_eff    = 0;
446                         alpha_coul_eff   = 0;
447                     }
448                     else
449                     {
450                         alpha_vdw_eff    = alpha_vdw;
451                         alpha_coul_eff   = alpha_coul;
452                     }
453                 }
454
455                 for (i = 0; i < NSTATES; i++)
456                 {
457                     FscalC[i]    = 0;
458                     FscalV[i]    = 0;
459                     Vcoul[i]     = 0;
460                     Vvdw[i]      = 0;
461
462                     /* Only spend time on A or B state if it is non-zero */
463                     if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
464                     {
465                         /* this section has to be inside the loop because of the dependence on sigma_pow */
466                         if (useSoftCore)
467                         {
468                             rpinvC     = one/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
469                             rinvC      = std::pow(rpinvC, one/sc_r_power);
470                             rC         = one/rinvC;
471
472                             if (scLambdasOrAlphasDiffer)
473                             {
474                                 rpinvV = one/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
475                                 rinvV  = std::pow(rpinvV, one/sc_r_power);
476                                 rV     = one/rinvV;
477                             }
478                             else
479                             {
480                                 /* We can avoid one expensive pow and one / operation */
481                                 rpinvV = rpinvC;
482                                 rinvV  = rinvC;
483                                 rV     = rC;
484                             }
485                         }
486                         else
487                         {
488                             rpinvC     = 1;
489                             rinvC      = rinv;
490                             rC         = r;
491
492                             rpinvV     = 1;
493                             rinvV      = rinv;
494                             rV         = r;
495                         }
496
497                         /* Only process the coulomb interactions if we have charges,
498                          * and if we either include all entries in the list (no cutoff
499                          * used in the kernel), or if we are within the cutoff.
500                          */
501                         bComputeElecInteraction =
502                             ( bEwald && r < rcoulomb) ||
503                             (!bEwald && rC < rcoulomb);
504
505                         if ( (qq[i] != 0) && bComputeElecInteraction)
506                         {
507                             if (bEwald)
508                             {
509                                 /* Ewald FEP is done only on the 1/r part */
510                                 Vcoul[i]   = qq[i]*(rinvC-sh_ewald);
511                                 FscalC[i]  = qq[i]*rinvC;
512                             }
513                             else
514                             {
515                                 /* reaction-field */
516                                 Vcoul[i]   = qq[i]*(rinvC + krf*rC*rC-crf);
517                                 FscalC[i]  = qq[i]*(rinvC - two*krf*rC*rC);
518                             }
519                         }
520
521                         /* Only process the VDW interactions if we have
522                          * some non-zero parameters, and if we either
523                          * include all entries in the list (no cutoff used
524                          * in the kernel), or if we are within the cutoff.
525                          */
526                         bComputeVdwInteraction =
527                             ( bEwaldLJ && r < rvdw) ||
528                             (!bEwaldLJ && rV < rvdw);
529                         if ((c6[i] != 0 || c12[i] != 0) && bComputeVdwInteraction)
530                         {
531                             /* cutoff LJ, also handles part of Ewald LJ */
532                             if (softCoreTreatment == SoftCoreTreatment::RPower6)
533                             {
534                                 rinv6        = rpinvV;
535                             }
536                             else
537                             {
538                                 rinv6        = rinvV*rinvV;
539                                 rinv6        = rinv6*rinv6*rinv6;
540                             }
541                             Vvdw6            = c6[i]*rinv6;
542                             Vvdw12           = c12[i]*rinv6*rinv6;
543
544                             Vvdw[i]          = ( (Vvdw12 + c12[i]*repulsionShift)*onetwelfth
545                                                  - (Vvdw6 + c6[i]*dispersionShift)*onesixth);
546                             FscalV[i]        = Vvdw12 - Vvdw6;
547
548                             if (bEwaldLJ)
549                             {
550                                 /* Subtract the grid potential at the cut-off */
551                                 c6grid      = nbfp_grid[tj[i]];
552                                 Vvdw[i]    += c6grid*sh_lj_ewald*onesixth;
553                             }
554
555                             if (ic->vdw_modifier == eintmodPOTSWITCH)
556                             {
557                                 d                = rV - ic->rvdw_switch;
558                                 d                = (d > zero) ? d : zero;
559                                 d2               = d*d;
560                                 sw               = one+d2*d*(vdw_swV3+d*(vdw_swV4+d*vdw_swV5));
561                                 dsw              = d2*(vdw_swF2+d*(vdw_swF3+d*vdw_swF4));
562
563                                 FscalV[i]        = FscalV[i]*sw - rV*Vvdw[i]*dsw;
564                                 Vvdw[i]         *= sw;
565
566                                 FscalV[i]  = (rV < rvdw) ? FscalV[i] : zero;
567                                 Vvdw[i]    = (rV < rvdw) ? Vvdw[i] : zero;
568                             }
569                         }
570
571                         /* FscalC (and FscalV) now contain: dV/drC * rC
572                          * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
573                          * Further down we first multiply by r^p-2 and then by
574                          * the vector r, which in total gives: dV/drC * (r/rC)^1-p
575                          */
576                         FscalC[i] *= rpinvC;
577                         FscalV[i] *= rpinvV;
578                     }
579                 }
580
581                 /* Assemble A and B states */
582                 for (i = 0; i < NSTATES; i++)
583                 {
584                     vctot         += LFC[i]*Vcoul[i];
585                     vvtot         += LFV[i]*Vvdw[i];
586
587                     Fscal     += LFC[i]*FscalC[i]*rpm2;
588                     Fscal     += LFV[i]*FscalV[i]*rpm2;
589
590                     if (useSoftCore)
591                     {
592                         dvdl_coul += Vcoul[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*FscalC[i]*sigma_pow[i];
593                         dvdl_vdw  += Vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*FscalV[i]*sigma_pow[i];
594                     }
595                     else
596                     {
597                         dvdl_coul += Vcoul[i]*DLF[i];
598                         dvdl_vdw  += Vvdw[i]*DLF[i];
599                     }
600                 }
601             }
602             else if (icoul == GMX_NBKERNEL_ELEC_REACTIONFIELD)
603             {
604                 /* For excluded pairs, which are only in this pair list when
605                  * using the Verlet scheme, we don't use soft-core.
606                  * The group scheme also doesn't soft-core for these.
607                  * As there is no singularity, there is no need for soft-core.
608                  */
609                 VV = krf*rsq - crf;
610                 FF = -two*krf;
611
612                 if (ii == jnr)
613                 {
614                     VV *= half;
615                 }
616
617                 for (i = 0; i < NSTATES; i++)
618                 {
619                     vctot      += LFC[i]*qq[i]*VV;
620                     Fscal      += LFC[i]*qq[i]*FF;
621                     dvdl_coul  += DLF[i]*qq[i]*VV;
622                 }
623             }
624
625             if (bEwald && r < rcoulomb)
626             {
627                 /* See comment in the preamble. When using Ewald interactions
628                  * (unless we use a switch modifier) we subtract the reciprocal-space
629                  * Ewald component here which made it possible to apply the free
630                  * energy interaction to 1/r (vanilla coulomb short-range part)
631                  * above. This gets us closer to the ideal case of applying
632                  * the softcore to the entire electrostatic interaction,
633                  * including the reciprocal-space component.
634                  */
635                 real v_lr, f_lr;
636
637                 ewrt      = r*ewtabscale;
638                 ewitab    = static_cast<int>(ewrt);
639                 eweps     = ewrt-ewitab;
640                 ewitab    = 4*ewitab;
641                 f_lr      = ewtab[ewitab]+eweps*ewtab[ewitab+1];
642                 v_lr      = (ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+f_lr));
643                 f_lr     *= rinv;
644
645                 /* Note that any possible Ewald shift has already been applied in
646                  * the normal interaction part above.
647                  */
648
649                 if (ii == jnr)
650                 {
651                     /* If we get here, the i particle (ii) has itself (jnr)
652                      * in its neighborlist. This can only happen with the Verlet
653                      * scheme, and corresponds to a self-interaction that will
654                      * occur twice. Scale it down by 50% to only include it once.
655                      */
656                     v_lr *= half;
657                 }
658
659                 for (i = 0; i < NSTATES; i++)
660                 {
661                     vctot      -= LFC[i]*qq[i]*v_lr;
662                     Fscal      -= LFC[i]*qq[i]*f_lr;
663                     dvdl_coul  -= (DLF[i]*qq[i])*v_lr;
664                 }
665             }
666
667             if (bEwaldLJ && r < rvdw)
668             {
669                 /* See comment in the preamble. When using LJ-Ewald interactions
670                  * (unless we use a switch modifier) we subtract the reciprocal-space
671                  * Ewald component here which made it possible to apply the free
672                  * energy interaction to r^-6 (vanilla LJ6 short-range part)
673                  * above. This gets us closer to the ideal case of applying
674                  * the softcore to the entire VdW interaction,
675                  * including the reciprocal-space component.
676                  */
677                 /* We could also use the analytical form here
678                  * iso a table, but that can cause issues for
679                  * r close to 0 for non-interacting pairs.
680                  */
681                 real rs, frac, f_lr;
682                 int  ri;
683
684                 rs     = rsq*rinv*ewtabscale;
685                 ri     = static_cast<int>(rs);
686                 frac   = rs - ri;
687                 f_lr   = (1 - frac)*tab_ewald_F_lj[ri] + frac*tab_ewald_F_lj[ri+1];
688                 /* TODO: Currently the Ewald LJ table does not contain
689                  * the factor 1/6, we should add this.
690                  */
691                 FF     = f_lr*rinv/six;
692                 VV     = (tab_ewald_V_lj[ri] - ewtabhalfspace*frac*(tab_ewald_F_lj[ri] + f_lr))/six;
693
694                 if (ii == jnr)
695                 {
696                     /* If we get here, the i particle (ii) has itself (jnr)
697                      * in its neighborlist. This can only happen with the Verlet
698                      * scheme, and corresponds to a self-interaction that will
699                      * occur twice. Scale it down by 50% to only include it once.
700                      */
701                     VV *= half;
702                 }
703
704                 for (i = 0; i < NSTATES; i++)
705                 {
706                     c6grid      = nbfp_grid[tj[i]];
707                     vvtot      += LFV[i]*c6grid*VV;
708                     Fscal      += LFV[i]*c6grid*FF;
709                     dvdl_vdw   += (DLF[i]*c6grid)*VV;
710                 }
711             }
712
713             if (bDoForces)
714             {
715                 tx         = Fscal*dx;
716                 ty         = Fscal*dy;
717                 tz         = Fscal*dz;
718                 fix        = fix + tx;
719                 fiy        = fiy + ty;
720                 fiz        = fiz + tz;
721                 /* OpenMP atomics are expensive, but this kernels is also
722                  * expensive, so we can take this hit, instead of using
723                  * thread-local output buffers and extra reduction.
724                  *
725                  * All the OpenMP regions in this file are trivial and should
726                  * not throw, so no need for try/catch.
727                  */
728 #pragma omp atomic
729                 f[j3]     -= tx;
730 #pragma omp atomic
731                 f[j3+1]   -= ty;
732 #pragma omp atomic
733                 f[j3+2]   -= tz;
734             }
735         }
736
737         /* The atomics below are expensive with many OpenMP threads.
738          * Here unperturbed i-particles will usually only have a few
739          * (perturbed) j-particles in the list. Thus with a buffered list
740          * we can skip a significant number of i-reductions with a check.
741          */
742         if (npair_within_cutoff > 0)
743         {
744             if (bDoForces)
745             {
746 #pragma omp atomic
747                 f[ii3]        += fix;
748 #pragma omp atomic
749                 f[ii3+1]      += fiy;
750 #pragma omp atomic
751                 f[ii3+2]      += fiz;
752             }
753             if (bDoShiftForces)
754             {
755 #pragma omp atomic
756                 fshift[is3]   += fix;
757 #pragma omp atomic
758                 fshift[is3+1] += fiy;
759 #pragma omp atomic
760                 fshift[is3+2] += fiz;
761             }
762             if (bDoPotential)
763             {
764                 ggid               = gid[n];
765 #pragma omp atomic
766                 Vc[ggid]          += vctot;
767 #pragma omp atomic
768                 Vv[ggid]          += vvtot;
769             }
770         }
771     }
772
773 #pragma omp atomic
774     dvdl[efptCOUL]     += dvdl_coul;
775  #pragma omp atomic
776     dvdl[efptVDW]      += dvdl_vdw;
777
778     /* Estimate flops, average for free energy stuff:
779      * 12  flops per outer iteration
780      * 150 flops per inner iteration
781      */
782 #pragma omp atomic
783     inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri*12 + nlist->jindex[n]*150);
784 }
785
786 void gmx_nb_free_energy_kernel(const t_nblist   *nlist,
787                                rvec             *xx,
788                                rvec             *ff,
789                                t_forcerec       *fr,
790                                const t_mdatoms  *mdatoms,
791                                nb_kernel_data_t *kernel_data,
792                                t_nrnb           *nrnb)
793 {
794     if (fr->sc_alphacoul == 0 && fr->sc_alphavdw == 0)
795     {
796         nb_free_energy_kernel<SoftCoreTreatment::None, false>(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
797     }
798     else if (fr->sc_r_power == 6.0_real)
799     {
800         if (kernel_data->lambda[efptCOUL] == kernel_data->lambda[efptVDW] &&
801             fr->sc_alphacoul == fr->sc_alphavdw)
802         {
803             nb_free_energy_kernel<SoftCoreTreatment::RPower6, false>(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
804         }
805         else
806         {
807             nb_free_energy_kernel<SoftCoreTreatment::RPower6, true>(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
808         }
809     }
810     else if (fr->sc_r_power == 48.0_real)
811     {
812         nb_free_energy_kernel<SoftCoreTreatment::RPower48, true>(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
813     }
814     else
815     {
816         GMX_RELEASE_ASSERT(false, "Unsupported soft-core r-power");
817     }
818 }