Tidy: modernize-use-nullptr
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_free_energy.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8  * and including many others, as listed in the AUTHORS file in the
9  * top-level source directory and at http://www.gromacs.org.
10  *
11  * GROMACS is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public License
13  * as published by the Free Software Foundation; either version 2.1
14  * of the License, or (at your option) any later version.
15  *
16  * GROMACS is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with GROMACS; if not, see
23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
25  *
26  * If you want to redistribute modifications to GROMACS, please
27  * consider that scientific software is very special. Version
28  * control is crucial - bugs must be traceable. We will be happy to
29  * consider code for inclusion in the official distribution, but
30  * derived work must not be called official GROMACS. Details are found
31  * in the README & COPYING files - if they are missing, get the
32  * official version at http://www.gromacs.org.
33  *
34  * To help us fund GROMACS development, we humbly ask that you cite
35  * the research papers on the package. Check out http://www.gromacs.org.
36  */
37 #include "gmxpre.h"
38
39 #include "nb_free_energy.h"
40
41 #include <cmath>
42
43 #include <algorithm>
44
45 #include "gromacs/gmxlib/nrnb.h"
46 #include "gromacs/gmxlib/nonbonded/nb_kernel.h"
47 #include "gromacs/gmxlib/nonbonded/nonbonded.h"
48 #include "gromacs/math/functions.h"
49 #include "gromacs/math/vec.h"
50 #include "gromacs/mdtypes/forcerec.h"
51 #include "gromacs/mdtypes/md_enums.h"
52 #include "gromacs/utility/fatalerror.h"
53
54 void
55 gmx_nb_free_energy_kernel(const t_nblist * gmx_restrict    nlist,
56                           rvec * gmx_restrict              xx,
57                           rvec * gmx_restrict              ff,
58                           t_forcerec * gmx_restrict        fr,
59                           const t_mdatoms * gmx_restrict   mdatoms,
60                           nb_kernel_data_t * gmx_restrict  kernel_data,
61                           t_nrnb * gmx_restrict            nrnb)
62 {
63
64 #define  STATE_A  0
65 #define  STATE_B  1
66 #define  NSTATES  2
67     int           i, n, ii, is3, ii3, k, nj0, nj1, jnr, j3, ggid;
68     real          shX, shY, shZ;
69     real          tx, ty, tz, Fscal;
70     double        FscalC[NSTATES], FscalV[NSTATES];  /* Needs double for sc_power==48 */
71     double        Vcoul[NSTATES], Vvdw[NSTATES];     /* Needs double for sc_power==48 */
72     real          rinv6, r, rtC, rtV;
73     real          iqA, iqB;
74     real          qq[NSTATES], vctot;
75     int           ntiA, ntiB, tj[NSTATES];
76     real          Vvdw6, Vvdw12, vvtot;
77     real          ix, iy, iz, fix, fiy, fiz;
78     real          dx, dy, dz, rsq, rinv;
79     real          c6[NSTATES], c12[NSTATES], c6grid;
80     real          LFC[NSTATES], LFV[NSTATES], DLF[NSTATES];
81     double        dvdl_coul, dvdl_vdw;
82     real          lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
83     real          sigma6[NSTATES], alpha_vdw_eff, alpha_coul_eff, sigma2_def, sigma2_min;
84     double        rp, rpm2, rC, rV, rinvC, rpinvC, rinvV, rpinvV; /* Needs double for sc_power==48 */
85     real          sigma2[NSTATES], sigma_pow[NSTATES];
86     int           do_tab, tab_elemsize = 0;
87     int           n0, n1C, n1V, nnn;
88     real          Y, F, Fp, Geps, Heps2, epsC, eps2C, epsV, eps2V, VV, FF;
89     int           icoul, ivdw;
90     int           nri;
91     const int *   iinr;
92     const int *   jindex;
93     const int *   jjnr;
94     const int *   shift;
95     const int *   gid;
96     const int *   typeA;
97     const int *   typeB;
98     int           ntype;
99     const real *  shiftvec;
100     real *        fshift;
101     real          tabscale = 0;
102     const real *  VFtab    = nullptr;
103     const real *  x;
104     real *        f;
105     real          facel, krf, crf;
106     const real *  chargeA;
107     const real *  chargeB;
108     real          sigma6_min, sigma6_def, lam_power, sc_r_power;
109     real          alpha_coul, alpha_vdw, lambda_coul, lambda_vdw;
110     real          ewcljrsq, ewclj, ewclj2, exponent, poly, vvdw_disp, vvdw_rep, sh_lj_ewald;
111     real          ewclj6;
112     const real *  nbfp, *nbfp_grid;
113     real *        dvdl;
114     real *        Vv;
115     real *        Vc;
116     gmx_bool      bDoForces, bDoShiftForces, bDoPotential;
117     real          rcoulomb, rvdw, sh_invrc6;
118     gmx_bool      bExactElecCutoff, bExactVdwCutoff, bExactCutoffAll;
119     gmx_bool      bEwald, bEwaldLJ;
120     real          rcutoff_max2;
121     const real *  tab_ewald_F_lj = nullptr;
122     const real *  tab_ewald_V_lj = nullptr;
123     real          d, d2, sw, dsw, rinvcorr;
124     real          elec_swV3, elec_swV4, elec_swV5, elec_swF2, elec_swF3, elec_swF4;
125     real          vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4;
126     gmx_bool      bConvertEwaldToCoulomb, bConvertLJEwaldToLJ6;
127     gmx_bool      bComputeVdwInteraction, bComputeElecInteraction;
128     const real *  ewtab = nullptr;
129     int           ewitab;
130     real          ewrt, eweps, ewtabscale = 0, ewtabhalfspace = 0, sh_ewald = 0;
131
132     const real    onetwelfth  = 1.0/12.0;
133     const real    onesixth    = 1.0/6.0;
134     const real    zero        = 0.0;
135     const real    half        = 0.5;
136     const real    one         = 1.0;
137     const real    two         = 2.0;
138     const real    six         = 6.0;
139     const real    fourtyeight = 48.0;
140
141     x                   = xx[0];
142     f                   = ff[0];
143
144     fshift              = fr->fshift[0];
145
146     nri                 = nlist->nri;
147     iinr                = nlist->iinr;
148     jindex              = nlist->jindex;
149     jjnr                = nlist->jjnr;
150     icoul               = nlist->ielec;
151     ivdw                = nlist->ivdw;
152     shift               = nlist->shift;
153     gid                 = nlist->gid;
154
155     shiftvec            = fr->shift_vec[0];
156     chargeA             = mdatoms->chargeA;
157     chargeB             = mdatoms->chargeB;
158     facel               = fr->epsfac;
159     krf                 = fr->k_rf;
160     crf                 = fr->c_rf;
161     Vc                  = kernel_data->energygrp_elec;
162     typeA               = mdatoms->typeA;
163     typeB               = mdatoms->typeB;
164     ntype               = fr->ntype;
165     nbfp                = fr->nbfp;
166     nbfp_grid           = fr->ljpme_c6grid;
167     Vv                  = kernel_data->energygrp_vdw;
168     lambda_coul         = kernel_data->lambda[efptCOUL];
169     lambda_vdw          = kernel_data->lambda[efptVDW];
170     dvdl                = kernel_data->dvdl;
171     alpha_coul          = fr->sc_alphacoul;
172     alpha_vdw           = fr->sc_alphavdw;
173     lam_power           = fr->sc_power;
174     sc_r_power          = fr->sc_r_power;
175     sigma6_def          = fr->sc_sigma6_def;
176     sigma6_min          = fr->sc_sigma6_min;
177     bDoForces           = kernel_data->flags & GMX_NONBONDED_DO_FORCE;
178     bDoShiftForces      = kernel_data->flags & GMX_NONBONDED_DO_SHIFTFORCE;
179     bDoPotential        = kernel_data->flags & GMX_NONBONDED_DO_POTENTIAL;
180
181     rcoulomb            = fr->rcoulomb;
182     rvdw                = fr->rvdw;
183     sh_invrc6           = fr->ic->sh_invrc6;
184     sh_lj_ewald         = fr->ic->sh_lj_ewald;
185     ewclj               = fr->ewaldcoeff_lj;
186     ewclj2              = ewclj*ewclj;
187     ewclj6              = ewclj2*ewclj2*ewclj2;
188
189     if (fr->coulomb_modifier == eintmodPOTSWITCH)
190     {
191         d               = fr->rcoulomb-fr->rcoulomb_switch;
192         elec_swV3       = -10.0/(d*d*d);
193         elec_swV4       =  15.0/(d*d*d*d);
194         elec_swV5       =  -6.0/(d*d*d*d*d);
195         elec_swF2       = -30.0/(d*d*d);
196         elec_swF3       =  60.0/(d*d*d*d);
197         elec_swF4       = -30.0/(d*d*d*d*d);
198     }
199     else
200     {
201         /* Avoid warnings from stupid compilers (looking at you, Clang!) */
202         elec_swV3 = elec_swV4 = elec_swV5 = elec_swF2 = elec_swF3 = elec_swF4 = 0.0;
203     }
204
205     if (fr->vdw_modifier == eintmodPOTSWITCH)
206     {
207         d               = fr->rvdw-fr->rvdw_switch;
208         vdw_swV3        = -10.0/(d*d*d);
209         vdw_swV4        =  15.0/(d*d*d*d);
210         vdw_swV5        =  -6.0/(d*d*d*d*d);
211         vdw_swF2        = -30.0/(d*d*d);
212         vdw_swF3        =  60.0/(d*d*d*d);
213         vdw_swF4        = -30.0/(d*d*d*d*d);
214     }
215     else
216     {
217         /* Avoid warnings from stupid compilers (looking at you, Clang!) */
218         vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0;
219     }
220
221     if (fr->cutoff_scheme == ecutsVERLET)
222     {
223         const interaction_const_t *ic;
224
225         ic = fr->ic;
226         if (EVDW_PME(ic->vdwtype))
227         {
228             ivdw         = GMX_NBKERNEL_VDW_LJEWALD;
229         }
230         else
231         {
232             ivdw         = GMX_NBKERNEL_VDW_LENNARDJONES;
233         }
234
235         if (ic->eeltype == eelCUT || EEL_RF(ic->eeltype))
236         {
237             icoul        = GMX_NBKERNEL_ELEC_REACTIONFIELD;
238         }
239         else if (EEL_PME_EWALD(ic->eeltype))
240         {
241             icoul        = GMX_NBKERNEL_ELEC_EWALD;
242         }
243         else
244         {
245             gmx_incons("Unsupported eeltype with Verlet and free-energy");
246         }
247
248         bExactElecCutoff = TRUE;
249         bExactVdwCutoff  = TRUE;
250     }
251     else
252     {
253         bExactElecCutoff = (fr->coulomb_modifier != eintmodNONE) || fr->eeltype == eelRF_ZERO;
254         bExactVdwCutoff  = (fr->vdw_modifier != eintmodNONE);
255     }
256
257     bExactCutoffAll = (bExactElecCutoff && bExactVdwCutoff);
258     rcutoff_max2    = std::max(fr->rcoulomb, fr->rvdw);
259     rcutoff_max2    = rcutoff_max2*rcutoff_max2;
260
261     bEwald          = (icoul == GMX_NBKERNEL_ELEC_EWALD);
262     bEwaldLJ        = (ivdw == GMX_NBKERNEL_VDW_LJEWALD);
263
264     if (bEwald || bEwaldLJ)
265     {
266         sh_ewald       = fr->ic->sh_ewald;
267         ewtab          = fr->ic->tabq_coul_FDV0;
268         ewtabscale     = fr->ic->tabq_scale;
269         ewtabhalfspace = half/ewtabscale;
270         tab_ewald_F_lj = fr->ic->tabq_vdw_F;
271         tab_ewald_V_lj = fr->ic->tabq_vdw_V;
272     }
273
274     /* For Ewald/PME interactions we cannot easily apply the soft-core component to
275      * reciprocal space. When we use vanilla (not switch/shift) Ewald interactions, we
276      * can apply the small trick of subtracting the _reciprocal_ space contribution
277      * in this kernel, and instead apply the free energy interaction to the 1/r
278      * (standard coulomb) interaction.
279      *
280      * However, we cannot use this approach for switch-modified since we would then
281      * effectively end up evaluating a significantly different interaction here compared to the
282      * normal (non-free-energy) kernels, either by applying a cutoff at a different
283      * position than what the user requested, or by switching different
284      * things (1/r rather than short-range Ewald). For these settings, we just
285      * use the traditional short-range Ewald interaction in that case.
286      */
287     bConvertEwaldToCoulomb = (bEwald && (fr->coulomb_modifier != eintmodPOTSWITCH));
288     /* For now the below will always be true (since LJ-PME only works with Shift in Gromacs-5.0),
289      * but writing it this way means we stay in sync with coulomb, and it avoids future bugs.
290      */
291     bConvertLJEwaldToLJ6   = (bEwaldLJ && (fr->vdw_modifier   != eintmodPOTSWITCH));
292
293     /* We currently don't implement exclusion correction, needed with the Verlet cut-off scheme, without conversion */
294     if (fr->cutoff_scheme == ecutsVERLET &&
295         ((bEwald   && !bConvertEwaldToCoulomb) ||
296          (bEwaldLJ && !bConvertLJEwaldToLJ6)))
297     {
298         gmx_incons("Unimplemented non-bonded setup");
299     }
300
301     /* fix compiler warnings */
302     n1C   = n1V   = 0;
303     epsC  = epsV  = 0;
304     eps2C = eps2V = 0;
305
306     dvdl_coul  = 0;
307     dvdl_vdw   = 0;
308
309     /* Lambda factor for state A, 1-lambda*/
310     LFC[STATE_A] = one - lambda_coul;
311     LFV[STATE_A] = one - lambda_vdw;
312
313     /* Lambda factor for state B, lambda*/
314     LFC[STATE_B] = lambda_coul;
315     LFV[STATE_B] = lambda_vdw;
316
317     /*derivative of the lambda factor for state A and B */
318     DLF[STATE_A] = -1;
319     DLF[STATE_B] = 1;
320
321     for (i = 0; i < NSTATES; i++)
322     {
323         lfac_coul[i]  = (lam_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
324         dlfac_coul[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFC[i]) : 1);
325         lfac_vdw[i]   = (lam_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
326         dlfac_vdw[i]  = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFV[i]) : 1);
327     }
328     /* precalculate */
329     sigma2_def = std::cbrt(sigma6_def);
330     sigma2_min = std::cbrt(sigma6_min);
331
332     /* Ewald (not PME) table is special (icoul==enbcoulFEWALD) */
333
334     do_tab = (icoul == GMX_NBKERNEL_ELEC_CUBICSPLINETABLE ||
335               ivdw == GMX_NBKERNEL_VDW_CUBICSPLINETABLE);
336     if (do_tab)
337     {
338         tabscale         = kernel_data->table_elec_vdw->scale;
339         VFtab            = kernel_data->table_elec_vdw->data;
340         /* we always use the combined table here */
341         tab_elemsize     = kernel_data->table_elec_vdw->stride;
342     }
343
344     for (n = 0; (n < nri); n++)
345     {
346         int npair_within_cutoff;
347
348         npair_within_cutoff = 0;
349
350         is3              = 3*shift[n];
351         shX              = shiftvec[is3];
352         shY              = shiftvec[is3+1];
353         shZ              = shiftvec[is3+2];
354         nj0              = jindex[n];
355         nj1              = jindex[n+1];
356         ii               = iinr[n];
357         ii3              = 3*ii;
358         ix               = shX + x[ii3+0];
359         iy               = shY + x[ii3+1];
360         iz               = shZ + x[ii3+2];
361         iqA              = facel*chargeA[ii];
362         iqB              = facel*chargeB[ii];
363         ntiA             = 2*ntype*typeA[ii];
364         ntiB             = 2*ntype*typeB[ii];
365         vctot            = 0;
366         vvtot            = 0;
367         fix              = 0;
368         fiy              = 0;
369         fiz              = 0;
370
371         for (k = nj0; (k < nj1); k++)
372         {
373             jnr              = jjnr[k];
374             j3               = 3*jnr;
375             dx               = ix - x[j3];
376             dy               = iy - x[j3+1];
377             dz               = iz - x[j3+2];
378             rsq              = dx*dx + dy*dy + dz*dz;
379
380             if (bExactCutoffAll && rsq >= rcutoff_max2)
381             {
382                 /* We save significant time by skipping all code below.
383                  * Note that with soft-core interactions, the actual cut-off
384                  * check might be different. But since the soft-core distance
385                  * is always larger than r, checking on r here is safe.
386                  */
387                 continue;
388             }
389             npair_within_cutoff++;
390
391             if (rsq > 0)
392             {
393                 /* Note that unlike in the nbnxn kernels, we do not need
394                  * to clamp the value of rsq before taking the invsqrt
395                  * to avoid NaN in the LJ calculation, since here we do
396                  * not calculate LJ interactions when C6 and C12 are zero.
397                  */
398
399                 rinv         = gmx::invsqrt(rsq);
400                 r            = rsq*rinv;
401             }
402             else
403             {
404                 /* The force at r=0 is zero, because of symmetry.
405                  * But note that the potential is in general non-zero,
406                  * since the soft-cored r will be non-zero.
407                  */
408                 rinv         = 0;
409                 r            = 0;
410             }
411
412             if (sc_r_power == six)
413             {
414                 rpm2             = rsq*rsq;  /* r4 */
415                 rp               = rpm2*rsq; /* r6 */
416             }
417             else if (sc_r_power == fourtyeight)
418             {
419                 rp               = rsq*rsq*rsq; /* r6 */
420                 rp               = rp*rp;       /* r12 */
421                 rp               = rp*rp;       /* r24 */
422                 rp               = rp*rp;       /* r48 */
423                 rpm2             = rp/rsq;      /* r46 */
424             }
425             else
426             {
427                 rp             = std::pow(r, sc_r_power);  /* not currently supported as input, but can handle it */
428                 rpm2           = rp/rsq;
429             }
430
431             Fscal = 0;
432
433             qq[STATE_A]      = iqA*chargeA[jnr];
434             qq[STATE_B]      = iqB*chargeB[jnr];
435
436             tj[STATE_A]      = ntiA+2*typeA[jnr];
437             tj[STATE_B]      = ntiB+2*typeB[jnr];
438
439             if (nlist->excl_fep == nullptr || nlist->excl_fep[k])
440             {
441                 c6[STATE_A]      = nbfp[tj[STATE_A]];
442                 c6[STATE_B]      = nbfp[tj[STATE_B]];
443
444                 for (i = 0; i < NSTATES; i++)
445                 {
446                     c12[i]             = nbfp[tj[i]+1];
447                     if ((c6[i] > 0) && (c12[i] > 0))
448                     {
449                         /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
450                         sigma6[i]       = half*c12[i]/c6[i];
451                         sigma2[i]       = std::cbrt(sigma6[i]);
452                         /* should be able to get rid of cbrt call eventually.  Will require agreement on
453                            what data to store externally.  Can't be fixed without larger scale changes, so not 4.6 */
454                         if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
455                         {
456                             sigma6[i] = sigma6_min;
457                             sigma2[i] = sigma2_min;
458                         }
459                     }
460                     else
461                     {
462                         sigma6[i]       = sigma6_def;
463                         sigma2[i]       = sigma2_def;
464                     }
465                     if (sc_r_power == six)
466                     {
467                         sigma_pow[i]    = sigma6[i];
468                     }
469                     else if (sc_r_power == fourtyeight)
470                     {
471                         sigma_pow[i]    = sigma6[i]*sigma6[i];       /* sigma^12 */
472                         sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
473                         sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
474                     }
475                     else
476                     {    /* not really supported as input, but in here for testing the general case*/
477                         sigma_pow[i]    = std::pow(sigma2[i], sc_r_power/2);
478                     }
479                 }
480
481                 /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
482                 if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
483                 {
484                     alpha_vdw_eff    = 0;
485                     alpha_coul_eff   = 0;
486                 }
487                 else
488                 {
489                     alpha_vdw_eff    = alpha_vdw;
490                     alpha_coul_eff   = alpha_coul;
491                 }
492
493                 for (i = 0; i < NSTATES; i++)
494                 {
495                     FscalC[i]    = 0;
496                     FscalV[i]    = 0;
497                     Vcoul[i]     = 0;
498                     Vvdw[i]      = 0;
499
500                     /* Only spend time on A or B state if it is non-zero */
501                     if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
502                     {
503                         /* this section has to be inside the loop because of the dependence on sigma_pow */
504                         rpinvC         = one/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
505                         rinvC          = std::pow(rpinvC, one/sc_r_power);
506                         rC             = one/rinvC;
507
508                         rpinvV         = one/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
509                         rinvV          = std::pow(rpinvV, one/sc_r_power);
510                         rV             = one/rinvV;
511
512                         if (do_tab)
513                         {
514                             rtC        = rC*tabscale;
515                             n0         = rtC;
516                             epsC       = rtC-n0;
517                             eps2C      = epsC*epsC;
518                             n1C        = tab_elemsize*n0;
519
520                             rtV        = rV*tabscale;
521                             n0         = rtV;
522                             epsV       = rtV-n0;
523                             eps2V      = epsV*epsV;
524                             n1V        = tab_elemsize*n0;
525                         }
526
527                         /* Only process the coulomb interactions if we have charges,
528                          * and if we either include all entries in the list (no cutoff
529                          * used in the kernel), or if we are within the cutoff.
530                          */
531                         bComputeElecInteraction = !bExactElecCutoff ||
532                             ( bConvertEwaldToCoulomb && r < rcoulomb) ||
533                             (!bConvertEwaldToCoulomb && rC < rcoulomb);
534
535                         if ( (qq[i] != 0) && bComputeElecInteraction)
536                         {
537                             switch (icoul)
538                             {
539                                 case GMX_NBKERNEL_ELEC_COULOMB:
540                                     /* simple cutoff */
541                                     Vcoul[i]   = qq[i]*rinvC;
542                                     FscalC[i]  = Vcoul[i];
543                                     /* The shift for the Coulomb potential is stored in
544                                      * the RF parameter c_rf, which is 0 without shift.
545                                      */
546                                     Vcoul[i]  -= qq[i]*fr->ic->c_rf;
547                                     break;
548
549                                 case GMX_NBKERNEL_ELEC_REACTIONFIELD:
550                                     /* reaction-field */
551                                     Vcoul[i]   = qq[i]*(rinvC + krf*rC*rC-crf);
552                                     FscalC[i]  = qq[i]*(rinvC - two*krf*rC*rC);
553                                     break;
554
555                                 case GMX_NBKERNEL_ELEC_CUBICSPLINETABLE:
556                                     /* non-Ewald tabulated coulomb */
557                                     nnn        = n1C;
558                                     Y          = VFtab[nnn];
559                                     F          = VFtab[nnn+1];
560                                     Geps       = epsC*VFtab[nnn+2];
561                                     Heps2      = eps2C*VFtab[nnn+3];
562                                     Fp         = F+Geps+Heps2;
563                                     VV         = Y+epsC*Fp;
564                                     FF         = Fp+Geps+two*Heps2;
565                                     Vcoul[i]   = qq[i]*VV;
566                                     FscalC[i]  = -qq[i]*tabscale*FF*rC;
567                                     break;
568
569                                 case GMX_NBKERNEL_ELEC_GENERALIZEDBORN:
570                                     gmx_fatal(FARGS, "Free energy and GB not implemented.\n");
571                                     break;
572
573                                 case GMX_NBKERNEL_ELEC_EWALD:
574                                     if (bConvertEwaldToCoulomb)
575                                     {
576                                         /* Ewald FEP is done only on the 1/r part */
577                                         Vcoul[i]   = qq[i]*(rinvC-sh_ewald);
578                                         FscalC[i]  = qq[i]*rinvC;
579                                     }
580                                     else
581                                     {
582                                         ewrt      = rC*ewtabscale;
583                                         ewitab    = static_cast<int>(ewrt);
584                                         eweps     = ewrt-ewitab;
585                                         ewitab    = 4*ewitab;
586                                         FscalC[i] = ewtab[ewitab]+eweps*ewtab[ewitab+1];
587                                         rinvcorr  = rinvC-sh_ewald;
588                                         Vcoul[i]  = qq[i]*(rinvcorr-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+FscalC[i])));
589                                         FscalC[i] = qq[i]*(rinvC-rC*FscalC[i]);
590                                     }
591                                     break;
592
593                                 case GMX_NBKERNEL_ELEC_NONE:
594                                     FscalC[i]  = zero;
595                                     Vcoul[i]   = zero;
596                                     break;
597
598                                 default:
599                                     gmx_incons("Invalid icoul in free energy kernel");
600                                     break;
601                             }
602
603                             if (fr->coulomb_modifier == eintmodPOTSWITCH)
604                             {
605                                 d                = rC-fr->rcoulomb_switch;
606                                 d                = (d > zero) ? d : zero;
607                                 d2               = d*d;
608                                 sw               = one+d2*d*(elec_swV3+d*(elec_swV4+d*elec_swV5));
609                                 dsw              = d2*(elec_swF2+d*(elec_swF3+d*elec_swF4));
610
611                                 FscalC[i]        = FscalC[i]*sw - rC*Vcoul[i]*dsw;
612                                 Vcoul[i]        *= sw;
613
614                                 FscalC[i]        = (rC < rcoulomb) ? FscalC[i] : zero;
615                                 Vcoul[i]         = (rC < rcoulomb) ? Vcoul[i] : zero;
616                             }
617                         }
618
619                         /* Only process the VDW interactions if we have
620                          * some non-zero parameters, and if we either
621                          * include all entries in the list (no cutoff used
622                          * in the kernel), or if we are within the cutoff.
623                          */
624                         bComputeVdwInteraction = !bExactVdwCutoff ||
625                             ( bConvertLJEwaldToLJ6 && r < rvdw) ||
626                             (!bConvertLJEwaldToLJ6 && rV < rvdw);
627                         if ((c6[i] != 0 || c12[i] != 0) && bComputeVdwInteraction)
628                         {
629                             switch (ivdw)
630                             {
631                                 case GMX_NBKERNEL_VDW_LENNARDJONES:
632                                     /* cutoff LJ */
633                                     if (sc_r_power == six)
634                                     {
635                                         rinv6            = rpinvV;
636                                     }
637                                     else
638                                     {
639                                         rinv6            = rinvV*rinvV;
640                                         rinv6            = rinv6*rinv6*rinv6;
641                                     }
642                                     Vvdw6            = c6[i]*rinv6;
643                                     Vvdw12           = c12[i]*rinv6*rinv6;
644
645                                     Vvdw[i]          = ( (Vvdw12 - c12[i]*sh_invrc6*sh_invrc6)*onetwelfth
646                                                          - (Vvdw6 - c6[i]*sh_invrc6)*onesixth);
647                                     FscalV[i]        = Vvdw12 - Vvdw6;
648                                     break;
649
650                                 case GMX_NBKERNEL_VDW_BUCKINGHAM:
651                                     gmx_fatal(FARGS, "Buckingham free energy not supported.");
652                                     break;
653
654                                 case GMX_NBKERNEL_VDW_CUBICSPLINETABLE:
655                                     /* Table LJ */
656                                     nnn = n1V+4;
657                                     /* dispersion */
658                                     Y          = VFtab[nnn];
659                                     F          = VFtab[nnn+1];
660                                     Geps       = epsV*VFtab[nnn+2];
661                                     Heps2      = eps2V*VFtab[nnn+3];
662                                     Fp         = F+Geps+Heps2;
663                                     VV         = Y+epsV*Fp;
664                                     FF         = Fp+Geps+two*Heps2;
665                                     Vvdw[i]   += c6[i]*VV;
666                                     FscalV[i] -= c6[i]*tabscale*FF*rV;
667
668                                     /* repulsion */
669                                     Y          = VFtab[nnn+4];
670                                     F          = VFtab[nnn+5];
671                                     Geps       = epsV*VFtab[nnn+6];
672                                     Heps2      = eps2V*VFtab[nnn+7];
673                                     Fp         = F+Geps+Heps2;
674                                     VV         = Y+epsV*Fp;
675                                     FF         = Fp+Geps+two*Heps2;
676                                     Vvdw[i]   += c12[i]*VV;
677                                     FscalV[i] -= c12[i]*tabscale*FF*rV;
678                                     break;
679
680                                 case GMX_NBKERNEL_VDW_LJEWALD:
681                                     if (sc_r_power == six)
682                                     {
683                                         rinv6            = rpinvV;
684                                     }
685                                     else
686                                     {
687                                         rinv6            = rinvV*rinvV;
688                                         rinv6            = rinv6*rinv6*rinv6;
689                                     }
690                                     c6grid           = nbfp_grid[tj[i]];
691
692                                     if (bConvertLJEwaldToLJ6)
693                                     {
694                                         /* cutoff LJ */
695                                         Vvdw6            = c6[i]*rinv6;
696                                         Vvdw12           = c12[i]*rinv6*rinv6;
697
698                                         Vvdw[i]          = ( (Vvdw12 - c12[i]*sh_invrc6*sh_invrc6)*onetwelfth
699                                                              - (Vvdw6 - c6[i]*sh_invrc6 - c6grid*sh_lj_ewald)*onesixth);
700                                         FscalV[i]        = Vvdw12 - Vvdw6;
701                                     }
702                                     else
703                                     {
704                                         /* Normal LJ-PME */
705                                         ewcljrsq         = ewclj2*rV*rV;
706                                         exponent         = std::exp(-ewcljrsq);
707                                         poly             = exponent*(one + ewcljrsq + ewcljrsq*ewcljrsq*half);
708                                         vvdw_disp        = (c6[i]-c6grid*(one-poly))*rinv6;
709                                         vvdw_rep         = c12[i]*rinv6*rinv6;
710                                         FscalV[i]        = vvdw_rep - vvdw_disp - c6grid*onesixth*exponent*ewclj6;
711                                         Vvdw[i]          = (vvdw_rep - c12[i]*sh_invrc6*sh_invrc6)*onetwelfth - (vvdw_disp - c6[i]*sh_invrc6 - c6grid*sh_lj_ewald)/six;
712                                     }
713                                     break;
714
715                                 case GMX_NBKERNEL_VDW_NONE:
716                                     Vvdw[i]    = zero;
717                                     FscalV[i]  = zero;
718                                     break;
719
720                                 default:
721                                     gmx_incons("Invalid ivdw in free energy kernel");
722                                     break;
723                             }
724
725                             if (fr->vdw_modifier == eintmodPOTSWITCH)
726                             {
727                                 d                = rV-fr->rvdw_switch;
728                                 d                = (d > zero) ? d : zero;
729                                 d2               = d*d;
730                                 sw               = one+d2*d*(vdw_swV3+d*(vdw_swV4+d*vdw_swV5));
731                                 dsw              = d2*(vdw_swF2+d*(vdw_swF3+d*vdw_swF4));
732
733                                 FscalV[i]        = FscalV[i]*sw - rV*Vvdw[i]*dsw;
734                                 Vvdw[i]         *= sw;
735
736                                 FscalV[i]  = (rV < rvdw) ? FscalV[i] : zero;
737                                 Vvdw[i]    = (rV < rvdw) ? Vvdw[i] : zero;
738                             }
739                         }
740
741                         /* FscalC (and FscalV) now contain: dV/drC * rC
742                          * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
743                          * Further down we first multiply by r^p-2 and then by
744                          * the vector r, which in total gives: dV/drC * (r/rC)^1-p
745                          */
746                         FscalC[i] *= rpinvC;
747                         FscalV[i] *= rpinvV;
748                     }
749                 }
750
751                 /* Assemble A and B states */
752                 for (i = 0; i < NSTATES; i++)
753                 {
754                     vctot         += LFC[i]*Vcoul[i];
755                     vvtot         += LFV[i]*Vvdw[i];
756
757                     Fscal         += LFC[i]*FscalC[i]*rpm2;
758                     Fscal         += LFV[i]*FscalV[i]*rpm2;
759
760                     dvdl_coul     += Vcoul[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*FscalC[i]*sigma_pow[i];
761                     dvdl_vdw      += Vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*FscalV[i]*sigma_pow[i];
762                 }
763             }
764             else if (icoul == GMX_NBKERNEL_ELEC_REACTIONFIELD)
765             {
766                 /* For excluded pairs, which are only in this pair list when
767                  * using the Verlet scheme, we don't use soft-core.
768                  * The group scheme also doesn't soft-core for these.
769                  * As there is no singularity, there is no need for soft-core.
770                  */
771                 VV = krf*rsq - crf;
772                 FF = -two*krf;
773
774                 if (ii == jnr)
775                 {
776                     VV *= half;
777                 }
778
779                 for (i = 0; i < NSTATES; i++)
780                 {
781                     vctot      += LFC[i]*qq[i]*VV;
782                     Fscal      += LFC[i]*qq[i]*FF;
783                     dvdl_coul  += DLF[i]*qq[i]*VV;
784                 }
785             }
786
787             if (bConvertEwaldToCoulomb && ( !bExactElecCutoff || r < rcoulomb ) )
788             {
789                 /* See comment in the preamble. When using Ewald interactions
790                  * (unless we use a switch modifier) we subtract the reciprocal-space
791                  * Ewald component here which made it possible to apply the free
792                  * energy interaction to 1/r (vanilla coulomb short-range part)
793                  * above. This gets us closer to the ideal case of applying
794                  * the softcore to the entire electrostatic interaction,
795                  * including the reciprocal-space component.
796                  */
797                 real v_lr, f_lr;
798
799                 ewrt      = r*ewtabscale;
800                 ewitab    = static_cast<int>(ewrt);
801                 eweps     = ewrt-ewitab;
802                 ewitab    = 4*ewitab;
803                 f_lr      = ewtab[ewitab]+eweps*ewtab[ewitab+1];
804                 v_lr      = (ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+f_lr));
805                 f_lr     *= rinv;
806
807                 /* Note that any possible Ewald shift has already been applied in
808                  * the normal interaction part above.
809                  */
810
811                 if (ii == jnr)
812                 {
813                     /* If we get here, the i particle (ii) has itself (jnr)
814                      * in its neighborlist. This can only happen with the Verlet
815                      * scheme, and corresponds to a self-interaction that will
816                      * occur twice. Scale it down by 50% to only include it once.
817                      */
818                     v_lr *= half;
819                 }
820
821                 for (i = 0; i < NSTATES; i++)
822                 {
823                     vctot      -= LFC[i]*qq[i]*v_lr;
824                     Fscal      -= LFC[i]*qq[i]*f_lr;
825                     dvdl_coul  -= (DLF[i]*qq[i])*v_lr;
826                 }
827             }
828
829             if (bConvertLJEwaldToLJ6 && (!bExactVdwCutoff || r < rvdw))
830             {
831                 /* See comment in the preamble. When using LJ-Ewald interactions
832                  * (unless we use a switch modifier) we subtract the reciprocal-space
833                  * Ewald component here which made it possible to apply the free
834                  * energy interaction to r^-6 (vanilla LJ6 short-range part)
835                  * above. This gets us closer to the ideal case of applying
836                  * the softcore to the entire VdW interaction,
837                  * including the reciprocal-space component.
838                  */
839                 /* We could also use the analytical form here
840                  * iso a table, but that can cause issues for
841                  * r close to 0 for non-interacting pairs.
842                  */
843                 real rs, frac, f_lr;
844                 int  ri;
845
846                 rs     = rsq*rinv*ewtabscale;
847                 ri     = static_cast<int>(rs);
848                 frac   = rs - ri;
849                 f_lr   = (1 - frac)*tab_ewald_F_lj[ri] + frac*tab_ewald_F_lj[ri+1];
850                 /* TODO: Currently the Ewald LJ table does not contain
851                  * the factor 1/6, we should add this.
852                  */
853                 FF     = f_lr*rinv/six;
854                 VV     = (tab_ewald_V_lj[ri] - ewtabhalfspace*frac*(tab_ewald_F_lj[ri] + f_lr))/six;
855
856                 if (ii == jnr)
857                 {
858                     /* If we get here, the i particle (ii) has itself (jnr)
859                      * in its neighborlist. This can only happen with the Verlet
860                      * scheme, and corresponds to a self-interaction that will
861                      * occur twice. Scale it down by 50% to only include it once.
862                      */
863                     VV *= half;
864                 }
865
866                 for (i = 0; i < NSTATES; i++)
867                 {
868                     c6grid      = nbfp_grid[tj[i]];
869                     vvtot      += LFV[i]*c6grid*VV;
870                     Fscal      += LFV[i]*c6grid*FF;
871                     dvdl_vdw   += (DLF[i]*c6grid)*VV;
872                 }
873             }
874
875             if (bDoForces)
876             {
877                 tx         = Fscal*dx;
878                 ty         = Fscal*dy;
879                 tz         = Fscal*dz;
880                 fix        = fix + tx;
881                 fiy        = fiy + ty;
882                 fiz        = fiz + tz;
883                 /* OpenMP atomics are expensive, but this kernels is also
884                  * expensive, so we can take this hit, instead of using
885                  * thread-local output buffers and extra reduction.
886                  *
887                  * All the OpenMP regions in this file are trivial and should
888                  * not throw, so no need for try/catch.
889                  */
890 #pragma omp atomic
891                 f[j3]     -= tx;
892 #pragma omp atomic
893                 f[j3+1]   -= ty;
894 #pragma omp atomic
895                 f[j3+2]   -= tz;
896             }
897         }
898
899         /* The atomics below are expensive with many OpenMP threads.
900          * Here unperturbed i-particles will usually only have a few
901          * (perturbed) j-particles in the list. Thus with a buffered list
902          * we can skip a significant number of i-reductions with a check.
903          */
904         if (npair_within_cutoff > 0)
905         {
906             if (bDoForces)
907             {
908 #pragma omp atomic
909                 f[ii3]        += fix;
910 #pragma omp atomic
911                 f[ii3+1]      += fiy;
912 #pragma omp atomic
913                 f[ii3+2]      += fiz;
914             }
915             if (bDoShiftForces)
916             {
917 #pragma omp atomic
918                 fshift[is3]   += fix;
919 #pragma omp atomic
920                 fshift[is3+1] += fiy;
921 #pragma omp atomic
922                 fshift[is3+2] += fiz;
923             }
924             if (bDoPotential)
925             {
926                 ggid               = gid[n];
927 #pragma omp atomic
928                 Vc[ggid]          += vctot;
929 #pragma omp atomic
930                 Vv[ggid]          += vvtot;
931             }
932         }
933     }
934
935 #pragma omp atomic
936     dvdl[efptCOUL]     += dvdl_coul;
937  #pragma omp atomic
938     dvdl[efptVDW]      += dvdl_vdw;
939
940     /* Estimate flops, average for free energy stuff:
941      * 12  flops per outer iteration
942      * 150 flops per inner iteration
943      */
944 #pragma omp atomic
945     inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri*12 + nlist->jindex[n]*150);
946 }