Merge "Merge release-4-6 into release-5-0" into release-5-0
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_free_energy.c
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2013,2014, by the GROMACS development team, led by
7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8  * and including many others, as listed in the AUTHORS file in the
9  * top-level source directory and at http://www.gromacs.org.
10  *
11  * GROMACS is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public License
13  * as published by the Free Software Foundation; either version 2.1
14  * of the License, or (at your option) any later version.
15  *
16  * GROMACS is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with GROMACS; if not, see
23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
25  *
26  * If you want to redistribute modifications to GROMACS, please
27  * consider that scientific software is very special. Version
28  * control is crucial - bugs must be traceable. We will be happy to
29  * consider code for inclusion in the official distribution, but
30  * derived work must not be called official GROMACS. Details are found
31  * in the README & COPYING files - if they are missing, get the
32  * official version at http://www.gromacs.org.
33  *
34  * To help us fund GROMACS development, we humbly ask that you cite
35  * the research papers on the package. Check out http://www.gromacs.org.
36  */
37 #ifdef HAVE_CONFIG_H
38 #include <config.h>
39 #endif
40
41 #include <math.h>
42
43 #include "vec.h"
44 #include "typedefs.h"
45 #include "nonbonded.h"
46 #include "nb_kernel.h"
47 #include "nrnb.h"
48 #include "macros.h"
49 #include "nb_free_energy.h"
50
51 #include "gmx_fatal.h"
52
53 void
54 gmx_nb_free_energy_kernel(const t_nblist * gmx_restrict    nlist,
55                           rvec * gmx_restrict              xx,
56                           rvec * gmx_restrict              ff,
57                           t_forcerec * gmx_restrict        fr,
58                           const t_mdatoms * gmx_restrict   mdatoms,
59                           nb_kernel_data_t * gmx_restrict  kernel_data,
60                           t_nrnb * gmx_restrict            nrnb)
61 {
62
63 #define  STATE_A  0
64 #define  STATE_B  1
65 #define  NSTATES  2
66     int           i, j, n, ii, is3, ii3, k, nj0, nj1, jnr, j3, ggid;
67     real          shX, shY, shZ;
68     real          Fscal, FscalC[NSTATES], FscalV[NSTATES], tx, ty, tz;
69     real          Vcoul[NSTATES], Vvdw[NSTATES];
70     real          rinv6, r, rt, rtC, rtV;
71     real          iqA, iqB;
72     real          qq[NSTATES], vctot, krsq;
73     int           ntiA, ntiB, tj[NSTATES];
74     real          Vvdw6, Vvdw12, vvtot;
75     real          ix, iy, iz, fix, fiy, fiz;
76     real          dx, dy, dz, rsq, rinv;
77     real          c6[NSTATES], c12[NSTATES], c6grid;
78     real          LFC[NSTATES], LFV[NSTATES], DLF[NSTATES];
79     double        dvdl_coul, dvdl_vdw;
80     real          lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
81     real          sigma6[NSTATES], alpha_vdw_eff, alpha_coul_eff, sigma2_def, sigma2_min;
82     real          rp, rpm2, rC, rV, rinvC, rpinvC, rinvV, rpinvV;
83     real          sigma2[NSTATES], sigma_pow[NSTATES], sigma_powm2[NSTATES], rs, rs2;
84     int           do_tab, tab_elemsize;
85     int           n0, n1C, n1V, nnn;
86     real          Y, F, G, H, Fp, Geps, Heps2, epsC, eps2C, epsV, eps2V, VV, FF;
87     int           icoul, ivdw;
88     int           nri;
89     const int *   iinr;
90     const int *   jindex;
91     const int *   jjnr;
92     const int *   shift;
93     const int *   gid;
94     const int *   typeA;
95     const int *   typeB;
96     int           ntype;
97     const real *  shiftvec;
98     real          dvdl_part;
99     real *        fshift;
100     real          tabscale = 0;
101     const real *  VFtab    = NULL;
102     const real *  x;
103     real *        f;
104     real          facel, krf, crf;
105     const real *  chargeA;
106     const real *  chargeB;
107     real          sigma6_min, sigma6_def, lam_power, sc_power, sc_r_power;
108     real          alpha_coul, alpha_vdw, lambda_coul, lambda_vdw, ewc_lj;
109     const real *  nbfp, *nbfp_grid;
110     real *        dvdl;
111     real *        Vv;
112     real *        Vc;
113     gmx_bool      bDoForces, bDoShiftForces, bDoPotential;
114     real          rcoulomb, rvdw, sh_invrc6;
115     gmx_bool      bExactElecCutoff, bExactVdwCutoff, bExactCutoffAll;
116     gmx_bool      bEwald, bEwaldLJ;
117     real          rcutoff_max2;
118     const real *  tab_ewald_F_lj;
119     const real *  tab_ewald_V_lj;
120     real          d, d2, sw, dsw, rinvcorr;
121     real          elec_swV3, elec_swV4, elec_swV5, elec_swF2, elec_swF3, elec_swF4;
122     real          vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4;
123     gmx_bool      bConvertEwaldToCoulomb, bConvertLJEwaldToLJ6;
124     gmx_bool      bComputeVdwInteraction, bComputeElecInteraction;
125     const real *  ewtab;
126     int           ewitab;
127     real          ewrt, eweps, ewtabscale, ewtabhalfspace, sh_ewald;
128
129     sh_ewald            = fr->ic->sh_ewald;
130     ewtab               = fr->ic->tabq_coul_FDV0;
131     ewtabscale          = fr->ic->tabq_scale;
132     ewtabhalfspace      = 0.5/ewtabscale;
133     tab_ewald_F_lj      = fr->ic->tabq_vdw_F;
134     tab_ewald_V_lj      = fr->ic->tabq_vdw_V;
135
136     x                   = xx[0];
137     f                   = ff[0];
138
139     fshift              = fr->fshift[0];
140
141     nri                 = nlist->nri;
142     iinr                = nlist->iinr;
143     jindex              = nlist->jindex;
144     jjnr                = nlist->jjnr;
145     icoul               = nlist->ielec;
146     ivdw                = nlist->ivdw;
147     shift               = nlist->shift;
148     gid                 = nlist->gid;
149
150     shiftvec            = fr->shift_vec[0];
151     chargeA             = mdatoms->chargeA;
152     chargeB             = mdatoms->chargeB;
153     facel               = fr->epsfac;
154     krf                 = fr->k_rf;
155     crf                 = fr->c_rf;
156     ewc_lj              = fr->ewaldcoeff_lj;
157     Vc                  = kernel_data->energygrp_elec;
158     typeA               = mdatoms->typeA;
159     typeB               = mdatoms->typeB;
160     ntype               = fr->ntype;
161     nbfp                = fr->nbfp;
162     nbfp_grid           = fr->ljpme_c6grid;
163     Vv                  = kernel_data->energygrp_vdw;
164     lambda_coul         = kernel_data->lambda[efptCOUL];
165     lambda_vdw          = kernel_data->lambda[efptVDW];
166     dvdl                = kernel_data->dvdl;
167     alpha_coul          = fr->sc_alphacoul;
168     alpha_vdw           = fr->sc_alphavdw;
169     lam_power           = fr->sc_power;
170     sc_r_power          = fr->sc_r_power;
171     sigma6_def          = fr->sc_sigma6_def;
172     sigma6_min          = fr->sc_sigma6_min;
173     bDoForces           = kernel_data->flags & GMX_NONBONDED_DO_FORCE;
174     bDoShiftForces      = kernel_data->flags & GMX_NONBONDED_DO_SHIFTFORCE;
175     bDoPotential        = kernel_data->flags & GMX_NONBONDED_DO_POTENTIAL;
176
177     rcoulomb            = fr->rcoulomb;
178     sh_ewald            = fr->ic->sh_ewald;
179     rvdw                = fr->rvdw;
180     sh_invrc6           = fr->ic->sh_invrc6;
181
182     if (fr->coulomb_modifier == eintmodPOTSWITCH)
183     {
184         d               = fr->rcoulomb-fr->rcoulomb_switch;
185         elec_swV3       = -10.0/(d*d*d);
186         elec_swV4       =  15.0/(d*d*d*d);
187         elec_swV5       =  -6.0/(d*d*d*d*d);
188         elec_swF2       = -30.0/(d*d*d);
189         elec_swF3       =  60.0/(d*d*d*d);
190         elec_swF4       = -30.0/(d*d*d*d*d);
191     }
192     else
193     {
194         /* Avoid warnings from stupid compilers (looking at you, Clang!) */
195         elec_swV3 = elec_swV4 = elec_swV5 = elec_swF2 = elec_swF3 = elec_swF4 = 0.0;
196     }
197
198     if (fr->vdw_modifier == eintmodPOTSWITCH)
199     {
200         d               = fr->rvdw-fr->rvdw_switch;
201         vdw_swV3        = -10.0/(d*d*d);
202         vdw_swV4        =  15.0/(d*d*d*d);
203         vdw_swV5        =  -6.0/(d*d*d*d*d);
204         vdw_swF2        = -30.0/(d*d*d);
205         vdw_swF3        =  60.0/(d*d*d*d);
206         vdw_swF4        = -30.0/(d*d*d*d*d);
207     }
208     else
209     {
210         /* Avoid warnings from stupid compilers (looking at you, Clang!) */
211         vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0;
212     }
213
214     if (fr->cutoff_scheme == ecutsVERLET)
215     {
216         const interaction_const_t *ic;
217
218         ic = fr->ic;
219         if (EVDW_PME(ic->vdwtype))
220         {
221             ivdw         = GMX_NBKERNEL_VDW_LJEWALD;
222         }
223         else
224         {
225             ivdw         = GMX_NBKERNEL_VDW_LENNARDJONES;
226         }
227
228         if (ic->eeltype == eelCUT || EEL_RF(ic->eeltype))
229         {
230             icoul        = GMX_NBKERNEL_ELEC_REACTIONFIELD;
231         }
232         else if (EEL_PME_EWALD(ic->eeltype))
233         {
234             icoul        = GMX_NBKERNEL_ELEC_EWALD;
235         }
236         else
237         {
238             gmx_incons("Unsupported eeltype with Verlet and free-energy");
239         }
240
241         bExactElecCutoff = TRUE;
242         bExactVdwCutoff  = TRUE;
243     }
244     else
245     {
246         bExactElecCutoff = (fr->coulomb_modifier != eintmodNONE) || fr->eeltype == eelRF_ZERO;
247         bExactVdwCutoff  = (fr->vdw_modifier != eintmodNONE);
248     }
249
250     bExactCutoffAll = (bExactElecCutoff && bExactVdwCutoff);
251     rcutoff_max2    = max(fr->rcoulomb, fr->rvdw);
252     rcutoff_max2    = rcutoff_max2*rcutoff_max2;
253
254     bEwald          = (icoul == GMX_NBKERNEL_ELEC_EWALD);
255     bEwaldLJ        = (ivdw == GMX_NBKERNEL_VDW_LJEWALD);
256
257     /* For Ewald/PME interactions we cannot easily apply the soft-core component to
258      * reciprocal space. When we use vanilla (not switch/shift) Ewald interactions, we
259      * can apply the small trick of subtracting the _reciprocal_ space contribution
260      * in this kernel, and instead apply the free energy interaction to the 1/r
261      * (standard coulomb) interaction.
262      *
263      * However, we cannot use this approach for switch-modified since we would then
264      * effectively end up evaluating a significantly different interaction here compared to the
265      * normal (non-free-energy) kernels, either by applying a cutoff at a different
266      * position than what the user requested, or by switching different
267      * things (1/r rather than short-range Ewald). For these settings, we just
268      * use the traditional short-range Ewald interaction in that case.
269      */
270     bConvertEwaldToCoulomb = (bEwald && (fr->coulomb_modifier != eintmodPOTSWITCH));
271     /* For now the below will always be true (since LJ-PME only works with Shift in Gromacs-5.0),
272      * but writing it this way means we stay in sync with coulomb, and it avoids future bugs.
273      */
274     bConvertLJEwaldToLJ6   = (bEwaldLJ && (fr->vdw_modifier   != eintmodPOTSWITCH));
275
276     /* fix compiler warnings */
277     nj1   = 0;
278     n1C   = n1V   = 0;
279     epsC  = epsV  = 0;
280     eps2C = eps2V = 0;
281
282     dvdl_coul  = 0;
283     dvdl_vdw   = 0;
284
285     /* Lambda factor for state A, 1-lambda*/
286     LFC[STATE_A] = 1.0 - lambda_coul;
287     LFV[STATE_A] = 1.0 - lambda_vdw;
288
289     /* Lambda factor for state B, lambda*/
290     LFC[STATE_B] = lambda_coul;
291     LFV[STATE_B] = lambda_vdw;
292
293     /*derivative of the lambda factor for state A and B */
294     DLF[STATE_A] = -1;
295     DLF[STATE_B] = 1;
296
297     for (i = 0; i < NSTATES; i++)
298     {
299         lfac_coul[i]  = (lam_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
300         dlfac_coul[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFC[i]) : 1);
301         lfac_vdw[i]   = (lam_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
302         dlfac_vdw[i]  = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFV[i]) : 1);
303     }
304     /* precalculate */
305     sigma2_def = pow(sigma6_def, 1.0/3.0);
306     sigma2_min = pow(sigma6_min, 1.0/3.0);
307
308     /* Ewald (not PME) table is special (icoul==enbcoulFEWALD) */
309
310     do_tab = (icoul == GMX_NBKERNEL_ELEC_CUBICSPLINETABLE ||
311               ivdw == GMX_NBKERNEL_VDW_CUBICSPLINETABLE);
312     if (do_tab)
313     {
314         tabscale         = kernel_data->table_elec_vdw->scale;
315         VFtab            = kernel_data->table_elec_vdw->data;
316         /* we always use the combined table here */
317         tab_elemsize     = 12;
318     }
319
320     for (n = 0; (n < nri); n++)
321     {
322         int npair_within_cutoff;
323
324         npair_within_cutoff = 0;
325
326         is3              = 3*shift[n];
327         shX              = shiftvec[is3];
328         shY              = shiftvec[is3+1];
329         shZ              = shiftvec[is3+2];
330         nj0              = jindex[n];
331         nj1              = jindex[n+1];
332         ii               = iinr[n];
333         ii3              = 3*ii;
334         ix               = shX + x[ii3+0];
335         iy               = shY + x[ii3+1];
336         iz               = shZ + x[ii3+2];
337         iqA              = facel*chargeA[ii];
338         iqB              = facel*chargeB[ii];
339         ntiA             = 2*ntype*typeA[ii];
340         ntiB             = 2*ntype*typeB[ii];
341         vctot            = 0;
342         vvtot            = 0;
343         fix              = 0;
344         fiy              = 0;
345         fiz              = 0;
346
347         for (k = nj0; (k < nj1); k++)
348         {
349             jnr              = jjnr[k];
350             j3               = 3*jnr;
351             dx               = ix - x[j3];
352             dy               = iy - x[j3+1];
353             dz               = iz - x[j3+2];
354             rsq              = dx*dx + dy*dy + dz*dz;
355
356             if (bExactCutoffAll && rsq >= rcutoff_max2)
357             {
358                 /* We save significant time by skipping all code below.
359                  * Note that with soft-core interactions, the actual cut-off
360                  * check might be different. But since the soft-core distance
361                  * is always larger than r, checking on r here is safe.
362                  */
363                 continue;
364             }
365             npair_within_cutoff++;
366
367             if (rsq > 0)
368             {
369                 rinv         = gmx_invsqrt(rsq);
370                 r            = rsq*rinv;
371             }
372             else
373             {
374                 /* The force at r=0 is zero, because of symmetry.
375                  * But note that the potential is in general non-zero,
376                  * since the soft-cored r will be non-zero.
377                  */
378                 rinv         = 0;
379                 r            = 0;
380             }
381
382             if (sc_r_power == 6.0)
383             {
384                 rpm2             = rsq*rsq;  /* r4 */
385                 rp               = rpm2*rsq; /* r6 */
386             }
387             else if (sc_r_power == 48.0)
388             {
389                 rp               = rsq*rsq*rsq; /* r6 */
390                 rp               = rp*rp;       /* r12 */
391                 rp               = rp*rp;       /* r24 */
392                 rp               = rp*rp;       /* r48 */
393                 rpm2             = rp/rsq;      /* r46 */
394             }
395             else
396             {
397                 rp             = pow(r, sc_r_power);  /* not currently supported as input, but can handle it */
398                 rpm2           = rp/rsq;
399             }
400
401             Fscal = 0;
402
403             qq[STATE_A]      = iqA*chargeA[jnr];
404             qq[STATE_B]      = iqB*chargeB[jnr];
405
406             tj[STATE_A]      = ntiA+2*typeA[jnr];
407             tj[STATE_B]      = ntiB+2*typeB[jnr];
408
409             if (nlist->excl_fep == NULL || nlist->excl_fep[k])
410             {
411                 c6[STATE_A]      = nbfp[tj[STATE_A]];
412                 c6[STATE_B]      = nbfp[tj[STATE_B]];
413
414                 for (i = 0; i < NSTATES; i++)
415                 {
416                     c12[i]             = nbfp[tj[i]+1];
417                     if ((c6[i] > 0) && (c12[i] > 0))
418                     {
419                         /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
420                         sigma6[i]       = 0.5*c12[i]/c6[i];
421                         sigma2[i]       = pow(sigma6[i], 1.0/3.0);
422                         /* should be able to get rid of this ^^^ internal pow call eventually.  Will require agreement on
423                            what data to store externally.  Can't be fixed without larger scale changes, so not 4.6 */
424                         if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
425                         {
426                             sigma6[i] = sigma6_min;
427                             sigma2[i] = sigma2_min;
428                         }
429                     }
430                     else
431                     {
432                         sigma6[i]       = sigma6_def;
433                         sigma2[i]       = sigma2_def;
434                     }
435                     if (sc_r_power == 6.0)
436                     {
437                         sigma_pow[i]    = sigma6[i];
438                         sigma_powm2[i]  = sigma6[i]/sigma2[i];
439                     }
440                     else if (sc_r_power == 48.0)
441                     {
442                         sigma_pow[i]    = sigma6[i]*sigma6[i];       /* sigma^12 */
443                         sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
444                         sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
445                         sigma_powm2[i]  = sigma_pow[i]/sigma2[i];
446                     }
447                     else
448                     {    /* not really supported as input, but in here for testing the general case*/
449                         sigma_pow[i]    = pow(sigma2[i], sc_r_power/2);
450                         sigma_powm2[i]  = sigma_pow[i]/(sigma2[i]);
451                     }
452                 }
453
454                 /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
455                 if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
456                 {
457                     alpha_vdw_eff    = 0;
458                     alpha_coul_eff   = 0;
459                 }
460                 else
461                 {
462                     alpha_vdw_eff    = alpha_vdw;
463                     alpha_coul_eff   = alpha_coul;
464                 }
465
466                 for (i = 0; i < NSTATES; i++)
467                 {
468                     FscalC[i]    = 0;
469                     FscalV[i]    = 0;
470                     Vcoul[i]     = 0;
471                     Vvdw[i]      = 0;
472
473                     /* Only spend time on A or B state if it is non-zero */
474                     if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
475                     {
476                         /* this section has to be inside the loop because of the dependence on sigma_pow */
477                         rpinvC         = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
478                         rinvC          = pow(rpinvC, 1.0/sc_r_power);
479                         rC             = 1.0/rinvC;
480
481                         rpinvV         = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
482                         rinvV          = pow(rpinvV, 1.0/sc_r_power);
483                         rV             = 1.0/rinvV;
484
485                         if (do_tab)
486                         {
487                             rtC        = rC*tabscale;
488                             n0         = rtC;
489                             epsC       = rtC-n0;
490                             eps2C      = epsC*epsC;
491                             n1C        = tab_elemsize*n0;
492
493                             rtV        = rV*tabscale;
494                             n0         = rtV;
495                             epsV       = rtV-n0;
496                             eps2V      = epsV*epsV;
497                             n1V        = tab_elemsize*n0;
498                         }
499
500                         /* Only process the coulomb interactions if we have charges,
501                          * and if we either include all entries in the list (no cutoff
502                          * used in the kernel), or if we are within the cutoff.
503                          */
504                         bComputeElecInteraction = !bExactElecCutoff ||
505                             ( bConvertEwaldToCoulomb && r < rcoulomb) ||
506                             (!bConvertEwaldToCoulomb && rC < rcoulomb);
507
508                         if ( (qq[i] != 0) && bComputeElecInteraction)
509                         {
510                             switch (icoul)
511                             {
512                                 case GMX_NBKERNEL_ELEC_COULOMB:
513                                     /* simple cutoff */
514                                     Vcoul[i]   = qq[i]*rinvC;
515                                     FscalC[i]  = Vcoul[i];
516                                     /* The shift for the Coulomb potential is stored in
517                                      * the RF parameter c_rf, which is 0 without shift
518                                      */
519                                     Vcoul[i]  -= qq[i]*fr->ic->c_rf;
520                                     break;
521
522                                 case GMX_NBKERNEL_ELEC_REACTIONFIELD:
523                                     /* reaction-field */
524                                     Vcoul[i]   = qq[i]*(rinvC + krf*rC*rC-crf);
525                                     FscalC[i]  = qq[i]*(rinvC - 2.0*krf*rC*rC);
526                                     break;
527
528                                 case GMX_NBKERNEL_ELEC_CUBICSPLINETABLE:
529                                     /* non-Ewald tabulated coulomb */
530                                     nnn        = n1C;
531                                     Y          = VFtab[nnn];
532                                     F          = VFtab[nnn+1];
533                                     Geps       = epsC*VFtab[nnn+2];
534                                     Heps2      = eps2C*VFtab[nnn+3];
535                                     Fp         = F+Geps+Heps2;
536                                     VV         = Y+epsC*Fp;
537                                     FF         = Fp+Geps+2.0*Heps2;
538                                     Vcoul[i]   = qq[i]*VV;
539                                     FscalC[i]  = -qq[i]*tabscale*FF*rC;
540                                     break;
541
542                                 case GMX_NBKERNEL_ELEC_GENERALIZEDBORN:
543                                     gmx_fatal(FARGS, "Free energy and GB not implemented.\n");
544                                     break;
545
546                                 case GMX_NBKERNEL_ELEC_EWALD:
547                                     if (bConvertEwaldToCoulomb)
548                                     {
549                                         /* Ewald FEP is done only on the 1/r part */
550                                         Vcoul[i]   = qq[i]*(rinvC-sh_ewald);
551                                         FscalC[i]  = qq[i]*rinvC;
552                                     }
553                                     else
554                                     {
555                                         ewrt      = rC*ewtabscale;
556                                         ewitab    = (int) ewrt;
557                                         eweps     = ewrt-ewitab;
558                                         ewitab    = 4*ewitab;
559                                         FscalC[i] = ewtab[ewitab]+eweps*ewtab[ewitab+1];
560                                         rinvcorr  = rinvC-sh_ewald;
561                                         Vcoul[i]  = qq[i]*(rinvcorr-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+FscalC[i])));
562                                         FscalC[i] = qq[i]*(rinvC-rC*FscalC[i]);
563                                     }
564                                     break;
565
566                                 case GMX_NBKERNEL_ELEC_NONE:
567                                     FscalC[i]  = 0.0;
568                                     Vcoul[i]   = 0.0;
569                                     break;
570
571                                 default:
572                                     gmx_incons("Invalid icoul in free energy kernel");
573                                     break;
574                             }
575
576                             if (fr->coulomb_modifier == eintmodPOTSWITCH)
577                             {
578                                 d                = rC-fr->rcoulomb_switch;
579                                 d                = (d > 0.0) ? d : 0.0;
580                                 d2               = d*d;
581                                 sw               = 1.0+d2*d*(elec_swV3+d*(elec_swV4+d*elec_swV5));
582                                 dsw              = d2*(elec_swF2+d*(elec_swF3+d*elec_swF4));
583
584                                 FscalC[i]        = FscalC[i]*sw - rC*Vcoul[i]*dsw;
585                                 Vcoul[i]        *= sw;
586
587                                 FscalC[i]        = (rC < rcoulomb) ? FscalC[i] : 0.0;
588                                 Vcoul[i]         = (rC < rcoulomb) ? Vcoul[i] : 0.0;
589                             }
590                         }
591
592                         /* Only process the VDW interactions if we have
593                          * some non-zero parameters, and if we either
594                          * include all entries in the list (no cutoff used
595                          * in the kernel), or if we are within the cutoff.
596                          */
597                         bComputeVdwInteraction = !bExactVdwCutoff ||
598                             ( bConvertLJEwaldToLJ6 && r < rvdw) ||
599                             (!bConvertLJEwaldToLJ6 && rV < rvdw);
600                         if ((c6[i] != 0 || c12[i] != 0) && bComputeVdwInteraction)
601                         {
602                             switch (ivdw)
603                             {
604                                 case GMX_NBKERNEL_VDW_LENNARDJONES:
605                                 case GMX_NBKERNEL_VDW_LJEWALD:
606                                     /* cutoff LJ */
607                                     if (sc_r_power == 6.0)
608                                     {
609                                         rinv6            = rpinvV;
610                                     }
611                                     else
612                                     {
613                                         rinv6            = pow(rinvV, 6.0);
614                                     }
615                                     Vvdw6            = c6[i]*rinv6;
616                                     Vvdw12           = c12[i]*rinv6*rinv6;
617                                     if (fr->vdw_modifier == eintmodPOTSHIFT)
618                                     {
619                                         Vvdw[i]          = ( (Vvdw12-c12[i]*sh_invrc6*sh_invrc6)*(1.0/12.0)
620                                                              -(Vvdw6-c6[i]*sh_invrc6)*(1.0/6.0));
621                                     }
622                                     else
623                                     {
624                                         Vvdw[i]          = Vvdw12*(1.0/12.0) - Vvdw6*(1.0/6.0);
625                                     }
626                                     FscalV[i]        = Vvdw12 - Vvdw6;
627                                     break;
628
629                                 case GMX_NBKERNEL_VDW_BUCKINGHAM:
630                                     gmx_fatal(FARGS, "Buckingham free energy not supported.");
631                                     break;
632
633                                 case GMX_NBKERNEL_VDW_CUBICSPLINETABLE:
634                                     /* Table LJ */
635                                     nnn = n1V+4;
636                                     /* dispersion */
637                                     Y          = VFtab[nnn];
638                                     F          = VFtab[nnn+1];
639                                     Geps       = epsV*VFtab[nnn+2];
640                                     Heps2      = eps2V*VFtab[nnn+3];
641                                     Fp         = F+Geps+Heps2;
642                                     VV         = Y+epsV*Fp;
643                                     FF         = Fp+Geps+2.0*Heps2;
644                                     Vvdw[i]   += c6[i]*VV;
645                                     FscalV[i] -= c6[i]*tabscale*FF*rV;
646
647                                     /* repulsion */
648                                     Y          = VFtab[nnn+4];
649                                     F          = VFtab[nnn+5];
650                                     Geps       = epsV*VFtab[nnn+6];
651                                     Heps2      = eps2V*VFtab[nnn+7];
652                                     Fp         = F+Geps+Heps2;
653                                     VV         = Y+epsV*Fp;
654                                     FF         = Fp+Geps+2.0*Heps2;
655                                     Vvdw[i]   += c12[i]*VV;
656                                     FscalV[i] -= c12[i]*tabscale*FF*rV;
657                                     break;
658
659                                 case GMX_NBKERNEL_VDW_NONE:
660                                     Vvdw[i]    = 0.0;
661                                     FscalV[i]  = 0.0;
662                                     break;
663
664                                 default:
665                                     gmx_incons("Invalid ivdw in free energy kernel");
666                                     break;
667                             }
668
669                             if (fr->vdw_modifier == eintmodPOTSWITCH)
670                             {
671                                 d                = rV-fr->rvdw_switch;
672                                 d                = (d > 0.0) ? d : 0.0;
673                                 d2               = d*d;
674                                 sw               = 1.0+d2*d*(vdw_swV3+d*(vdw_swV4+d*vdw_swV5));
675                                 dsw              = d2*(vdw_swF2+d*(vdw_swF3+d*vdw_swF4));
676
677                                 FscalV[i]        = FscalV[i]*sw - rV*Vvdw[i]*dsw;
678                                 Vvdw[i]         *= sw;
679
680                                 FscalV[i]  = (rV < rvdw) ? FscalV[i] : 0.0;
681                                 Vvdw[i]    = (rV < rvdw) ? Vvdw[i] : 0.0;
682                             }
683                         }
684
685                         /* FscalC (and FscalV) now contain: dV/drC * rC
686                          * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
687                          * Further down we first multiply by r^p-2 and then by
688                          * the vector r, which in total gives: dV/drC * (r/rC)^1-p
689                          */
690                         FscalC[i] *= rpinvC;
691                         FscalV[i] *= rpinvV;
692                     }
693                 }
694
695                 /* Assemble A and B states */
696                 for (i = 0; i < NSTATES; i++)
697                 {
698                     vctot         += LFC[i]*Vcoul[i];
699                     vvtot         += LFV[i]*Vvdw[i];
700
701                     Fscal         += LFC[i]*FscalC[i]*rpm2;
702                     Fscal         += LFV[i]*FscalV[i]*rpm2;
703
704                     dvdl_coul     += Vcoul[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*FscalC[i]*sigma_pow[i];
705                     dvdl_vdw      += Vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*FscalV[i]*sigma_pow[i];
706                 }
707             }
708             else if (icoul == GMX_NBKERNEL_ELEC_REACTIONFIELD)
709             {
710                 /* For excluded pairs, which are only in this pair list when
711                  * using the Verlet scheme, we don't use soft-core.
712                  * The group scheme also doesn't soft-core for these.
713                  * As there is no singularity, there is no need for soft-core.
714                  */
715                 VV = krf*rsq - crf;
716                 FF = -2.0*krf;
717
718                 if (ii == jnr)
719                 {
720                     VV *= 0.5;
721                 }
722
723                 for (i = 0; i < NSTATES; i++)
724                 {
725                     vctot      += LFC[i]*qq[i]*VV;
726                     Fscal      += LFC[i]*qq[i]*FF;
727                     dvdl_coul  += DLF[i]*qq[i]*VV;
728                 }
729             }
730
731             if (bConvertEwaldToCoulomb && ( !bExactElecCutoff || r < rcoulomb ) )
732             {
733                 /* See comment in the preamble. When using Ewald interactions
734                  * (unless we use a switch modifier) we subtract the reciprocal-space
735                  * Ewald component here which made it possible to apply the free
736                  * energy interaction to 1/r (vanilla coulomb short-range part)
737                  * above. This gets us closer to the ideal case of applying
738                  * the softcore to the entire electrostatic interaction,
739                  * including the reciprocal-space component.
740                  */
741                 real v_lr, f_lr;
742
743                 ewrt      = r*ewtabscale;
744                 ewitab    = (int) ewrt;
745                 eweps     = ewrt-ewitab;
746                 ewitab    = 4*ewitab;
747                 f_lr      = ewtab[ewitab]+eweps*ewtab[ewitab+1];
748                 v_lr      = (ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+f_lr));
749                 f_lr     *= rinv;
750
751                 if (ii == jnr)
752                 {
753                     /* If we get here, the i particle (ii) has itself (jnr)
754                      * in its neighborlist. This can only happen with the Verlet
755                      * scheme, and corresponds to a self-interaction that will
756                      * occur twice. Scale it down by 50% to only include it once.
757                      */
758                     v_lr *= 0.5;
759                 }
760
761                 for (i = 0; i < NSTATES; i++)
762                 {
763                     vctot      -= LFC[i]*qq[i]*v_lr;
764                     Fscal      -= LFC[i]*qq[i]*f_lr;
765                     dvdl_coul  -= (DLF[i]*qq[i])*v_lr;
766                 }
767             }
768
769             if (bConvertLJEwaldToLJ6 && (!bExactVdwCutoff || r < rvdw))
770             {
771                 /* See comment in the preamble. When using LJ-Ewald interactions
772                  * (unless we use a switch modifier) we subtract the reciprocal-space
773                  * Ewald component here which made it possible to apply the free
774                  * energy interaction to r^-6 (vanilla LJ6 short-range part)
775                  * above. This gets us closer to the ideal case of applying
776                  * the softcore to the entire VdW interaction,
777                  * including the reciprocal-space component.
778                  */
779                 real rs, frac, f_lr;
780                 int  ri;
781
782                 rs     = rsq*rinv*ewtabscale;
783                 ri     = (int)rs;
784                 frac   = rs - ri;
785                 f_lr   = (1 - frac)*tab_ewald_F_lj[ri] + frac*tab_ewald_F_lj[ri+1];
786                 FF     = f_lr*rinv;
787                 VV     = tab_ewald_V_lj[ri] - ewtabhalfspace*frac*(tab_ewald_F_lj[ri] + f_lr);
788
789                 if (ii == jnr)
790                 {
791                     /* If we get here, the i particle (ii) has itself (jnr)
792                      * in its neighborlist. This can only happen with the Verlet
793                      * scheme, and corresponds to a self-interaction that will
794                      * occur twice. Scale it down by 50% to only include it once.
795                      */
796                     VV *= 0.5;
797                 }
798
799                 for (i = 0; i < NSTATES; i++)
800                 {
801                     c6grid      = nbfp_grid[tj[i]];
802                     vvtot      += LFV[i]*c6grid*VV*(1.0/6.0);
803                     Fscal      += LFV[i]*c6grid*FF*(1.0/6.0);
804                     dvdl_vdw   += (DLF[i]*c6grid)*VV*(1.0/6.0);
805                 }
806
807             }
808
809             if (bDoForces)
810             {
811                 tx         = Fscal*dx;
812                 ty         = Fscal*dy;
813                 tz         = Fscal*dz;
814                 fix        = fix + tx;
815                 fiy        = fiy + ty;
816                 fiz        = fiz + tz;
817                 /* OpenMP atomics are expensive, but this kernels is also
818                  * expensive, so we can take this hit, instead of using
819                  * thread-local output buffers and extra reduction.
820                  */
821 #pragma omp atomic
822                 f[j3]     -= tx;
823 #pragma omp atomic
824                 f[j3+1]   -= ty;
825 #pragma omp atomic
826                 f[j3+2]   -= tz;
827             }
828         }
829
830         /* The atomics below are expensive with many OpenMP threads.
831          * Here unperturbed i-particles will usually only have a few
832          * (perturbed) j-particles in the list. Thus with a buffered list
833          * we can skip a significant number of i-reductions with a check.
834          */
835         if (npair_within_cutoff > 0)
836         {
837             if (bDoForces)
838             {
839 #pragma omp atomic
840                 f[ii3]        += fix;
841 #pragma omp atomic
842                 f[ii3+1]      += fiy;
843 #pragma omp atomic
844                 f[ii3+2]      += fiz;
845             }
846             if (bDoShiftForces)
847             {
848 #pragma omp atomic
849                 fshift[is3]   += fix;
850 #pragma omp atomic
851                 fshift[is3+1] += fiy;
852 #pragma omp atomic
853                 fshift[is3+2] += fiz;
854             }
855             if (bDoPotential)
856             {
857                 ggid               = gid[n];
858 #pragma omp atomic
859                 Vc[ggid]          += vctot;
860 #pragma omp atomic
861                 Vv[ggid]          += vvtot;
862             }
863         }
864     }
865
866 #pragma omp atomic
867     dvdl[efptCOUL]     += dvdl_coul;
868  #pragma omp atomic
869     dvdl[efptVDW]      += dvdl_vdw;
870
871     /* Estimate flops, average for free energy stuff:
872      * 12  flops per outer iteration
873      * 150 flops per inner iteration
874      */
875 #pragma omp atomic
876     inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri*12 + nlist->jindex[n]*150);
877 }
878
879 real
880 nb_free_energy_evaluate_single(real r2, real sc_r_power, real alpha_coul, real alpha_vdw,
881                                real tabscale, real *vftab,
882                                real qqA, real c6A, real c12A, real qqB, real c6B, real c12B,
883                                real LFC[2], real LFV[2], real DLF[2],
884                                real lfac_coul[2], real lfac_vdw[2], real dlfac_coul[2], real dlfac_vdw[2],
885                                real sigma6_def, real sigma6_min, real sigma2_def, real sigma2_min,
886                                real *velectot, real *vvdwtot, real *dvdl)
887 {
888     real       r, rp, rpm2, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VV, FF, fscal;
889     real       qq[2], c6[2], c12[2], sigma6[2], sigma2[2], sigma_pow[2], sigma_powm2[2];
890     real       alpha_coul_eff, alpha_vdw_eff, dvdl_coul, dvdl_vdw;
891     real       rpinv, r_coul, r_vdw, velecsum, vvdwsum;
892     real       fscal_vdw[2], fscal_elec[2];
893     real       velec[2], vvdw[2];
894     int        i, ntab;
895
896     qq[0]    = qqA;
897     qq[1]    = qqB;
898     c6[0]    = c6A;
899     c6[1]    = c6B;
900     c12[0]   = c12A;
901     c12[1]   = c12B;
902
903     if (sc_r_power == 6.0)
904     {
905         rpm2             = r2*r2;   /* r4 */
906         rp               = rpm2*r2; /* r6 */
907     }
908     else if (sc_r_power == 48.0)
909     {
910         rp               = r2*r2*r2; /* r6 */
911         rp               = rp*rp;    /* r12 */
912         rp               = rp*rp;    /* r24 */
913         rp               = rp*rp;    /* r48 */
914         rpm2             = rp/r2;    /* r46 */
915     }
916     else
917     {
918         rp             = pow(r2, 0.5*sc_r_power);  /* not currently supported as input, but can handle it */
919         rpm2           = rp/r2;
920     }
921
922     /* Loop over state A(0) and B(1) */
923     for (i = 0; i < 2; i++)
924     {
925         if ((c6[i] > 0) && (c12[i] > 0))
926         {
927             /* The c6 & c12 coefficients now contain the constants 6.0 and 12.0, respectively.
928              * Correct for this by multiplying with (1/12.0)/(1/6.0)=6.0/12.0=0.5.
929              */
930             sigma6[i]       = 0.5*c12[i]/c6[i];
931             sigma2[i]       = pow(0.5*c12[i]/c6[i], 1.0/3.0);
932             /* should be able to get rid of this ^^^ internal pow call eventually.  Will require agreement on
933                what data to store externally.  Can't be fixed without larger scale changes, so not 5.0 */
934             if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
935             {
936                 sigma6[i] = sigma6_min;
937                 sigma2[i] = sigma2_min;
938             }
939         }
940         else
941         {
942             sigma6[i]       = sigma6_def;
943             sigma2[i]       = sigma2_def;
944         }
945         if (sc_r_power == 6.0)
946         {
947             sigma_pow[i]    = sigma6[i];
948             sigma_powm2[i]  = sigma6[i]/sigma2[i];
949         }
950         else if (sc_r_power == 48.0)
951         {
952             sigma_pow[i]    = sigma6[i]*sigma6[i];       /* sigma^12 */
953             sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
954             sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
955             sigma_powm2[i]  = sigma_pow[i]/sigma2[i];
956         }
957         else
958         {    /* not really supported as input, but in here for testing the general case*/
959             sigma_pow[i]    = pow(sigma2[i], sc_r_power/2);
960             sigma_powm2[i]  = sigma_pow[i]/(sigma2[i]);
961         }
962     }
963
964     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
965     if ((c12[0] > 0) && (c12[1] > 0))
966     {
967         alpha_vdw_eff    = 0;
968         alpha_coul_eff   = 0;
969     }
970     else
971     {
972         alpha_vdw_eff    = alpha_vdw;
973         alpha_coul_eff   = alpha_coul;
974     }
975
976     /* Loop over A and B states again */
977     for (i = 0; i < 2; i++)
978     {
979         fscal_elec[i] = 0;
980         fscal_vdw[i]  = 0;
981         velec[i]      = 0;
982         vvdw[i]       = 0;
983
984         /* Only spend time on A or B state if it is non-zero */
985         if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
986         {
987             /* Coulomb */
988             rpinv            = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
989             r_coul           = pow(rpinv, -1.0/sc_r_power);
990
991             /* Electrostatics table lookup data */
992             rtab             = r_coul*tabscale;
993             ntab             = rtab;
994             eps              = rtab-ntab;
995             eps2             = eps*eps;
996             ntab             = 12*ntab;
997             /* Electrostatics */
998             Y                = vftab[ntab];
999             F                = vftab[ntab+1];
1000             Geps             = eps*vftab[ntab+2];
1001             Heps2            = eps2*vftab[ntab+3];
1002             Fp               = F+Geps+Heps2;
1003             VV               = Y+eps*Fp;
1004             FF               = Fp+Geps+2.0*Heps2;
1005             velec[i]         = qq[i]*VV;
1006             fscal_elec[i]    = -qq[i]*FF*r_coul*rpinv*tabscale;
1007
1008             /* Vdw */
1009             rpinv            = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
1010             r_vdw            = pow(rpinv, -1.0/sc_r_power);
1011             /* Vdw table lookup data */
1012             rtab             = r_vdw*tabscale;
1013             ntab             = rtab;
1014             eps              = rtab-ntab;
1015             eps2             = eps*eps;
1016             ntab             = 12*ntab;
1017             /* Dispersion */
1018             Y                = vftab[ntab+4];
1019             F                = vftab[ntab+5];
1020             Geps             = eps*vftab[ntab+6];
1021             Heps2            = eps2*vftab[ntab+7];
1022             Fp               = F+Geps+Heps2;
1023             VV               = Y+eps*Fp;
1024             FF               = Fp+Geps+2.0*Heps2;
1025             vvdw[i]          = c6[i]*VV;
1026             fscal_vdw[i]     = -c6[i]*FF;
1027
1028             /* Repulsion */
1029             Y                = vftab[ntab+8];
1030             F                = vftab[ntab+9];
1031             Geps             = eps*vftab[ntab+10];
1032             Heps2            = eps2*vftab[ntab+11];
1033             Fp               = F+Geps+Heps2;
1034             VV               = Y+eps*Fp;
1035             FF               = Fp+Geps+2.0*Heps2;
1036             vvdw[i]         += c12[i]*VV;
1037             fscal_vdw[i]    -= c12[i]*FF;
1038             fscal_vdw[i]    *= r_vdw*rpinv*tabscale;
1039         }
1040     }
1041     /* Now we have velec[i], vvdw[i], and fscal[i] for both states */
1042     /* Assemble A and B states */
1043     velecsum  = 0;
1044     vvdwsum   = 0;
1045     dvdl_coul = 0;
1046     dvdl_vdw  = 0;
1047     fscal     = 0;
1048     for (i = 0; i < 2; i++)
1049     {
1050         velecsum      += LFC[i]*velec[i];
1051         vvdwsum       += LFV[i]*vvdw[i];
1052
1053         fscal         += (LFC[i]*fscal_elec[i]+LFV[i]*fscal_vdw[i])*rpm2;
1054
1055         dvdl_coul     += velec[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*fscal_elec[i]*sigma_pow[i];
1056         dvdl_vdw      += vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*fscal_vdw[i]*sigma_pow[i];
1057     }
1058
1059     dvdl[efptCOUL]     += dvdl_coul;
1060     dvdl[efptVDW]      += dvdl_vdw;
1061
1062     *velectot           = velecsum;
1063     *vvdwtot            = vvdwsum;
1064
1065     return fscal;
1066 }