Free-energy support for LJ-PME
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_free_energy.c
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2013,2014, by the GROMACS development team, led by
7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8  * and including many others, as listed in the AUTHORS file in the
9  * top-level source directory and at http://www.gromacs.org.
10  *
11  * GROMACS is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public License
13  * as published by the Free Software Foundation; either version 2.1
14  * of the License, or (at your option) any later version.
15  *
16  * GROMACS is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with GROMACS; if not, see
23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
25  *
26  * If you want to redistribute modifications to GROMACS, please
27  * consider that scientific software is very special. Version
28  * control is crucial - bugs must be traceable. We will be happy to
29  * consider code for inclusion in the official distribution, but
30  * derived work must not be called official GROMACS. Details are found
31  * in the README & COPYING files - if they are missing, get the
32  * official version at http://www.gromacs.org.
33  *
34  * To help us fund GROMACS development, we humbly ask that you cite
35  * the research papers on the package. Check out http://www.gromacs.org.
36  */
37 #ifdef HAVE_CONFIG_H
38 #include <config.h>
39 #endif
40
41 #include <math.h>
42
43 #include "vec.h"
44 #include "typedefs.h"
45 #include "nonbonded.h"
46 #include "nb_kernel.h"
47 #include "nrnb.h"
48 #include "macros.h"
49 #include "nb_free_energy.h"
50
51 void
52 gmx_nb_free_energy_kernel(const t_nblist * gmx_restrict    nlist,
53                           rvec * gmx_restrict              xx,
54                           rvec * gmx_restrict              ff,
55                           t_forcerec * gmx_restrict        fr,
56                           const t_mdatoms * gmx_restrict   mdatoms,
57                           nb_kernel_data_t * gmx_restrict  kernel_data,
58                           t_nrnb * gmx_restrict            nrnb)
59 {
60
61 #define  STATE_A  0
62 #define  STATE_B  1
63 #define  NSTATES  2
64     int           i, j, n, ii, is3, ii3, k, nj0, nj1, jnr, j3, ggid;
65     real          shX, shY, shZ;
66     real          Fscal, FscalC[NSTATES], FscalV[NSTATES], tx, ty, tz;
67     real          Vcoul[NSTATES], Vvdw[NSTATES];
68     real          rinv6, r, rt, rtC, rtV;
69     real          iqA, iqB;
70     real          qq[NSTATES], vctot, krsq;
71     int           ntiA, ntiB, tj[NSTATES];
72     real          Vvdw6, Vvdw12, vvtot;
73     real          ix, iy, iz, fix, fiy, fiz;
74     real          dx, dy, dz, rsq, rinv;
75     real          c6[NSTATES], c12[NSTATES], c6grid[NSTATES];
76     real          LFC[NSTATES], LFV[NSTATES], DLF[NSTATES];
77     double        dvdl_coul, dvdl_vdw;
78     real          lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
79     real          sigma6[NSTATES], alpha_vdw_eff, alpha_coul_eff, sigma2_def, sigma2_min;
80     real          rp, rpm2, rC, rV, rinvC, rpinvC, rinvV, rpinvV;
81     real          sigma2[NSTATES], sigma_pow[NSTATES], sigma_powm2[NSTATES], rs, rs2;
82     int           do_tab, tab_elemsize;
83     int           n0, n1C, n1V, nnn;
84     real          Y, F, G, H, Fp, Geps, Heps2, epsC, eps2C, epsV, eps2V, VV, FF;
85     int           icoul, ivdw;
86     int           nri;
87     const int *   iinr;
88     const int *   jindex;
89     const int *   jjnr;
90     const int *   shift;
91     const int *   gid;
92     const int *   typeA;
93     const int *   typeB;
94     int           ntype;
95     const real *  shiftvec;
96     real          dvdl_part;
97     real *        fshift;
98     real          tabscale = 0;
99     const real *  VFtab    = NULL;
100     const real *  x;
101     real *        f;
102     real          facel, krf, crf;
103     const real *  chargeA;
104     const real *  chargeB;
105     real          sigma6_min, sigma6_def, lam_power, sc_power, sc_r_power;
106     real          alpha_coul, alpha_vdw, lambda_coul, lambda_vdw, ewc_lj;
107     const real *  nbfp, *nbfp_grid;
108     real *        dvdl;
109     real *        Vv;
110     real *        Vc;
111     gmx_bool      bDoForces, bDoShiftForces, bDoPotential;
112     real          rcoulomb, sh_ewald;
113     real          rvdw, sh_invrc6;
114     gmx_bool      bExactElecCutoff, bExactVdwCutoff, bExactCutoffAll, bEwald;
115     real          rcutoff_max2;
116     real          rcutoff, rcutoff2, rswitch, d, d2, swV3, swV4, swV5, swF2, swF3, swF4, sw, dsw, rinvcorr;
117     const real *  tab_ewald_F;
118     const real *  tab_ewald_V;
119     const real *  tab_ewald_F_lj;
120     const real *  tab_ewald_V_lj;
121     real          tab_ewald_scale, tab_ewald_halfsp;
122
123     x                   = xx[0];
124     f                   = ff[0];
125
126     fshift              = fr->fshift[0];
127
128     nri                 = nlist->nri;
129     iinr                = nlist->iinr;
130     jindex              = nlist->jindex;
131     jjnr                = nlist->jjnr;
132     icoul               = nlist->ielec;
133     ivdw                = nlist->ivdw;
134     shift               = nlist->shift;
135     gid                 = nlist->gid;
136
137     shiftvec            = fr->shift_vec[0];
138     chargeA             = mdatoms->chargeA;
139     chargeB             = mdatoms->chargeB;
140     facel               = fr->epsfac;
141     krf                 = fr->k_rf;
142     crf                 = fr->c_rf;
143     ewc_lj              = fr->ewaldcoeff_lj;
144     Vc                  = kernel_data->energygrp_elec;
145     typeA               = mdatoms->typeA;
146     typeB               = mdatoms->typeB;
147     ntype               = fr->ntype;
148     nbfp                = fr->nbfp;
149     nbfp_grid           = fr->ljpme_c6grid;
150     Vv                  = kernel_data->energygrp_vdw;
151     lambda_coul         = kernel_data->lambda[efptCOUL];
152     lambda_vdw          = kernel_data->lambda[efptVDW];
153     dvdl                = kernel_data->dvdl;
154     alpha_coul          = fr->sc_alphacoul;
155     alpha_vdw           = fr->sc_alphavdw;
156     lam_power           = fr->sc_power;
157     sc_r_power          = fr->sc_r_power;
158     sigma6_def          = fr->sc_sigma6_def;
159     sigma6_min          = fr->sc_sigma6_min;
160     bDoForces           = kernel_data->flags & GMX_NONBONDED_DO_FORCE;
161     bDoShiftForces      = kernel_data->flags & GMX_NONBONDED_DO_SHIFTFORCE;
162     bDoPotential        = kernel_data->flags & GMX_NONBONDED_DO_POTENTIAL;
163
164     rcoulomb            = fr->rcoulomb;
165     sh_ewald            = fr->ic->sh_ewald;
166     rvdw                = fr->rvdw;
167     sh_invrc6           = fr->ic->sh_invrc6;
168
169     /* Ewald (PME) reciprocal force and energy quadratic spline tables */
170     tab_ewald_F         = fr->ic->tabq_coul_F;
171     tab_ewald_V         = fr->ic->tabq_coul_V;
172     tab_ewald_scale     = fr->ic->tabq_scale;
173     tab_ewald_F_lj      = fr->ic->tabq_vdw_F;
174     tab_ewald_V_lj      = fr->ic->tabq_vdw_V;
175     tab_ewald_halfsp    = 0.5/tab_ewald_scale;
176
177     if (fr->coulomb_modifier == eintmodPOTSWITCH || fr->vdw_modifier == eintmodPOTSWITCH)
178     {
179         rcutoff         = (fr->coulomb_modifier == eintmodPOTSWITCH) ? fr->rcoulomb : fr->rvdw;
180         rcutoff2        = rcutoff*rcutoff;
181         rswitch         = (fr->coulomb_modifier == eintmodPOTSWITCH) ? fr->rcoulomb_switch : fr->rvdw_switch;
182         d               = rcutoff-rswitch;
183         swV3            = -10.0/(d*d*d);
184         swV4            =  15.0/(d*d*d*d);
185         swV5            =  -6.0/(d*d*d*d*d);
186         swF2            = -30.0/(d*d*d);
187         swF3            =  60.0/(d*d*d*d);
188         swF4            = -30.0/(d*d*d*d*d);
189     }
190     else
191     {
192         /* Stupid compilers dont realize these variables will not be used */
193         rswitch         = 0.0;
194         swV3            = 0.0;
195         swV4            = 0.0;
196         swV5            = 0.0;
197         swF2            = 0.0;
198         swF3            = 0.0;
199         swF4            = 0.0;
200     }
201
202     if (fr->cutoff_scheme == ecutsVERLET)
203     {
204         const interaction_const_t *ic;
205
206         ic = fr->ic;
207         if (EVDW_PME(ic->vdwtype))
208         {
209             ivdw         = GMX_NBKERNEL_VDW_LJEWALD;
210         }
211         else
212         {
213             ivdw         = GMX_NBKERNEL_VDW_LENNARDJONES;
214         }
215
216         if (ic->eeltype == eelCUT || EEL_RF(ic->eeltype))
217         {
218             icoul        = GMX_NBKERNEL_ELEC_REACTIONFIELD;
219         }
220         else if (EEL_PME_EWALD(ic->eeltype))
221         {
222             icoul        = GMX_NBKERNEL_ELEC_EWALD;
223         }
224         else
225         {
226             gmx_incons("Unsupported eeltype with Verlet and free-energy");
227         }
228
229         bExactElecCutoff = TRUE;
230         bExactVdwCutoff  = TRUE;
231     }
232     else
233     {
234         bExactElecCutoff = (fr->coulomb_modifier != eintmodNONE) || fr->eeltype == eelRF_ZERO;
235         bExactVdwCutoff  = (fr->vdw_modifier != eintmodNONE);
236     }
237
238     bExactCutoffAll = (bExactElecCutoff && bExactVdwCutoff);
239     rcutoff_max2    = max(fr->rcoulomb, fr->rvdw);
240     rcutoff_max2    = rcutoff_max2*rcutoff_max2;
241
242     bEwald          = (icoul == GMX_NBKERNEL_ELEC_EWALD);
243
244     /* fix compiler warnings */
245     nj1   = 0;
246     n1C   = n1V   = 0;
247     epsC  = epsV  = 0;
248     eps2C = eps2V = 0;
249
250     dvdl_coul  = 0;
251     dvdl_vdw   = 0;
252
253     /* Lambda factor for state A, 1-lambda*/
254     LFC[STATE_A] = 1.0 - lambda_coul;
255     LFV[STATE_A] = 1.0 - lambda_vdw;
256
257     /* Lambda factor for state B, lambda*/
258     LFC[STATE_B] = lambda_coul;
259     LFV[STATE_B] = lambda_vdw;
260
261     /*derivative of the lambda factor for state A and B */
262     DLF[STATE_A] = -1;
263     DLF[STATE_B] = 1;
264
265     for (i = 0; i < NSTATES; i++)
266     {
267         lfac_coul[i]  = (lam_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
268         dlfac_coul[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFC[i]) : 1);
269         lfac_vdw[i]   = (lam_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
270         dlfac_vdw[i]  = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFV[i]) : 1);
271     }
272     /* precalculate */
273     sigma2_def = pow(sigma6_def, 1.0/3.0);
274     sigma2_min = pow(sigma6_min, 1.0/3.0);
275
276     /* Ewald (not PME) table is special (icoul==enbcoulFEWALD) */
277
278     do_tab = (icoul == GMX_NBKERNEL_ELEC_CUBICSPLINETABLE ||
279               ivdw == GMX_NBKERNEL_VDW_CUBICSPLINETABLE);
280     if (do_tab)
281     {
282         tabscale         = kernel_data->table_elec_vdw->scale;
283         VFtab            = kernel_data->table_elec_vdw->data;
284         /* we always use the combined table here */
285         tab_elemsize     = 12;
286     }
287
288     for (n = 0; (n < nri); n++)
289     {
290         int npair_within_cutoff;
291
292         npair_within_cutoff = 0;
293
294         is3              = 3*shift[n];
295         shX              = shiftvec[is3];
296         shY              = shiftvec[is3+1];
297         shZ              = shiftvec[is3+2];
298         nj0              = jindex[n];
299         nj1              = jindex[n+1];
300         ii               = iinr[n];
301         ii3              = 3*ii;
302         ix               = shX + x[ii3+0];
303         iy               = shY + x[ii3+1];
304         iz               = shZ + x[ii3+2];
305         iqA              = facel*chargeA[ii];
306         iqB              = facel*chargeB[ii];
307         ntiA             = 2*ntype*typeA[ii];
308         ntiB             = 2*ntype*typeB[ii];
309         vctot            = 0;
310         vvtot            = 0;
311         fix              = 0;
312         fiy              = 0;
313         fiz              = 0;
314
315         for (k = nj0; (k < nj1); k++)
316         {
317             jnr              = jjnr[k];
318             j3               = 3*jnr;
319             dx               = ix - x[j3];
320             dy               = iy - x[j3+1];
321             dz               = iz - x[j3+2];
322             rsq              = dx*dx + dy*dy + dz*dz;
323
324             if (bExactCutoffAll && rsq >= rcutoff_max2)
325             {
326                 /* We save significant time by skipping all code below.
327                  * Note that with soft-core interactions, the actual cut-off
328                  * check might be different. But since the soft-core distance
329                  * is always larger than r, checking on r here is safe.
330                  */
331                 continue;
332             }
333             npair_within_cutoff++;
334
335             if (rsq > 0)
336             {
337                 rinv         = gmx_invsqrt(rsq);
338                 r            = rsq*rinv;
339             }
340             else
341             {
342                 /* The force at r=0 is zero, because of symmetry.
343                  * But note that the potential is in general non-zero,
344                  * since the soft-cored r will be non-zero.
345                  */
346                 rinv         = 0;
347                 r            = 0;
348             }
349
350             if (sc_r_power == 6.0)
351             {
352                 rpm2             = rsq*rsq;  /* r4 */
353                 rp               = rpm2*rsq; /* r6 */
354             }
355             else if (sc_r_power == 48.0)
356             {
357                 rp               = rsq*rsq*rsq; /* r6 */
358                 rp               = rp*rp;       /* r12 */
359                 rp               = rp*rp;       /* r24 */
360                 rp               = rp*rp;       /* r48 */
361                 rpm2             = rp/rsq;      /* r46 */
362             }
363             else
364             {
365                 rp             = pow(r, sc_r_power);  /* not currently supported as input, but can handle it */
366                 rpm2           = rp/rsq;
367             }
368
369             Fscal = 0;
370
371             qq[STATE_A]      = iqA*chargeA[jnr];
372             qq[STATE_B]      = iqB*chargeB[jnr];
373
374             tj[STATE_A]      = ntiA+2*typeA[jnr];
375             tj[STATE_B]      = ntiB+2*typeB[jnr];
376
377             if (ivdw == GMX_NBKERNEL_VDW_LJEWALD)
378             {
379                 c6grid[STATE_A] = nbfp_grid[tj[STATE_A]];
380                 c6grid[STATE_B] = nbfp_grid[tj[STATE_B]];
381             }
382
383             if (nlist->excl_fep == NULL || nlist->excl_fep[k])
384             {
385                 c6[STATE_A]      = nbfp[tj[STATE_A]];
386                 c6[STATE_B]      = nbfp[tj[STATE_B]];
387
388                 for (i = 0; i < NSTATES; i++)
389                 {
390                     c12[i]             = nbfp[tj[i]+1];
391                     if ((c6[i] > 0) && (c12[i] > 0))
392                     {
393                         /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
394                         sigma6[i]       = 0.5*c12[i]/c6[i];
395                         sigma2[i]       = pow(sigma6[i], 1.0/3.0);
396                         /* should be able to get rid of this ^^^ internal pow call eventually.  Will require agreement on
397                            what data to store externally.  Can't be fixed without larger scale changes, so not 4.6 */
398                         if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
399                         {
400                             sigma6[i] = sigma6_min;
401                             sigma2[i] = sigma2_min;
402                         }
403                     }
404                     else
405                     {
406                         sigma6[i]       = sigma6_def;
407                         sigma2[i]       = sigma2_def;
408                     }
409                     if (sc_r_power == 6.0)
410                     {
411                         sigma_pow[i]    = sigma6[i];
412                         sigma_powm2[i]  = sigma6[i]/sigma2[i];
413                     }
414                     else if (sc_r_power == 48.0)
415                     {
416                         sigma_pow[i]    = sigma6[i]*sigma6[i];       /* sigma^12 */
417                         sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
418                         sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
419                         sigma_powm2[i]  = sigma_pow[i]/sigma2[i];
420                     }
421                     else
422                     {    /* not really supported as input, but in here for testing the general case*/
423                         sigma_pow[i]    = pow(sigma2[i], sc_r_power/2);
424                         sigma_powm2[i]  = sigma_pow[i]/(sigma2[i]);
425                     }
426                 }
427
428                 /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
429                 if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
430                 {
431                     alpha_vdw_eff    = 0;
432                     alpha_coul_eff   = 0;
433                 }
434                 else
435                 {
436                     alpha_vdw_eff    = alpha_vdw;
437                     alpha_coul_eff   = alpha_coul;
438                 }
439
440                 for (i = 0; i < NSTATES; i++)
441                 {
442                     FscalC[i]    = 0;
443                     FscalV[i]    = 0;
444                     Vcoul[i]     = 0;
445                     Vvdw[i]      = 0;
446
447                     /* Only spend time on A or B state if it is non-zero */
448                     if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
449                     {
450                         /* this section has to be inside the loop because of the dependence on sigma_pow */
451                         rpinvC         = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
452                         rinvC          = pow(rpinvC, 1.0/sc_r_power);
453                         rC             = 1.0/rinvC;
454
455                         rpinvV         = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
456                         rinvV          = pow(rpinvV, 1.0/sc_r_power);
457                         rV             = 1.0/rinvV;
458
459                         if (do_tab)
460                         {
461                             rtC        = rC*tabscale;
462                             n0         = rtC;
463                             epsC       = rtC-n0;
464                             eps2C      = epsC*epsC;
465                             n1C        = tab_elemsize*n0;
466
467                             rtV        = rV*tabscale;
468                             n0         = rtV;
469                             epsV       = rtV-n0;
470                             eps2V      = epsV*epsV;
471                             n1V        = tab_elemsize*n0;
472                         }
473
474                         /* With Ewald and soft-core we should put the cut-off on r,
475                          * not on the soft-cored rC, as the real-space and
476                          * reciprocal space contributions should (almost) cancel.
477                          */
478                         if (qq[i] != 0 &&
479                             !(bExactElecCutoff &&
480                               ((!bEwald && rC >= rcoulomb) ||
481                                (bEwald && r >= rcoulomb))))
482                         {
483                             switch (icoul)
484                             {
485                                 case GMX_NBKERNEL_ELEC_COULOMB:
486                                     /* simple cutoff */
487                                     Vcoul[i]   = qq[i]*rinvC;
488                                     FscalC[i]  = Vcoul[i];
489                                     break;
490
491                                 case GMX_NBKERNEL_ELEC_EWALD:
492                                     /* Ewald FEP is done only on the 1/r part */
493                                     Vcoul[i]   = qq[i]*(rinvC - sh_ewald);
494                                     FscalC[i]  = Vcoul[i];
495                                     break;
496
497                                 case GMX_NBKERNEL_ELEC_REACTIONFIELD:
498                                     /* reaction-field */
499                                     Vcoul[i]   = qq[i]*(rinvC + krf*rC*rC-crf);
500                                     FscalC[i]  = qq[i]*(rinvC - 2.0*krf*rC*rC);
501                                     break;
502
503                                 case GMX_NBKERNEL_ELEC_CUBICSPLINETABLE:
504                                     /* non-Ewald tabulated coulomb */
505                                     nnn        = n1C;
506                                     Y          = VFtab[nnn];
507                                     F          = VFtab[nnn+1];
508                                     Geps       = epsC*VFtab[nnn+2];
509                                     Heps2      = eps2C*VFtab[nnn+3];
510                                     Fp         = F+Geps+Heps2;
511                                     VV         = Y+epsC*Fp;
512                                     FF         = Fp+Geps+2.0*Heps2;
513                                     Vcoul[i]   = qq[i]*VV;
514                                     FscalC[i]  = -qq[i]*tabscale*FF*rC;
515                                     break;
516
517                                 case GMX_NBKERNEL_ELEC_GENERALIZEDBORN:
518                                     gmx_fatal(FARGS, "Free energy and GB not implemented.\n");
519                                     break;
520
521                                 case GMX_NBKERNEL_ELEC_NONE:
522                                     FscalC[i]  = 0.0;
523                                     Vcoul[i]   = 0.0;
524                                     break;
525
526                                 default:
527                                     gmx_incons("Invalid icoul in free energy kernel");
528                                     break;
529                             }
530
531                             if (fr->coulomb_modifier == eintmodPOTSWITCH)
532                             {
533                                 d                = rC-rswitch;
534                                 d                = (d > 0.0) ? d : 0.0;
535                                 d2               = d*d;
536                                 sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
537                                 dsw              = d2*(swF2+d*(swF3+d*swF4));
538
539                                 Vcoul[i]  *= sw;
540                                 FscalC[i]  = FscalC[i]*sw + Vcoul[i]*dsw;
541                             }
542                         }
543
544                         if ((c6[i] != 0 || c12[i] != 0) &&
545                             !(bExactVdwCutoff &&
546                               ((ivdw != GMX_NBKERNEL_VDW_LJEWALD && rV >= rvdw) ||
547                                (ivdw == GMX_NBKERNEL_VDW_LJEWALD && r >= rvdw))))
548                         {
549                             switch (ivdw)
550                             {
551                                 case GMX_NBKERNEL_VDW_LENNARDJONES:
552                                 case GMX_NBKERNEL_VDW_LJEWALD:
553                                     /* cutoff LJ */
554                                     if (sc_r_power == 6.0)
555                                     {
556                                         rinv6            = rpinvV;
557                                     }
558                                     else
559                                     {
560                                         rinv6            = pow(rinvV, 6.0);
561                                     }
562                                     Vvdw6            = c6[i]*rinv6;
563                                     Vvdw12           = c12[i]*rinv6*rinv6;
564                                     if (fr->vdw_modifier == eintmodPOTSHIFT)
565                                     {
566                                         Vvdw[i]          = ( (Vvdw12-c12[i]*sh_invrc6*sh_invrc6)*(1.0/12.0)
567                                                              -(Vvdw6-c6[i]*sh_invrc6)*(1.0/6.0));
568                                     }
569                                     else
570                                     {
571                                         Vvdw[i]          = Vvdw12*(1.0/12.0) - Vvdw6*(1.0/6.0);
572                                     }
573                                     FscalV[i]        = Vvdw12 - Vvdw6;
574                                     break;
575
576                                 case GMX_NBKERNEL_VDW_BUCKINGHAM:
577                                     gmx_fatal(FARGS, "Buckingham free energy not supported.");
578                                     break;
579
580                                 case GMX_NBKERNEL_VDW_CUBICSPLINETABLE:
581                                     /* Table LJ */
582                                     nnn = n1V+4;
583                                     /* dispersion */
584                                     Y          = VFtab[nnn];
585                                     F          = VFtab[nnn+1];
586                                     Geps       = epsV*VFtab[nnn+2];
587                                     Heps2      = eps2V*VFtab[nnn+3];
588                                     Fp         = F+Geps+Heps2;
589                                     VV         = Y+epsV*Fp;
590                                     FF         = Fp+Geps+2.0*Heps2;
591                                     Vvdw[i]   += c6[i]*VV;
592                                     FscalV[i] -= c6[i]*tabscale*FF*rV;
593
594                                     /* repulsion */
595                                     Y          = VFtab[nnn+4];
596                                     F          = VFtab[nnn+5];
597                                     Geps       = epsV*VFtab[nnn+6];
598                                     Heps2      = eps2V*VFtab[nnn+7];
599                                     Fp         = F+Geps+Heps2;
600                                     VV         = Y+epsV*Fp;
601                                     FF         = Fp+Geps+2.0*Heps2;
602                                     Vvdw[i]   += c12[i]*VV;
603                                     FscalV[i] -= c12[i]*tabscale*FF*rV;
604                                     break;
605
606                                 case GMX_NBKERNEL_VDW_NONE:
607                                     Vvdw[i]    = 0.0;
608                                     FscalV[i]  = 0.0;
609                                     break;
610
611                                 default:
612                                     gmx_incons("Invalid ivdw in free energy kernel");
613                                     break;
614                             }
615
616                             if (fr->vdw_modifier == eintmodPOTSWITCH)
617                             {
618                                 d          = rV-rswitch;
619                                 d          = (d > 0.0) ? d : 0.0;
620                                 d2         = d*d;
621                                 sw         = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
622                                 dsw        = d2*(swF2+d*(swF3+d*swF4));
623
624                                 Vvdw[i]   *= sw;
625                                 FscalV[i]  = FscalV[i]*sw + Vvdw[i]*dsw;
626
627                                 FscalV[i]  = (rV < rvdw) ? FscalV[i] : 0.0;
628                                 Vvdw[i]    = (rV < rvdw) ? Vvdw[i] : 0.0;
629                             }
630                         }
631
632                         /* FscalC (and FscalV) now contain: dV/drC * rC
633                          * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
634                          * Further down we first multiply by r^p-2 and then by
635                          * the vector r, which in total gives: dV/drC * (r/rC)^1-p
636                          */
637                         FscalC[i] *= rpinvC;
638                         FscalV[i] *= rpinvV;
639                     }
640                 }
641
642                 /* Assemble A and B states */
643                 for (i = 0; i < NSTATES; i++)
644                 {
645                     vctot         += LFC[i]*Vcoul[i];
646                     vvtot         += LFV[i]*Vvdw[i];
647
648                     Fscal         += LFC[i]*FscalC[i]*rpm2;
649                     Fscal         += LFV[i]*FscalV[i]*rpm2;
650
651                     dvdl_coul     += Vcoul[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*FscalC[i]*sigma_pow[i];
652                     dvdl_vdw      += Vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*FscalV[i]*sigma_pow[i];
653                 }
654             }
655             else if (icoul == GMX_NBKERNEL_ELEC_REACTIONFIELD)
656             {
657                 /* For excluded pairs, which are only in this pair list when
658                  * using the Verlet scheme, we don't use soft-core.
659                  * The group scheme also doesn't soft-core for these.
660                  * As there is no singularity, there is no need for soft-core.
661                  */
662                 VV = krf*rsq - crf;
663                 FF = -2.0*krf;
664
665                 if (ii == jnr)
666                 {
667                     VV *= 0.5;
668                 }
669
670                 for (i = 0; i < NSTATES; i++)
671                 {
672                     vctot      += LFC[i]*qq[i]*VV;
673                     Fscal      += LFC[i]*qq[i]*FF;
674                     dvdl_coul  += DLF[i]*qq[i]*VV;
675                 }
676             }
677
678             if (icoul == GMX_NBKERNEL_ELEC_EWALD &&
679                 !(bExactElecCutoff && r >= rcoulomb))
680             {
681                 /* Because we compute the soft-core normally,
682                  * we have to remove the Ewald short range portion.
683                  * Done outside of the states loop because this part
684                  * doesn't depend on the scaled R.
685                  */
686                 real rs, frac, f_lr;
687                 int  ri;
688
689                 rs     = rsq*rinv*tab_ewald_scale;
690                 ri     = (int)rs;
691                 frac   = rs - ri;
692                 f_lr   = (1 - frac)*tab_ewald_F[ri] + frac*tab_ewald_F[ri+1];
693                 FF     = f_lr*rinv;
694                 VV     = tab_ewald_V[ri] - tab_ewald_halfsp*frac*(tab_ewald_F[ri] + f_lr);
695
696                 if (ii == jnr)
697                 {
698                     VV   *= 0.5;
699                 }
700
701                 for (i = 0; i < NSTATES; i++)
702                 {
703                     vctot      -= LFC[i]*qq[i]*VV;
704                     Fscal      -= LFC[i]*qq[i]*FF;
705                     dvdl_coul  -= (DLF[i]*qq[i])*VV;
706                 }
707             }
708
709             if (ivdw == GMX_NBKERNEL_VDW_LJEWALD &&
710                 !(bExactVdwCutoff && r >= rvdw))
711             {
712                 real rs, frac, f_lr;
713                 int  ri;
714
715                 rs     = rsq*rinv*tab_ewald_scale;
716                 ri     = (int)rs;
717                 frac   = rs - ri;
718                 f_lr   = (1 - frac)*tab_ewald_F_lj[ri] + frac*tab_ewald_F_lj[ri+1];
719                 FF     = f_lr*rinv;
720                 VV     = tab_ewald_V_lj[ri] - tab_ewald_halfsp*frac*(tab_ewald_F_lj[ri] + f_lr);
721                 for (i = 0; i < NSTATES; i++)
722                 {
723                     vvtot      += LFV[i]*c6grid[i]*VV*(1.0/6.0);
724                     Fscal      += LFV[i]*c6grid[i]*FF*(1.0/6.0);
725                     dvdl_vdw   += (DLF[i]*c6grid[i])*VV*(1.0/6.0);
726                 }
727
728             }
729
730             if (bDoForces)
731             {
732                 tx         = Fscal*dx;
733                 ty         = Fscal*dy;
734                 tz         = Fscal*dz;
735                 fix        = fix + tx;
736                 fiy        = fiy + ty;
737                 fiz        = fiz + tz;
738                 /* OpenMP atomics are expensive, but this kernels is also
739                  * expensive, so we can take this hit, instead of using
740                  * thread-local output buffers and extra reduction.
741                  */
742 #pragma omp atomic
743                 f[j3]     -= tx;
744 #pragma omp atomic
745                 f[j3+1]   -= ty;
746 #pragma omp atomic
747                 f[j3+2]   -= tz;
748             }
749         }
750
751         /* The atomics below are expensive with many OpenMP threads.
752          * Here unperturbed i-particles will usually only have a few
753          * (perturbed) j-particles in the list. Thus with a buffered list
754          * we can skip a significant number of i-reductions with a check.
755          */
756         if (npair_within_cutoff > 0)
757         {
758             if (bDoForces)
759             {
760 #pragma omp atomic
761                 f[ii3]        += fix;
762 #pragma omp atomic
763                 f[ii3+1]      += fiy;
764 #pragma omp atomic
765                 f[ii3+2]      += fiz;
766             }
767             if (bDoShiftForces)
768             {
769 #pragma omp atomic
770                 fshift[is3]   += fix;
771 #pragma omp atomic
772                 fshift[is3+1] += fiy;
773 #pragma omp atomic
774                 fshift[is3+2] += fiz;
775             }
776             if (bDoPotential)
777             {
778                 ggid               = gid[n];
779 #pragma omp atomic
780                 Vc[ggid]          += vctot;
781 #pragma omp atomic
782                 Vv[ggid]          += vvtot;
783             }
784         }
785     }
786
787 #pragma omp atomic
788     dvdl[efptCOUL]     += dvdl_coul;
789  #pragma omp atomic
790     dvdl[efptVDW]      += dvdl_vdw;
791
792     /* Estimate flops, average for free energy stuff:
793      * 12  flops per outer iteration
794      * 150 flops per inner iteration
795      */
796     inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri*12 + nlist->jindex[n]*150);
797 }
798
799 real
800 nb_free_energy_evaluate_single(real r2, real sc_r_power, real alpha_coul, real alpha_vdw,
801                                real tabscale, real *vftab,
802                                real qqA, real c6A, real c12A, real qqB, real c6B, real c12B,
803                                real LFC[2], real LFV[2], real DLF[2],
804                                real lfac_coul[2], real lfac_vdw[2], real dlfac_coul[2], real dlfac_vdw[2],
805                                real sigma6_def, real sigma6_min, real sigma2_def, real sigma2_min,
806                                real *velectot, real *vvdwtot, real *dvdl)
807 {
808     real       r, rp, rpm2, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VV, FF, fscal;
809     real       qq[2], c6[2], c12[2], sigma6[2], sigma2[2], sigma_pow[2], sigma_powm2[2];
810     real       alpha_coul_eff, alpha_vdw_eff, dvdl_coul, dvdl_vdw;
811     real       rpinv, r_coul, r_vdw, velecsum, vvdwsum;
812     real       fscal_vdw[2], fscal_elec[2];
813     real       velec[2], vvdw[2];
814     int        i, ntab;
815
816     qq[0]    = qqA;
817     qq[1]    = qqB;
818     c6[0]    = c6A;
819     c6[1]    = c6B;
820     c12[0]   = c12A;
821     c12[1]   = c12B;
822
823     if (sc_r_power == 6.0)
824     {
825         rpm2             = r2*r2;   /* r4 */
826         rp               = rpm2*r2; /* r6 */
827     }
828     else if (sc_r_power == 48.0)
829     {
830         rp               = r2*r2*r2; /* r6 */
831         rp               = rp*rp;    /* r12 */
832         rp               = rp*rp;    /* r24 */
833         rp               = rp*rp;    /* r48 */
834         rpm2             = rp/r2;    /* r46 */
835     }
836     else
837     {
838         rp             = pow(r2, 0.5*sc_r_power);  /* not currently supported as input, but can handle it */
839         rpm2           = rp/r2;
840     }
841
842     /* Loop over state A(0) and B(1) */
843     for (i = 0; i < 2; i++)
844     {
845         if ((c6[i] > 0) && (c12[i] > 0))
846         {
847             /* The c6 & c12 coefficients now contain the constants 6.0 and 12.0, respectively.
848              * Correct for this by multiplying with (1/12.0)/(1/6.0)=6.0/12.0=0.5.
849              */
850             sigma6[i]       = 0.5*c12[i]/c6[i];
851             sigma2[i]       = pow(0.5*c12[i]/c6[i], 1.0/3.0);
852             /* should be able to get rid of this ^^^ internal pow call eventually.  Will require agreement on
853                what data to store externally.  Can't be fixed without larger scale changes, so not 5.0 */
854             if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
855             {
856                 sigma6[i] = sigma6_min;
857                 sigma2[i] = sigma2_min;
858             }
859         }
860         else
861         {
862             sigma6[i]       = sigma6_def;
863             sigma2[i]       = sigma2_def;
864         }
865         if (sc_r_power == 6.0)
866         {
867             sigma_pow[i]    = sigma6[i];
868             sigma_powm2[i]  = sigma6[i]/sigma2[i];
869         }
870         else if (sc_r_power == 48.0)
871         {
872             sigma_pow[i]    = sigma6[i]*sigma6[i];       /* sigma^12 */
873             sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
874             sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
875             sigma_powm2[i]  = sigma_pow[i]/sigma2[i];
876         }
877         else
878         {    /* not really supported as input, but in here for testing the general case*/
879             sigma_pow[i]    = pow(sigma2[i], sc_r_power/2);
880             sigma_powm2[i]  = sigma_pow[i]/(sigma2[i]);
881         }
882     }
883
884     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
885     if ((c12[0] > 0) && (c12[1] > 0))
886     {
887         alpha_vdw_eff    = 0;
888         alpha_coul_eff   = 0;
889     }
890     else
891     {
892         alpha_vdw_eff    = alpha_vdw;
893         alpha_coul_eff   = alpha_coul;
894     }
895
896     /* Loop over A and B states again */
897     for (i = 0; i < 2; i++)
898     {
899         fscal_elec[i] = 0;
900         fscal_vdw[i]  = 0;
901         velec[i]      = 0;
902         vvdw[i]       = 0;
903
904         /* Only spend time on A or B state if it is non-zero */
905         if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
906         {
907             /* Coulomb */
908             rpinv            = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
909             r_coul           = pow(rpinv, -1.0/sc_r_power);
910
911             /* Electrostatics table lookup data */
912             rtab             = r_coul*tabscale;
913             ntab             = rtab;
914             eps              = rtab-ntab;
915             eps2             = eps*eps;
916             ntab             = 12*ntab;
917             /* Electrostatics */
918             Y                = vftab[ntab];
919             F                = vftab[ntab+1];
920             Geps             = eps*vftab[ntab+2];
921             Heps2            = eps2*vftab[ntab+3];
922             Fp               = F+Geps+Heps2;
923             VV               = Y+eps*Fp;
924             FF               = Fp+Geps+2.0*Heps2;
925             velec[i]         = qq[i]*VV;
926             fscal_elec[i]    = -qq[i]*FF*r_coul*rpinv*tabscale;
927
928             /* Vdw */
929             rpinv            = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
930             r_vdw            = pow(rpinv, -1.0/sc_r_power);
931             /* Vdw table lookup data */
932             rtab             = r_vdw*tabscale;
933             ntab             = rtab;
934             eps              = rtab-ntab;
935             eps2             = eps*eps;
936             ntab             = 12*ntab;
937             /* Dispersion */
938             Y                = vftab[ntab+4];
939             F                = vftab[ntab+5];
940             Geps             = eps*vftab[ntab+6];
941             Heps2            = eps2*vftab[ntab+7];
942             Fp               = F+Geps+Heps2;
943             VV               = Y+eps*Fp;
944             FF               = Fp+Geps+2.0*Heps2;
945             vvdw[i]          = c6[i]*VV;
946             fscal_vdw[i]     = -c6[i]*FF;
947
948             /* Repulsion */
949             Y                = vftab[ntab+8];
950             F                = vftab[ntab+9];
951             Geps             = eps*vftab[ntab+10];
952             Heps2            = eps2*vftab[ntab+11];
953             Fp               = F+Geps+Heps2;
954             VV               = Y+eps*Fp;
955             FF               = Fp+Geps+2.0*Heps2;
956             vvdw[i]         += c12[i]*VV;
957             fscal_vdw[i]    -= c12[i]*FF;
958             fscal_vdw[i]    *= r_vdw*rpinv*tabscale;
959         }
960     }
961     /* Now we have velec[i], vvdw[i], and fscal[i] for both states */
962     /* Assemble A and B states */
963     velecsum  = 0;
964     vvdwsum   = 0;
965     dvdl_coul = 0;
966     dvdl_vdw  = 0;
967     fscal     = 0;
968     for (i = 0; i < 2; i++)
969     {
970         velecsum      += LFC[i]*velec[i];
971         vvdwsum       += LFV[i]*vvdw[i];
972
973         fscal         += (LFC[i]*fscal_elec[i]+LFV[i]*fscal_vdw[i])*rpm2;
974
975         dvdl_coul     += velec[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*fscal_elec[i]*sigma_pow[i];
976         dvdl_vdw      += vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*fscal_vdw[i]*sigma_pow[i];
977     }
978
979     dvdl[efptCOUL]     += dvdl_coul;
980     dvdl[efptVDW]      += dvdl_vdw;
981
982     *velectot           = velecsum;
983     *vvdwtot            = vvdwsum;
984
985     return fscal;
986 }