ccb3a1e3458ec9733017a75177e2974c8b130137
[alexxy/gromacs.git] / src / gmxlib / nonbonded / nb_free_energy.c
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team,
6  * check out http://www.gromacs.org for more information.
7  * Copyright (c) 2012,2013, by the GROMACS development team, led by
8  * David van der Spoel, Berk Hess, Erik Lindahl, and including many
9  * others, as listed in the AUTHORS file in the top-level source
10  * directory and at http://www.gromacs.org.
11  *
12  * GROMACS is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU Lesser General Public License
14  * as published by the Free Software Foundation; either version 2.1
15  * of the License, or (at your option) any later version.
16  *
17  * GROMACS is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public
23  * License along with GROMACS; if not, see
24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
26  *
27  * If you want to redistribute modifications to GROMACS, please
28  * consider that scientific software is very special. Version
29  * control is crucial - bugs must be traceable. We will be happy to
30  * consider code for inclusion in the official distribution, but
31  * derived work must not be called official GROMACS. Details are found
32  * in the README & COPYING files - if they are missing, get the
33  * official version at http://www.gromacs.org.
34  *
35  * To help us fund GROMACS development, we humbly ask that you cite
36  * the research papers on the package. Check out http://www.gromacs.org.
37  */
38 #ifdef HAVE_CONFIG_H
39 #include <config.h>
40 #endif
41
42 #include <math.h>
43
44 #include "vec.h"
45 #include "typedefs.h"
46 #include "nonbonded.h"
47 #include "nb_kernel.h"
48 #include "nrnb.h"
49 #include "nb_free_energy.h"
50
51 void
52 gmx_nb_free_energy_kernel(const t_nblist * gmx_restrict    nlist,
53                           rvec * gmx_restrict              xx,
54                           rvec * gmx_restrict              ff,
55                           t_forcerec * gmx_restrict        fr,
56                           const t_mdatoms * gmx_restrict   mdatoms,
57                           nb_kernel_data_t * gmx_restrict  kernel_data,
58                           t_nrnb * gmx_restrict            nrnb)
59 {
60
61 #define  STATE_A  0
62 #define  STATE_B  1
63 #define  NSTATES  2
64     int           i, j, n, ii, is3, ii3, k, nj0, nj1, jnr, j3, ggid;
65     real          shX, shY, shZ;
66     real          Fscal, FscalC[NSTATES], FscalV[NSTATES], tx, ty, tz;
67     real          Vcoul[NSTATES], Vvdw[NSTATES];
68     real          rinv6, r, rt, rtC, rtV;
69     real          iqA, iqB;
70     real          qq[NSTATES], vctot, krsq;
71     int           ntiA, ntiB, tj[NSTATES];
72     real          Vvdw6, Vvdw12, vvtot;
73     real          ix, iy, iz, fix, fiy, fiz;
74     real          dx, dy, dz, rsq, rinv;
75     real          c6[NSTATES], c12[NSTATES];
76     real          LFC[NSTATES], LFV[NSTATES], DLF[NSTATES];
77     double        dvdl_coul, dvdl_vdw;
78     real          lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
79     real          sigma6[NSTATES], alpha_vdw_eff, alpha_coul_eff, sigma2_def, sigma2_min;
80     real          rp, rpm2, rC, rV, rinvC, rpinvC, rinvV, rpinvV;
81     real          sigma2[NSTATES], sigma_pow[NSTATES], sigma_powm2[NSTATES], rs, rs2;
82     int           do_tab, tab_elemsize;
83     int           n0, n1C, n1V, nnn;
84     real          Y, F, G, H, Fp, Geps, Heps2, epsC, eps2C, epsV, eps2V, VV, FF;
85     int           icoul, ivdw;
86     int           nri;
87     const int *   iinr;
88     const int *   jindex;
89     const int *   jjnr;
90     const int *   shift;
91     const int *   gid;
92     const int *   typeA;
93     const int *   typeB;
94     int           ntype;
95     const real *  shiftvec;
96     real          dvdl_part;
97     real *        fshift;
98     real          tabscale;
99     const real *  VFtab;
100     const real *  x;
101     real *        f;
102     real          facel, krf, crf;
103     const real *  chargeA;
104     const real *  chargeB;
105     real          sigma6_min, sigma6_def, lam_power, sc_power, sc_r_power;
106     real          alpha_coul, alpha_vdw, lambda_coul, lambda_vdw, ewc;
107     const real *  nbfp;
108     real *        dvdl;
109     real *        Vv;
110     real *        Vc;
111     gmx_bool      bDoForces;
112     real          rcoulomb, rvdw, sh_invrc6;
113     gmx_bool      bExactElecCutoff, bExactVdwCutoff;
114     real          rcutoff, rcutoff2, rswitch, d, d2, swV3, swV4, swV5, swF2, swF3, swF4, sw, dsw, rinvcorr;
115     const real *  tab_ewald_F;
116     const real *  tab_ewald_V;
117     real          tab_ewald_scale, tab_ewald_halfsp;
118
119     x                   = xx[0];
120     f                   = ff[0];
121
122     fshift              = fr->fshift[0];
123
124     nri                 = nlist->nri;
125     iinr                = nlist->iinr;
126     jindex              = nlist->jindex;
127     jjnr                = nlist->jjnr;
128     icoul               = nlist->ielec;
129     ivdw                = nlist->ivdw;
130     shift               = nlist->shift;
131     gid                 = nlist->gid;
132
133     shiftvec            = fr->shift_vec[0];
134     chargeA             = mdatoms->chargeA;
135     chargeB             = mdatoms->chargeB;
136     facel               = fr->epsfac;
137     krf                 = fr->k_rf;
138     crf                 = fr->c_rf;
139     ewc                 = fr->ewaldcoeff;
140     Vc                  = kernel_data->energygrp_elec;
141     typeA               = mdatoms->typeA;
142     typeB               = mdatoms->typeB;
143     ntype               = fr->ntype;
144     nbfp                = fr->nbfp;
145     Vv                  = kernel_data->energygrp_vdw;
146     tabscale            = kernel_data->table_elec_vdw->scale;
147     VFtab               = kernel_data->table_elec_vdw->data;
148     lambda_coul         = kernel_data->lambda[efptCOUL];
149     lambda_vdw          = kernel_data->lambda[efptVDW];
150     dvdl                = kernel_data->dvdl;
151     alpha_coul          = fr->sc_alphacoul;
152     alpha_vdw           = fr->sc_alphavdw;
153     lam_power           = fr->sc_power;
154     sc_r_power          = fr->sc_r_power;
155     sigma6_def          = fr->sc_sigma6_def;
156     sigma6_min          = fr->sc_sigma6_min;
157     bDoForces           = kernel_data->flags & GMX_NONBONDED_DO_FORCE;
158
159     rcoulomb            = fr->rcoulomb;
160     rvdw                = fr->rvdw;
161     sh_invrc6           = fr->ic->sh_invrc6;
162
163     /* Ewald (PME) reciprocal force and energy quadratic spline tables */
164     tab_ewald_F         = fr->ic->tabq_coul_F;
165     tab_ewald_V         = fr->ic->tabq_coul_V;
166     tab_ewald_scale     = fr->ic->tabq_scale;
167     tab_ewald_halfsp    = 0.5/tab_ewald_scale;
168
169     if (fr->coulomb_modifier == eintmodPOTSWITCH || fr->vdw_modifier == eintmodPOTSWITCH)
170     {
171         rcutoff         = (fr->coulomb_modifier == eintmodPOTSWITCH) ? fr->rcoulomb : fr->rvdw;
172         rcutoff2        = rcutoff*rcutoff;
173         rswitch         = (fr->coulomb_modifier == eintmodPOTSWITCH) ? fr->rcoulomb_switch : fr->rvdw_switch;
174         d               = rcutoff-rswitch;
175         swV3            = -10.0/(d*d*d);
176         swV4            =  15.0/(d*d*d*d);
177         swV5            =  -6.0/(d*d*d*d*d);
178         swF2            = -30.0/(d*d*d);
179         swF3            =  60.0/(d*d*d*d);
180         swF4            = -30.0/(d*d*d*d*d);
181     }
182     else
183     {
184         /* Stupid compilers dont realize these variables will not be used */
185         rswitch         = 0.0;
186         swV3            = 0.0;
187         swV4            = 0.0;
188         swV5            = 0.0;
189         swF2            = 0.0;
190         swF3            = 0.0;
191         swF4            = 0.0;
192     }
193
194     bExactElecCutoff    = (fr->coulomb_modifier != eintmodNONE) || fr->eeltype == eelRF_ZERO;
195     bExactVdwCutoff     = (fr->vdw_modifier != eintmodNONE);
196
197     /* fix compiler warnings */
198     nj1   = 0;
199     n1C   = n1V   = 0;
200     epsC  = epsV  = 0;
201     eps2C = eps2V = 0;
202
203     dvdl_coul  = 0;
204     dvdl_vdw   = 0;
205
206     /* Lambda factor for state A, 1-lambda*/
207     LFC[STATE_A] = 1.0 - lambda_coul;
208     LFV[STATE_A] = 1.0 - lambda_vdw;
209
210     /* Lambda factor for state B, lambda*/
211     LFC[STATE_B] = lambda_coul;
212     LFV[STATE_B] = lambda_vdw;
213
214     /*derivative of the lambda factor for state A and B */
215     DLF[STATE_A] = -1;
216     DLF[STATE_B] = 1;
217
218     for (i = 0; i < NSTATES; i++)
219     {
220         lfac_coul[i]  = (lam_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
221         dlfac_coul[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFC[i]) : 1);
222         lfac_vdw[i]   = (lam_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
223         dlfac_vdw[i]  = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFV[i]) : 1);
224     }
225     /* precalculate */
226     sigma2_def = pow(sigma6_def, 1.0/3.0);
227     sigma2_min = pow(sigma6_min, 1.0/3.0);
228
229     /* Ewald (not PME) table is special (icoul==enbcoulFEWALD) */
230
231     do_tab = (icoul == GMX_NBKERNEL_ELEC_CUBICSPLINETABLE ||
232               ivdw == GMX_NBKERNEL_VDW_CUBICSPLINETABLE);
233
234     /* we always use the combined table here */
235     tab_elemsize = 12;
236
237     for (n = 0; (n < nri); n++)
238     {
239         is3              = 3*shift[n];
240         shX              = shiftvec[is3];
241         shY              = shiftvec[is3+1];
242         shZ              = shiftvec[is3+2];
243         nj0              = jindex[n];
244         nj1              = jindex[n+1];
245         ii               = iinr[n];
246         ii3              = 3*ii;
247         ix               = shX + x[ii3+0];
248         iy               = shY + x[ii3+1];
249         iz               = shZ + x[ii3+2];
250         iqA              = facel*chargeA[ii];
251         iqB              = facel*chargeB[ii];
252         ntiA             = 2*ntype*typeA[ii];
253         ntiB             = 2*ntype*typeB[ii];
254         vctot            = 0;
255         vvtot            = 0;
256         fix              = 0;
257         fiy              = 0;
258         fiz              = 0;
259
260         for (k = nj0; (k < nj1); k++)
261         {
262             jnr              = jjnr[k];
263             j3               = 3*jnr;
264             dx               = ix - x[j3];
265             dy               = iy - x[j3+1];
266             dz               = iz - x[j3+2];
267             rsq              = dx*dx+dy*dy+dz*dz;
268             rinv             = gmx_invsqrt(rsq);
269             r                = rsq*rinv;
270             if (sc_r_power == 6.0)
271             {
272                 rpm2             = rsq*rsq;  /* r4 */
273                 rp               = rpm2*rsq; /* r6 */
274             }
275             else if (sc_r_power == 48.0)
276             {
277                 rp               = rsq*rsq*rsq; /* r6 */
278                 rp               = rp*rp;       /* r12 */
279                 rp               = rp*rp;       /* r24 */
280                 rp               = rp*rp;       /* r48 */
281                 rpm2             = rp/rsq;      /* r46 */
282             }
283             else
284             {
285                 rp             = pow(r, sc_r_power);  /* not currently supported as input, but can handle it */
286                 rpm2           = rp/rsq;
287             }
288
289             tj[STATE_A]      = ntiA+2*typeA[jnr];
290             tj[STATE_B]      = ntiB+2*typeB[jnr];
291             qq[STATE_A]      = iqA*chargeA[jnr];
292             qq[STATE_B]      = iqB*chargeB[jnr];
293
294             for (i = 0; i < NSTATES; i++)
295             {
296
297                 c6[i]              = nbfp[tj[i]];
298                 c12[i]             = nbfp[tj[i]+1];
299                 if ((c6[i] > 0) && (c12[i] > 0))
300                 {
301                     /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
302                     sigma6[i]       = 0.5*c12[i]/c6[i];
303                     sigma2[i]       = pow(sigma6[i], 1.0/3.0);
304                     /* should be able to get rid of this ^^^ internal pow call eventually.  Will require agreement on
305                        what data to store externally.  Can't be fixed without larger scale changes, so not 4.6 */
306                     if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
307                     {
308                         sigma6[i] = sigma6_min;
309                         sigma2[i] = sigma2_min;
310                     }
311                 }
312                 else
313                 {
314                     sigma6[i]       = sigma6_def;
315                     sigma2[i]       = sigma2_def;
316                 }
317                 if (sc_r_power == 6.0)
318                 {
319                     sigma_pow[i]    = sigma6[i];
320                     sigma_powm2[i]  = sigma6[i]/sigma2[i];
321                 }
322                 else if (sc_r_power == 48.0)
323                 {
324                     sigma_pow[i]    = sigma6[i]*sigma6[i];       /* sigma^12 */
325                     sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
326                     sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
327                     sigma_powm2[i]  = sigma_pow[i]/sigma2[i];
328                 }
329                 else
330                 {    /* not really supported as input, but in here for testing the general case*/
331                     sigma_pow[i]    = pow(sigma2[i], sc_r_power/2);
332                     sigma_powm2[i]  = sigma_pow[i]/(sigma2[i]);
333                 }
334             }
335
336             /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
337             if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
338             {
339                 alpha_vdw_eff    = 0;
340                 alpha_coul_eff   = 0;
341             }
342             else
343             {
344                 alpha_vdw_eff    = alpha_vdw;
345                 alpha_coul_eff   = alpha_coul;
346             }
347
348             for (i = 0; i < NSTATES; i++)
349             {
350                 FscalC[i]    = 0;
351                 FscalV[i]    = 0;
352                 Vcoul[i]     = 0;
353                 Vvdw[i]      = 0;
354
355                 /* Only spend time on A or B state if it is non-zero */
356                 if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
357                 {
358
359                     /* this section has to be inside the loop becaue of the dependence on sigma_pow */
360                     rpinvC         = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
361                     rinvC          = pow(rpinvC, 1.0/sc_r_power);
362                     rC             = 1.0/rinvC;
363
364                     rpinvV         = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
365                     rinvV          = pow(rpinvV, 1.0/sc_r_power);
366                     rV             = 1.0/rinvV;
367
368                     if (do_tab)
369                     {
370                         rtC        = rC*tabscale;
371                         n0         = rtC;
372                         epsC       = rtC-n0;
373                         eps2C      = epsC*epsC;
374                         n1C        = tab_elemsize*n0;
375
376                         rtV        = rV*tabscale;
377                         n0         = rtV;
378                         epsV       = rtV-n0;
379                         eps2V      = epsV*epsV;
380                         n1V        = tab_elemsize*n0;
381                     }
382
383                     /* With Ewald and soft-core we should put the cut-off on r,
384                      * not on the soft-cored rC, as the real-space and
385                      * reciprocal space contributions should (almost) cancel.
386                      */
387                     if (qq[i] != 0 &&
388                         !(bExactElecCutoff &&
389                           ((icoul != GMX_NBKERNEL_ELEC_EWALD && rC >= rcoulomb) ||
390                            (icoul == GMX_NBKERNEL_ELEC_EWALD && r >= rcoulomb))))
391                     {
392                         switch (icoul)
393                         {
394                             case GMX_NBKERNEL_ELEC_COULOMB:
395                             case GMX_NBKERNEL_ELEC_EWALD:
396                                 /* simple cutoff (yes, ewald is done all on direct space for free energy) */
397                                 Vcoul[i]   = qq[i]*rinvC;
398                                 FscalC[i]  = Vcoul[i];
399                                 break;
400
401                             case GMX_NBKERNEL_ELEC_REACTIONFIELD:
402                                 /* reaction-field */
403                                 Vcoul[i]   = qq[i]*(rinvC + krf*rC*rC-crf);
404                                 FscalC[i]  = qq[i]*(rinvC - 2.0*krf*rC*rC);
405                                 break;
406
407                             case GMX_NBKERNEL_ELEC_CUBICSPLINETABLE:
408                                 /* non-Ewald tabulated coulomb */
409                                 nnn        = n1C;
410                                 Y          = VFtab[nnn];
411                                 F          = VFtab[nnn+1];
412                                 Geps       = epsC*VFtab[nnn+2];
413                                 Heps2      = eps2C*VFtab[nnn+3];
414                                 Fp         = F+Geps+Heps2;
415                                 VV         = Y+epsC*Fp;
416                                 FF         = Fp+Geps+2.0*Heps2;
417                                 Vcoul[i]   = qq[i]*VV;
418                                 FscalC[i]  = -qq[i]*tabscale*FF*rC;
419                                 break;
420
421                             case GMX_NBKERNEL_ELEC_GENERALIZEDBORN:
422                                 gmx_fatal(FARGS, "Free energy and GB not implemented.\n");
423                                 break;
424
425                             case GMX_NBKERNEL_ELEC_NONE:
426                                 FscalC[i]  = 0.0;
427                                 Vcoul[i]   = 0.0;
428                                 break;
429
430                             default:
431                                 gmx_incons("Invalid icoul in free energy kernel");
432                                 break;
433                         }
434
435                         if (fr->coulomb_modifier == eintmodPOTSWITCH)
436                         {
437                             d                = rC-rswitch;
438                             d                = (d > 0.0) ? d : 0.0;
439                             d2               = d*d;
440                             sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
441                             dsw              = d2*(swF2+d*(swF3+d*swF4));
442
443                             Vcoul[i]        *= sw;
444                             FscalC[i]        = FscalC[i]*sw + Vcoul[i]*dsw;
445                         }
446                     }
447
448                     if ((c6[i] != 0 || c12[i] != 0) &&
449                         !(bExactVdwCutoff && rV >= rvdw))
450                     {
451                         switch (ivdw)
452                         {
453                             case GMX_NBKERNEL_VDW_LENNARDJONES:
454                                 /* cutoff LJ */
455                                 if (sc_r_power == 6.0)
456                                 {
457                                     rinv6            = rpinvV;
458                                 }
459                                 else
460                                 {
461                                     rinv6            = pow(rinvV, 6.0);
462                                 }
463                                 Vvdw6            = c6[i]*rinv6;
464                                 Vvdw12           = c12[i]*rinv6*rinv6;
465                                 if (fr->vdw_modifier == eintmodPOTSHIFT)
466                                 {
467                                     Vvdw[i]          = ( (Vvdw12-c12[i]*sh_invrc6*sh_invrc6)*(1.0/12.0)
468                                                          -(Vvdw6-c6[i]*sh_invrc6)*(1.0/6.0));
469                                 }
470                                 else
471                                 {
472                                     Vvdw[i]          = Vvdw12*(1.0/12.0) - Vvdw6*(1.0/6.0);
473                                 }
474                                 FscalV[i]        = Vvdw12 - Vvdw6;
475                                 break;
476
477                             case GMX_NBKERNEL_VDW_BUCKINGHAM:
478                                 gmx_fatal(FARGS, "Buckingham free energy not supported.");
479                                 break;
480
481                             case GMX_NBKERNEL_VDW_CUBICSPLINETABLE:
482                                 /* Table LJ */
483                                 nnn = n1V+4;
484                                 /* dispersion */
485                                 Y          = VFtab[nnn];
486                                 F          = VFtab[nnn+1];
487                                 Geps       = epsV*VFtab[nnn+2];
488                                 Heps2      = eps2V*VFtab[nnn+3];
489                                 Fp         = F+Geps+Heps2;
490                                 VV         = Y+epsV*Fp;
491                                 FF         = Fp+Geps+2.0*Heps2;
492                                 Vvdw[i]   += c6[i]*VV;
493                                 FscalV[i] -= c6[i]*tabscale*FF*rV;
494
495                                 /* repulsion */
496                                 Y          = VFtab[nnn+4];
497                                 F          = VFtab[nnn+5];
498                                 Geps       = epsV*VFtab[nnn+6];
499                                 Heps2      = eps2V*VFtab[nnn+7];
500                                 Fp         = F+Geps+Heps2;
501                                 VV         = Y+epsV*Fp;
502                                 FF         = Fp+Geps+2.0*Heps2;
503                                 Vvdw[i]   += c12[i]*VV;
504                                 FscalV[i] -= c12[i]*tabscale*FF*rV;
505                                 break;
506
507                             case GMX_NBKERNEL_VDW_NONE:
508                                 Vvdw[i]    = 0.0;
509                                 FscalV[i]  = 0.0;
510                                 break;
511
512                             default:
513                                 gmx_incons("Invalid ivdw in free energy kernel");
514                                 break;
515                         }
516
517                         if (fr->vdw_modifier == eintmodPOTSWITCH)
518                         {
519                             d                = rV-rswitch;
520                             d                = (d > 0.0) ? d : 0.0;
521                             d2               = d*d;
522                             sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
523                             dsw              = d2*(swF2+d*(swF3+d*swF4));
524
525                             Vvdw[i]         *= sw;
526                             FscalV[i]        = FscalV[i]*sw + Vvdw[i]*dsw;
527
528                             FscalV[i]        = (rV < rvdw) ? FscalV[i] : 0.0;
529                             Vvdw[i]          = (rV < rvdw) ? Vvdw[i] : 0.0;
530                         }
531                     }
532
533                     /* FscalC (and FscalV) now contain: dV/drC * rC
534                      * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
535                      * Further down we first multiply by r^p-2 and then by
536                      * the vector r, which in total gives: dV/drC * (r/rC)^1-p
537                      */
538                     FscalC[i] *= rpinvC;
539                     FscalV[i] *= rpinvV;
540                 }
541             }
542
543             Fscal = 0;
544
545             if (icoul == GMX_NBKERNEL_ELEC_EWALD &&
546                 !(bExactElecCutoff && r >= rcoulomb))
547             {
548                 /* Because we compute the soft-core normally,
549                  * we have to remove the Ewald short range portion.
550                  * Done outside of the states loop because this part
551                  * doesn't depend on the scaled R.
552                  */
553                 real rs, frac, f_lr;
554                 int  ri;
555
556                 rs     = rsq*rinv*tab_ewald_scale;
557                 ri     = (int)rs;
558                 frac   = rs - ri;
559                 f_lr   = (1 - frac)*tab_ewald_F[ri] + frac*tab_ewald_F[ri+1];
560                 FF     = f_lr*rinv;
561                 VV     = tab_ewald_V[ri] - tab_ewald_halfsp*frac*(tab_ewald_F[ri] + f_lr);
562
563                 for (i = 0; i < NSTATES; i++)
564                 {
565                     vctot      -= LFC[i]*qq[i]*VV;
566                     Fscal      -= LFC[i]*qq[i]*FF;
567                     dvdl_coul  -= (DLF[i]*qq[i])*VV;
568                 }
569             }
570
571             /* Assemble A and B states */
572             for (i = 0; i < NSTATES; i++)
573             {
574                 vctot         += LFC[i]*Vcoul[i];
575                 vvtot         += LFV[i]*Vvdw[i];
576
577                 Fscal         += LFC[i]*FscalC[i]*rpm2;
578                 Fscal         += LFV[i]*FscalV[i]*rpm2;
579
580                 dvdl_coul     += Vcoul[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*FscalC[i]*sigma_pow[i];
581                 dvdl_vdw      += Vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*FscalV[i]*sigma_pow[i];
582             }
583
584             if (bDoForces)
585             {
586                 tx         = Fscal*dx;
587                 ty         = Fscal*dy;
588                 tz         = Fscal*dz;
589                 fix        = fix + tx;
590                 fiy        = fiy + ty;
591                 fiz        = fiz + tz;
592                 f[j3]      = f[j3]   - tx;
593                 f[j3+1]    = f[j3+1] - ty;
594                 f[j3+2]    = f[j3+2] - tz;
595             }
596         }
597
598         if (bDoForces)
599         {
600             f[ii3]         = f[ii3]        + fix;
601             f[ii3+1]       = f[ii3+1]      + fiy;
602             f[ii3+2]       = f[ii3+2]      + fiz;
603             fshift[is3]    = fshift[is3]   + fix;
604             fshift[is3+1]  = fshift[is3+1] + fiy;
605             fshift[is3+2]  = fshift[is3+2] + fiz;
606         }
607         ggid               = gid[n];
608         Vc[ggid]           = Vc[ggid] + vctot;
609         Vv[ggid]           = Vv[ggid] + vvtot;
610     }
611
612     dvdl[efptCOUL]     += dvdl_coul;
613     dvdl[efptVDW]      += dvdl_vdw;
614
615     /* Estimate flops, average for free energy stuff:
616      * 12  flops per outer iteration
617      * 150 flops per inner iteration
618      */
619     inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri*12 + nlist->jindex[n]*150);
620 }
621
622 real
623 nb_free_energy_evaluate_single(real r2, real sc_r_power, real alpha_coul, real alpha_vdw,
624                                real tabscale, real *vftab,
625                                real qqA, real c6A, real c12A, real qqB, real c6B, real c12B,
626                                real LFC[2], real LFV[2], real DLF[2],
627                                real lfac_coul[2], real lfac_vdw[2], real dlfac_coul[2], real dlfac_vdw[2],
628                                real sigma6_def, real sigma6_min, real sigma2_def, real sigma2_min,
629                                real *velectot, real *vvdwtot, real *dvdl)
630 {
631     real       r, rp, rpm2, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VV, FF, fscal;
632     real       qq[2], c6[2], c12[2], sigma6[2], sigma2[2], sigma_pow[2], sigma_powm2[2];
633     real       alpha_coul_eff, alpha_vdw_eff, dvdl_coul, dvdl_vdw;
634     real       rpinv, r_coul, r_vdw, velecsum, vvdwsum;
635     real       fscal_vdw[2], fscal_elec[2];
636     real       velec[2], vvdw[2];
637     int        i, ntab;
638
639     qq[0]    = qqA;
640     qq[1]    = qqB;
641     c6[0]    = c6A;
642     c6[1]    = c6B;
643     c12[0]   = c12A;
644     c12[1]   = c12B;
645
646     if (sc_r_power == 6.0)
647     {
648         rpm2             = r2*r2;   /* r4 */
649         rp               = rpm2*r2; /* r6 */
650     }
651     else if (sc_r_power == 48.0)
652     {
653         rp               = r2*r2*r2; /* r6 */
654         rp               = rp*rp;    /* r12 */
655         rp               = rp*rp;    /* r24 */
656         rp               = rp*rp;    /* r48 */
657         rpm2             = rp/r2;    /* r46 */
658     }
659     else
660     {
661         rp             = pow(r2, 0.5*sc_r_power);  /* not currently supported as input, but can handle it */
662         rpm2           = rp/r2;
663     }
664
665     /* Loop over state A(0) and B(1) */
666     for (i = 0; i < 2; i++)
667     {
668         if ((c6[i] > 0) && (c12[i] > 0))
669         {
670             /* The c6 & c12 coefficients now contain the constants 6.0 and 12.0, respectively.
671              * Correct for this by multiplying with (1/12.0)/(1/6.0)=6.0/12.0=0.5.
672              */
673             sigma6[i]       = 0.5*c12[i]/c6[i];
674             sigma2[i]       = pow(0.5*c12[i]/c6[i], 1.0/3.0);
675             /* should be able to get rid of this ^^^ internal pow call eventually.  Will require agreement on
676                what data to store externally.  Can't be fixed without larger scale changes, so not 4.6 */
677             if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
678             {
679                 sigma6[i] = sigma6_min;
680                 sigma2[i] = sigma2_min;
681             }
682         }
683         else
684         {
685             sigma6[i]       = sigma6_def;
686             sigma2[i]       = sigma2_def;
687         }
688         if (sc_r_power == 6.0)
689         {
690             sigma_pow[i]    = sigma6[i];
691             sigma_powm2[i]  = sigma6[i]/sigma2[i];
692         }
693         else if (sc_r_power == 48.0)
694         {
695             sigma_pow[i]    = sigma6[i]*sigma6[i];       /* sigma^12 */
696             sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
697             sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
698             sigma_powm2[i]  = sigma_pow[i]/sigma2[i];
699         }
700         else
701         {    /* not really supported as input, but in here for testing the general case*/
702             sigma_pow[i]    = pow(sigma2[i], sc_r_power/2);
703             sigma_powm2[i]  = sigma_pow[i]/(sigma2[i]);
704         }
705     }
706
707     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
708     if ((c12[0] > 0) && (c12[1] > 0))
709     {
710         alpha_vdw_eff    = 0;
711         alpha_coul_eff   = 0;
712     }
713     else
714     {
715         alpha_vdw_eff    = alpha_vdw;
716         alpha_coul_eff   = alpha_coul;
717     }
718
719     /* Loop over A and B states again */
720     for (i = 0; i < 2; i++)
721     {
722         fscal_elec[i] = 0;
723         fscal_vdw[i]  = 0;
724         velec[i]      = 0;
725         vvdw[i]       = 0;
726
727         /* Only spend time on A or B state if it is non-zero */
728         if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
729         {
730             /* Coulomb */
731             rpinv            = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
732             r_coul           = pow(rpinv, -1.0/sc_r_power);
733
734             /* Electrostatics table lookup data */
735             rtab             = r_coul*tabscale;
736             ntab             = rtab;
737             eps              = rtab-ntab;
738             eps2             = eps*eps;
739             ntab             = 12*ntab;
740             /* Electrostatics */
741             Y                = vftab[ntab];
742             F                = vftab[ntab+1];
743             Geps             = eps*vftab[ntab+2];
744             Heps2            = eps2*vftab[ntab+3];
745             Fp               = F+Geps+Heps2;
746             VV               = Y+eps*Fp;
747             FF               = Fp+Geps+2.0*Heps2;
748             velec[i]         = qq[i]*VV;
749             fscal_elec[i]    = -qq[i]*FF*r_coul*rpinv*tabscale;
750
751             /* Vdw */
752             rpinv            = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
753             r_vdw            = pow(rpinv, -1.0/sc_r_power);
754             /* Vdw table lookup data */
755             rtab             = r_vdw*tabscale;
756             ntab             = rtab;
757             eps              = rtab-ntab;
758             eps2             = eps*eps;
759             ntab             = 12*ntab;
760             /* Dispersion */
761             Y                = vftab[ntab+4];
762             F                = vftab[ntab+5];
763             Geps             = eps*vftab[ntab+6];
764             Heps2            = eps2*vftab[ntab+7];
765             Fp               = F+Geps+Heps2;
766             VV               = Y+eps*Fp;
767             FF               = Fp+Geps+2.0*Heps2;
768             vvdw[i]          = c6[i]*VV;
769             fscal_vdw[i]     = -c6[i]*FF;
770
771             /* Repulsion */
772             Y                = vftab[ntab+8];
773             F                = vftab[ntab+9];
774             Geps             = eps*vftab[ntab+10];
775             Heps2            = eps2*vftab[ntab+11];
776             Fp               = F+Geps+Heps2;
777             VV               = Y+eps*Fp;
778             FF               = Fp+Geps+2.0*Heps2;
779             vvdw[i]         += c12[i]*VV;
780             fscal_vdw[i]    -= c12[i]*FF;
781             fscal_vdw[i]    *= r_vdw*rpinv*tabscale;
782         }
783     }
784     /* Now we have velec[i], vvdw[i], and fscal[i] for both states */
785     /* Assemble A and B states */
786     velecsum  = 0;
787     vvdwsum   = 0;
788     dvdl_coul = 0;
789     dvdl_vdw  = 0;
790     fscal     = 0;
791     for (i = 0; i < 2; i++)
792     {
793         velecsum      += LFC[i]*velec[i];
794         vvdwsum       += LFV[i]*vvdw[i];
795
796         fscal         += (LFC[i]*fscal_elec[i]+LFV[i]*fscal_vdw[i])*rpm2;
797
798         dvdl_coul     += velec[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*fscal_elec[i]*sigma_pow[i];
799         dvdl_vdw      += vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*fscal_vdw[i]*sigma_pow[i];
800     }
801
802     dvdl[efptCOUL]     += dvdl_coul;
803     dvdl[efptVDW]      += dvdl_vdw;
804
805     *velectot           = velecsum;
806     *vvdwtot            = vvdwsum;
807
808     return fscal;
809 }