Merge branch release-4-6 into master
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_free_energy.c
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
2  *
3  *
4  *                This source code is part of
5  *
6  *                 G   R   O   M   A   C   S
7  *
8  *          GROningen MAchine for Chemical Simulations
9  *
10  *                        VERSION 3.2.0
11  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13  * Copyright (c) 2001-2004, The GROMACS development team,
14  * check out http://www.gromacs.org for more information.
15
16  * This program is free software; you can redistribute it and/or
17  * modify it under the terms of the GNU General Public License
18  * as published by the Free Software Foundation; either version 2
19  * of the License, or (at your option) any later version.
20  *
21  * If you want to redistribute modifications, please consider that
22  * scientific software is very special. Version control is crucial -
23  * bugs must be traceable. We will be happy to consider code for
24  * inclusion in the official distribution, but derived work must not
25  * be called official GROMACS. Details are found in the README & COPYING
26  * files - if they are missing, get the official version at www.gromacs.org.
27  *
28  * To help us fund GROMACS development, we humbly ask that you cite
29  * the papers on the package - you can find them in the top README file.
30  *
31  * For more info, check our website at http://www.gromacs.org
32  *
33  * And Hey:
34  * GROningen Mixture of Alchemy and Childrens' Stories
35  */
36 #ifdef HAVE_CONFIG_H
37 #include <config.h>
38 #endif
39
40 #include <math.h>
41
42 #include "vec.h"
43 #include "typedefs.h"
44 #include "nonbonded.h"
45 #include "nb_kernel.h"
46 #include "nrnb.h"
47
48 void
49 gmx_nb_free_energy_kernel(t_nblist *                nlist,
50                           rvec *                    xx,
51                           rvec *                    ff,
52                           t_forcerec *              fr,
53                           t_mdatoms *               mdatoms,
54                           nb_kernel_data_t *        kernel_data,
55                           t_nrnb *                  nrnb)
56 {
57
58 #define  STATE_A  0
59 #define  STATE_B  1
60 #define  NSTATES  2
61     int           i, j, n, ii, is3, ii3, k, nj0, nj1, jnr, j3, ggid;
62     real          shX, shY, shZ;
63     real          Fscal, FscalC[NSTATES], FscalV[NSTATES], tx, ty, tz;
64     real          Vcoul[NSTATES], Vvdw[NSTATES];
65     real          rinv6, r, rt, rtC, rtV;
66     real          iqA, iqB;
67     real          qq[NSTATES], vctot, krsq;
68     int           ntiA, ntiB, tj[NSTATES];
69     real          Vvdw6, Vvdw12, vvtot;
70     real          ix, iy, iz, fix, fiy, fiz;
71     real          dx, dy, dz, rsq, rinv;
72     real          c6[NSTATES], c12[NSTATES];
73     real          LFC[NSTATES], LFV[NSTATES], DLF[NSTATES];
74     double        dvdl_coul, dvdl_vdw;
75     real          lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
76     real          sigma6[NSTATES], alpha_vdw_eff, alpha_coul_eff, sigma2_def, sigma2_min;
77     real          rp, rpm2, rC, rV, rinvC, rpinvC, rinvV, rpinvV;
78     real          sigma2[NSTATES], sigma_pow[NSTATES], sigma_powm2[NSTATES], rs, rs2;
79     int           do_coultab, do_vdwtab, do_tab, tab_elemsize;
80     int           n0, n1C, n1V, nnn;
81     real          Y, F, G, H, Fp, Geps, Heps2, epsC, eps2C, epsV, eps2V, VV, FF;
82     int           icoul, ivdw;
83     int           nri;
84     int *         iinr;
85     int *         jindex;
86     int *         jjnr;
87     int *         shift;
88     int *         gid;
89     int *         typeA;
90     int *         typeB;
91     int           ntype;
92     real *        shiftvec;
93     real          dvdl_part;
94     real *        fshift;
95     real          tabscale;
96     real *        VFtab;
97     real *        x;
98     real *        f;
99     real          facel, krf, crf;
100     real *        chargeA;
101     real *        chargeB;
102     real          sigma6_min, sigma6_def, lam_power, sc_power, sc_r_power;
103     real          alpha_coul, alpha_vdw, lambda_coul, lambda_vdw, ewc;
104     real *        nbfp;
105     real *        dvdl;
106     real *        Vv;
107     real *        Vc;
108     gmx_bool      bDoForces;
109     real          rcoulomb, rvdw, sh_invrc6;
110     gmx_bool      bExactElecCutoff, bExactVdwCutoff;
111     real          rcutoff, rcutoff2, rswitch, d, d2, swV3, swV4, swV5, swF2, swF3, swF4, sw, dsw, rinvcorr;
112
113     x                   = xx[0];
114     f                   = ff[0];
115
116     fshift              = fr->fshift[0];
117     Vc                  = kernel_data->energygrp_elec;
118     Vv                  = kernel_data->energygrp_vdw;
119     tabscale            = kernel_data->table_elec_vdw->scale;
120     VFtab               = kernel_data->table_elec_vdw->data;
121
122     nri                 = nlist->nri;
123     iinr                = nlist->iinr;
124     jindex              = nlist->jindex;
125     jjnr                = nlist->jjnr;
126     icoul               = nlist->ielec;
127     ivdw                = nlist->ivdw;
128     shift               = nlist->shift;
129     gid                 = nlist->gid;
130
131     shiftvec            = fr->shift_vec[0];
132     chargeA             = mdatoms->chargeA;
133     chargeB             = mdatoms->chargeB;
134     facel               = fr->epsfac;
135     krf                 = fr->k_rf;
136     crf                 = fr->c_rf;
137     ewc                 = fr->ewaldcoeff;
138     typeA               = mdatoms->typeA;
139     typeB               = mdatoms->typeB;
140     ntype               = fr->ntype;
141     nbfp                = fr->nbfp;
142     lambda_coul         = kernel_data->lambda[efptCOUL];
143     lambda_vdw          = kernel_data->lambda[efptVDW];
144     dvdl                = kernel_data->dvdl;
145     alpha_coul          = fr->sc_alphacoul;
146     alpha_vdw           = fr->sc_alphavdw;
147     lam_power           = fr->sc_power;
148     sc_r_power          = fr->sc_r_power;
149     sigma6_def          = fr->sc_sigma6_def;
150     sigma6_min          = fr->sc_sigma6_min;
151     bDoForces           = kernel_data->flags & GMX_NONBONDED_DO_FORCE;
152
153     rcoulomb            = fr->rcoulomb;
154     rvdw                = fr->rvdw;
155     sh_invrc6           = fr->ic->sh_invrc6;
156
157     if (fr->coulomb_modifier == eintmodPOTSWITCH || fr->vdw_modifier == eintmodPOTSWITCH)
158     {
159         rcutoff         = (fr->coulomb_modifier == eintmodPOTSWITCH) ? fr->rcoulomb : fr->rvdw;
160         rcutoff2        = rcutoff*rcutoff;
161         rswitch         = (fr->coulomb_modifier == eintmodPOTSWITCH) ? fr->rcoulomb_switch : fr->rvdw_switch;
162         d               = rcutoff-rswitch;
163         swV3            = -10.0/(d*d*d);
164         swV4            =  15.0/(d*d*d*d);
165         swV5            =  -6.0/(d*d*d*d*d);
166         swF2            = -30.0/(d*d*d);
167         swF3            =  60.0/(d*d*d*d);
168         swF4            = -30.0/(d*d*d*d*d);
169     }
170     else
171     {
172         /* Stupid compilers dont realize these variables will not be used */
173         rswitch         = 0.0;
174         swV3            = 0.0;
175         swV4            = 0.0;
176         swV5            = 0.0;
177         swF2            = 0.0;
178         swF3            = 0.0;
179         swF4            = 0.0;
180     }
181
182     bExactElecCutoff    = (fr->coulomb_modifier != eintmodNONE) || fr->eeltype == eelRF_ZERO;
183     bExactVdwCutoff     = (fr->vdw_modifier != eintmodNONE);
184
185     /* fix compiler warnings */
186     nj1   = 0;
187     n1C   = n1V   = 0;
188     epsC  = epsV  = 0;
189     eps2C = eps2V = 0;
190
191     dvdl_coul  = 0;
192     dvdl_vdw   = 0;
193
194     /* Lambda factor for state A, 1-lambda*/
195     LFC[STATE_A] = 1.0 - lambda_coul;
196     LFV[STATE_A] = 1.0 - lambda_vdw;
197
198     /* Lambda factor for state B, lambda*/
199     LFC[STATE_B] = lambda_coul;
200     LFV[STATE_B] = lambda_vdw;
201
202     /*derivative of the lambda factor for state A and B */
203     DLF[STATE_A] = -1;
204     DLF[STATE_B] = 1;
205
206     for (i = 0; i < NSTATES; i++)
207     {
208         lfac_coul[i]  = (lam_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
209         dlfac_coul[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFC[i]) : 1);
210         lfac_vdw[i]   = (lam_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
211         dlfac_vdw[i]  = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFV[i]) : 1);
212     }
213     /* precalculate */
214     sigma2_def = pow(sigma6_def, 1.0/3.0);
215     sigma2_min = pow(sigma6_min, 1.0/3.0);
216
217     /* Ewald (not PME) table is special (icoul==enbcoulFEWALD) */
218
219     do_coultab = (icoul == GMX_NBKERNEL_ELEC_CUBICSPLINETABLE);
220     do_vdwtab  = (ivdw == GMX_NBKERNEL_VDW_CUBICSPLINETABLE);
221
222     do_tab = do_coultab || do_vdwtab;
223
224     /* we always use the combined table here */
225     tab_elemsize = 12;
226
227     for (n = 0; (n < nri); n++)
228     {
229         is3              = 3*shift[n];
230         shX              = shiftvec[is3];
231         shY              = shiftvec[is3+1];
232         shZ              = shiftvec[is3+2];
233         nj0              = jindex[n];
234         nj1              = jindex[n+1];
235         ii               = iinr[n];
236         ii3              = 3*ii;
237         ix               = shX + x[ii3+0];
238         iy               = shY + x[ii3+1];
239         iz               = shZ + x[ii3+2];
240         iqA              = facel*chargeA[ii];
241         iqB              = facel*chargeB[ii];
242         ntiA             = 2*ntype*typeA[ii];
243         ntiB             = 2*ntype*typeB[ii];
244         vctot            = 0;
245         vvtot            = 0;
246         fix              = 0;
247         fiy              = 0;
248         fiz              = 0;
249
250         for (k = nj0; (k < nj1); k++)
251         {
252             jnr              = jjnr[k];
253             j3               = 3*jnr;
254             dx               = ix - x[j3];
255             dy               = iy - x[j3+1];
256             dz               = iz - x[j3+2];
257             rsq              = dx*dx+dy*dy+dz*dz;
258             rinv             = gmx_invsqrt(rsq);
259             r                = rsq*rinv;
260             if (sc_r_power == 6.0)
261             {
262                 rpm2             = rsq*rsq;  /* r4 */
263                 rp               = rpm2*rsq; /* r6 */
264             }
265             else if (sc_r_power == 48.0)
266             {
267                 rp               = rsq*rsq*rsq; /* r6 */
268                 rp               = rp*rp;       /* r12 */
269                 rp               = rp*rp;       /* r24 */
270                 rp               = rp*rp;       /* r48 */
271                 rpm2             = rp/rsq;      /* r46 */
272             }
273             else
274             {
275                 rp             = pow(r, sc_r_power);  /* not currently supported as input, but can handle it */
276                 rpm2           = rp/rsq;
277             }
278
279             tj[STATE_A]      = ntiA+2*typeA[jnr];
280             tj[STATE_B]      = ntiB+2*typeB[jnr];
281             qq[STATE_A]      = iqA*chargeA[jnr];
282             qq[STATE_B]      = iqB*chargeB[jnr];
283
284             for (i = 0; i < NSTATES; i++)
285             {
286
287                 c6[i]              = nbfp[tj[i]];
288                 c12[i]             = nbfp[tj[i]+1];
289                 if ((c6[i] > 0) && (c12[i] > 0))
290                 {
291                     /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
292                     sigma6[i]       = 0.5*c12[i]/c6[i];
293                     sigma2[i]       = pow(sigma6[i], 1.0/3.0);
294                     /* should be able to get rid of this ^^^ internal pow call eventually.  Will require agreement on
295                        what data to store externally.  Can't be fixed without larger scale changes, so not 4.6 */
296                     if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
297                     {
298                         sigma6[i] = sigma6_min;
299                         sigma2[i] = sigma2_min;
300                     }
301                 }
302                 else
303                 {
304                     sigma6[i]       = sigma6_def;
305                     sigma2[i]       = sigma2_def;
306                 }
307                 if (sc_r_power == 6.0)
308                 {
309                     sigma_pow[i]    = sigma6[i];
310                     sigma_powm2[i]  = sigma6[i]/sigma2[i];
311                 }
312                 else if (sc_r_power == 48.0)
313                 {
314                     sigma_pow[i]    = sigma6[i]*sigma6[i];       /* sigma^12 */
315                     sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
316                     sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
317                     sigma_powm2[i]  = sigma_pow[i]/sigma2[i];
318                 }
319                 else
320                 {    /* not really supported as input, but in here for testing the general case*/
321                     sigma_pow[i]    = pow(sigma2[i], sc_r_power/2);
322                     sigma_powm2[i]  = sigma_pow[i]/(sigma2[i]);
323                 }
324             }
325
326             /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
327             if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
328             {
329                 alpha_vdw_eff    = 0;
330                 alpha_coul_eff   = 0;
331             }
332             else
333             {
334                 alpha_vdw_eff    = alpha_vdw;
335                 alpha_coul_eff   = alpha_coul;
336             }
337
338             for (i = 0; i < NSTATES; i++)
339             {
340                 FscalC[i]    = 0;
341                 FscalV[i]    = 0;
342                 Vcoul[i]     = 0;
343                 Vvdw[i]      = 0;
344
345                 /* Only spend time on A or B state if it is non-zero */
346                 if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
347                 {
348
349                     /* this section has to be inside the loop becaue of the dependence on sigma_pow */
350                     rpinvC         = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
351                     rinvC          = pow(rpinvC, 1.0/sc_r_power);
352                     rC             = 1.0/rinvC;
353
354                     rpinvV         = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
355                     rinvV          = pow(rpinvV, 1.0/sc_r_power);
356                     rV             = 1.0/rinvV;
357
358                     if (do_tab)
359                     {
360                         rtC        = rC*tabscale;
361                         n0         = rtC;
362                         epsC       = rtC-n0;
363                         eps2C      = epsC*epsC;
364                         n1C        = tab_elemsize*n0;
365
366                         rtV        = rV*tabscale;
367                         n0         = rtV;
368                         epsV       = rtV-n0;
369                         eps2V      = epsV*epsV;
370                         n1V        = tab_elemsize*n0;
371                     }
372
373                     /* With Ewald and soft-core we should put the cut-off on r,
374                      * not on the soft-cored rC, as the real-space and
375                      * reciprocal space contributions should (almost) cancel.
376                      */
377                     if (qq[i] != 0 &&
378                         !(bExactElecCutoff &&
379                           ((icoul != GMX_NBKERNEL_ELEC_EWALD && rC >= rcoulomb) ||
380                            (icoul == GMX_NBKERNEL_ELEC_EWALD && r >= rcoulomb))))
381                     {
382                         switch (icoul)
383                         {
384                             case GMX_NBKERNEL_ELEC_COULOMB:
385                             case GMX_NBKERNEL_ELEC_EWALD:
386                                 /* simple cutoff (yes, ewald is done all on direct space for free energy) */
387                                 Vcoul[i]   = qq[i]*rinvC;
388                                 FscalC[i]  = Vcoul[i]*rpinvC;
389                                 break;
390
391                             case GMX_NBKERNEL_ELEC_REACTIONFIELD:
392                                 /* reaction-field */
393                                 Vcoul[i]   = qq[i]*(rinvC+krf*rC*rC-crf);
394                                 FscalC[i]  = qq[i]*(rinvC*rpinvC-2.0*krf);
395                                 break;
396
397                             case GMX_NBKERNEL_ELEC_CUBICSPLINETABLE:
398                                 /* non-Ewald tabulated coulomb */
399                                 nnn        = n1C;
400                                 Y          = VFtab[nnn];
401                                 F          = VFtab[nnn+1];
402                                 Geps       = epsC*VFtab[nnn+2];
403                                 Heps2      = eps2C*VFtab[nnn+3];
404                                 Fp         = F+Geps+Heps2;
405                                 VV         = Y+epsC*Fp;
406                                 FF         = Fp+Geps+2.0*Heps2;
407                                 Vcoul[i]   = qq[i]*VV;
408                                 FscalC[i]  = -qq[i]*tabscale*FF*rC*rpinvC;
409                                 break;
410
411                             case GMX_NBKERNEL_ELEC_GENERALIZEDBORN:
412                                 gmx_fatal(FARGS, "Free energy and GB not implemented.\n");
413                                 break;
414
415                             case GMX_NBKERNEL_ELEC_NONE:
416                                 FscalC[i]  = 0.0;
417                                 Vcoul[i]   = 0.0;
418                                 break;
419
420                             default:
421                                 gmx_incons("Invalid icoul in free energy kernel");
422                                 break;
423                         }
424
425                         if (fr->coulomb_modifier == eintmodPOTSWITCH)
426                         {
427                             d                = rC-rswitch;
428                             d                = (d > 0.0) ? d : 0.0;
429                             d2               = d*d;
430                             sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
431                             dsw              = d2*(swF2+d*(swF3+d*swF4));
432
433                             Vcoul[i]        *= sw;
434                             FscalC[i]        = FscalC[i]*sw + Vcoul[i]*dsw;
435                         }
436                     }
437
438                     if ((c6[i] != 0 || c12[i] != 0) &&
439                         !(bExactVdwCutoff && rV >= rvdw))
440                     {
441                         switch (ivdw)
442                         {
443                             case GMX_NBKERNEL_VDW_LENNARDJONES:
444                                 /* cutoff LJ */
445                                 if (sc_r_power == 6.0)
446                                 {
447                                     rinv6            = rpinvV;
448                                 }
449                                 else
450                                 {
451                                     rinv6            = pow(rinvV, 6.0);
452                                 }
453                                 Vvdw6            = c6[i]*rinv6;
454                                 Vvdw12           = c12[i]*rinv6*rinv6;
455                                 if (fr->vdw_modifier == eintmodPOTSHIFT)
456                                 {
457                                     Vvdw[i]          = ( (Vvdw12-c12[i]*sh_invrc6*sh_invrc6)*(1.0/12.0)
458                                                          -(Vvdw6-c6[i]*sh_invrc6)*(1.0/6.0));
459                                 }
460                                 else
461                                 {
462                                     Vvdw[i]          = Vvdw12*(1.0/12.0)-Vvdw6*(1.0/6.0);
463                                 }
464                                 FscalV[i]        = (Vvdw12-Vvdw6)*rpinvV;
465                                 break;
466
467                             case GMX_NBKERNEL_VDW_BUCKINGHAM:
468                                 gmx_fatal(FARGS, "Buckingham free energy not supported.");
469                                 break;
470
471                             case GMX_NBKERNEL_VDW_CUBICSPLINETABLE:
472                                 /* Table LJ */
473                                 nnn = n1V+4;
474                                 /* dispersion */
475                                 Y          = VFtab[nnn];
476                                 F          = VFtab[nnn+1];
477                                 Geps       = epsV*VFtab[nnn+2];
478                                 Heps2      = eps2V*VFtab[nnn+3];
479                                 Fp         = F+Geps+Heps2;
480                                 VV         = Y+epsV*Fp;
481                                 FF         = Fp+Geps+2.0*Heps2;
482                                 Vvdw[i]   += c6[i]*VV;
483                                 FscalV[i] -= c6[i]*tabscale*FF*rV*rpinvV;
484
485                                 /* repulsion */
486                                 Y          = VFtab[nnn+4];
487                                 F          = VFtab[nnn+5];
488                                 Geps       = epsV*VFtab[nnn+6];
489                                 Heps2      = eps2V*VFtab[nnn+7];
490                                 Fp         = F+Geps+Heps2;
491                                 VV         = Y+epsV*Fp;
492                                 FF         = Fp+Geps+2.0*Heps2;
493                                 Vvdw[i]   += c12[i]*VV;
494                                 FscalV[i] -= c12[i]*tabscale*FF*rV*rpinvV;
495                                 break;
496
497                             case GMX_NBKERNEL_VDW_NONE:
498                                 Vvdw[i]    = 0.0;
499                                 FscalV[i]  = 0.0;
500                                 break;
501
502                             default:
503                                 gmx_incons("Invalid ivdw in free energy kernel");
504                                 break;
505                         }
506
507                         if (fr->vdw_modifier == eintmodPOTSWITCH)
508                         {
509                             d                = rV-rswitch;
510                             d                = (d > 0.0) ? d : 0.0;
511                             d2               = d*d;
512                             sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
513                             dsw              = d2*(swF2+d*(swF3+d*swF4));
514
515                             Vvdw[i]         *= sw;
516                             FscalV[i]        = FscalV[i]*sw + Vvdw[i]*dsw;
517
518                             FscalV[i]        = (rV < rvdw) ? FscalV[i] : 0.0;
519                             Vvdw[i]          = (rV < rvdw) ? Vvdw[i] : 0.0;
520                         }
521                     }
522                 }
523             }
524
525             Fscal = 0;
526
527             if (icoul == GMX_NBKERNEL_ELEC_EWALD &&
528                 !(bExactElecCutoff && r >= rcoulomb))
529             {
530                 /* because we compute the softcore normally,
531                    we have to remove the ewald short range portion. Done outside of
532                    the states loop because this part doesn't depend on the scaled R */
533
534 #ifdef GMX_DOUBLE
535                 /* Relative accuracy at R_ERF_R_INACC of 3e-10 */
536 #define         R_ERF_R_INACC 0.006
537 #else
538                 /* Relative accuracy at R_ERF_R_INACC of 2e-5 */
539 #define         R_ERF_R_INACC 0.1
540 #endif
541                 if (ewc*r > R_ERF_R_INACC)
542                 {
543                     VV    = gmx_erf(ewc*r)*rinv;
544                     FF    = rinv*rinv*(VV - ewc*M_2_SQRTPI*exp(-ewc*ewc*rsq));
545                 }
546                 else
547                 {
548                     VV    = ewc*M_2_SQRTPI;
549                     FF    = ewc*ewc*ewc*M_2_SQRTPI*(2.0/3.0 - 0.4*ewc*ewc*rsq);
550                 }
551
552                 for (i = 0; i < NSTATES; i++)
553                 {
554                     vctot      -= LFC[i]*qq[i]*VV;
555                     Fscal      -= LFC[i]*qq[i]*FF;
556                     dvdl_coul  -= (DLF[i]*qq[i])*VV;
557                 }
558             }
559
560             /* Assemble A and B states */
561             for (i = 0; i < NSTATES; i++)
562             {
563                 vctot         += LFC[i]*Vcoul[i];
564                 vvtot         += LFV[i]*Vvdw[i];
565
566                 Fscal         += LFC[i]*FscalC[i]*rpm2;
567                 Fscal         += LFV[i]*FscalV[i]*rpm2;
568
569                 dvdl_coul     += Vcoul[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*FscalC[i]*sigma_pow[i];
570                 dvdl_vdw      += Vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*FscalV[i]*sigma_pow[i];
571             }
572
573             if (bDoForces)
574             {
575                 tx         = Fscal*dx;
576                 ty         = Fscal*dy;
577                 tz         = Fscal*dz;
578                 fix        = fix + tx;
579                 fiy        = fiy + ty;
580                 fiz        = fiz + tz;
581                 f[j3]      = f[j3]   - tx;
582                 f[j3+1]    = f[j3+1] - ty;
583                 f[j3+2]    = f[j3+2] - tz;
584             }
585         }
586
587         if (bDoForces)
588         {
589             f[ii3]         = f[ii3]        + fix;
590             f[ii3+1]       = f[ii3+1]      + fiy;
591             f[ii3+2]       = f[ii3+2]      + fiz;
592             fshift[is3]    = fshift[is3]   + fix;
593             fshift[is3+1]  = fshift[is3+1] + fiy;
594             fshift[is3+2]  = fshift[is3+2] + fiz;
595         }
596         ggid               = gid[n];
597         Vc[ggid]           = Vc[ggid] + vctot;
598         Vv[ggid]           = Vv[ggid] + vvtot;
599     }
600
601     dvdl[efptCOUL]     += dvdl_coul;
602     dvdl[efptVDW]      += dvdl_vdw;
603
604     /* Estimate flops, average for free energy stuff:
605      * 12  flops per outer iteration
606      * 150 flops per inner iteration
607      */
608     inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri*12 + nlist->jindex[n]*150);
609 }
610
611 real
612 nb_free_energy_evaluate_single(real r2, real sc_r_power, real alpha_coul, real alpha_vdw,
613                                real tabscale, real *vftab,
614                                real qqA, real c6A, real c12A, real qqB, real c6B, real c12B,
615                                real LFC[2], real LFV[2], real DLF[2],
616                                real lfac_coul[2], real lfac_vdw[2], real dlfac_coul[2], real dlfac_vdw[2],
617                                real sigma6_def, real sigma6_min, real sigma2_def, real sigma2_min,
618                                real *velectot, real *vvdwtot, real *dvdl)
619 {
620     real       r, rp, rpm2, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VV, FF, fscal;
621     real       qq[2], c6[2], c12[2], sigma6[2], sigma2[2], sigma_pow[2], sigma_powm2[2];
622     real       alpha_coul_eff, alpha_vdw_eff, dvdl_coul, dvdl_vdw;
623     real       rpinv, r_coul, r_vdw, velecsum, vvdwsum;
624     real       fscal_vdw[2], fscal_elec[2];
625     real       velec[2], vvdw[2];
626     int        i, ntab;
627
628     qq[0]    = qqA;
629     qq[1]    = qqB;
630     c6[0]    = c6A;
631     c6[1]    = c6B;
632     c12[0]   = c12A;
633     c12[1]   = c12B;
634
635     if (sc_r_power == 6.0)
636     {
637         rpm2             = r2*r2;   /* r4 */
638         rp               = rpm2*r2; /* r6 */
639     }
640     else if (sc_r_power == 48.0)
641     {
642         rp               = r2*r2*r2; /* r6 */
643         rp               = rp*rp;    /* r12 */
644         rp               = rp*rp;    /* r24 */
645         rp               = rp*rp;    /* r48 */
646         rpm2             = rp/r2;    /* r46 */
647     }
648     else
649     {
650         rp             = pow(r2, 0.5*sc_r_power);  /* not currently supported as input, but can handle it */
651         rpm2           = rp/r2;
652     }
653
654     /* Loop over state A(0) and B(1) */
655     for (i = 0; i < 2; i++)
656     {
657         if ((c6[i] > 0) && (c12[i] > 0))
658         {
659             /* The c6 & c12 coefficients now contain the constants 6.0 and 12.0, respectively.
660              * Correct for this by multiplying with (1/12.0)/(1/6.0)=6.0/12.0=0.5.
661              */
662             sigma6[i]       = 0.5*c12[i]/c6[i];
663             sigma2[i]       = pow(0.5*c12[i]/c6[i], 1.0/3.0);
664             /* should be able to get rid of this ^^^ internal pow call eventually.  Will require agreement on
665                what data to store externally.  Can't be fixed without larger scale changes, so not 4.6 */
666             if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
667             {
668                 sigma6[i] = sigma6_min;
669                 sigma2[i] = sigma2_min;
670             }
671         }
672         else
673         {
674             sigma6[i]       = sigma6_def;
675             sigma2[i]       = sigma2_def;
676         }
677         if (sc_r_power == 6.0)
678         {
679             sigma_pow[i]    = sigma6[i];
680             sigma_powm2[i]  = sigma6[i]/sigma2[i];
681         }
682         else if (sc_r_power == 48.0)
683         {
684             sigma_pow[i]    = sigma6[i]*sigma6[i];       /* sigma^12 */
685             sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
686             sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
687             sigma_powm2[i]  = sigma_pow[i]/sigma2[i];
688         }
689         else
690         {    /* not really supported as input, but in here for testing the general case*/
691             sigma_pow[i]    = pow(sigma2[i], sc_r_power/2);
692             sigma_powm2[i]  = sigma_pow[i]/(sigma2[i]);
693         }
694     }
695
696     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
697     if ((c12[0] > 0) && (c12[1] > 0))
698     {
699         alpha_vdw_eff    = 0;
700         alpha_coul_eff   = 0;
701     }
702     else
703     {
704         alpha_vdw_eff    = alpha_vdw;
705         alpha_coul_eff   = alpha_coul;
706     }
707
708     /* Loop over A and B states again */
709     for (i = 0; i < 2; i++)
710     {
711         fscal_elec[i] = 0;
712         fscal_vdw[i]  = 0;
713         velec[i]      = 0;
714         vvdw[i]       = 0;
715
716         /* Only spend time on A or B state if it is non-zero */
717         if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
718         {
719             /* Coulomb */
720             rpinv            = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
721             r_coul           = pow(rpinv, -1.0/sc_r_power);
722
723             /* Electrostatics table lookup data */
724             rtab             = r_coul*tabscale;
725             ntab             = rtab;
726             eps              = rtab-ntab;
727             eps2             = eps*eps;
728             ntab             = 12*ntab;
729             /* Electrostatics */
730             Y                = vftab[ntab];
731             F                = vftab[ntab+1];
732             Geps             = eps*vftab[ntab+2];
733             Heps2            = eps2*vftab[ntab+3];
734             Fp               = F+Geps+Heps2;
735             VV               = Y+eps*Fp;
736             FF               = Fp+Geps+2.0*Heps2;
737             velec[i]         = qq[i]*VV;
738             fscal_elec[i]    = -qq[i]*FF*r_coul*rpinv*tabscale;
739
740             /* Vdw */
741             rpinv            = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
742             r_vdw            = pow(rpinv, -1.0/sc_r_power);
743             /* Vdw table lookup data */
744             rtab             = r_vdw*tabscale;
745             ntab             = rtab;
746             eps              = rtab-ntab;
747             eps2             = eps*eps;
748             ntab             = 12*ntab;
749             /* Dispersion */
750             Y                = vftab[ntab+4];
751             F                = vftab[ntab+5];
752             Geps             = eps*vftab[ntab+6];
753             Heps2            = eps2*vftab[ntab+7];
754             Fp               = F+Geps+Heps2;
755             VV               = Y+eps*Fp;
756             FF               = Fp+Geps+2.0*Heps2;
757             vvdw[i]          = c6[i]*VV;
758             fscal_vdw[i]     = -c6[i]*FF;
759
760             /* Repulsion */
761             Y                = vftab[ntab+8];
762             F                = vftab[ntab+9];
763             Geps             = eps*vftab[ntab+10];
764             Heps2            = eps2*vftab[ntab+11];
765             Fp               = F+Geps+Heps2;
766             VV               = Y+eps*Fp;
767             FF               = Fp+Geps+2.0*Heps2;
768             vvdw[i]         += c12[i]*VV;
769             fscal_vdw[i]    -= c12[i]*FF;
770             fscal_vdw[i]    *= r_vdw*rpinv*tabscale;
771         }
772     }
773     /* Now we have velec[i], vvdw[i], and fscal[i] for both states */
774     /* Assemble A and B states */
775     velecsum  = 0;
776     vvdwsum   = 0;
777     dvdl_coul = 0;
778     dvdl_vdw  = 0;
779     fscal     = 0;
780     for (i = 0; i < 2; i++)
781     {
782         velecsum      += LFC[i]*velec[i];
783         vvdwsum       += LFV[i]*vvdw[i];
784
785         fscal         += (LFC[i]*fscal_elec[i]+LFV[i]*fscal_vdw[i])*rpm2;
786
787         dvdl_coul     += velec[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*fscal_elec[i]*sigma_pow[i];
788         dvdl_vdw      += vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*fscal_vdw[i]*sigma_pow[i];
789     }
790
791     dvdl[efptCOUL]     += dvdl_coul;
792     dvdl[efptVDW]      += dvdl_vdw;
793
794     *velectot           = velecsum;
795     *vvdwtot            = vvdwsum;
796
797     return fscal;
798 }