Merge release-4-6 into master
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_free_energy.c
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
2  *
3  *
4  *                This source code is part of
5  *
6  *                 G   R   O   M   A   C   S
7  *
8  *          GROningen MAchine for Chemical Simulations
9  *
10  *                        VERSION 3.2.0
11  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13  * Copyright (c) 2001-2004, The GROMACS development team,
14  * check out http://www.gromacs.org for more information.
15
16  * This program is free software; you can redistribute it and/or
17  * modify it under the terms of the GNU General Public License
18  * as published by the Free Software Foundation; either version 2
19  * of the License, or (at your option) any later version.
20  *
21  * If you want to redistribute modifications, please consider that
22  * scientific software is very special. Version control is crucial -
23  * bugs must be traceable. We will be happy to consider code for
24  * inclusion in the official distribution, but derived work must not
25  * be called official GROMACS. Details are found in the README & COPYING
26  * files - if they are missing, get the official version at www.gromacs.org.
27  *
28  * To help us fund GROMACS development, we humbly ask that you cite
29  * the papers on the package - you can find them in the top README file.
30  *
31  * For more info, check our website at http://www.gromacs.org
32  *
33  * And Hey:
34  * GROningen Mixture of Alchemy and Childrens' Stories
35  */
36 #ifdef HAVE_CONFIG_H
37 #include <config.h>
38 #endif
39
40 #include <math.h>
41
42 #include "vec.h"
43 #include "typedefs.h"
44 #include "nonbonded.h"
45 #include "nb_kernel.h"
46 #include "nrnb.h"
47 #include "nb_free_energy.h"
48
49 void
50 gmx_nb_free_energy_kernel(const t_nblist * gmx_restrict    nlist,
51                           rvec * gmx_restrict              xx,
52                           rvec * gmx_restrict              ff,
53                           t_forcerec * gmx_restrict        fr,
54                           const t_mdatoms * gmx_restrict   mdatoms,
55                           nb_kernel_data_t * gmx_restrict  kernel_data,
56                           t_nrnb * gmx_restrict            nrnb)
57 {
58
59 #define  STATE_A  0
60 #define  STATE_B  1
61 #define  NSTATES  2
62     int           i, j, n, ii, is3, ii3, k, nj0, nj1, jnr, j3, ggid;
63     real          shX, shY, shZ;
64     real          Fscal, FscalC[NSTATES], FscalV[NSTATES], tx, ty, tz;
65     real          Vcoul[NSTATES], Vvdw[NSTATES];
66     real          rinv6, r, rt, rtC, rtV;
67     real          iqA, iqB;
68     real          qq[NSTATES], vctot, krsq;
69     int           ntiA, ntiB, tj[NSTATES];
70     real          Vvdw6, Vvdw12, vvtot;
71     real          ix, iy, iz, fix, fiy, fiz;
72     real          dx, dy, dz, rsq, rinv;
73     real          c6[NSTATES], c12[NSTATES];
74     real          LFC[NSTATES], LFV[NSTATES], DLF[NSTATES];
75     double        dvdl_coul, dvdl_vdw;
76     real          lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
77     real          sigma6[NSTATES], alpha_vdw_eff, alpha_coul_eff, sigma2_def, sigma2_min;
78     real          rp, rpm2, rC, rV, rinvC, rpinvC, rinvV, rpinvV;
79     real          sigma2[NSTATES], sigma_pow[NSTATES], sigma_powm2[NSTATES], rs, rs2;
80     int           do_tab, tab_elemsize;
81     int           n0, n1C, n1V, nnn;
82     real          Y, F, G, H, Fp, Geps, Heps2, epsC, eps2C, epsV, eps2V, VV, FF;
83     int           icoul, ivdw;
84     int           nri;
85     const int *   iinr;
86     const int *   jindex;
87     const int *   jjnr;
88     const int *   shift;
89     const int *   gid;
90     const int *   typeA;
91     const int *   typeB;
92     int           ntype;
93     const real *  shiftvec;
94     real          dvdl_part;
95     real *        fshift;
96     real          tabscale;
97     const real *  VFtab;
98     const real *  x;
99     real *        f;
100     real          facel, krf, crf;
101     const real *  chargeA;
102     const real *  chargeB;
103     real          sigma6_min, sigma6_def, lam_power, sc_power, sc_r_power;
104     real          alpha_coul, alpha_vdw, lambda_coul, lambda_vdw, ewc;
105     const real *  nbfp;
106     real *        dvdl;
107     real *        Vv;
108     real *        Vc;
109     gmx_bool      bDoForces;
110     real          rcoulomb, rvdw, sh_invrc6;
111     gmx_bool      bExactElecCutoff, bExactVdwCutoff;
112     real          rcutoff, rcutoff2, rswitch, d, d2, swV3, swV4, swV5, swF2, swF3, swF4, sw, dsw, rinvcorr;
113     const real *  tab_ewald_F;
114     const real *  tab_ewald_V;
115     real          tab_ewald_scale, tab_ewald_halfsp;
116
117     x                   = xx[0];
118     f                   = ff[0];
119
120     fshift              = fr->fshift[0];
121
122     nri                 = nlist->nri;
123     iinr                = nlist->iinr;
124     jindex              = nlist->jindex;
125     jjnr                = nlist->jjnr;
126     icoul               = nlist->ielec;
127     ivdw                = nlist->ivdw;
128     shift               = nlist->shift;
129     gid                 = nlist->gid;
130
131     shiftvec            = fr->shift_vec[0];
132     chargeA             = mdatoms->chargeA;
133     chargeB             = mdatoms->chargeB;
134     facel               = fr->epsfac;
135     krf                 = fr->k_rf;
136     crf                 = fr->c_rf;
137     ewc                 = fr->ewaldcoeff;
138     Vc                  = kernel_data->energygrp_elec;
139     typeA               = mdatoms->typeA;
140     typeB               = mdatoms->typeB;
141     ntype               = fr->ntype;
142     nbfp                = fr->nbfp;
143     Vv                  = kernel_data->energygrp_vdw;
144     tabscale            = kernel_data->table_elec_vdw->scale;
145     VFtab               = kernel_data->table_elec_vdw->data;
146     lambda_coul         = kernel_data->lambda[efptCOUL];
147     lambda_vdw          = kernel_data->lambda[efptVDW];
148     dvdl                = kernel_data->dvdl;
149     alpha_coul          = fr->sc_alphacoul;
150     alpha_vdw           = fr->sc_alphavdw;
151     lam_power           = fr->sc_power;
152     sc_r_power          = fr->sc_r_power;
153     sigma6_def          = fr->sc_sigma6_def;
154     sigma6_min          = fr->sc_sigma6_min;
155     bDoForces           = kernel_data->flags & GMX_NONBONDED_DO_FORCE;
156
157     rcoulomb            = fr->rcoulomb;
158     rvdw                = fr->rvdw;
159     sh_invrc6           = fr->ic->sh_invrc6;
160
161     /* Ewald (PME) reciprocal force and energy quadratic spline tables */
162     tab_ewald_F         = fr->ic->tabq_coul_F;
163     tab_ewald_V         = fr->ic->tabq_coul_V;
164     tab_ewald_scale     = fr->ic->tabq_scale;
165     tab_ewald_halfsp    = 0.5/tab_ewald_scale;
166
167     if (fr->coulomb_modifier == eintmodPOTSWITCH || fr->vdw_modifier == eintmodPOTSWITCH)
168     {
169         rcutoff         = (fr->coulomb_modifier == eintmodPOTSWITCH) ? fr->rcoulomb : fr->rvdw;
170         rcutoff2        = rcutoff*rcutoff;
171         rswitch         = (fr->coulomb_modifier == eintmodPOTSWITCH) ? fr->rcoulomb_switch : fr->rvdw_switch;
172         d               = rcutoff-rswitch;
173         swV3            = -10.0/(d*d*d);
174         swV4            =  15.0/(d*d*d*d);
175         swV5            =  -6.0/(d*d*d*d*d);
176         swF2            = -30.0/(d*d*d);
177         swF3            =  60.0/(d*d*d*d);
178         swF4            = -30.0/(d*d*d*d*d);
179     }
180     else
181     {
182         /* Stupid compilers dont realize these variables will not be used */
183         rswitch         = 0.0;
184         swV3            = 0.0;
185         swV4            = 0.0;
186         swV5            = 0.0;
187         swF2            = 0.0;
188         swF3            = 0.0;
189         swF4            = 0.0;
190     }
191
192     bExactElecCutoff    = (fr->coulomb_modifier != eintmodNONE) || fr->eeltype == eelRF_ZERO;
193     bExactVdwCutoff     = (fr->vdw_modifier != eintmodNONE);
194
195     /* fix compiler warnings */
196     nj1   = 0;
197     n1C   = n1V   = 0;
198     epsC  = epsV  = 0;
199     eps2C = eps2V = 0;
200
201     dvdl_coul  = 0;
202     dvdl_vdw   = 0;
203
204     /* Lambda factor for state A, 1-lambda*/
205     LFC[STATE_A] = 1.0 - lambda_coul;
206     LFV[STATE_A] = 1.0 - lambda_vdw;
207
208     /* Lambda factor for state B, lambda*/
209     LFC[STATE_B] = lambda_coul;
210     LFV[STATE_B] = lambda_vdw;
211
212     /*derivative of the lambda factor for state A and B */
213     DLF[STATE_A] = -1;
214     DLF[STATE_B] = 1;
215
216     for (i = 0; i < NSTATES; i++)
217     {
218         lfac_coul[i]  = (lam_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
219         dlfac_coul[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFC[i]) : 1);
220         lfac_vdw[i]   = (lam_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
221         dlfac_vdw[i]  = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFV[i]) : 1);
222     }
223     /* precalculate */
224     sigma2_def = pow(sigma6_def, 1.0/3.0);
225     sigma2_min = pow(sigma6_min, 1.0/3.0);
226
227     /* Ewald (not PME) table is special (icoul==enbcoulFEWALD) */
228
229     do_tab = (icoul == GMX_NBKERNEL_ELEC_CUBICSPLINETABLE ||
230               ivdw == GMX_NBKERNEL_VDW_CUBICSPLINETABLE);
231
232     /* we always use the combined table here */
233     tab_elemsize = 12;
234
235     for (n = 0; (n < nri); n++)
236     {
237         is3              = 3*shift[n];
238         shX              = shiftvec[is3];
239         shY              = shiftvec[is3+1];
240         shZ              = shiftvec[is3+2];
241         nj0              = jindex[n];
242         nj1              = jindex[n+1];
243         ii               = iinr[n];
244         ii3              = 3*ii;
245         ix               = shX + x[ii3+0];
246         iy               = shY + x[ii3+1];
247         iz               = shZ + x[ii3+2];
248         iqA              = facel*chargeA[ii];
249         iqB              = facel*chargeB[ii];
250         ntiA             = 2*ntype*typeA[ii];
251         ntiB             = 2*ntype*typeB[ii];
252         vctot            = 0;
253         vvtot            = 0;
254         fix              = 0;
255         fiy              = 0;
256         fiz              = 0;
257
258         for (k = nj0; (k < nj1); k++)
259         {
260             jnr              = jjnr[k];
261             j3               = 3*jnr;
262             dx               = ix - x[j3];
263             dy               = iy - x[j3+1];
264             dz               = iz - x[j3+2];
265             rsq              = dx*dx+dy*dy+dz*dz;
266             rinv             = gmx_invsqrt(rsq);
267             r                = rsq*rinv;
268             if (sc_r_power == 6.0)
269             {
270                 rpm2             = rsq*rsq;  /* r4 */
271                 rp               = rpm2*rsq; /* r6 */
272             }
273             else if (sc_r_power == 48.0)
274             {
275                 rp               = rsq*rsq*rsq; /* r6 */
276                 rp               = rp*rp;       /* r12 */
277                 rp               = rp*rp;       /* r24 */
278                 rp               = rp*rp;       /* r48 */
279                 rpm2             = rp/rsq;      /* r46 */
280             }
281             else
282             {
283                 rp             = pow(r, sc_r_power);  /* not currently supported as input, but can handle it */
284                 rpm2           = rp/rsq;
285             }
286
287             tj[STATE_A]      = ntiA+2*typeA[jnr];
288             tj[STATE_B]      = ntiB+2*typeB[jnr];
289             qq[STATE_A]      = iqA*chargeA[jnr];
290             qq[STATE_B]      = iqB*chargeB[jnr];
291
292             for (i = 0; i < NSTATES; i++)
293             {
294
295                 c6[i]              = nbfp[tj[i]];
296                 c12[i]             = nbfp[tj[i]+1];
297                 if ((c6[i] > 0) && (c12[i] > 0))
298                 {
299                     /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
300                     sigma6[i]       = 0.5*c12[i]/c6[i];
301                     sigma2[i]       = pow(sigma6[i], 1.0/3.0);
302                     /* should be able to get rid of this ^^^ internal pow call eventually.  Will require agreement on
303                        what data to store externally.  Can't be fixed without larger scale changes, so not 4.6 */
304                     if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
305                     {
306                         sigma6[i] = sigma6_min;
307                         sigma2[i] = sigma2_min;
308                     }
309                 }
310                 else
311                 {
312                     sigma6[i]       = sigma6_def;
313                     sigma2[i]       = sigma2_def;
314                 }
315                 if (sc_r_power == 6.0)
316                 {
317                     sigma_pow[i]    = sigma6[i];
318                     sigma_powm2[i]  = sigma6[i]/sigma2[i];
319                 }
320                 else if (sc_r_power == 48.0)
321                 {
322                     sigma_pow[i]    = sigma6[i]*sigma6[i];       /* sigma^12 */
323                     sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
324                     sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
325                     sigma_powm2[i]  = sigma_pow[i]/sigma2[i];
326                 }
327                 else
328                 {    /* not really supported as input, but in here for testing the general case*/
329                     sigma_pow[i]    = pow(sigma2[i], sc_r_power/2);
330                     sigma_powm2[i]  = sigma_pow[i]/(sigma2[i]);
331                 }
332             }
333
334             /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
335             if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
336             {
337                 alpha_vdw_eff    = 0;
338                 alpha_coul_eff   = 0;
339             }
340             else
341             {
342                 alpha_vdw_eff    = alpha_vdw;
343                 alpha_coul_eff   = alpha_coul;
344             }
345
346             for (i = 0; i < NSTATES; i++)
347             {
348                 FscalC[i]    = 0;
349                 FscalV[i]    = 0;
350                 Vcoul[i]     = 0;
351                 Vvdw[i]      = 0;
352
353                 /* Only spend time on A or B state if it is non-zero */
354                 if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
355                 {
356
357                     /* this section has to be inside the loop becaue of the dependence on sigma_pow */
358                     rpinvC         = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
359                     rinvC          = pow(rpinvC, 1.0/sc_r_power);
360                     rC             = 1.0/rinvC;
361
362                     rpinvV         = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
363                     rinvV          = pow(rpinvV, 1.0/sc_r_power);
364                     rV             = 1.0/rinvV;
365
366                     if (do_tab)
367                     {
368                         rtC        = rC*tabscale;
369                         n0         = rtC;
370                         epsC       = rtC-n0;
371                         eps2C      = epsC*epsC;
372                         n1C        = tab_elemsize*n0;
373
374                         rtV        = rV*tabscale;
375                         n0         = rtV;
376                         epsV       = rtV-n0;
377                         eps2V      = epsV*epsV;
378                         n1V        = tab_elemsize*n0;
379                     }
380
381                     /* With Ewald and soft-core we should put the cut-off on r,
382                      * not on the soft-cored rC, as the real-space and
383                      * reciprocal space contributions should (almost) cancel.
384                      */
385                     if (qq[i] != 0 &&
386                         !(bExactElecCutoff &&
387                           ((icoul != GMX_NBKERNEL_ELEC_EWALD && rC >= rcoulomb) ||
388                            (icoul == GMX_NBKERNEL_ELEC_EWALD && r >= rcoulomb))))
389                     {
390                         switch (icoul)
391                         {
392                             case GMX_NBKERNEL_ELEC_COULOMB:
393                             case GMX_NBKERNEL_ELEC_EWALD:
394                                 /* simple cutoff (yes, ewald is done all on direct space for free energy) */
395                                 Vcoul[i]   = qq[i]*rinvC;
396                                 FscalC[i]  = Vcoul[i];
397                                 break;
398
399                             case GMX_NBKERNEL_ELEC_REACTIONFIELD:
400                                 /* reaction-field */
401                                 Vcoul[i]   = qq[i]*(rinvC + krf*rC*rC-crf);
402                                 FscalC[i]  = qq[i]*(rinvC - 2.0*krf*rC*rC);
403                                 break;
404
405                             case GMX_NBKERNEL_ELEC_CUBICSPLINETABLE:
406                                 /* non-Ewald tabulated coulomb */
407                                 nnn        = n1C;
408                                 Y          = VFtab[nnn];
409                                 F          = VFtab[nnn+1];
410                                 Geps       = epsC*VFtab[nnn+2];
411                                 Heps2      = eps2C*VFtab[nnn+3];
412                                 Fp         = F+Geps+Heps2;
413                                 VV         = Y+epsC*Fp;
414                                 FF         = Fp+Geps+2.0*Heps2;
415                                 Vcoul[i]   = qq[i]*VV;
416                                 FscalC[i]  = -qq[i]*tabscale*FF*rC;
417                                 break;
418
419                             case GMX_NBKERNEL_ELEC_GENERALIZEDBORN:
420                                 gmx_fatal(FARGS, "Free energy and GB not implemented.\n");
421                                 break;
422
423                             case GMX_NBKERNEL_ELEC_NONE:
424                                 FscalC[i]  = 0.0;
425                                 Vcoul[i]   = 0.0;
426                                 break;
427
428                             default:
429                                 gmx_incons("Invalid icoul in free energy kernel");
430                                 break;
431                         }
432
433                         if (fr->coulomb_modifier == eintmodPOTSWITCH)
434                         {
435                             d                = rC-rswitch;
436                             d                = (d > 0.0) ? d : 0.0;
437                             d2               = d*d;
438                             sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
439                             dsw              = d2*(swF2+d*(swF3+d*swF4));
440
441                             Vcoul[i]        *= sw;
442                             FscalC[i]        = FscalC[i]*sw + Vcoul[i]*dsw;
443                         }
444                     }
445
446                     if ((c6[i] != 0 || c12[i] != 0) &&
447                         !(bExactVdwCutoff && rV >= rvdw))
448                     {
449                         switch (ivdw)
450                         {
451                             case GMX_NBKERNEL_VDW_LENNARDJONES:
452                                 /* cutoff LJ */
453                                 if (sc_r_power == 6.0)
454                                 {
455                                     rinv6            = rpinvV;
456                                 }
457                                 else
458                                 {
459                                     rinv6            = pow(rinvV, 6.0);
460                                 }
461                                 Vvdw6            = c6[i]*rinv6;
462                                 Vvdw12           = c12[i]*rinv6*rinv6;
463                                 if (fr->vdw_modifier == eintmodPOTSHIFT)
464                                 {
465                                     Vvdw[i]          = ( (Vvdw12-c12[i]*sh_invrc6*sh_invrc6)*(1.0/12.0)
466                                                          -(Vvdw6-c6[i]*sh_invrc6)*(1.0/6.0));
467                                 }
468                                 else
469                                 {
470                                     Vvdw[i]          = Vvdw12*(1.0/12.0) - Vvdw6*(1.0/6.0);
471                                 }
472                                 FscalV[i]        = Vvdw12 - Vvdw6;
473                                 break;
474
475                             case GMX_NBKERNEL_VDW_BUCKINGHAM:
476                                 gmx_fatal(FARGS, "Buckingham free energy not supported.");
477                                 break;
478
479                             case GMX_NBKERNEL_VDW_CUBICSPLINETABLE:
480                                 /* Table LJ */
481                                 nnn = n1V+4;
482                                 /* dispersion */
483                                 Y          = VFtab[nnn];
484                                 F          = VFtab[nnn+1];
485                                 Geps       = epsV*VFtab[nnn+2];
486                                 Heps2      = eps2V*VFtab[nnn+3];
487                                 Fp         = F+Geps+Heps2;
488                                 VV         = Y+epsV*Fp;
489                                 FF         = Fp+Geps+2.0*Heps2;
490                                 Vvdw[i]   += c6[i]*VV;
491                                 FscalV[i] -= c6[i]*tabscale*FF*rV;
492
493                                 /* repulsion */
494                                 Y          = VFtab[nnn+4];
495                                 F          = VFtab[nnn+5];
496                                 Geps       = epsV*VFtab[nnn+6];
497                                 Heps2      = eps2V*VFtab[nnn+7];
498                                 Fp         = F+Geps+Heps2;
499                                 VV         = Y+epsV*Fp;
500                                 FF         = Fp+Geps+2.0*Heps2;
501                                 Vvdw[i]   += c12[i]*VV;
502                                 FscalV[i] -= c12[i]*tabscale*FF*rV;
503                                 break;
504
505                             case GMX_NBKERNEL_VDW_NONE:
506                                 Vvdw[i]    = 0.0;
507                                 FscalV[i]  = 0.0;
508                                 break;
509
510                             default:
511                                 gmx_incons("Invalid ivdw in free energy kernel");
512                                 break;
513                         }
514
515                         if (fr->vdw_modifier == eintmodPOTSWITCH)
516                         {
517                             d                = rV-rswitch;
518                             d                = (d > 0.0) ? d : 0.0;
519                             d2               = d*d;
520                             sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
521                             dsw              = d2*(swF2+d*(swF3+d*swF4));
522
523                             Vvdw[i]         *= sw;
524                             FscalV[i]        = FscalV[i]*sw + Vvdw[i]*dsw;
525
526                             FscalV[i]        = (rV < rvdw) ? FscalV[i] : 0.0;
527                             Vvdw[i]          = (rV < rvdw) ? Vvdw[i] : 0.0;
528                         }
529                     }
530
531                     /* FscalC (and FscalV) now contain: dV/drC * rC
532                      * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
533                      * Further down we first multiply by r^p-2 and then by
534                      * the vector r, which in total gives: dV/drC * (r/rC)^1-p
535                      */
536                     FscalC[i] *= rpinvC;
537                     FscalV[i] *= rpinvV;
538                 }
539             }
540
541             Fscal = 0;
542
543             if (icoul == GMX_NBKERNEL_ELEC_EWALD &&
544                 !(bExactElecCutoff && r >= rcoulomb))
545             {
546                 /* Because we compute the soft-core normally,
547                  * we have to remove the Ewald short range portion.
548                  * Done outside of the states loop because this part
549                  * doesn't depend on the scaled R.
550                  */
551                 real rs, frac, f_lr;
552                 int  ri;
553
554                 rs     = rsq*rinv*tab_ewald_scale;
555                 ri     = (int)rs;
556                 frac   = rs - ri;
557                 f_lr   = (1 - frac)*tab_ewald_F[ri] + frac*tab_ewald_F[ri+1];
558                 FF     = f_lr*rinv;
559                 VV     = tab_ewald_V[ri] - tab_ewald_halfsp*frac*(tab_ewald_F[ri] + f_lr);
560
561                 for (i = 0; i < NSTATES; i++)
562                 {
563                     vctot      -= LFC[i]*qq[i]*VV;
564                     Fscal      -= LFC[i]*qq[i]*FF;
565                     dvdl_coul  -= (DLF[i]*qq[i])*VV;
566                 }
567             }
568
569             /* Assemble A and B states */
570             for (i = 0; i < NSTATES; i++)
571             {
572                 vctot         += LFC[i]*Vcoul[i];
573                 vvtot         += LFV[i]*Vvdw[i];
574
575                 Fscal         += LFC[i]*FscalC[i]*rpm2;
576                 Fscal         += LFV[i]*FscalV[i]*rpm2;
577
578                 dvdl_coul     += Vcoul[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*FscalC[i]*sigma_pow[i];
579                 dvdl_vdw      += Vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*FscalV[i]*sigma_pow[i];
580             }
581
582             if (bDoForces)
583             {
584                 tx         = Fscal*dx;
585                 ty         = Fscal*dy;
586                 tz         = Fscal*dz;
587                 fix        = fix + tx;
588                 fiy        = fiy + ty;
589                 fiz        = fiz + tz;
590                 f[j3]      = f[j3]   - tx;
591                 f[j3+1]    = f[j3+1] - ty;
592                 f[j3+2]    = f[j3+2] - tz;
593             }
594         }
595
596         if (bDoForces)
597         {
598             f[ii3]         = f[ii3]        + fix;
599             f[ii3+1]       = f[ii3+1]      + fiy;
600             f[ii3+2]       = f[ii3+2]      + fiz;
601             fshift[is3]    = fshift[is3]   + fix;
602             fshift[is3+1]  = fshift[is3+1] + fiy;
603             fshift[is3+2]  = fshift[is3+2] + fiz;
604         }
605         ggid               = gid[n];
606         Vc[ggid]           = Vc[ggid] + vctot;
607         Vv[ggid]           = Vv[ggid] + vvtot;
608     }
609
610     dvdl[efptCOUL]     += dvdl_coul;
611     dvdl[efptVDW]      += dvdl_vdw;
612
613     /* Estimate flops, average for free energy stuff:
614      * 12  flops per outer iteration
615      * 150 flops per inner iteration
616      */
617     inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri*12 + nlist->jindex[n]*150);
618 }
619
620 real
621 nb_free_energy_evaluate_single(real r2, real sc_r_power, real alpha_coul, real alpha_vdw,
622                                real tabscale, real *vftab,
623                                real qqA, real c6A, real c12A, real qqB, real c6B, real c12B,
624                                real LFC[2], real LFV[2], real DLF[2],
625                                real lfac_coul[2], real lfac_vdw[2], real dlfac_coul[2], real dlfac_vdw[2],
626                                real sigma6_def, real sigma6_min, real sigma2_def, real sigma2_min,
627                                real *velectot, real *vvdwtot, real *dvdl)
628 {
629     real       r, rp, rpm2, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VV, FF, fscal;
630     real       qq[2], c6[2], c12[2], sigma6[2], sigma2[2], sigma_pow[2], sigma_powm2[2];
631     real       alpha_coul_eff, alpha_vdw_eff, dvdl_coul, dvdl_vdw;
632     real       rpinv, r_coul, r_vdw, velecsum, vvdwsum;
633     real       fscal_vdw[2], fscal_elec[2];
634     real       velec[2], vvdw[2];
635     int        i, ntab;
636
637     qq[0]    = qqA;
638     qq[1]    = qqB;
639     c6[0]    = c6A;
640     c6[1]    = c6B;
641     c12[0]   = c12A;
642     c12[1]   = c12B;
643
644     if (sc_r_power == 6.0)
645     {
646         rpm2             = r2*r2;   /* r4 */
647         rp               = rpm2*r2; /* r6 */
648     }
649     else if (sc_r_power == 48.0)
650     {
651         rp               = r2*r2*r2; /* r6 */
652         rp               = rp*rp;    /* r12 */
653         rp               = rp*rp;    /* r24 */
654         rp               = rp*rp;    /* r48 */
655         rpm2             = rp/r2;    /* r46 */
656     }
657     else
658     {
659         rp             = pow(r2, 0.5*sc_r_power);  /* not currently supported as input, but can handle it */
660         rpm2           = rp/r2;
661     }
662
663     /* Loop over state A(0) and B(1) */
664     for (i = 0; i < 2; i++)
665     {
666         if ((c6[i] > 0) && (c12[i] > 0))
667         {
668             /* The c6 & c12 coefficients now contain the constants 6.0 and 12.0, respectively.
669              * Correct for this by multiplying with (1/12.0)/(1/6.0)=6.0/12.0=0.5.
670              */
671             sigma6[i]       = 0.5*c12[i]/c6[i];
672             sigma2[i]       = pow(0.5*c12[i]/c6[i], 1.0/3.0);
673             /* should be able to get rid of this ^^^ internal pow call eventually.  Will require agreement on
674                what data to store externally.  Can't be fixed without larger scale changes, so not 4.6 */
675             if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
676             {
677                 sigma6[i] = sigma6_min;
678                 sigma2[i] = sigma2_min;
679             }
680         }
681         else
682         {
683             sigma6[i]       = sigma6_def;
684             sigma2[i]       = sigma2_def;
685         }
686         if (sc_r_power == 6.0)
687         {
688             sigma_pow[i]    = sigma6[i];
689             sigma_powm2[i]  = sigma6[i]/sigma2[i];
690         }
691         else if (sc_r_power == 48.0)
692         {
693             sigma_pow[i]    = sigma6[i]*sigma6[i];       /* sigma^12 */
694             sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
695             sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
696             sigma_powm2[i]  = sigma_pow[i]/sigma2[i];
697         }
698         else
699         {    /* not really supported as input, but in here for testing the general case*/
700             sigma_pow[i]    = pow(sigma2[i], sc_r_power/2);
701             sigma_powm2[i]  = sigma_pow[i]/(sigma2[i]);
702         }
703     }
704
705     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
706     if ((c12[0] > 0) && (c12[1] > 0))
707     {
708         alpha_vdw_eff    = 0;
709         alpha_coul_eff   = 0;
710     }
711     else
712     {
713         alpha_vdw_eff    = alpha_vdw;
714         alpha_coul_eff   = alpha_coul;
715     }
716
717     /* Loop over A and B states again */
718     for (i = 0; i < 2; i++)
719     {
720         fscal_elec[i] = 0;
721         fscal_vdw[i]  = 0;
722         velec[i]      = 0;
723         vvdw[i]       = 0;
724
725         /* Only spend time on A or B state if it is non-zero */
726         if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
727         {
728             /* Coulomb */
729             rpinv            = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
730             r_coul           = pow(rpinv, -1.0/sc_r_power);
731
732             /* Electrostatics table lookup data */
733             rtab             = r_coul*tabscale;
734             ntab             = rtab;
735             eps              = rtab-ntab;
736             eps2             = eps*eps;
737             ntab             = 12*ntab;
738             /* Electrostatics */
739             Y                = vftab[ntab];
740             F                = vftab[ntab+1];
741             Geps             = eps*vftab[ntab+2];
742             Heps2            = eps2*vftab[ntab+3];
743             Fp               = F+Geps+Heps2;
744             VV               = Y+eps*Fp;
745             FF               = Fp+Geps+2.0*Heps2;
746             velec[i]         = qq[i]*VV;
747             fscal_elec[i]    = -qq[i]*FF*r_coul*rpinv*tabscale;
748
749             /* Vdw */
750             rpinv            = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
751             r_vdw            = pow(rpinv, -1.0/sc_r_power);
752             /* Vdw table lookup data */
753             rtab             = r_vdw*tabscale;
754             ntab             = rtab;
755             eps              = rtab-ntab;
756             eps2             = eps*eps;
757             ntab             = 12*ntab;
758             /* Dispersion */
759             Y                = vftab[ntab+4];
760             F                = vftab[ntab+5];
761             Geps             = eps*vftab[ntab+6];
762             Heps2            = eps2*vftab[ntab+7];
763             Fp               = F+Geps+Heps2;
764             VV               = Y+eps*Fp;
765             FF               = Fp+Geps+2.0*Heps2;
766             vvdw[i]          = c6[i]*VV;
767             fscal_vdw[i]     = -c6[i]*FF;
768
769             /* Repulsion */
770             Y                = vftab[ntab+8];
771             F                = vftab[ntab+9];
772             Geps             = eps*vftab[ntab+10];
773             Heps2            = eps2*vftab[ntab+11];
774             Fp               = F+Geps+Heps2;
775             VV               = Y+eps*Fp;
776             FF               = Fp+Geps+2.0*Heps2;
777             vvdw[i]         += c12[i]*VV;
778             fscal_vdw[i]    -= c12[i]*FF;
779             fscal_vdw[i]    *= r_vdw*rpinv*tabscale;
780         }
781     }
782     /* Now we have velec[i], vvdw[i], and fscal[i] for both states */
783     /* Assemble A and B states */
784     velecsum  = 0;
785     vvdwsum   = 0;
786     dvdl_coul = 0;
787     dvdl_vdw  = 0;
788     fscal     = 0;
789     for (i = 0; i < 2; i++)
790     {
791         velecsum      += LFC[i]*velec[i];
792         vvdwsum       += LFV[i]*vvdw[i];
793
794         fscal         += (LFC[i]*fscal_elec[i]+LFV[i]*fscal_vdw[i])*rpm2;
795
796         dvdl_coul     += velec[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*fscal_elec[i]*sigma_pow[i];
797         dvdl_vdw      += vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*fscal_vdw[i]*sigma_pow[i];
798     }
799
800     dvdl[efptCOUL]     += dvdl_coul;
801     dvdl[efptVDW]      += dvdl_vdw;
802
803     *velectot           = velecsum;
804     *vvdwtot            = vvdwsum;
805
806     return fscal;
807 }