Merge branch 'release-4-6'
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_free_energy.c
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
2  *
3  *
4  *                This source code is part of
5  *
6  *                 G   R   O   M   A   C   S
7  *
8  *          GROningen MAchine for Chemical Simulations
9  *
10  *                        VERSION 3.2.0
11  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13  * Copyright (c) 2001-2004, The GROMACS development team,
14  * check out http://www.gromacs.org for more information.
15
16  * This program is free software; you can redistribute it and/or
17  * modify it under the terms of the GNU General Public License
18  * as published by the Free Software Foundation; either version 2
19  * of the License, or (at your option) any later version.
20  *
21  * If you want to redistribute modifications, please consider that
22  * scientific software is very special. Version control is crucial -
23  * bugs must be traceable. We will be happy to consider code for
24  * inclusion in the official distribution, but derived work must not
25  * be called official GROMACS. Details are found in the README & COPYING
26  * files - if they are missing, get the official version at www.gromacs.org.
27  *
28  * To help us fund GROMACS development, we humbly ask that you cite
29  * the papers on the package - you can find them in the top README file.
30  *
31  * For more info, check our website at http://www.gromacs.org
32  *
33  * And Hey:
34  * GROningen Mixture of Alchemy and Childrens' Stories
35  */
36 #ifdef HAVE_CONFIG_H
37 #include <config.h>
38 #endif
39
40 #include <math.h>
41
42 #include "vec.h"
43 #include "typedefs.h"
44 #include "nonbonded.h"
45 #include "nb_kernel.h"
46 #include "nrnb.h"
47
48 void
49 gmx_nb_free_energy_kernel(t_nblist *                nlist,
50                           rvec *                    xx,
51                           rvec *                    ff,
52                           t_forcerec *              fr,
53                           t_mdatoms *               mdatoms,
54                           nb_kernel_data_t *        kernel_data,
55                           t_nrnb *                  nrnb)
56 {
57
58 #define  STATE_A  0
59 #define  STATE_B  1
60 #define  NSTATES  2
61     int           i, j, n, ii, is3, ii3, k, nj0, nj1, jnr, j3, ggid;
62     real          shX, shY, shZ;
63     real          Fscal, FscalC[NSTATES], FscalV[NSTATES], tx, ty, tz;
64     real          Vcoul[NSTATES], Vvdw[NSTATES];
65     real          rinv6, r, rt, rtC, rtV;
66     real          iqA, iqB;
67     real          qq[NSTATES], vctot, krsq;
68     int           ntiA, ntiB, tj[NSTATES];
69     real          Vvdw6, Vvdw12, vvtot;
70     real          ix, iy, iz, fix, fiy, fiz;
71     real          dx, dy, dz, rsq, rinv;
72     real          c6[NSTATES], c12[NSTATES];
73     real          LFC[NSTATES], LFV[NSTATES], DLF[NSTATES];
74     double        dvdl_coul, dvdl_vdw;
75     real          lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
76     real          sigma6[NSTATES], alpha_vdw_eff, alpha_coul_eff, sigma2_def, sigma2_min;
77     real          rp, rpm2, rC, rV, rinvC, rpinvC, rinvV, rpinvV;
78     real          sigma2[NSTATES], sigma_pow[NSTATES], sigma_powm2[NSTATES], rs, rs2;
79     int           do_coultab, do_vdwtab, do_tab, tab_elemsize;
80     int           n0, n1C, n1V, nnn;
81     real          Y, F, G, H, Fp, Geps, Heps2, epsC, eps2C, epsV, eps2V, VV, FF;
82     int           icoul, ivdw;
83     int           nri;
84     int *         iinr;
85     int *         jindex;
86     int *         jjnr;
87     int *         shift;
88     int *         gid;
89     int *         typeA;
90     int *         typeB;
91     int           ntype;
92     real *        shiftvec;
93     real          dvdl_part;
94     real *        fshift;
95     real          tabscale;
96     real *        VFtab;
97     real *        x;
98     real *        f;
99     real          facel, krf, crf;
100     real *        chargeA;
101     real *        chargeB;
102     real          sigma6_min, sigma6_def, lam_power, sc_power, sc_r_power;
103     real          alpha_coul, alpha_vdw, lambda_coul, lambda_vdw, ewc;
104     real *        nbfp;
105     real *        dvdl;
106     real *        Vv;
107     real *        Vc;
108     gmx_bool      bDoForces;
109     real          rcoulomb, rvdw, sh_invrc6;
110     gmx_bool      bExactElecCutoff, bExactVdwCutoff;
111     real          rcutoff, rcutoff2, rswitch, d, d2, swV3, swV4, swV5, swF2, swF3, swF4, sw, dsw, rinvcorr;
112
113     x                   = xx[0];
114     f                   = ff[0];
115
116     fshift              = fr->fshift[0];
117     Vc                  = kernel_data->energygrp_elec;
118     Vv                  = kernel_data->energygrp_vdw;
119     tabscale            = kernel_data->table_elec_vdw->scale;
120     VFtab               = kernel_data->table_elec_vdw->data;
121
122     nri                 = nlist->nri;
123     iinr                = nlist->iinr;
124     jindex              = nlist->jindex;
125     jjnr                = nlist->jjnr;
126     icoul               = nlist->ielec;
127     ivdw                = nlist->ivdw;
128     shift               = nlist->shift;
129     gid                 = nlist->gid;
130
131     shiftvec            = fr->shift_vec[0];
132     chargeA             = mdatoms->chargeA;
133     chargeB             = mdatoms->chargeB;
134     facel               = fr->epsfac;
135     krf                 = fr->k_rf;
136     crf                 = fr->c_rf;
137     ewc                 = fr->ewaldcoeff;
138     Vc                  = kernel_data->energygrp_elec;
139     typeA               = mdatoms->typeA;
140     typeB               = mdatoms->typeB;
141     ntype               = fr->ntype;
142     nbfp                = fr->nbfp;
143     Vv                  = kernel_data->energygrp_vdw;
144     tabscale            = kernel_data->table_elec_vdw->scale;
145     VFtab               = kernel_data->table_elec_vdw->data;
146     lambda_coul         = kernel_data->lambda[efptCOUL];
147     lambda_vdw          = kernel_data->lambda[efptVDW];
148     dvdl                = kernel_data->dvdl;
149     alpha_coul          = fr->sc_alphacoul;
150     alpha_vdw           = fr->sc_alphavdw;
151     lam_power           = fr->sc_power;
152     sc_r_power          = fr->sc_r_power;
153     sigma6_def          = fr->sc_sigma6_def;
154     sigma6_min          = fr->sc_sigma6_min;
155     bDoForces           = kernel_data->flags & GMX_NONBONDED_DO_FORCE;
156
157     rcoulomb            = fr->rcoulomb;
158     rvdw                = fr->rvdw;
159     sh_invrc6           = fr->ic->sh_invrc6;
160
161     if (fr->coulomb_modifier == eintmodPOTSWITCH || fr->vdw_modifier == eintmodPOTSWITCH)
162     {
163         rcutoff         = (fr->coulomb_modifier == eintmodPOTSWITCH) ? fr->rcoulomb : fr->rvdw;
164         rcutoff2        = rcutoff*rcutoff;
165         rswitch         = (fr->coulomb_modifier == eintmodPOTSWITCH) ? fr->rcoulomb_switch : fr->rvdw_switch;
166         d               = rcutoff-rswitch;
167         swV3            = -10.0/(d*d*d);
168         swV4            =  15.0/(d*d*d*d);
169         swV5            =  -6.0/(d*d*d*d*d);
170         swF2            = -30.0/(d*d*d);
171         swF3            =  60.0/(d*d*d*d);
172         swF4            = -30.0/(d*d*d*d*d);
173     }
174     else
175     {
176         /* Stupid compilers dont realize these variables will not be used */
177         rswitch         = 0.0;
178         swV3            = 0.0;
179         swV4            = 0.0;
180         swV5            = 0.0;
181         swF2            = 0.0;
182         swF3            = 0.0;
183         swF4            = 0.0;
184     }
185
186     bExactElecCutoff    = (fr->coulomb_modifier != eintmodNONE) || fr->eeltype == eelRF_ZERO;
187     bExactVdwCutoff     = (fr->vdw_modifier != eintmodNONE);
188
189     /* fix compiler warnings */
190     nj1   = 0;
191     n1C   = n1V   = 0;
192     epsC  = epsV  = 0;
193     eps2C = eps2V = 0;
194
195     dvdl_coul  = 0;
196     dvdl_vdw   = 0;
197
198     /* Lambda factor for state A, 1-lambda*/
199     LFC[STATE_A] = 1.0 - lambda_coul;
200     LFV[STATE_A] = 1.0 - lambda_vdw;
201
202     /* Lambda factor for state B, lambda*/
203     LFC[STATE_B] = lambda_coul;
204     LFV[STATE_B] = lambda_vdw;
205
206     /*derivative of the lambda factor for state A and B */
207     DLF[STATE_A] = -1;
208     DLF[STATE_B] = 1;
209
210     for (i = 0; i < NSTATES; i++)
211     {
212         lfac_coul[i]  = (lam_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
213         dlfac_coul[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFC[i]) : 1);
214         lfac_vdw[i]   = (lam_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
215         dlfac_vdw[i]  = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFV[i]) : 1);
216     }
217     /* precalculate */
218     sigma2_def = pow(sigma6_def, 1.0/3.0);
219     sigma2_min = pow(sigma6_min, 1.0/3.0);
220
221     /* Ewald (not PME) table is special (icoul==enbcoulFEWALD) */
222
223     do_coultab = (icoul == GMX_NBKERNEL_ELEC_CUBICSPLINETABLE);
224     do_vdwtab  = (ivdw == GMX_NBKERNEL_VDW_CUBICSPLINETABLE);
225
226     do_tab = do_coultab || do_vdwtab;
227
228     /* we always use the combined table here */
229     tab_elemsize = 12;
230
231     for (n = 0; (n < nri); n++)
232     {
233         is3              = 3*shift[n];
234         shX              = shiftvec[is3];
235         shY              = shiftvec[is3+1];
236         shZ              = shiftvec[is3+2];
237         nj0              = jindex[n];
238         nj1              = jindex[n+1];
239         ii               = iinr[n];
240         ii3              = 3*ii;
241         ix               = shX + x[ii3+0];
242         iy               = shY + x[ii3+1];
243         iz               = shZ + x[ii3+2];
244         iqA              = facel*chargeA[ii];
245         iqB              = facel*chargeB[ii];
246         ntiA             = 2*ntype*typeA[ii];
247         ntiB             = 2*ntype*typeB[ii];
248         vctot            = 0;
249         vvtot            = 0;
250         fix              = 0;
251         fiy              = 0;
252         fiz              = 0;
253
254         for (k = nj0; (k < nj1); k++)
255         {
256             jnr              = jjnr[k];
257             j3               = 3*jnr;
258             dx               = ix - x[j3];
259             dy               = iy - x[j3+1];
260             dz               = iz - x[j3+2];
261             rsq              = dx*dx+dy*dy+dz*dz;
262             rinv             = gmx_invsqrt(rsq);
263             r                = rsq*rinv;
264             if (sc_r_power == 6.0)
265             {
266                 rpm2             = rsq*rsq;  /* r4 */
267                 rp               = rpm2*rsq; /* r6 */
268             }
269             else if (sc_r_power == 48.0)
270             {
271                 rp               = rsq*rsq*rsq; /* r6 */
272                 rp               = rp*rp;       /* r12 */
273                 rp               = rp*rp;       /* r24 */
274                 rp               = rp*rp;       /* r48 */
275                 rpm2             = rp/rsq;      /* r46 */
276             }
277             else
278             {
279                 rp             = pow(r, sc_r_power);  /* not currently supported as input, but can handle it */
280                 rpm2           = rp/rsq;
281             }
282
283             tj[STATE_A]      = ntiA+2*typeA[jnr];
284             tj[STATE_B]      = ntiB+2*typeB[jnr];
285             qq[STATE_A]      = iqA*chargeA[jnr];
286             qq[STATE_B]      = iqB*chargeB[jnr];
287
288             for (i = 0; i < NSTATES; i++)
289             {
290
291                 c6[i]              = nbfp[tj[i]];
292                 c12[i]             = nbfp[tj[i]+1];
293                 if ((c6[i] > 0) && (c12[i] > 0))
294                 {
295                     /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
296                     sigma6[i]       = 0.5*c12[i]/c6[i];
297                     sigma2[i]       = pow(sigma6[i], 1.0/3.0);
298                     /* should be able to get rid of this ^^^ internal pow call eventually.  Will require agreement on
299                        what data to store externally.  Can't be fixed without larger scale changes, so not 4.6 */
300                     if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
301                     {
302                         sigma6[i] = sigma6_min;
303                         sigma2[i] = sigma2_min;
304                     }
305                 }
306                 else
307                 {
308                     sigma6[i]       = sigma6_def;
309                     sigma2[i]       = sigma2_def;
310                 }
311                 if (sc_r_power == 6.0)
312                 {
313                     sigma_pow[i]    = sigma6[i];
314                     sigma_powm2[i]  = sigma6[i]/sigma2[i];
315                 }
316                 else if (sc_r_power == 48.0)
317                 {
318                     sigma_pow[i]    = sigma6[i]*sigma6[i];       /* sigma^12 */
319                     sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
320                     sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
321                     sigma_powm2[i]  = sigma_pow[i]/sigma2[i];
322                 }
323                 else
324                 {    /* not really supported as input, but in here for testing the general case*/
325                     sigma_pow[i]    = pow(sigma2[i], sc_r_power/2);
326                     sigma_powm2[i]  = sigma_pow[i]/(sigma2[i]);
327                 }
328             }
329
330             /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
331             if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
332             {
333                 alpha_vdw_eff    = 0;
334                 alpha_coul_eff   = 0;
335             }
336             else
337             {
338                 alpha_vdw_eff    = alpha_vdw;
339                 alpha_coul_eff   = alpha_coul;
340             }
341
342             for (i = 0; i < NSTATES; i++)
343             {
344                 FscalC[i]    = 0;
345                 FscalV[i]    = 0;
346                 Vcoul[i]     = 0;
347                 Vvdw[i]      = 0;
348
349                 /* Only spend time on A or B state if it is non-zero */
350                 if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
351                 {
352
353                     /* this section has to be inside the loop becaue of the dependence on sigma_pow */
354                     rpinvC         = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
355                     rinvC          = pow(rpinvC, 1.0/sc_r_power);
356                     rC             = 1.0/rinvC;
357
358                     rpinvV         = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
359                     rinvV          = pow(rpinvV, 1.0/sc_r_power);
360                     rV             = 1.0/rinvV;
361
362                     if (do_tab)
363                     {
364                         rtC        = rC*tabscale;
365                         n0         = rtC;
366                         epsC       = rtC-n0;
367                         eps2C      = epsC*epsC;
368                         n1C        = tab_elemsize*n0;
369
370                         rtV        = rV*tabscale;
371                         n0         = rtV;
372                         epsV       = rtV-n0;
373                         eps2V      = epsV*epsV;
374                         n1V        = tab_elemsize*n0;
375                     }
376
377                     /* With Ewald and soft-core we should put the cut-off on r,
378                      * not on the soft-cored rC, as the real-space and
379                      * reciprocal space contributions should (almost) cancel.
380                      */
381                     if (qq[i] != 0 &&
382                         !(bExactElecCutoff &&
383                           ((icoul != GMX_NBKERNEL_ELEC_EWALD && rC >= rcoulomb) ||
384                            (icoul == GMX_NBKERNEL_ELEC_EWALD && r >= rcoulomb))))
385                     {
386                         switch (icoul)
387                         {
388                             case GMX_NBKERNEL_ELEC_COULOMB:
389                             case GMX_NBKERNEL_ELEC_EWALD:
390                                 /* simple cutoff (yes, ewald is done all on direct space for free energy) */
391                                 Vcoul[i]   = qq[i]*rinvC;
392                                 FscalC[i]  = Vcoul[i]*rpinvC;
393                                 break;
394
395                             case GMX_NBKERNEL_ELEC_REACTIONFIELD:
396                                 /* reaction-field */
397                                 Vcoul[i]   = qq[i]*(rinvC+krf*rC*rC-crf);
398                                 FscalC[i]  = qq[i]*(rinvC*rpinvC-2.0*krf);
399                                 break;
400
401                             case GMX_NBKERNEL_ELEC_CUBICSPLINETABLE:
402                                 /* non-Ewald tabulated coulomb */
403                                 nnn        = n1C;
404                                 Y          = VFtab[nnn];
405                                 F          = VFtab[nnn+1];
406                                 Geps       = epsC*VFtab[nnn+2];
407                                 Heps2      = eps2C*VFtab[nnn+3];
408                                 Fp         = F+Geps+Heps2;
409                                 VV         = Y+epsC*Fp;
410                                 FF         = Fp+Geps+2.0*Heps2;
411                                 Vcoul[i]   = qq[i]*VV;
412                                 FscalC[i]  = -qq[i]*tabscale*FF*rC*rpinvC;
413                                 break;
414
415                             case GMX_NBKERNEL_ELEC_GENERALIZEDBORN:
416                                 gmx_fatal(FARGS, "Free energy and GB not implemented.\n");
417                                 break;
418
419                             case GMX_NBKERNEL_ELEC_NONE:
420                                 FscalC[i]  = 0.0;
421                                 Vcoul[i]   = 0.0;
422                                 break;
423
424                             default:
425                                 gmx_incons("Invalid icoul in free energy kernel");
426                                 break;
427                         }
428
429                         if (fr->coulomb_modifier == eintmodPOTSWITCH)
430                         {
431                             d                = rC-rswitch;
432                             d                = (d > 0.0) ? d : 0.0;
433                             d2               = d*d;
434                             sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
435                             dsw              = d2*(swF2+d*(swF3+d*swF4));
436
437                             Vcoul[i]        *= sw;
438                             FscalC[i]        = FscalC[i]*sw + Vcoul[i]*dsw;
439                         }
440                     }
441
442                     if ((c6[i] != 0 || c12[i] != 0) &&
443                         !(bExactVdwCutoff && rV >= rvdw))
444                     {
445                         switch (ivdw)
446                         {
447                             case GMX_NBKERNEL_VDW_LENNARDJONES:
448                                 /* cutoff LJ */
449                                 if (sc_r_power == 6.0)
450                                 {
451                                     rinv6            = rpinvV;
452                                 }
453                                 else
454                                 {
455                                     rinv6            = pow(rinvV, 6.0);
456                                 }
457                                 Vvdw6            = c6[i]*rinv6;
458                                 Vvdw12           = c12[i]*rinv6*rinv6;
459                                 if (fr->vdw_modifier == eintmodPOTSHIFT)
460                                 {
461                                     Vvdw[i]          = ( (Vvdw12-c12[i]*sh_invrc6*sh_invrc6)*(1.0/12.0)
462                                                          -(Vvdw6-c6[i]*sh_invrc6)*(1.0/6.0));
463                                 }
464                                 else
465                                 {
466                                     Vvdw[i]          = Vvdw12*(1.0/12.0)-Vvdw6*(1.0/6.0);
467                                 }
468                                 FscalV[i]        = (Vvdw12-Vvdw6)*rpinvV;
469                                 break;
470
471                             case GMX_NBKERNEL_VDW_BUCKINGHAM:
472                                 gmx_fatal(FARGS, "Buckingham free energy not supported.");
473                                 break;
474
475                             case GMX_NBKERNEL_VDW_CUBICSPLINETABLE:
476                                 /* Table LJ */
477                                 nnn = n1V+4;
478                                 /* dispersion */
479                                 Y          = VFtab[nnn];
480                                 F          = VFtab[nnn+1];
481                                 Geps       = epsV*VFtab[nnn+2];
482                                 Heps2      = eps2V*VFtab[nnn+3];
483                                 Fp         = F+Geps+Heps2;
484                                 VV         = Y+epsV*Fp;
485                                 FF         = Fp+Geps+2.0*Heps2;
486                                 Vvdw[i]   += c6[i]*VV;
487                                 FscalV[i] -= c6[i]*tabscale*FF*rV*rpinvV;
488
489                                 /* repulsion */
490                                 Y          = VFtab[nnn+4];
491                                 F          = VFtab[nnn+5];
492                                 Geps       = epsV*VFtab[nnn+6];
493                                 Heps2      = eps2V*VFtab[nnn+7];
494                                 Fp         = F+Geps+Heps2;
495                                 VV         = Y+epsV*Fp;
496                                 FF         = Fp+Geps+2.0*Heps2;
497                                 Vvdw[i]   += c12[i]*VV;
498                                 FscalV[i] -= c12[i]*tabscale*FF*rV*rpinvV;
499                                 break;
500
501                             case GMX_NBKERNEL_VDW_NONE:
502                                 Vvdw[i]    = 0.0;
503                                 FscalV[i]  = 0.0;
504                                 break;
505
506                             default:
507                                 gmx_incons("Invalid ivdw in free energy kernel");
508                                 break;
509                         }
510
511                         if (fr->vdw_modifier == eintmodPOTSWITCH)
512                         {
513                             d                = rV-rswitch;
514                             d                = (d > 0.0) ? d : 0.0;
515                             d2               = d*d;
516                             sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
517                             dsw              = d2*(swF2+d*(swF3+d*swF4));
518
519                             Vvdw[i]         *= sw;
520                             FscalV[i]        = FscalV[i]*sw + Vvdw[i]*dsw;
521
522                             FscalV[i]        = (rV < rvdw) ? FscalV[i] : 0.0;
523                             Vvdw[i]          = (rV < rvdw) ? Vvdw[i] : 0.0;
524                         }
525                     }
526                 }
527             }
528
529             Fscal = 0;
530
531             if (icoul == GMX_NBKERNEL_ELEC_EWALD &&
532                 !(bExactElecCutoff && r >= rcoulomb))
533             {
534                 /* because we compute the softcore normally,
535                    we have to remove the ewald short range portion. Done outside of
536                    the states loop because this part doesn't depend on the scaled R */
537
538 #ifdef GMX_DOUBLE
539                 /* Relative accuracy at R_ERF_R_INACC of 3e-10 */
540 #define         R_ERF_R_INACC 0.006
541 #else
542                 /* Relative accuracy at R_ERF_R_INACC of 2e-5 */
543 #define         R_ERF_R_INACC 0.1
544 #endif
545                 if (ewc*r > R_ERF_R_INACC)
546                 {
547                     VV    = gmx_erf(ewc*r)*rinv;
548                     FF    = rinv*rinv*(VV - ewc*M_2_SQRTPI*exp(-ewc*ewc*rsq));
549                 }
550                 else
551                 {
552                     VV    = ewc*M_2_SQRTPI;
553                     FF    = ewc*ewc*ewc*M_2_SQRTPI*(2.0/3.0 - 0.4*ewc*ewc*rsq);
554                 }
555
556                 for (i = 0; i < NSTATES; i++)
557                 {
558                     vctot      -= LFC[i]*qq[i]*VV;
559                     Fscal      -= LFC[i]*qq[i]*FF;
560                     dvdl_coul  -= (DLF[i]*qq[i])*VV;
561                 }
562             }
563
564             /* Assemble A and B states */
565             for (i = 0; i < NSTATES; i++)
566             {
567                 vctot         += LFC[i]*Vcoul[i];
568                 vvtot         += LFV[i]*Vvdw[i];
569
570                 Fscal         += LFC[i]*FscalC[i]*rpm2;
571                 Fscal         += LFV[i]*FscalV[i]*rpm2;
572
573                 dvdl_coul     += Vcoul[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*FscalC[i]*sigma_pow[i];
574                 dvdl_vdw      += Vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*FscalV[i]*sigma_pow[i];
575             }
576
577             if (bDoForces)
578             {
579                 tx         = Fscal*dx;
580                 ty         = Fscal*dy;
581                 tz         = Fscal*dz;
582                 fix        = fix + tx;
583                 fiy        = fiy + ty;
584                 fiz        = fiz + tz;
585                 f[j3]      = f[j3]   - tx;
586                 f[j3+1]    = f[j3+1] - ty;
587                 f[j3+2]    = f[j3+2] - tz;
588             }
589         }
590
591         if (bDoForces)
592         {
593             f[ii3]         = f[ii3]        + fix;
594             f[ii3+1]       = f[ii3+1]      + fiy;
595             f[ii3+2]       = f[ii3+2]      + fiz;
596             fshift[is3]    = fshift[is3]   + fix;
597             fshift[is3+1]  = fshift[is3+1] + fiy;
598             fshift[is3+2]  = fshift[is3+2] + fiz;
599         }
600         ggid               = gid[n];
601         Vc[ggid]           = Vc[ggid] + vctot;
602         Vv[ggid]           = Vv[ggid] + vvtot;
603     }
604
605     dvdl[efptCOUL]     += dvdl_coul;
606     dvdl[efptVDW]      += dvdl_vdw;
607
608     /* Estimate flops, average for free energy stuff:
609      * 12  flops per outer iteration
610      * 150 flops per inner iteration
611      */
612     inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri*12 + nlist->jindex[n]*150);
613 }
614
615 real
616 nb_free_energy_evaluate_single(real r2, real sc_r_power, real alpha_coul, real alpha_vdw,
617                                real tabscale, real *vftab,
618                                real qqA, real c6A, real c12A, real qqB, real c6B, real c12B,
619                                real LFC[2], real LFV[2], real DLF[2],
620                                real lfac_coul[2], real lfac_vdw[2], real dlfac_coul[2], real dlfac_vdw[2],
621                                real sigma6_def, real sigma6_min, real sigma2_def, real sigma2_min,
622                                real *velectot, real *vvdwtot, real *dvdl)
623 {
624     real       r, rp, rpm2, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VV, FF, fscal;
625     real       qq[2], c6[2], c12[2], sigma6[2], sigma2[2], sigma_pow[2], sigma_powm2[2];
626     real       alpha_coul_eff, alpha_vdw_eff, dvdl_coul, dvdl_vdw;
627     real       rpinv, r_coul, r_vdw, velecsum, vvdwsum;
628     real       fscal_vdw[2], fscal_elec[2];
629     real       velec[2], vvdw[2];
630     int        i, ntab;
631
632     qq[0]    = qqA;
633     qq[1]    = qqB;
634     c6[0]    = c6A;
635     c6[1]    = c6B;
636     c12[0]   = c12A;
637     c12[1]   = c12B;
638
639     if (sc_r_power == 6.0)
640     {
641         rpm2             = r2*r2;   /* r4 */
642         rp               = rpm2*r2; /* r6 */
643     }
644     else if (sc_r_power == 48.0)
645     {
646         rp               = r2*r2*r2; /* r6 */
647         rp               = rp*rp;    /* r12 */
648         rp               = rp*rp;    /* r24 */
649         rp               = rp*rp;    /* r48 */
650         rpm2             = rp/r2;    /* r46 */
651     }
652     else
653     {
654         rp             = pow(r2, 0.5*sc_r_power);  /* not currently supported as input, but can handle it */
655         rpm2           = rp/r2;
656     }
657
658     /* Loop over state A(0) and B(1) */
659     for (i = 0; i < 2; i++)
660     {
661         if ((c6[i] > 0) && (c12[i] > 0))
662         {
663             /* The c6 & c12 coefficients now contain the constants 6.0 and 12.0, respectively.
664              * Correct for this by multiplying with (1/12.0)/(1/6.0)=6.0/12.0=0.5.
665              */
666             sigma6[i]       = 0.5*c12[i]/c6[i];
667             sigma2[i]       = pow(0.5*c12[i]/c6[i], 1.0/3.0);
668             /* should be able to get rid of this ^^^ internal pow call eventually.  Will require agreement on
669                what data to store externally.  Can't be fixed without larger scale changes, so not 4.6 */
670             if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
671             {
672                 sigma6[i] = sigma6_min;
673                 sigma2[i] = sigma2_min;
674             }
675         }
676         else
677         {
678             sigma6[i]       = sigma6_def;
679             sigma2[i]       = sigma2_def;
680         }
681         if (sc_r_power == 6.0)
682         {
683             sigma_pow[i]    = sigma6[i];
684             sigma_powm2[i]  = sigma6[i]/sigma2[i];
685         }
686         else if (sc_r_power == 48.0)
687         {
688             sigma_pow[i]    = sigma6[i]*sigma6[i];       /* sigma^12 */
689             sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
690             sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
691             sigma_powm2[i]  = sigma_pow[i]/sigma2[i];
692         }
693         else
694         {    /* not really supported as input, but in here for testing the general case*/
695             sigma_pow[i]    = pow(sigma2[i], sc_r_power/2);
696             sigma_powm2[i]  = sigma_pow[i]/(sigma2[i]);
697         }
698     }
699
700     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
701     if ((c12[0] > 0) && (c12[1] > 0))
702     {
703         alpha_vdw_eff    = 0;
704         alpha_coul_eff   = 0;
705     }
706     else
707     {
708         alpha_vdw_eff    = alpha_vdw;
709         alpha_coul_eff   = alpha_coul;
710     }
711
712     /* Loop over A and B states again */
713     for (i = 0; i < 2; i++)
714     {
715         fscal_elec[i] = 0;
716         fscal_vdw[i]  = 0;
717         velec[i]      = 0;
718         vvdw[i]       = 0;
719
720         /* Only spend time on A or B state if it is non-zero */
721         if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
722         {
723             /* Coulomb */
724             rpinv            = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
725             r_coul           = pow(rpinv, -1.0/sc_r_power);
726
727             /* Electrostatics table lookup data */
728             rtab             = r_coul*tabscale;
729             ntab             = rtab;
730             eps              = rtab-ntab;
731             eps2             = eps*eps;
732             ntab             = 12*ntab;
733             /* Electrostatics */
734             Y                = vftab[ntab];
735             F                = vftab[ntab+1];
736             Geps             = eps*vftab[ntab+2];
737             Heps2            = eps2*vftab[ntab+3];
738             Fp               = F+Geps+Heps2;
739             VV               = Y+eps*Fp;
740             FF               = Fp+Geps+2.0*Heps2;
741             velec[i]         = qq[i]*VV;
742             fscal_elec[i]    = -qq[i]*FF*r_coul*rpinv*tabscale;
743
744             /* Vdw */
745             rpinv            = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
746             r_vdw            = pow(rpinv, -1.0/sc_r_power);
747             /* Vdw table lookup data */
748             rtab             = r_vdw*tabscale;
749             ntab             = rtab;
750             eps              = rtab-ntab;
751             eps2             = eps*eps;
752             ntab             = 12*ntab;
753             /* Dispersion */
754             Y                = vftab[ntab+4];
755             F                = vftab[ntab+5];
756             Geps             = eps*vftab[ntab+6];
757             Heps2            = eps2*vftab[ntab+7];
758             Fp               = F+Geps+Heps2;
759             VV               = Y+eps*Fp;
760             FF               = Fp+Geps+2.0*Heps2;
761             vvdw[i]          = c6[i]*VV;
762             fscal_vdw[i]     = -c6[i]*FF;
763
764             /* Repulsion */
765             Y                = vftab[ntab+8];
766             F                = vftab[ntab+9];
767             Geps             = eps*vftab[ntab+10];
768             Heps2            = eps2*vftab[ntab+11];
769             Fp               = F+Geps+Heps2;
770             VV               = Y+eps*Fp;
771             FF               = Fp+Geps+2.0*Heps2;
772             vvdw[i]         += c12[i]*VV;
773             fscal_vdw[i]    -= c12[i]*FF;
774             fscal_vdw[i]    *= r_vdw*rpinv*tabscale;
775         }
776     }
777     /* Now we have velec[i], vvdw[i], and fscal[i] for both states */
778     /* Assemble A and B states */
779     velecsum  = 0;
780     vvdwsum   = 0;
781     dvdl_coul = 0;
782     dvdl_vdw  = 0;
783     fscal     = 0;
784     for (i = 0; i < 2; i++)
785     {
786         velecsum      += LFC[i]*velec[i];
787         vvdwsum       += LFV[i]*vvdw[i];
788
789         fscal         += (LFC[i]*fscal_elec[i]+LFV[i]*fscal_vdw[i])*rpm2;
790
791         dvdl_coul     += velec[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*fscal_elec[i]*sigma_pow[i];
792         dvdl_vdw      += vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*fscal_vdw[i]*sigma_pow[i];
793     }
794
795     dvdl[efptCOUL]     += dvdl_coul;
796     dvdl[efptVDW]      += dvdl_vdw;
797
798     *velectot           = velecsum;
799     *vvdwtot            = vvdwsum;
800
801     return fscal;
802 }