Merge release-4-6 into master
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_free_energy.c
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
2  *
3  *
4  *                This source code is part of
5  *
6  *                 G   R   O   M   A   C   S
7  *
8  *          GROningen MAchine for Chemical Simulations
9  *
10  *                        VERSION 3.2.0
11  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13  * Copyright (c) 2001-2004, The GROMACS development team,
14  * check out http://www.gromacs.org for more information.
15
16  * This program is free software; you can redistribute it and/or
17  * modify it under the terms of the GNU General Public License
18  * as published by the Free Software Foundation; either version 2
19  * of the License, or (at your option) any later version.
20  *
21  * If you want to redistribute modifications, please consider that
22  * scientific software is very special. Version control is crucial -
23  * bugs must be traceable. We will be happy to consider code for
24  * inclusion in the official distribution, but derived work must not
25  * be called official GROMACS. Details are found in the README & COPYING
26  * files - if they are missing, get the official version at www.gromacs.org.
27  *
28  * To help us fund GROMACS development, we humbly ask that you cite
29  * the papers on the package - you can find them in the top README file.
30  *
31  * For more info, check our website at http://www.gromacs.org
32  *
33  * And Hey:
34  * GROningen Mixture of Alchemy and Childrens' Stories
35  */
36 #ifdef HAVE_CONFIG_H
37 #include <config.h>
38 #endif
39
40 #include <math.h>
41
42 #include "vec.h"
43 #include "typedefs.h"
44 #include "nonbonded.h"
45 #include "nb_kernel.h"
46 #include "nrnb.h"
47
48 void
49 gmx_nb_free_energy_kernel(t_nblist *                nlist,
50                           rvec *                    xx,
51                           rvec *                    ff,
52                           t_forcerec *              fr,
53                           t_mdatoms *               mdatoms,
54                           nb_kernel_data_t *        kernel_data,
55                           t_nrnb *                  nrnb)
56 {
57
58 #define  STATE_A  0
59 #define  STATE_B  1
60 #define  NSTATES  2
61     int           i, j, n, ii, is3, ii3, k, nj0, nj1, jnr, j3, ggid;
62     real          shX, shY, shZ;
63     real          Fscal, FscalC[NSTATES], FscalV[NSTATES], tx, ty, tz;
64     real          Vcoul[NSTATES], Vvdw[NSTATES];
65     real          rinv6, r, rt, rtC, rtV;
66     real          iqA, iqB;
67     real          qq[NSTATES], vctot, krsq;
68     int           ntiA, ntiB, tj[NSTATES];
69     real          Vvdw6, Vvdw12, vvtot;
70     real          ix, iy, iz, fix, fiy, fiz;
71     real          dx, dy, dz, rsq, rinv;
72     real          c6[NSTATES], c12[NSTATES];
73     real          LFC[NSTATES], LFV[NSTATES], DLF[NSTATES];
74     double        dvdl_coul, dvdl_vdw;
75     real          lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
76     real          sigma6[NSTATES], alpha_vdw_eff, alpha_coul_eff, sigma2_def, sigma2_min;
77     real          rp, rpm2, rC, rV, rinvC, rpinvC, rinvV, rpinvV;
78     real          sigma2[NSTATES], sigma_pow[NSTATES], sigma_powm2[NSTATES], rs, rs2;
79     int           do_coultab, do_vdwtab, do_tab, tab_elemsize;
80     int           n0, n1C, n1V, nnn;
81     real          Y, F, G, H, Fp, Geps, Heps2, epsC, eps2C, epsV, eps2V, VV, FF;
82     int           icoul, ivdw;
83     int           nri;
84     int *         iinr;
85     int *         jindex;
86     int *         jjnr;
87     int *         shift;
88     int *         gid;
89     int *         typeA;
90     int *         typeB;
91     int           ntype;
92     real *        shiftvec;
93     real          dvdl_part;
94     real *        fshift;
95     real          tabscale;
96     real *        VFtab;
97     real *        x;
98     real *        f;
99     real          facel, krf, crf;
100     real *        chargeA;
101     real *        chargeB;
102     real          sigma6_min, sigma6_def, lam_power, sc_power, sc_r_power;
103     real          alpha_coul, alpha_vdw, lambda_coul, lambda_vdw, ewc;
104     real *        nbfp;
105     real *        dvdl;
106     real *        Vv;
107     real *        Vc;
108     gmx_bool      bDoForces;
109     real          rcoulomb, rvdw, sh_invrc6;
110     gmx_bool      bExactElecCutoff, bExactVdwCutoff;
111     real          rcutoff, rcutoff2, rswitch, d, d2, swV3, swV4, swV5, swF2, swF3, swF4, sw, dsw, rinvcorr;
112
113     x                   = xx[0];
114     f                   = ff[0];
115
116     fshift              = fr->fshift[0];
117     Vc                  = kernel_data->energygrp_elec;
118     Vv                  = kernel_data->energygrp_vdw;
119     tabscale            = kernel_data->table_elec_vdw->scale;
120     VFtab               = kernel_data->table_elec_vdw->data;
121
122     nri                 = nlist->nri;
123     iinr                = nlist->iinr;
124     jindex              = nlist->jindex;
125     jjnr                = nlist->jjnr;
126     icoul               = nlist->ielec;
127     ivdw                = nlist->ivdw;
128     shift               = nlist->shift;
129     gid                 = nlist->gid;
130
131     shiftvec            = fr->shift_vec[0];
132     chargeA             = mdatoms->chargeA;
133     chargeB             = mdatoms->chargeB;
134     facel               = fr->epsfac;
135     krf                 = fr->k_rf;
136     crf                 = fr->c_rf;
137     ewc                 = fr->ewaldcoeff;
138     Vc                  = kernel_data->energygrp_elec;
139     typeA               = mdatoms->typeA;
140     typeB               = mdatoms->typeB;
141     ntype               = fr->ntype;
142     nbfp                = fr->nbfp;
143     Vv                  = kernel_data->energygrp_vdw;
144     tabscale            = kernel_data->table_elec_vdw->scale;
145     VFtab               = kernel_data->table_elec_vdw->data;
146     lambda_coul         = kernel_data->lambda[efptCOUL];
147     lambda_vdw          = kernel_data->lambda[efptVDW];
148     dvdl                = kernel_data->dvdl;
149     alpha_coul          = fr->sc_alphacoul;
150     alpha_vdw           = fr->sc_alphavdw;
151     lam_power           = fr->sc_power;
152     sc_r_power          = fr->sc_r_power;
153     sigma6_def          = fr->sc_sigma6_def;
154     sigma6_min          = fr->sc_sigma6_min;
155     bDoForces           = kernel_data->flags & GMX_NONBONDED_DO_FORCE;
156
157     rcoulomb            = fr->rcoulomb;
158     rvdw                = fr->rvdw;
159     sh_invrc6           = fr->ic->sh_invrc6;
160
161     if (fr->coulomb_modifier == eintmodPOTSWITCH || fr->vdw_modifier == eintmodPOTSWITCH)
162     {
163         rcutoff         = (fr->coulomb_modifier == eintmodPOTSWITCH) ? fr->rcoulomb : fr->rvdw;
164         rcutoff2        = rcutoff*rcutoff;
165         rswitch         = (fr->coulomb_modifier == eintmodPOTSWITCH) ? fr->rcoulomb_switch : fr->rvdw_switch;
166         d               = rcutoff-rswitch;
167         swV3            = -10.0/(d*d*d);
168         swV4            =  15.0/(d*d*d*d);
169         swV5            =  -6.0/(d*d*d*d*d);
170         swF2            = -30.0/(d*d*d);
171         swF3            =  60.0/(d*d*d*d);
172         swF4            = -30.0/(d*d*d*d*d);
173     }
174     else
175     {
176         /* Stupid compilers dont realize these variables will not be used */
177         rswitch         = 0.0;
178         swV3            = 0.0;
179         swV4            = 0.0;
180         swV5            = 0.0;
181         swF2            = 0.0;
182         swF3            = 0.0;
183         swF4            = 0.0;
184     }
185
186     bExactElecCutoff    = (fr->coulomb_modifier != eintmodNONE) || fr->eeltype == eelRF_ZERO;
187     bExactVdwCutoff     = (fr->vdw_modifier != eintmodNONE);
188
189     /* fix compiler warnings */
190     nj1   = 0;
191     n1C   = n1V   = 0;
192     epsC  = epsV  = 0;
193     eps2C = eps2V = 0;
194
195     dvdl_coul  = 0;
196     dvdl_vdw   = 0;
197
198     /* Lambda factor for state A, 1-lambda*/
199     LFC[STATE_A] = 1.0 - lambda_coul;
200     LFV[STATE_A] = 1.0 - lambda_vdw;
201
202     /* Lambda factor for state B, lambda*/
203     LFC[STATE_B] = lambda_coul;
204     LFV[STATE_B] = lambda_vdw;
205
206     /*derivative of the lambda factor for state A and B */
207     DLF[STATE_A] = -1;
208     DLF[STATE_B] = 1;
209
210     for (i = 0; i < NSTATES; i++)
211     {
212         lfac_coul[i]  = (lam_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
213         dlfac_coul[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFC[i]) : 1);
214         lfac_vdw[i]   = (lam_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
215         dlfac_vdw[i]  = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFV[i]) : 1);
216     }
217     /* precalculate */
218     sigma2_def = pow(sigma6_def, 1.0/3.0);
219     sigma2_min = pow(sigma6_min, 1.0/3.0);
220
221     /* Ewald (not PME) table is special (icoul==enbcoulFEWALD) */
222
223     do_coultab = (icoul == GMX_NBKERNEL_ELEC_CUBICSPLINETABLE);
224     do_vdwtab  = (ivdw == GMX_NBKERNEL_VDW_CUBICSPLINETABLE);
225
226     do_tab = do_coultab || do_vdwtab;
227
228     /* we always use the combined table here */
229     tab_elemsize = 12;
230
231     for (n = 0; (n < nri); n++)
232     {
233         is3              = 3*shift[n];
234         shX              = shiftvec[is3];
235         shY              = shiftvec[is3+1];
236         shZ              = shiftvec[is3+2];
237         nj0              = jindex[n];
238         nj1              = jindex[n+1];
239         ii               = iinr[n];
240         ii3              = 3*ii;
241         ix               = shX + x[ii3+0];
242         iy               = shY + x[ii3+1];
243         iz               = shZ + x[ii3+2];
244         iqA              = facel*chargeA[ii];
245         iqB              = facel*chargeB[ii];
246         ntiA             = 2*ntype*typeA[ii];
247         ntiB             = 2*ntype*typeB[ii];
248         vctot            = 0;
249         vvtot            = 0;
250         fix              = 0;
251         fiy              = 0;
252         fiz              = 0;
253
254         for (k = nj0; (k < nj1); k++)
255         {
256             jnr              = jjnr[k];
257             j3               = 3*jnr;
258             dx               = ix - x[j3];
259             dy               = iy - x[j3+1];
260             dz               = iz - x[j3+2];
261             rsq              = dx*dx+dy*dy+dz*dz;
262             rinv             = gmx_invsqrt(rsq);
263             r                = rsq*rinv;
264             if (sc_r_power == 6.0)
265             {
266                 rpm2             = rsq*rsq;  /* r4 */
267                 rp               = rpm2*rsq; /* r6 */
268             }
269             else if (sc_r_power == 48.0)
270             {
271                 rp               = rsq*rsq*rsq; /* r6 */
272                 rp               = rp*rp;       /* r12 */
273                 rp               = rp*rp;       /* r24 */
274                 rp               = rp*rp;       /* r48 */
275                 rpm2             = rp/rsq;      /* r46 */
276             }
277             else
278             {
279                 rp             = pow(r, sc_r_power);  /* not currently supported as input, but can handle it */
280                 rpm2           = rp/rsq;
281             }
282
283             tj[STATE_A]      = ntiA+2*typeA[jnr];
284             tj[STATE_B]      = ntiB+2*typeB[jnr];
285             qq[STATE_A]      = iqA*chargeA[jnr];
286             qq[STATE_B]      = iqB*chargeB[jnr];
287
288             for (i = 0; i < NSTATES; i++)
289             {
290
291                 c6[i]              = nbfp[tj[i]];
292                 c12[i]             = nbfp[tj[i]+1];
293                 if ((c6[i] > 0) && (c12[i] > 0))
294                 {
295                     /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
296                     sigma6[i]       = 0.5*c12[i]/c6[i];
297                     sigma2[i]       = pow(sigma6[i], 1.0/3.0);
298                     /* should be able to get rid of this ^^^ internal pow call eventually.  Will require agreement on
299                        what data to store externally.  Can't be fixed without larger scale changes, so not 4.6 */
300                     if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
301                     {
302                         sigma6[i] = sigma6_min;
303                         sigma2[i] = sigma2_min;
304                     }
305                 }
306                 else
307                 {
308                     sigma6[i]       = sigma6_def;
309                     sigma2[i]       = sigma2_def;
310                 }
311                 if (sc_r_power == 6.0)
312                 {
313                     sigma_pow[i]    = sigma6[i];
314                     sigma_powm2[i]  = sigma6[i]/sigma2[i];
315                 }
316                 else if (sc_r_power == 48.0)
317                 {
318                     sigma_pow[i]    = sigma6[i]*sigma6[i];       /* sigma^12 */
319                     sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
320                     sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
321                     sigma_powm2[i]  = sigma_pow[i]/sigma2[i];
322                 }
323                 else
324                 {    /* not really supported as input, but in here for testing the general case*/
325                     sigma_pow[i]    = pow(sigma2[i], sc_r_power/2);
326                     sigma_powm2[i]  = sigma_pow[i]/(sigma2[i]);
327                 }
328             }
329
330             /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
331             if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
332             {
333                 alpha_vdw_eff    = 0;
334                 alpha_coul_eff   = 0;
335             }
336             else
337             {
338                 alpha_vdw_eff    = alpha_vdw;
339                 alpha_coul_eff   = alpha_coul;
340             }
341
342             for (i = 0; i < NSTATES; i++)
343             {
344                 FscalC[i]    = 0;
345                 FscalV[i]    = 0;
346                 Vcoul[i]     = 0;
347                 Vvdw[i]      = 0;
348
349                 /* Only spend time on A or B state if it is non-zero */
350                 if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
351                 {
352
353                     /* this section has to be inside the loop becaue of the dependence on sigma_pow */
354                     rpinvC         = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
355                     rinvC          = pow(rpinvC, 1.0/sc_r_power);
356                     rC             = 1.0/rinvC;
357
358                     rpinvV         = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
359                     rinvV          = pow(rpinvV, 1.0/sc_r_power);
360                     rV             = 1.0/rinvV;
361
362                     if (do_tab)
363                     {
364                         rtC        = rC*tabscale;
365                         n0         = rtC;
366                         epsC       = rtC-n0;
367                         eps2C      = epsC*epsC;
368                         n1C        = tab_elemsize*n0;
369
370                         rtV        = rV*tabscale;
371                         n0         = rtV;
372                         epsV       = rtV-n0;
373                         eps2V      = epsV*epsV;
374                         n1V        = tab_elemsize*n0;
375                     }
376
377                     /* With Ewald and soft-core we should put the cut-off on r,
378                      * not on the soft-cored rC, as the real-space and
379                      * reciprocal space contributions should (almost) cancel.
380                      */
381                     if (qq[i] != 0 &&
382                         !(bExactElecCutoff &&
383                           ((icoul != GMX_NBKERNEL_ELEC_EWALD && rC >= rcoulomb) ||
384                            (icoul == GMX_NBKERNEL_ELEC_EWALD && r >= rcoulomb))))
385                     {
386                         switch (icoul)
387                         {
388                             case GMX_NBKERNEL_ELEC_COULOMB:
389                             case GMX_NBKERNEL_ELEC_EWALD:
390                                 /* simple cutoff (yes, ewald is done all on direct space for free energy) */
391                                 Vcoul[i]   = qq[i]*rinvC;
392                                 FscalC[i]  = Vcoul[i]*rpinvC;
393                                 break;
394
395                             case GMX_NBKERNEL_ELEC_REACTIONFIELD:
396                                 /* reaction-field */
397                                 Vcoul[i]   = qq[i]*(rinvC+krf*rC*rC-crf);
398                                 FscalC[i]  = qq[i]*(rinvC*rpinvC-2.0*krf);
399                                 break;
400
401                             case GMX_NBKERNEL_ELEC_CUBICSPLINETABLE:
402                                 /* non-Ewald tabulated coulomb */
403                                 nnn        = n1C;
404                                 Y          = VFtab[nnn];
405                                 F          = VFtab[nnn+1];
406                                 Geps       = epsC*VFtab[nnn+2];
407                                 Heps2      = eps2C*VFtab[nnn+3];
408                                 Fp         = F+Geps+Heps2;
409                                 VV         = Y+epsC*Fp;
410                                 FF         = Fp+Geps+2.0*Heps2;
411                                 Vcoul[i]   = qq[i]*VV;
412                                 FscalC[i]  = -qq[i]*tabscale*FF*rC*rpinvC;
413                                 break;
414
415                             default:
416                                 FscalC[i]  = 0.0;
417                                 Vcoul[i]   = 0.0;
418                                 break;
419                         }
420
421                         if (fr->coulomb_modifier == eintmodPOTSWITCH)
422                         {
423                             d                = rC-rswitch;
424                             d                = (d > 0.0) ? d : 0.0;
425                             d2               = d*d;
426                             sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
427                             dsw              = d2*(swF2+d*(swF3+d*swF4));
428
429                             Vcoul[i]        *= sw;
430                             FscalC[i]        = FscalC[i]*sw + Vcoul[i]*dsw;
431                         }
432                     }
433
434                     if ((c6[i] != 0 || c12[i] != 0) &&
435                         !(bExactVdwCutoff && rV >= rvdw))
436                     {
437                         switch (ivdw)
438                         {
439                             case GMX_NBKERNEL_VDW_LENNARDJONES:
440                                 /* cutoff LJ */
441                                 if (sc_r_power == 6.0)
442                                 {
443                                     rinv6            = rpinvV;
444                                 }
445                                 else
446                                 {
447                                     rinv6            = pow(rinvV, 6.0);
448                                 }
449                                 Vvdw6            = c6[i]*rinv6;
450                                 Vvdw12           = c12[i]*rinv6*rinv6;
451                                 if (fr->vdw_modifier == eintmodPOTSHIFT)
452                                 {
453                                     Vvdw[i]          = ( (Vvdw12-c12[i]*sh_invrc6*sh_invrc6)*(1.0/12.0)
454                                                          -(Vvdw6-c6[i]*sh_invrc6)*(1.0/6.0));
455                                 }
456                                 else
457                                 {
458                                     Vvdw[i]          = Vvdw12*(1.0/12.0)-Vvdw6*(1.0/6.0);
459                                 }
460                                 FscalV[i]        = (Vvdw12-Vvdw6)*rpinvV;
461                                 break;
462
463                             case GMX_NBKERNEL_VDW_BUCKINGHAM:
464                                 gmx_fatal(FARGS, "Buckingham free energy not supported.");
465                                 break;
466
467                             case GMX_NBKERNEL_VDW_CUBICSPLINETABLE:
468                                 /* Table LJ */
469                                 nnn = n1V+4;
470                                 /* dispersion */
471                                 Y          = VFtab[nnn];
472                                 F          = VFtab[nnn+1];
473                                 Geps       = epsV*VFtab[nnn+2];
474                                 Heps2      = eps2V*VFtab[nnn+3];
475                                 Fp         = F+Geps+Heps2;
476                                 VV         = Y+epsV*Fp;
477                                 FF         = Fp+Geps+2.0*Heps2;
478                                 Vvdw[i]   += c6[i]*VV;
479                                 FscalV[i] -= c6[i]*tabscale*FF*rV*rpinvV;
480
481                                 /* repulsion */
482                                 Y          = VFtab[nnn+4];
483                                 F          = VFtab[nnn+5];
484                                 Geps       = epsV*VFtab[nnn+6];
485                                 Heps2      = eps2V*VFtab[nnn+7];
486                                 Fp         = F+Geps+Heps2;
487                                 VV         = Y+epsV*Fp;
488                                 FF         = Fp+Geps+2.0*Heps2;
489                                 Vvdw[i]   += c12[i]*VV;
490                                 FscalV[i] -= c12[i]*tabscale*FF*rV*rpinvV;
491                                 break;
492
493                             default:
494                                 Vvdw[i]    = 0.0;
495                                 FscalV[i]  = 0.0;
496                                 break;
497                         }
498
499                         if (fr->vdw_modifier == eintmodPOTSWITCH)
500                         {
501                             d                = rV-rswitch;
502                             d                = (d > 0.0) ? d : 0.0;
503                             d2               = d*d;
504                             sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
505                             dsw              = d2*(swF2+d*(swF3+d*swF4));
506
507                             Vvdw[i]         *= sw;
508                             FscalV[i]        = FscalV[i]*sw + Vvdw[i]*dsw;
509
510                             FscalV[i]        = (rV < rvdw) ? FscalV[i] : 0.0;
511                             Vvdw[i]          = (rV < rvdw) ? Vvdw[i] : 0.0;
512                         }
513                     }
514                 }
515             }
516
517             Fscal = 0;
518
519             if (icoul == GMX_NBKERNEL_ELEC_EWALD &&
520                 !(bExactElecCutoff && r >= rcoulomb))
521             {
522                 /* because we compute the softcore normally,
523                    we have to remove the ewald short range portion. Done outside of
524                    the states loop because this part doesn't depend on the scaled R */
525
526 #ifdef GMX_DOUBLE
527                 /* Relative accuracy at R_ERF_R_INACC of 3e-10 */
528 #define         R_ERF_R_INACC 0.006
529 #else
530                 /* Relative accuracy at R_ERF_R_INACC of 2e-5 */
531 #define         R_ERF_R_INACC 0.1
532 #endif
533                 if (ewc*r > R_ERF_R_INACC)
534                 {
535                     VV    = gmx_erf(ewc*r)*rinv;
536                     FF    = rinv*rinv*(VV - ewc*M_2_SQRTPI*exp(-ewc*ewc*rsq));
537                 }
538                 else
539                 {
540                     VV    = ewc*M_2_SQRTPI;
541                     FF    = ewc*ewc*ewc*M_2_SQRTPI*(2.0/3.0 - 0.4*ewc*ewc*rsq);
542                 }
543
544                 for (i = 0; i < NSTATES; i++)
545                 {
546                     vctot      -= LFC[i]*qq[i]*VV;
547                     Fscal      -= LFC[i]*qq[i]*FF;
548                     dvdl_coul  -= (DLF[i]*qq[i])*VV;
549                 }
550             }
551
552             /* Assemble A and B states */
553             for (i = 0; i < NSTATES; i++)
554             {
555                 vctot         += LFC[i]*Vcoul[i];
556                 vvtot         += LFV[i]*Vvdw[i];
557
558                 Fscal         += LFC[i]*FscalC[i]*rpm2;
559                 Fscal         += LFV[i]*FscalV[i]*rpm2;
560
561                 dvdl_coul     += Vcoul[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*FscalC[i]*sigma_pow[i];
562                 dvdl_vdw      += Vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*FscalV[i]*sigma_pow[i];
563             }
564
565             if (bDoForces)
566             {
567                 tx         = Fscal*dx;
568                 ty         = Fscal*dy;
569                 tz         = Fscal*dz;
570                 fix        = fix + tx;
571                 fiy        = fiy + ty;
572                 fiz        = fiz + tz;
573                 f[j3]      = f[j3]   - tx;
574                 f[j3+1]    = f[j3+1] - ty;
575                 f[j3+2]    = f[j3+2] - tz;
576             }
577         }
578
579         if (bDoForces)
580         {
581             f[ii3]         = f[ii3]        + fix;
582             f[ii3+1]       = f[ii3+1]      + fiy;
583             f[ii3+2]       = f[ii3+2]      + fiz;
584             fshift[is3]    = fshift[is3]   + fix;
585             fshift[is3+1]  = fshift[is3+1] + fiy;
586             fshift[is3+2]  = fshift[is3+2] + fiz;
587         }
588         ggid               = gid[n];
589         Vc[ggid]           = Vc[ggid] + vctot;
590         Vv[ggid]           = Vv[ggid] + vvtot;
591     }
592
593     dvdl[efptCOUL]     += dvdl_coul;
594     dvdl[efptVDW]      += dvdl_vdw;
595
596     /* Estimate flops, average for free energy stuff:
597      * 12  flops per outer iteration
598      * 150 flops per inner iteration
599      */
600     inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri*12 + nlist->jindex[n]*150);
601 }
602
603 real
604 nb_free_energy_evaluate_single(real r2, real sc_r_power, real alpha_coul, real alpha_vdw,
605                                real tabscale, real *vftab,
606                                real qqA, real c6A, real c12A, real qqB, real c6B, real c12B,
607                                real LFC[2], real LFV[2], real DLF[2],
608                                real lfac_coul[2], real lfac_vdw[2], real dlfac_coul[2], real dlfac_vdw[2],
609                                real sigma6_def, real sigma6_min, real sigma2_def, real sigma2_min,
610                                real *velectot, real *vvdwtot, real *dvdl)
611 {
612     real       r, rp, rpm2, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VV, FF, fscal;
613     real       qq[2], c6[2], c12[2], sigma6[2], sigma2[2], sigma_pow[2], sigma_powm2[2];
614     real       alpha_coul_eff, alpha_vdw_eff, dvdl_coul, dvdl_vdw;
615     real       rpinv, r_coul, r_vdw, velecsum, vvdwsum;
616     real       fscal_vdw[2], fscal_elec[2];
617     real       velec[2], vvdw[2];
618     int        i, ntab;
619
620     qq[0]    = qqA;
621     qq[1]    = qqB;
622     c6[0]    = c6A;
623     c6[1]    = c6B;
624     c12[0]   = c12A;
625     c12[1]   = c12B;
626
627     if (sc_r_power == 6.0)
628     {
629         rpm2             = r2*r2;   /* r4 */
630         rp               = rpm2*r2; /* r6 */
631     }
632     else if (sc_r_power == 48.0)
633     {
634         rp               = r2*r2*r2; /* r6 */
635         rp               = rp*rp;    /* r12 */
636         rp               = rp*rp;    /* r24 */
637         rp               = rp*rp;    /* r48 */
638         rpm2             = rp/r2;    /* r46 */
639     }
640     else
641     {
642         rp             = pow(r2, 0.5*sc_r_power);  /* not currently supported as input, but can handle it */
643         rpm2           = rp/r2;
644     }
645
646     /* Loop over state A(0) and B(1) */
647     for (i = 0; i < 2; i++)
648     {
649         if ((c6[i] > 0) && (c12[i] > 0))
650         {
651             /* The c6 & c12 coefficients now contain the constants 6.0 and 12.0, respectively.
652              * Correct for this by multiplying with (1/12.0)/(1/6.0)=6.0/12.0=0.5.
653              */
654             sigma6[i]       = 0.5*c12[i]/c6[i];
655             sigma2[i]       = pow(0.5*c12[i]/c6[i], 1.0/3.0);
656             /* should be able to get rid of this ^^^ internal pow call eventually.  Will require agreement on
657                what data to store externally.  Can't be fixed without larger scale changes, so not 4.6 */
658             if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
659             {
660                 sigma6[i] = sigma6_min;
661                 sigma2[i] = sigma2_min;
662             }
663         }
664         else
665         {
666             sigma6[i]       = sigma6_def;
667             sigma2[i]       = sigma2_def;
668         }
669         if (sc_r_power == 6.0)
670         {
671             sigma_pow[i]    = sigma6[i];
672             sigma_powm2[i]  = sigma6[i]/sigma2[i];
673         }
674         else if (sc_r_power == 48.0)
675         {
676             sigma_pow[i]    = sigma6[i]*sigma6[i];       /* sigma^12 */
677             sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
678             sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
679             sigma_powm2[i]  = sigma_pow[i]/sigma2[i];
680         }
681         else
682         {    /* not really supported as input, but in here for testing the general case*/
683             sigma_pow[i]    = pow(sigma2[i], sc_r_power/2);
684             sigma_powm2[i]  = sigma_pow[i]/(sigma2[i]);
685         }
686     }
687
688     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
689     if ((c12[0] > 0) && (c12[1] > 0))
690     {
691         alpha_vdw_eff    = 0;
692         alpha_coul_eff   = 0;
693     }
694     else
695     {
696         alpha_vdw_eff    = alpha_vdw;
697         alpha_coul_eff   = alpha_coul;
698     }
699
700     /* Loop over A and B states again */
701     for (i = 0; i < 2; i++)
702     {
703         fscal_elec[i] = 0;
704         fscal_vdw[i]  = 0;
705         velec[i]      = 0;
706         vvdw[i]       = 0;
707
708         /* Only spend time on A or B state if it is non-zero */
709         if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
710         {
711             /* Coulomb */
712             rpinv            = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
713             r_coul           = pow(rpinv, -1.0/sc_r_power);
714
715             /* Electrostatics table lookup data */
716             rtab             = r_coul*tabscale;
717             ntab             = rtab;
718             eps              = rtab-ntab;
719             eps2             = eps*eps;
720             ntab             = 12*ntab;
721             /* Electrostatics */
722             Y                = vftab[ntab];
723             F                = vftab[ntab+1];
724             Geps             = eps*vftab[ntab+2];
725             Heps2            = eps2*vftab[ntab+3];
726             Fp               = F+Geps+Heps2;
727             VV               = Y+eps*Fp;
728             FF               = Fp+Geps+2.0*Heps2;
729             velec[i]         = qq[i]*VV;
730             fscal_elec[i]    = -qq[i]*FF*r_coul*rpinv*tabscale;
731
732             /* Vdw */
733             rpinv            = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
734             r_vdw            = pow(rpinv, -1.0/sc_r_power);
735             /* Vdw table lookup data */
736             rtab             = r_vdw*tabscale;
737             ntab             = rtab;
738             eps              = rtab-ntab;
739             eps2             = eps*eps;
740             ntab             = 12*ntab;
741             /* Dispersion */
742             Y                = vftab[ntab+4];
743             F                = vftab[ntab+5];
744             Geps             = eps*vftab[ntab+6];
745             Heps2            = eps2*vftab[ntab+7];
746             Fp               = F+Geps+Heps2;
747             VV               = Y+eps*Fp;
748             FF               = Fp+Geps+2.0*Heps2;
749             vvdw[i]          = c6[i]*VV;
750             fscal_vdw[i]     = -c6[i]*FF;
751
752             /* Repulsion */
753             Y                = vftab[ntab+8];
754             F                = vftab[ntab+9];
755             Geps             = eps*vftab[ntab+10];
756             Heps2            = eps2*vftab[ntab+11];
757             Fp               = F+Geps+Heps2;
758             VV               = Y+eps*Fp;
759             FF               = Fp+Geps+2.0*Heps2;
760             vvdw[i]         += c12[i]*VV;
761             fscal_vdw[i]    -= c12[i]*FF;
762             fscal_vdw[i]    *= r_vdw*rpinv*tabscale;
763         }
764     }
765     /* Now we have velec[i], vvdw[i], and fscal[i] for both states */
766     /* Assemble A and B states */
767     velecsum  = 0;
768     vvdwsum   = 0;
769     dvdl_coul = 0;
770     dvdl_vdw  = 0;
771     fscal     = 0;
772     for (i = 0; i < 2; i++)
773     {
774         velecsum      += LFC[i]*velec[i];
775         vvdwsum       += LFV[i]*vvdw[i];
776
777         fscal         += (LFC[i]*fscal_elec[i]+LFV[i]*fscal_vdw[i])*rpm2;
778
779         dvdl_coul     += velec[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*fscal_elec[i]*sigma_pow[i];
780         dvdl_vdw      += vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*fscal_vdw[i]*sigma_pow[i];
781     }
782
783     dvdl[efptCOUL]     += dvdl_coul;
784     dvdl[efptVDW]      += dvdl_vdw;
785
786     *velectot           = velecsum;
787     *vvdwtot            = vvdwsum;
788
789     return fscal;
790 }