082fd28d43815a4fc40838acca1e8f8dd39f0ee6
[alexxy/gromacs.git] / src / gmxlib / nonbonded / nb_free_energy.c
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team,
6  * check out http://www.gromacs.org for more information.
7  * Copyright (c) 2012,2013, by the GROMACS development team, led by
8  * David van der Spoel, Berk Hess, Erik Lindahl, and including many
9  * others, as listed in the AUTHORS file in the top-level source
10  * directory and at http://www.gromacs.org.
11  *
12  * GROMACS is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU Lesser General Public License
14  * as published by the Free Software Foundation; either version 2.1
15  * of the License, or (at your option) any later version.
16  *
17  * GROMACS is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public
23  * License along with GROMACS; if not, see
24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
26  *
27  * If you want to redistribute modifications to GROMACS, please
28  * consider that scientific software is very special. Version
29  * control is crucial - bugs must be traceable. We will be happy to
30  * consider code for inclusion in the official distribution, but
31  * derived work must not be called official GROMACS. Details are found
32  * in the README & COPYING files - if they are missing, get the
33  * official version at http://www.gromacs.org.
34  *
35  * To help us fund GROMACS development, we humbly ask that you cite
36  * the research papers on the package. Check out http://www.gromacs.org.
37  */
38 #ifdef HAVE_CONFIG_H
39 #include <config.h>
40 #endif
41
42 #include <math.h>
43
44 #include "vec.h"
45 #include "typedefs.h"
46 #include "nonbonded.h"
47 #include "nb_kernel.h"
48 #include "nrnb.h"
49
50 void
51 gmx_nb_free_energy_kernel(t_nblist *                nlist,
52                           rvec *                    xx,
53                           rvec *                    ff,
54                           t_forcerec *              fr,
55                           t_mdatoms *               mdatoms,
56                           nb_kernel_data_t *        kernel_data,
57                           t_nrnb *                  nrnb)
58 {
59
60 #define  STATE_A  0
61 #define  STATE_B  1
62 #define  NSTATES  2
63     int           i, j, n, ii, is3, ii3, k, nj0, nj1, jnr, j3, ggid;
64     real          shX, shY, shZ;
65     real          Fscal, FscalC[NSTATES], FscalV[NSTATES], tx, ty, tz;
66     real          Vcoul[NSTATES], Vvdw[NSTATES];
67     real          rinv6, r, rt, rtC, rtV;
68     real          iqA, iqB;
69     real          qq[NSTATES], vctot, krsq;
70     int           ntiA, ntiB, tj[NSTATES];
71     real          Vvdw6, Vvdw12, vvtot;
72     real          ix, iy, iz, fix, fiy, fiz;
73     real          dx, dy, dz, rsq, rinv;
74     real          c6[NSTATES], c12[NSTATES];
75     real          LFC[NSTATES], LFV[NSTATES], DLF[NSTATES];
76     double        dvdl_coul, dvdl_vdw;
77     real          lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
78     real          sigma6[NSTATES], alpha_vdw_eff, alpha_coul_eff, sigma2_def, sigma2_min;
79     real          rp, rpm2, rC, rV, rinvC, rpinvC, rinvV, rpinvV;
80     real          sigma2[NSTATES], sigma_pow[NSTATES], sigma_powm2[NSTATES], rs, rs2;
81     int           do_coultab, do_vdwtab, do_tab, tab_elemsize;
82     int           n0, n1C, n1V, nnn;
83     real          Y, F, G, H, Fp, Geps, Heps2, epsC, eps2C, epsV, eps2V, VV, FF;
84     int           icoul, ivdw;
85     int           nri;
86     int *         iinr;
87     int *         jindex;
88     int *         jjnr;
89     int *         shift;
90     int *         gid;
91     int *         typeA;
92     int *         typeB;
93     int           ntype;
94     real *        shiftvec;
95     real          dvdl_part;
96     real *        fshift;
97     real          tabscale;
98     real *        VFtab;
99     real *        x;
100     real *        f;
101     real          facel, krf, crf;
102     real *        chargeA;
103     real *        chargeB;
104     real          sigma6_min, sigma6_def, lam_power, sc_power, sc_r_power;
105     real          alpha_coul, alpha_vdw, lambda_coul, lambda_vdw, ewc;
106     real *        nbfp;
107     real *        dvdl;
108     real *        Vv;
109     real *        Vc;
110     gmx_bool      bDoForces;
111     real          rcoulomb, rvdw, sh_invrc6;
112     gmx_bool      bExactElecCutoff, bExactVdwCutoff;
113     real          rcutoff, rcutoff2, rswitch, d, d2, swV3, swV4, swV5, swF2, swF3, swF4, sw, dsw, rinvcorr;
114
115     x                   = xx[0];
116     f                   = ff[0];
117
118     fshift              = fr->fshift[0];
119     Vc                  = kernel_data->energygrp_elec;
120     Vv                  = kernel_data->energygrp_vdw;
121     tabscale            = kernel_data->table_elec_vdw->scale;
122     VFtab               = kernel_data->table_elec_vdw->data;
123
124     nri                 = nlist->nri;
125     iinr                = nlist->iinr;
126     jindex              = nlist->jindex;
127     jjnr                = nlist->jjnr;
128     icoul               = nlist->ielec;
129     ivdw                = nlist->ivdw;
130     shift               = nlist->shift;
131     gid                 = nlist->gid;
132
133     shiftvec            = fr->shift_vec[0];
134     chargeA             = mdatoms->chargeA;
135     chargeB             = mdatoms->chargeB;
136     facel               = fr->epsfac;
137     krf                 = fr->k_rf;
138     crf                 = fr->c_rf;
139     ewc                 = fr->ewaldcoeff;
140     Vc                  = kernel_data->energygrp_elec;
141     typeA               = mdatoms->typeA;
142     typeB               = mdatoms->typeB;
143     ntype               = fr->ntype;
144     nbfp                = fr->nbfp;
145     Vv                  = kernel_data->energygrp_vdw;
146     tabscale            = kernel_data->table_elec_vdw->scale;
147     VFtab               = kernel_data->table_elec_vdw->data;
148     lambda_coul         = kernel_data->lambda[efptCOUL];
149     lambda_vdw          = kernel_data->lambda[efptVDW];
150     dvdl                = kernel_data->dvdl;
151     alpha_coul          = fr->sc_alphacoul;
152     alpha_vdw           = fr->sc_alphavdw;
153     lam_power           = fr->sc_power;
154     sc_r_power          = fr->sc_r_power;
155     sigma6_def          = fr->sc_sigma6_def;
156     sigma6_min          = fr->sc_sigma6_min;
157     bDoForces           = kernel_data->flags & GMX_NONBONDED_DO_FORCE;
158
159     rcoulomb            = fr->rcoulomb;
160     rvdw                = fr->rvdw;
161     sh_invrc6           = fr->ic->sh_invrc6;
162
163     if (fr->coulomb_modifier == eintmodPOTSWITCH || fr->vdw_modifier == eintmodPOTSWITCH)
164     {
165         rcutoff         = (fr->coulomb_modifier == eintmodPOTSWITCH) ? fr->rcoulomb : fr->rvdw;
166         rcutoff2        = rcutoff*rcutoff;
167         rswitch         = (fr->coulomb_modifier == eintmodPOTSWITCH) ? fr->rcoulomb_switch : fr->rvdw_switch;
168         d               = rcutoff-rswitch;
169         swV3            = -10.0/(d*d*d);
170         swV4            =  15.0/(d*d*d*d);
171         swV5            =  -6.0/(d*d*d*d*d);
172         swF2            = -30.0/(d*d*d);
173         swF3            =  60.0/(d*d*d*d);
174         swF4            = -30.0/(d*d*d*d*d);
175     }
176     else
177     {
178         /* Stupid compilers dont realize these variables will not be used */
179         rswitch         = 0.0;
180         swV3            = 0.0;
181         swV4            = 0.0;
182         swV5            = 0.0;
183         swF2            = 0.0;
184         swF3            = 0.0;
185         swF4            = 0.0;
186     }
187
188     bExactElecCutoff    = (fr->coulomb_modifier != eintmodNONE) || fr->eeltype == eelRF_ZERO;
189     bExactVdwCutoff     = (fr->vdw_modifier != eintmodNONE);
190
191     /* fix compiler warnings */
192     nj1   = 0;
193     n1C   = n1V   = 0;
194     epsC  = epsV  = 0;
195     eps2C = eps2V = 0;
196
197     dvdl_coul  = 0;
198     dvdl_vdw   = 0;
199
200     /* Lambda factor for state A, 1-lambda*/
201     LFC[STATE_A] = 1.0 - lambda_coul;
202     LFV[STATE_A] = 1.0 - lambda_vdw;
203
204     /* Lambda factor for state B, lambda*/
205     LFC[STATE_B] = lambda_coul;
206     LFV[STATE_B] = lambda_vdw;
207
208     /*derivative of the lambda factor for state A and B */
209     DLF[STATE_A] = -1;
210     DLF[STATE_B] = 1;
211
212     for (i = 0; i < NSTATES; i++)
213     {
214         lfac_coul[i]  = (lam_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
215         dlfac_coul[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFC[i]) : 1);
216         lfac_vdw[i]   = (lam_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
217         dlfac_vdw[i]  = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFV[i]) : 1);
218     }
219     /* precalculate */
220     sigma2_def = pow(sigma6_def, 1.0/3.0);
221     sigma2_min = pow(sigma6_min, 1.0/3.0);
222
223     /* Ewald (not PME) table is special (icoul==enbcoulFEWALD) */
224
225     do_coultab = (icoul == GMX_NBKERNEL_ELEC_CUBICSPLINETABLE);
226     do_vdwtab  = (ivdw == GMX_NBKERNEL_VDW_CUBICSPLINETABLE);
227
228     do_tab = do_coultab || do_vdwtab;
229
230     /* we always use the combined table here */
231     tab_elemsize = 12;
232
233     for (n = 0; (n < nri); n++)
234     {
235         is3              = 3*shift[n];
236         shX              = shiftvec[is3];
237         shY              = shiftvec[is3+1];
238         shZ              = shiftvec[is3+2];
239         nj0              = jindex[n];
240         nj1              = jindex[n+1];
241         ii               = iinr[n];
242         ii3              = 3*ii;
243         ix               = shX + x[ii3+0];
244         iy               = shY + x[ii3+1];
245         iz               = shZ + x[ii3+2];
246         iqA              = facel*chargeA[ii];
247         iqB              = facel*chargeB[ii];
248         ntiA             = 2*ntype*typeA[ii];
249         ntiB             = 2*ntype*typeB[ii];
250         vctot            = 0;
251         vvtot            = 0;
252         fix              = 0;
253         fiy              = 0;
254         fiz              = 0;
255
256         for (k = nj0; (k < nj1); k++)
257         {
258             jnr              = jjnr[k];
259             j3               = 3*jnr;
260             dx               = ix - x[j3];
261             dy               = iy - x[j3+1];
262             dz               = iz - x[j3+2];
263             rsq              = dx*dx+dy*dy+dz*dz;
264             rinv             = gmx_invsqrt(rsq);
265             r                = rsq*rinv;
266             if (sc_r_power == 6.0)
267             {
268                 rpm2             = rsq*rsq;  /* r4 */
269                 rp               = rpm2*rsq; /* r6 */
270             }
271             else if (sc_r_power == 48.0)
272             {
273                 rp               = rsq*rsq*rsq; /* r6 */
274                 rp               = rp*rp;       /* r12 */
275                 rp               = rp*rp;       /* r24 */
276                 rp               = rp*rp;       /* r48 */
277                 rpm2             = rp/rsq;      /* r46 */
278             }
279             else
280             {
281                 rp             = pow(r, sc_r_power);  /* not currently supported as input, but can handle it */
282                 rpm2           = rp/rsq;
283             }
284
285             tj[STATE_A]      = ntiA+2*typeA[jnr];
286             tj[STATE_B]      = ntiB+2*typeB[jnr];
287             qq[STATE_A]      = iqA*chargeA[jnr];
288             qq[STATE_B]      = iqB*chargeB[jnr];
289
290             for (i = 0; i < NSTATES; i++)
291             {
292
293                 c6[i]              = nbfp[tj[i]];
294                 c12[i]             = nbfp[tj[i]+1];
295                 if ((c6[i] > 0) && (c12[i] > 0))
296                 {
297                     /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
298                     sigma6[i]       = 0.5*c12[i]/c6[i];
299                     sigma2[i]       = pow(sigma6[i], 1.0/3.0);
300                     /* should be able to get rid of this ^^^ internal pow call eventually.  Will require agreement on
301                        what data to store externally.  Can't be fixed without larger scale changes, so not 4.6 */
302                     if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
303                     {
304                         sigma6[i] = sigma6_min;
305                         sigma2[i] = sigma2_min;
306                     }
307                 }
308                 else
309                 {
310                     sigma6[i]       = sigma6_def;
311                     sigma2[i]       = sigma2_def;
312                 }
313                 if (sc_r_power == 6.0)
314                 {
315                     sigma_pow[i]    = sigma6[i];
316                     sigma_powm2[i]  = sigma6[i]/sigma2[i];
317                 }
318                 else if (sc_r_power == 48.0)
319                 {
320                     sigma_pow[i]    = sigma6[i]*sigma6[i];       /* sigma^12 */
321                     sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
322                     sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
323                     sigma_powm2[i]  = sigma_pow[i]/sigma2[i];
324                 }
325                 else
326                 {    /* not really supported as input, but in here for testing the general case*/
327                     sigma_pow[i]    = pow(sigma2[i], sc_r_power/2);
328                     sigma_powm2[i]  = sigma_pow[i]/(sigma2[i]);
329                 }
330             }
331
332             /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
333             if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
334             {
335                 alpha_vdw_eff    = 0;
336                 alpha_coul_eff   = 0;
337             }
338             else
339             {
340                 alpha_vdw_eff    = alpha_vdw;
341                 alpha_coul_eff   = alpha_coul;
342             }
343
344             for (i = 0; i < NSTATES; i++)
345             {
346                 FscalC[i]    = 0;
347                 FscalV[i]    = 0;
348                 Vcoul[i]     = 0;
349                 Vvdw[i]      = 0;
350
351                 /* Only spend time on A or B state if it is non-zero */
352                 if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
353                 {
354
355                     /* this section has to be inside the loop becaue of the dependence on sigma_pow */
356                     rpinvC         = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
357                     rinvC          = pow(rpinvC, 1.0/sc_r_power);
358                     rC             = 1.0/rinvC;
359
360                     rpinvV         = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
361                     rinvV          = pow(rpinvV, 1.0/sc_r_power);
362                     rV             = 1.0/rinvV;
363
364                     if (do_tab)
365                     {
366                         rtC        = rC*tabscale;
367                         n0         = rtC;
368                         epsC       = rtC-n0;
369                         eps2C      = epsC*epsC;
370                         n1C        = tab_elemsize*n0;
371
372                         rtV        = rV*tabscale;
373                         n0         = rtV;
374                         epsV       = rtV-n0;
375                         eps2V      = epsV*epsV;
376                         n1V        = tab_elemsize*n0;
377                     }
378
379                     /* With Ewald and soft-core we should put the cut-off on r,
380                      * not on the soft-cored rC, as the real-space and
381                      * reciprocal space contributions should (almost) cancel.
382                      */
383                     if (qq[i] != 0 &&
384                         !(bExactElecCutoff &&
385                           ((icoul != GMX_NBKERNEL_ELEC_EWALD && rC >= rcoulomb) ||
386                            (icoul == GMX_NBKERNEL_ELEC_EWALD && r >= rcoulomb))))
387                     {
388                         switch (icoul)
389                         {
390                             case GMX_NBKERNEL_ELEC_COULOMB:
391                             case GMX_NBKERNEL_ELEC_EWALD:
392                                 /* simple cutoff (yes, ewald is done all on direct space for free energy) */
393                                 Vcoul[i]   = qq[i]*rinvC;
394                                 FscalC[i]  = Vcoul[i]*rpinvC;
395                                 break;
396
397                             case GMX_NBKERNEL_ELEC_REACTIONFIELD:
398                                 /* reaction-field */
399                                 Vcoul[i]   = qq[i]*(rinvC+krf*rC*rC-crf);
400                                 FscalC[i]  = qq[i]*(rinvC*rpinvC-2.0*krf);
401                                 break;
402
403                             case GMX_NBKERNEL_ELEC_CUBICSPLINETABLE:
404                                 /* non-Ewald tabulated coulomb */
405                                 nnn        = n1C;
406                                 Y          = VFtab[nnn];
407                                 F          = VFtab[nnn+1];
408                                 Geps       = epsC*VFtab[nnn+2];
409                                 Heps2      = eps2C*VFtab[nnn+3];
410                                 Fp         = F+Geps+Heps2;
411                                 VV         = Y+epsC*Fp;
412                                 FF         = Fp+Geps+2.0*Heps2;
413                                 Vcoul[i]   = qq[i]*VV;
414                                 FscalC[i]  = -qq[i]*tabscale*FF*rC*rpinvC;
415                                 break;
416
417                             case GMX_NBKERNEL_ELEC_GENERALIZEDBORN:
418                                 gmx_fatal(FARGS, "Free energy and GB not implemented.\n");
419                                 break;
420
421                             case GMX_NBKERNEL_ELEC_NONE:
422                                 FscalC[i]  = 0.0;
423                                 Vcoul[i]   = 0.0;
424                                 break;
425
426                             default:
427                                 gmx_incons("Invalid icoul in free energy kernel");
428                                 break;
429                         }
430
431                         if (fr->coulomb_modifier == eintmodPOTSWITCH)
432                         {
433                             d                = rC-rswitch;
434                             d                = (d > 0.0) ? d : 0.0;
435                             d2               = d*d;
436                             sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
437                             dsw              = d2*(swF2+d*(swF3+d*swF4));
438
439                             Vcoul[i]        *= sw;
440                             FscalC[i]        = FscalC[i]*sw + Vcoul[i]*dsw;
441                         }
442                     }
443
444                     if ((c6[i] != 0 || c12[i] != 0) &&
445                         !(bExactVdwCutoff && rV >= rvdw))
446                     {
447                         switch (ivdw)
448                         {
449                             case GMX_NBKERNEL_VDW_LENNARDJONES:
450                                 /* cutoff LJ */
451                                 if (sc_r_power == 6.0)
452                                 {
453                                     rinv6            = rpinvV;
454                                 }
455                                 else
456                                 {
457                                     rinv6            = pow(rinvV, 6.0);
458                                 }
459                                 Vvdw6            = c6[i]*rinv6;
460                                 Vvdw12           = c12[i]*rinv6*rinv6;
461                                 if (fr->vdw_modifier == eintmodPOTSHIFT)
462                                 {
463                                     Vvdw[i]          = ( (Vvdw12-c12[i]*sh_invrc6*sh_invrc6)*(1.0/12.0)
464                                                          -(Vvdw6-c6[i]*sh_invrc6)*(1.0/6.0));
465                                 }
466                                 else
467                                 {
468                                     Vvdw[i]          = Vvdw12*(1.0/12.0)-Vvdw6*(1.0/6.0);
469                                 }
470                                 FscalV[i]        = (Vvdw12-Vvdw6)*rpinvV;
471                                 break;
472
473                             case GMX_NBKERNEL_VDW_BUCKINGHAM:
474                                 gmx_fatal(FARGS, "Buckingham free energy not supported.");
475                                 break;
476
477                             case GMX_NBKERNEL_VDW_CUBICSPLINETABLE:
478                                 /* Table LJ */
479                                 nnn = n1V+4;
480                                 /* dispersion */
481                                 Y          = VFtab[nnn];
482                                 F          = VFtab[nnn+1];
483                                 Geps       = epsV*VFtab[nnn+2];
484                                 Heps2      = eps2V*VFtab[nnn+3];
485                                 Fp         = F+Geps+Heps2;
486                                 VV         = Y+epsV*Fp;
487                                 FF         = Fp+Geps+2.0*Heps2;
488                                 Vvdw[i]   += c6[i]*VV;
489                                 FscalV[i] -= c6[i]*tabscale*FF*rV*rpinvV;
490
491                                 /* repulsion */
492                                 Y          = VFtab[nnn+4];
493                                 F          = VFtab[nnn+5];
494                                 Geps       = epsV*VFtab[nnn+6];
495                                 Heps2      = eps2V*VFtab[nnn+7];
496                                 Fp         = F+Geps+Heps2;
497                                 VV         = Y+epsV*Fp;
498                                 FF         = Fp+Geps+2.0*Heps2;
499                                 Vvdw[i]   += c12[i]*VV;
500                                 FscalV[i] -= c12[i]*tabscale*FF*rV*rpinvV;
501                                 break;
502
503                             case GMX_NBKERNEL_VDW_NONE:
504                                 Vvdw[i]    = 0.0;
505                                 FscalV[i]  = 0.0;
506                                 break;
507
508                             default:
509                                 gmx_incons("Invalid ivdw in free energy kernel");
510                                 break;
511                         }
512
513                         if (fr->vdw_modifier == eintmodPOTSWITCH)
514                         {
515                             d                = rV-rswitch;
516                             d                = (d > 0.0) ? d : 0.0;
517                             d2               = d*d;
518                             sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
519                             dsw              = d2*(swF2+d*(swF3+d*swF4));
520
521                             Vvdw[i]         *= sw;
522                             FscalV[i]        = FscalV[i]*sw + Vvdw[i]*dsw;
523
524                             FscalV[i]        = (rV < rvdw) ? FscalV[i] : 0.0;
525                             Vvdw[i]          = (rV < rvdw) ? Vvdw[i] : 0.0;
526                         }
527                     }
528                 }
529             }
530
531             Fscal = 0;
532
533             if (icoul == GMX_NBKERNEL_ELEC_EWALD &&
534                 !(bExactElecCutoff && r >= rcoulomb))
535             {
536                 /* because we compute the softcore normally,
537                    we have to remove the ewald short range portion. Done outside of
538                    the states loop because this part doesn't depend on the scaled R */
539
540 #ifdef GMX_DOUBLE
541                 /* Relative accuracy at R_ERF_R_INACC of 3e-10 */
542 #define         R_ERF_R_INACC 0.006
543 #else
544                 /* Relative accuracy at R_ERF_R_INACC of 2e-5 */
545 #define         R_ERF_R_INACC 0.1
546 #endif
547                 if (ewc*r > R_ERF_R_INACC)
548                 {
549                     VV    = gmx_erf(ewc*r)*rinv;
550                     FF    = rinv*rinv*(VV - ewc*M_2_SQRTPI*exp(-ewc*ewc*rsq));
551                 }
552                 else
553                 {
554                     VV    = ewc*M_2_SQRTPI;
555                     FF    = ewc*ewc*ewc*M_2_SQRTPI*(2.0/3.0 - 0.4*ewc*ewc*rsq);
556                 }
557
558                 for (i = 0; i < NSTATES; i++)
559                 {
560                     vctot      -= LFC[i]*qq[i]*VV;
561                     Fscal      -= LFC[i]*qq[i]*FF;
562                     dvdl_coul  -= (DLF[i]*qq[i])*VV;
563                 }
564             }
565
566             /* Assemble A and B states */
567             for (i = 0; i < NSTATES; i++)
568             {
569                 vctot         += LFC[i]*Vcoul[i];
570                 vvtot         += LFV[i]*Vvdw[i];
571
572                 Fscal         += LFC[i]*FscalC[i]*rpm2;
573                 Fscal         += LFV[i]*FscalV[i]*rpm2;
574
575                 dvdl_coul     += Vcoul[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*FscalC[i]*sigma_pow[i];
576                 dvdl_vdw      += Vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*FscalV[i]*sigma_pow[i];
577             }
578
579             if (bDoForces)
580             {
581                 tx         = Fscal*dx;
582                 ty         = Fscal*dy;
583                 tz         = Fscal*dz;
584                 fix        = fix + tx;
585                 fiy        = fiy + ty;
586                 fiz        = fiz + tz;
587                 f[j3]      = f[j3]   - tx;
588                 f[j3+1]    = f[j3+1] - ty;
589                 f[j3+2]    = f[j3+2] - tz;
590             }
591         }
592
593         if (bDoForces)
594         {
595             f[ii3]         = f[ii3]        + fix;
596             f[ii3+1]       = f[ii3+1]      + fiy;
597             f[ii3+2]       = f[ii3+2]      + fiz;
598             fshift[is3]    = fshift[is3]   + fix;
599             fshift[is3+1]  = fshift[is3+1] + fiy;
600             fshift[is3+2]  = fshift[is3+2] + fiz;
601         }
602         ggid               = gid[n];
603         Vc[ggid]           = Vc[ggid] + vctot;
604         Vv[ggid]           = Vv[ggid] + vvtot;
605     }
606
607     dvdl[efptCOUL]     += dvdl_coul;
608     dvdl[efptVDW]      += dvdl_vdw;
609
610     /* Estimate flops, average for free energy stuff:
611      * 12  flops per outer iteration
612      * 150 flops per inner iteration
613      */
614     inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri*12 + nlist->jindex[n]*150);
615 }
616
617 real
618 nb_free_energy_evaluate_single(real r2, real sc_r_power, real alpha_coul, real alpha_vdw,
619                                real tabscale, real *vftab,
620                                real qqA, real c6A, real c12A, real qqB, real c6B, real c12B,
621                                real LFC[2], real LFV[2], real DLF[2],
622                                real lfac_coul[2], real lfac_vdw[2], real dlfac_coul[2], real dlfac_vdw[2],
623                                real sigma6_def, real sigma6_min, real sigma2_def, real sigma2_min,
624                                real *velectot, real *vvdwtot, real *dvdl)
625 {
626     real       r, rp, rpm2, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VV, FF, fscal;
627     real       qq[2], c6[2], c12[2], sigma6[2], sigma2[2], sigma_pow[2], sigma_powm2[2];
628     real       alpha_coul_eff, alpha_vdw_eff, dvdl_coul, dvdl_vdw;
629     real       rpinv, r_coul, r_vdw, velecsum, vvdwsum;
630     real       fscal_vdw[2], fscal_elec[2];
631     real       velec[2], vvdw[2];
632     int        i, ntab;
633
634     qq[0]    = qqA;
635     qq[1]    = qqB;
636     c6[0]    = c6A;
637     c6[1]    = c6B;
638     c12[0]   = c12A;
639     c12[1]   = c12B;
640
641     if (sc_r_power == 6.0)
642     {
643         rpm2             = r2*r2;   /* r4 */
644         rp               = rpm2*r2; /* r6 */
645     }
646     else if (sc_r_power == 48.0)
647     {
648         rp               = r2*r2*r2; /* r6 */
649         rp               = rp*rp;    /* r12 */
650         rp               = rp*rp;    /* r24 */
651         rp               = rp*rp;    /* r48 */
652         rpm2             = rp/r2;    /* r46 */
653     }
654     else
655     {
656         rp             = pow(r2, 0.5*sc_r_power);  /* not currently supported as input, but can handle it */
657         rpm2           = rp/r2;
658     }
659
660     /* Loop over state A(0) and B(1) */
661     for (i = 0; i < 2; i++)
662     {
663         if ((c6[i] > 0) && (c12[i] > 0))
664         {
665             /* The c6 & c12 coefficients now contain the constants 6.0 and 12.0, respectively.
666              * Correct for this by multiplying with (1/12.0)/(1/6.0)=6.0/12.0=0.5.
667              */
668             sigma6[i]       = 0.5*c12[i]/c6[i];
669             sigma2[i]       = pow(0.5*c12[i]/c6[i], 1.0/3.0);
670             /* should be able to get rid of this ^^^ internal pow call eventually.  Will require agreement on
671                what data to store externally.  Can't be fixed without larger scale changes, so not 4.6 */
672             if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
673             {
674                 sigma6[i] = sigma6_min;
675                 sigma2[i] = sigma2_min;
676             }
677         }
678         else
679         {
680             sigma6[i]       = sigma6_def;
681             sigma2[i]       = sigma2_def;
682         }
683         if (sc_r_power == 6.0)
684         {
685             sigma_pow[i]    = sigma6[i];
686             sigma_powm2[i]  = sigma6[i]/sigma2[i];
687         }
688         else if (sc_r_power == 48.0)
689         {
690             sigma_pow[i]    = sigma6[i]*sigma6[i];       /* sigma^12 */
691             sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
692             sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
693             sigma_powm2[i]  = sigma_pow[i]/sigma2[i];
694         }
695         else
696         {    /* not really supported as input, but in here for testing the general case*/
697             sigma_pow[i]    = pow(sigma2[i], sc_r_power/2);
698             sigma_powm2[i]  = sigma_pow[i]/(sigma2[i]);
699         }
700     }
701
702     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
703     if ((c12[0] > 0) && (c12[1] > 0))
704     {
705         alpha_vdw_eff    = 0;
706         alpha_coul_eff   = 0;
707     }
708     else
709     {
710         alpha_vdw_eff    = alpha_vdw;
711         alpha_coul_eff   = alpha_coul;
712     }
713
714     /* Loop over A and B states again */
715     for (i = 0; i < 2; i++)
716     {
717         fscal_elec[i] = 0;
718         fscal_vdw[i]  = 0;
719         velec[i]      = 0;
720         vvdw[i]       = 0;
721
722         /* Only spend time on A or B state if it is non-zero */
723         if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
724         {
725             /* Coulomb */
726             rpinv            = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
727             r_coul           = pow(rpinv, -1.0/sc_r_power);
728
729             /* Electrostatics table lookup data */
730             rtab             = r_coul*tabscale;
731             ntab             = rtab;
732             eps              = rtab-ntab;
733             eps2             = eps*eps;
734             ntab             = 12*ntab;
735             /* Electrostatics */
736             Y                = vftab[ntab];
737             F                = vftab[ntab+1];
738             Geps             = eps*vftab[ntab+2];
739             Heps2            = eps2*vftab[ntab+3];
740             Fp               = F+Geps+Heps2;
741             VV               = Y+eps*Fp;
742             FF               = Fp+Geps+2.0*Heps2;
743             velec[i]         = qq[i]*VV;
744             fscal_elec[i]    = -qq[i]*FF*r_coul*rpinv*tabscale;
745
746             /* Vdw */
747             rpinv            = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
748             r_vdw            = pow(rpinv, -1.0/sc_r_power);
749             /* Vdw table lookup data */
750             rtab             = r_vdw*tabscale;
751             ntab             = rtab;
752             eps              = rtab-ntab;
753             eps2             = eps*eps;
754             ntab             = 12*ntab;
755             /* Dispersion */
756             Y                = vftab[ntab+4];
757             F                = vftab[ntab+5];
758             Geps             = eps*vftab[ntab+6];
759             Heps2            = eps2*vftab[ntab+7];
760             Fp               = F+Geps+Heps2;
761             VV               = Y+eps*Fp;
762             FF               = Fp+Geps+2.0*Heps2;
763             vvdw[i]          = c6[i]*VV;
764             fscal_vdw[i]     = -c6[i]*FF;
765
766             /* Repulsion */
767             Y                = vftab[ntab+8];
768             F                = vftab[ntab+9];
769             Geps             = eps*vftab[ntab+10];
770             Heps2            = eps2*vftab[ntab+11];
771             Fp               = F+Geps+Heps2;
772             VV               = Y+eps*Fp;
773             FF               = Fp+Geps+2.0*Heps2;
774             vvdw[i]         += c12[i]*VV;
775             fscal_vdw[i]    -= c12[i]*FF;
776             fscal_vdw[i]    *= r_vdw*rpinv*tabscale;
777         }
778     }
779     /* Now we have velec[i], vvdw[i], and fscal[i] for both states */
780     /* Assemble A and B states */
781     velecsum  = 0;
782     vvdwsum   = 0;
783     dvdl_coul = 0;
784     dvdl_vdw  = 0;
785     fscal     = 0;
786     for (i = 0; i < 2; i++)
787     {
788         velecsum      += LFC[i]*velec[i];
789         vvdwsum       += LFV[i]*vvdw[i];
790
791         fscal         += (LFC[i]*fscal_elec[i]+LFV[i]*fscal_vdw[i])*rpm2;
792
793         dvdl_coul     += velec[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*fscal_elec[i]*sigma_pow[i];
794         dvdl_vdw      += vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*fscal_vdw[i]*sigma_pow[i];
795     }
796
797     dvdl[efptCOUL]     += dvdl_coul;
798     dvdl[efptVDW]      += dvdl_vdw;
799
800     *velectot           = velecsum;
801     *vvdwtot            = vvdwsum;
802
803     return fscal;
804 }