Merge release-4-6 into master
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_free_energy.c
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
2  *
3  *
4  *                This source code is part of
5  *
6  *                 G   R   O   M   A   C   S
7  *
8  *          GROningen MAchine for Chemical Simulations
9  *
10  *                        VERSION 3.2.0
11  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13  * Copyright (c) 2001-2004, The GROMACS development team,
14  * check out http://www.gromacs.org for more information.
15
16  * This program is free software; you can redistribute it and/or
17  * modify it under the terms of the GNU General Public License
18  * as published by the Free Software Foundation; either version 2
19  * of the License, or (at your option) any later version.
20  *
21  * If you want to redistribute modifications, please consider that
22  * scientific software is very special. Version control is crucial -
23  * bugs must be traceable. We will be happy to consider code for
24  * inclusion in the official distribution, but derived work must not
25  * be called official GROMACS. Details are found in the README & COPYING
26  * files - if they are missing, get the official version at www.gromacs.org.
27  *
28  * To help us fund GROMACS development, we humbly ask that you cite
29  * the papers on the package - you can find them in the top README file.
30  *
31  * For more info, check our website at http://www.gromacs.org
32  *
33  * And Hey:
34  * GROningen Mixture of Alchemy and Childrens' Stories
35  */
36 #ifdef HAVE_CONFIG_H
37 #include <config.h>
38 #endif
39
40 #include <math.h>
41
42 #include "vec.h"
43 #include "typedefs.h"
44 #include "nonbonded.h"
45 #include "nb_kernel.h"
46 #include "nrnb.h"
47
48 void
49 gmx_nb_free_energy_kernel(t_nblist *                nlist,
50                           rvec *                    xx,
51                           rvec *                    ff,
52                           t_forcerec *              fr,
53                           t_mdatoms *               mdatoms,
54                           nb_kernel_data_t *        kernel_data,
55                           t_nrnb *                  nrnb)
56 {
57
58 #define  STATE_A  0
59 #define  STATE_B  1
60 #define  NSTATES  2
61     int           i, j, n, ii, is3, ii3, k, nj0, nj1, jnr, j3, ggid;
62     real          shX, shY, shZ;
63     real          Fscal, FscalC[NSTATES], FscalV[NSTATES], tx, ty, tz;
64     real          Vcoul[NSTATES], Vvdw[NSTATES];
65     real          rinv6, r, rt, rtC, rtV;
66     real          iqA, iqB;
67     real          qq[NSTATES], vctot, krsq;
68     int           ntiA, ntiB, tj[NSTATES];
69     real          Vvdw6, Vvdw12, vvtot;
70     real          ix, iy, iz, fix, fiy, fiz;
71     real          dx, dy, dz, rsq, rinv;
72     real          c6[NSTATES], c12[NSTATES];
73     real          LFC[NSTATES], LFV[NSTATES], DLF[NSTATES];
74     double        dvdl_coul, dvdl_vdw;
75     real          lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
76     real          sigma6[NSTATES], alpha_vdw_eff, alpha_coul_eff, sigma2_def, sigma2_min;
77     real          rp, rpm2, rC, rV, rinvC, rpinvC, rinvV, rpinvV;
78     real          sigma2[NSTATES], sigma_pow[NSTATES], sigma_powm2[NSTATES], rs, rs2;
79     int           do_coultab, do_vdwtab, do_tab, tab_elemsize;
80     int           n0, n1C, n1V, nnn;
81     real          Y, F, G, H, Fp, Geps, Heps2, epsC, eps2C, epsV, eps2V, VV, FF;
82     int           icoul, ivdw;
83     int           nri;
84     int *         iinr;
85     int *         jindex;
86     int *         jjnr;
87     int *         shift;
88     int *         gid;
89     int *         typeA;
90     int *         typeB;
91     int           ntype;
92     real *        shiftvec;
93     real          dvdl_part;
94     real *        fshift;
95     real          tabscale;
96     real *        VFtab;
97     real *        x;
98     real *        f;
99     real          facel, krf, crf;
100     real *        chargeA;
101     real *        chargeB;
102     real          sigma6_min, sigma6_def, lam_power, sc_power, sc_r_power;
103     real          alpha_coul, alpha_vdw, lambda_coul, lambda_vdw, ewc;
104     real *        nbfp;
105     real *        dvdl;
106     real *        Vv;
107     real *        Vc;
108     gmx_bool      bDoForces;
109     real          rcoulomb, rvdw, factor_coul, factor_vdw, sh_invrc6;
110     gmx_bool      bExactElecCutoff, bExactVdwCutoff;
111     real          rcutoff, rcutoff2, rswitch, d, d2, swV3, swV4, swV5, swF2, swF3, swF4, sw, dsw, rinvcorr;
112
113     x                   = xx[0];
114     f                   = ff[0];
115
116     fshift              = fr->fshift[0];
117     Vc                  = kernel_data->energygrp_elec;
118     Vv                  = kernel_data->energygrp_vdw;
119     tabscale            = kernel_data->table_elec_vdw->scale;
120     VFtab               = kernel_data->table_elec_vdw->data;
121
122     nri                 = nlist->nri;
123     iinr                = nlist->iinr;
124     jindex              = nlist->jindex;
125     jjnr                = nlist->jjnr;
126     icoul               = nlist->ielec;
127     ivdw                = nlist->ivdw;
128     shift               = nlist->shift;
129     gid                 = nlist->gid;
130
131     shiftvec            = fr->shift_vec[0];
132     chargeA             = mdatoms->chargeA;
133     chargeB             = mdatoms->chargeB;
134     facel               = fr->epsfac;
135     krf                 = fr->k_rf;
136     crf                 = fr->c_rf;
137     ewc                 = fr->ewaldcoeff;
138     Vc                  = kernel_data->energygrp_elec;
139     typeA               = mdatoms->typeA;
140     typeB               = mdatoms->typeB;
141     ntype               = fr->ntype;
142     nbfp                = fr->nbfp;
143     Vv                  = kernel_data->energygrp_vdw;
144     tabscale            = kernel_data->table_elec_vdw->scale;
145     VFtab               = kernel_data->table_elec_vdw->data;
146     lambda_coul         = kernel_data->lambda[efptCOUL];
147     lambda_vdw          = kernel_data->lambda[efptVDW];
148     dvdl                = kernel_data->dvdl;
149     alpha_coul          = fr->sc_alphacoul;
150     alpha_vdw           = fr->sc_alphavdw;
151     lam_power           = fr->sc_power;
152     sc_r_power          = fr->sc_r_power;
153     sigma6_def          = fr->sc_sigma6_def;
154     sigma6_min          = fr->sc_sigma6_min;
155     bDoForces           = kernel_data->flags & GMX_NONBONDED_DO_FORCE;
156
157     rcoulomb            = fr->rcoulomb;
158     rvdw                = fr->rvdw;
159     sh_invrc6           = fr->ic->sh_invrc6;
160
161     if (fr->coulomb_modifier == eintmodPOTSWITCH || fr->vdw_modifier == eintmodPOTSWITCH)
162     {
163         rcutoff         = (fr->coulomb_modifier == eintmodPOTSWITCH) ? fr->rcoulomb : fr->rvdw;
164         rcutoff2        = rcutoff*rcutoff;
165         rswitch         = (fr->coulomb_modifier == eintmodPOTSWITCH) ? fr->rcoulomb_switch : fr->rvdw_switch;
166         d               = rcutoff-rswitch;
167         swV3            = -10.0/(d*d*d);
168         swV4            =  15.0/(d*d*d*d);
169         swV5            =  -6.0/(d*d*d*d*d);
170         swF2            = -30.0/(d*d*d);
171         swF3            =  60.0/(d*d*d*d);
172         swF4            = -30.0/(d*d*d*d*d);
173     }
174     else
175     {
176         /* Stupid compilers dont realize these variables will not be used */
177         rswitch         = 0.0;
178         swV3            = 0.0;
179         swV4            = 0.0;
180         swV5            = 0.0;
181         swF2            = 0.0;
182         swF3            = 0.0;
183         swF4            = 0.0;
184     }
185
186     bExactElecCutoff    = (fr->coulomb_modifier != eintmodNONE) || fr->eeltype == eelRF_ZERO;
187     bExactVdwCutoff     = (fr->vdw_modifier != eintmodNONE);
188
189     /* fix compiler warnings */
190     nj1   = 0;
191     n1C   = n1V   = 0;
192     epsC  = epsV  = 0;
193     eps2C = eps2V = 0;
194
195     dvdl_coul  = 0;
196     dvdl_vdw   = 0;
197
198     /* Lambda factor for state A, 1-lambda*/
199     LFC[STATE_A] = 1.0 - lambda_coul;
200     LFV[STATE_A] = 1.0 - lambda_vdw;
201
202     /* Lambda factor for state B, lambda*/
203     LFC[STATE_B] = lambda_coul;
204     LFV[STATE_B] = lambda_vdw;
205
206     /*derivative of the lambda factor for state A and B */
207     DLF[STATE_A] = -1;
208     DLF[STATE_B] = 1;
209
210     for (i = 0; i < NSTATES; i++)
211     {
212         lfac_coul[i]  = (lam_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
213         dlfac_coul[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFC[i]) : 1);
214         lfac_vdw[i]   = (lam_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
215         dlfac_vdw[i]  = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFV[i]) : 1);
216     }
217     /* precalculate */
218     sigma2_def = pow(sigma6_def, 1.0/3.0);
219     sigma2_min = pow(sigma6_min, 1.0/3.0);
220
221     /* Ewald (not PME) table is special (icoul==enbcoulFEWALD) */
222
223     do_coultab = (icoul == GMX_NBKERNEL_ELEC_CUBICSPLINETABLE);
224     do_vdwtab  = (ivdw == GMX_NBKERNEL_VDW_CUBICSPLINETABLE);
225
226     do_tab = do_coultab || do_vdwtab;
227
228     /* we always use the combined table here */
229     tab_elemsize = 12;
230
231     for (n = 0; (n < nri); n++)
232     {
233         is3              = 3*shift[n];
234         shX              = shiftvec[is3];
235         shY              = shiftvec[is3+1];
236         shZ              = shiftvec[is3+2];
237         nj0              = jindex[n];
238         nj1              = jindex[n+1];
239         ii               = iinr[n];
240         ii3              = 3*ii;
241         ix               = shX + x[ii3+0];
242         iy               = shY + x[ii3+1];
243         iz               = shZ + x[ii3+2];
244         iqA              = facel*chargeA[ii];
245         iqB              = facel*chargeB[ii];
246         ntiA             = 2*ntype*typeA[ii];
247         ntiB             = 2*ntype*typeB[ii];
248         vctot            = 0;
249         vvtot            = 0;
250         fix              = 0;
251         fiy              = 0;
252         fiz              = 0;
253
254         for (k = nj0; (k < nj1); k++)
255         {
256             jnr              = jjnr[k];
257             j3               = 3*jnr;
258             dx               = ix - x[j3];
259             dy               = iy - x[j3+1];
260             dz               = iz - x[j3+2];
261             rsq              = dx*dx+dy*dy+dz*dz;
262             rinv             = gmx_invsqrt(rsq);
263             r                = rsq*rinv;
264             if (sc_r_power == 6.0)
265             {
266                 rpm2             = rsq*rsq;  /* r4 */
267                 rp               = rpm2*rsq; /* r6 */
268             }
269             else if (sc_r_power == 48.0)
270             {
271                 rp               = rsq*rsq*rsq; /* r6 */
272                 rp               = rp*rp;       /* r12 */
273                 rp               = rp*rp;       /* r24 */
274                 rp               = rp*rp;       /* r48 */
275                 rpm2             = rp/rsq;      /* r46 */
276             }
277             else
278             {
279                 rp             = pow(r, sc_r_power);  /* not currently supported as input, but can handle it */
280                 rpm2           = rp/rsq;
281             }
282
283             tj[STATE_A]      = ntiA+2*typeA[jnr];
284             tj[STATE_B]      = ntiB+2*typeB[jnr];
285             qq[STATE_A]      = iqA*chargeA[jnr];
286             qq[STATE_B]      = iqB*chargeB[jnr];
287
288             for (i = 0; i < NSTATES; i++)
289             {
290
291                 c6[i]              = nbfp[tj[i]];
292                 c12[i]             = nbfp[tj[i]+1];
293                 if ((c6[i] > 0) && (c12[i] > 0))
294                 {
295                     /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
296                     sigma6[i]       = 0.5*c12[i]/c6[i];
297                     sigma2[i]       = pow(sigma6[i], 1.0/3.0);
298                     /* should be able to get rid of this ^^^ internal pow call eventually.  Will require agreement on
299                        what data to store externally.  Can't be fixed without larger scale changes, so not 4.6 */
300                     if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
301                     {
302                         sigma6[i] = sigma6_min;
303                         sigma2[i] = sigma2_min;
304                     }
305                 }
306                 else
307                 {
308                     sigma6[i]       = sigma6_def;
309                     sigma2[i]       = sigma2_def;
310                 }
311                 if (sc_r_power == 6.0)
312                 {
313                     sigma_pow[i]    = sigma6[i];
314                     sigma_powm2[i]  = sigma6[i]/sigma2[i];
315                 }
316                 else if (sc_r_power == 48.0)
317                 {
318                     sigma_pow[i]    = sigma6[i]*sigma6[i];       /* sigma^12 */
319                     sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
320                     sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
321                     sigma_powm2[i]  = sigma_pow[i]/sigma2[i];
322                 }
323                 else
324                 {    /* not really supported as input, but in here for testing the general case*/
325                     sigma_pow[i]    = pow(sigma2[i], sc_r_power/2);
326                     sigma_powm2[i]  = sigma_pow[i]/(sigma2[i]);
327                 }
328             }
329
330             /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
331             if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
332             {
333                 alpha_vdw_eff    = 0;
334                 alpha_coul_eff   = 0;
335             }
336             else
337             {
338                 alpha_vdw_eff    = alpha_vdw;
339                 alpha_coul_eff   = alpha_coul;
340             }
341
342             for (i = 0; i < NSTATES; i++)
343             {
344                 FscalC[i]    = 0;
345                 FscalV[i]    = 0;
346                 Vcoul[i]     = 0;
347                 Vvdw[i]      = 0;
348
349                 /* Only spend time on A or B state if it is non-zero */
350                 if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
351                 {
352
353                     /* this section has to be inside the loop becaue of the dependence on sigma_pow */
354                     rpinvC         = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
355                     rinvC          = pow(rpinvC, 1.0/sc_r_power);
356                     rC             = 1.0/rinvC;
357
358                     rpinvV         = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
359                     rinvV          = pow(rpinvV, 1.0/sc_r_power);
360                     rV             = 1.0/rinvV;
361
362                     factor_coul    = (rC <= rcoulomb) ? 1.0 : 0.0;
363                     factor_vdw     = (rV <= rvdw)     ? 1.0 : 0.0;
364
365                     if (do_tab)
366                     {
367                         rtC        = rC*tabscale;
368                         n0         = rtC;
369                         epsC       = rtC-n0;
370                         eps2C      = epsC*epsC;
371                         n1C        = tab_elemsize*n0;
372
373                         rtV        = rV*tabscale;
374                         n0         = rtV;
375                         epsV       = rtV-n0;
376                         eps2V      = epsV*epsV;
377                         n1V        = tab_elemsize*n0;
378                     }
379
380                     if (qq[i] != 0)
381                     {
382                         switch (icoul)
383                         {
384                             case GMX_NBKERNEL_ELEC_COULOMB:
385                             case GMX_NBKERNEL_ELEC_EWALD:
386                                 /* simple cutoff (yes, ewald is done all on direct space for free energy) */
387                                 Vcoul[i]   = qq[i]*rinvC;
388                                 FscalC[i]  = Vcoul[i]*rpinvC;
389                                 break;
390
391                             case GMX_NBKERNEL_ELEC_REACTIONFIELD:
392                                 /* reaction-field */
393                                 Vcoul[i]   = qq[i]*(rinvC+krf*rC*rC-crf);
394                                 FscalC[i]  = qq[i]*(rinvC*rpinvC-2.0*krf);
395                                 break;
396
397                             case GMX_NBKERNEL_ELEC_CUBICSPLINETABLE:
398                                 /* non-Ewald tabulated coulomb */
399                                 nnn        = n1C;
400                                 Y          = VFtab[nnn];
401                                 F          = VFtab[nnn+1];
402                                 Geps       = epsC*VFtab[nnn+2];
403                                 Heps2      = eps2C*VFtab[nnn+3];
404                                 Fp         = F+Geps+Heps2;
405                                 VV         = Y+epsC*Fp;
406                                 FF         = Fp+Geps+2.0*Heps2;
407                                 Vcoul[i]   = qq[i]*VV;
408                                 FscalC[i]  = -qq[i]*tabscale*FF*rC*rpinvC;
409                                 break;
410
411                             default:
412                                 FscalC[i]  = 0.0;
413                                 Vcoul[i]   = 0.0;
414                                 break;
415                         }
416
417                         if (fr->coulomb_modifier == eintmodPOTSWITCH)
418                         {
419                             d                = rC-rswitch;
420                             d                = (d > 0.0) ? d : 0.0;
421                             d2               = d*d;
422                             sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
423                             dsw              = d2*(swF2+d*(swF3+d*swF4));
424
425                             Vcoul[i]        *= sw;
426                             FscalC[i]        = FscalC[i]*sw + Vcoul[i]*dsw;
427                         }
428
429                         if (bExactElecCutoff)
430                         {
431                             FscalC[i]        = (rC < rcoulomb) ? FscalC[i] : 0.0;
432                             Vcoul[i]         = (rC < rcoulomb) ? Vcoul[i] : 0.0;
433                         }
434                     }
435
436                     if ((c6[i] != 0) || (c12[i] != 0))
437                     {
438                         switch (ivdw)
439                         {
440                             case GMX_NBKERNEL_VDW_LENNARDJONES:
441                                 /* cutoff LJ */
442                                 if (sc_r_power == 6.0)
443                                 {
444                                     rinv6            = rpinvV;
445                                 }
446                                 else
447                                 {
448                                     rinv6            = pow(rinvV, 6.0);
449                                 }
450                                 Vvdw6            = c6[i]*rinv6;
451                                 Vvdw12           = c12[i]*rinv6*rinv6;
452                                 if (fr->vdw_modifier == eintmodPOTSHIFT)
453                                 {
454                                     Vvdw[i]          = ( (Vvdw12-c12[i]*sh_invrc6*sh_invrc6)*(1.0/12.0)
455                                                          -(Vvdw6-c6[i]*sh_invrc6)*(1.0/6.0));
456                                 }
457                                 else
458                                 {
459                                     Vvdw[i]          = Vvdw12*(1.0/12.0)-Vvdw6*(1.0/6.0);
460                                 }
461                                 FscalV[i]        = (Vvdw12-Vvdw6)*rpinvV;
462                                 break;
463
464                             case GMX_NBKERNEL_VDW_BUCKINGHAM:
465                                 gmx_fatal(FARGS, "Buckingham free energy not supported.");
466                                 break;
467
468                             case GMX_NBKERNEL_VDW_CUBICSPLINETABLE:
469                                 /* Table LJ */
470                                 nnn = n1V+4;
471                                 /* dispersion */
472                                 Y          = VFtab[nnn];
473                                 F          = VFtab[nnn+1];
474                                 Geps       = epsV*VFtab[nnn+2];
475                                 Heps2      = eps2V*VFtab[nnn+3];
476                                 Fp         = F+Geps+Heps2;
477                                 VV         = Y+epsV*Fp;
478                                 FF         = Fp+Geps+2.0*Heps2;
479                                 Vvdw[i]   += c6[i]*VV;
480                                 FscalV[i] -= c6[i]*tabscale*FF*rV*rpinvV;
481
482                                 /* repulsion */
483                                 Y          = VFtab[nnn+4];
484                                 F          = VFtab[nnn+5];
485                                 Geps       = epsV*VFtab[nnn+6];
486                                 Heps2      = eps2V*VFtab[nnn+7];
487                                 Fp         = F+Geps+Heps2;
488                                 VV         = Y+epsV*Fp;
489                                 FF         = Fp+Geps+2.0*Heps2;
490                                 Vvdw[i]   += c12[i]*VV;
491                                 FscalV[i] -= c12[i]*tabscale*FF*rV*rpinvV;
492                                 break;
493
494                             default:
495                                 Vvdw[i]    = 0.0;
496                                 FscalV[i]  = 0.0;
497                                 break;
498                         }
499
500                         if (fr->vdw_modifier == eintmodPOTSWITCH)
501                         {
502                             d                = rV-rswitch;
503                             d                = (d > 0.0) ? d : 0.0;
504                             d2               = d*d;
505                             sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
506                             dsw              = d2*(swF2+d*(swF3+d*swF4));
507
508                             Vvdw[i]         *= sw;
509                             FscalV[i]        = FscalV[i]*sw + Vvdw[i]*dsw;
510
511                             FscalV[i]        = (rV < rvdw) ? FscalV[i] : 0.0;
512                             Vvdw[i]          = (rV < rvdw) ? Vvdw[i] : 0.0;
513                         }
514                     }
515                 }
516             }
517
518             Fscal = 0;
519
520             if (icoul == GMX_NBKERNEL_ELEC_EWALD)
521             {
522                 /* because we compute the softcore normally,
523                    we have to remove the ewald short range portion. Done outside of
524                    the states loop because this part doesn't depend on the scaled R */
525
526                 if (r != 0)
527                 {
528                     VV    = gmx_erf(ewc*r)*rinv;
529                     FF    = rinv*rinv*(VV - ewc*M_2_SQRTPI*exp(-ewc*ewc*rsq));
530                 }
531                 else
532                 {
533                     VV    = ewc*M_2_SQRTPI;
534                     FF    = 0;
535                 }
536
537                 for (i = 0; i < NSTATES; i++)
538                 {
539                     vctot      -= LFC[i]*qq[i]*VV;
540                     Fscal      -= LFC[i]*qq[i]*FF;
541                     dvdl_coul  -= (DLF[i]*qq[i])*VV;
542                 }
543             }
544
545             /* Assemble A and B states */
546             for (i = 0; i < NSTATES; i++)
547             {
548                 vctot         += LFC[i]*Vcoul[i];
549                 vvtot         += LFV[i]*Vvdw[i];
550
551                 Fscal         += LFC[i]*FscalC[i]*rpm2;
552                 Fscal         += LFV[i]*FscalV[i]*rpm2;
553
554                 dvdl_coul     += Vcoul[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*FscalC[i]*sigma_pow[i];
555                 dvdl_vdw      += Vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*FscalV[i]*sigma_pow[i];
556             }
557
558             if (bDoForces)
559             {
560                 tx         = Fscal*dx;
561                 ty         = Fscal*dy;
562                 tz         = Fscal*dz;
563                 fix        = fix + tx;
564                 fiy        = fiy + ty;
565                 fiz        = fiz + tz;
566                 f[j3]      = f[j3]   - tx;
567                 f[j3+1]    = f[j3+1] - ty;
568                 f[j3+2]    = f[j3+2] - tz;
569             }
570         }
571
572         if (bDoForces)
573         {
574             f[ii3]         = f[ii3]        + fix;
575             f[ii3+1]       = f[ii3+1]      + fiy;
576             f[ii3+2]       = f[ii3+2]      + fiz;
577             fshift[is3]    = fshift[is3]   + fix;
578             fshift[is3+1]  = fshift[is3+1] + fiy;
579             fshift[is3+2]  = fshift[is3+2] + fiz;
580         }
581         ggid               = gid[n];
582         Vc[ggid]           = Vc[ggid] + vctot;
583         Vv[ggid]           = Vv[ggid] + vvtot;
584     }
585
586     dvdl[efptCOUL]     += dvdl_coul;
587     dvdl[efptVDW]      += dvdl_vdw;
588
589     /* Estimate flops, average for free energy stuff:
590      * 12  flops per outer iteration
591      * 150 flops per inner iteration
592      */
593     inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri*12 + nlist->jindex[n]*150);
594 }
595
596 real
597 nb_free_energy_evaluate_single(real r2, real sc_r_power, real alpha_coul, real alpha_vdw,
598                                real tabscale, real *vftab,
599                                real qqA, real c6A, real c12A, real qqB, real c6B, real c12B,
600                                real LFC[2], real LFV[2], real DLF[2],
601                                real lfac_coul[2], real lfac_vdw[2], real dlfac_coul[2], real dlfac_vdw[2],
602                                real sigma6_def, real sigma6_min, real sigma2_def, real sigma2_min,
603                                real *velectot, real *vvdwtot, real *dvdl)
604 {
605     real       r, rp, rpm2, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VV, FF, fscal;
606     real       qq[2], c6[2], c12[2], sigma6[2], sigma2[2], sigma_pow[2], sigma_powm2[2];
607     real       alpha_coul_eff, alpha_vdw_eff, dvdl_coul, dvdl_vdw;
608     real       rpinv, r_coul, r_vdw, velecsum, vvdwsum;
609     real       fscal_vdw[2], fscal_elec[2];
610     real       velec[2], vvdw[2];
611     int        i, ntab;
612
613     qq[0]    = qqA;
614     qq[1]    = qqB;
615     c6[0]    = c6A;
616     c6[1]    = c6B;
617     c12[0]   = c12A;
618     c12[1]   = c12B;
619
620     if (sc_r_power == 6.0)
621     {
622         rpm2             = r2*r2;   /* r4 */
623         rp               = rpm2*r2; /* r6 */
624     }
625     else if (sc_r_power == 48.0)
626     {
627         rp               = r2*r2*r2; /* r6 */
628         rp               = rp*rp;    /* r12 */
629         rp               = rp*rp;    /* r24 */
630         rp               = rp*rp;    /* r48 */
631         rpm2             = rp/r2;    /* r46 */
632     }
633     else
634     {
635         rp             = pow(r2, 0.5*sc_r_power);  /* not currently supported as input, but can handle it */
636         rpm2           = rp/r2;
637     }
638
639     /* Loop over state A(0) and B(1) */
640     for (i = 0; i < 2; i++)
641     {
642         if ((c6[i] > 0) && (c12[i] > 0))
643         {
644             /* The c6 & c12 coefficients now contain the constants 6.0 and 12.0, respectively.
645              * Correct for this by multiplying with (1/12.0)/(1/6.0)=6.0/12.0=0.5.
646              */
647             sigma6[i]       = 0.5*c12[i]/c6[i];
648             sigma2[i]       = pow(0.5*c12[i]/c6[i], 1.0/3.0);
649             /* should be able to get rid of this ^^^ internal pow call eventually.  Will require agreement on
650                what data to store externally.  Can't be fixed without larger scale changes, so not 4.6 */
651             if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
652             {
653                 sigma6[i] = sigma6_min;
654                 sigma2[i] = sigma2_min;
655             }
656         }
657         else
658         {
659             sigma6[i]       = sigma6_def;
660             sigma2[i]       = sigma2_def;
661         }
662         if (sc_r_power == 6.0)
663         {
664             sigma_pow[i]    = sigma6[i];
665             sigma_powm2[i]  = sigma6[i]/sigma2[i];
666         }
667         else if (sc_r_power == 48.0)
668         {
669             sigma_pow[i]    = sigma6[i]*sigma6[i];       /* sigma^12 */
670             sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
671             sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
672             sigma_powm2[i]  = sigma_pow[i]/sigma2[i];
673         }
674         else
675         {    /* not really supported as input, but in here for testing the general case*/
676             sigma_pow[i]    = pow(sigma2[i], sc_r_power/2);
677             sigma_powm2[i]  = sigma_pow[i]/(sigma2[i]);
678         }
679     }
680
681     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
682     if ((c12[0] > 0) && (c12[1] > 0))
683     {
684         alpha_vdw_eff    = 0;
685         alpha_coul_eff   = 0;
686     }
687     else
688     {
689         alpha_vdw_eff    = alpha_vdw;
690         alpha_coul_eff   = alpha_coul;
691     }
692
693     /* Loop over A and B states again */
694     for (i = 0; i < 2; i++)
695     {
696         fscal_elec[i] = 0;
697         fscal_vdw[i]  = 0;
698         velec[i]      = 0;
699         vvdw[i]       = 0;
700
701         /* Only spend time on A or B state if it is non-zero */
702         if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
703         {
704             /* Coulomb */
705             rpinv            = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
706             r_coul           = pow(rpinv, -1.0/sc_r_power);
707
708             /* Electrostatics table lookup data */
709             rtab             = r_coul*tabscale;
710             ntab             = rtab;
711             eps              = rtab-ntab;
712             eps2             = eps*eps;
713             ntab             = 12*ntab;
714             /* Electrostatics */
715             Y                = vftab[ntab];
716             F                = vftab[ntab+1];
717             Geps             = eps*vftab[ntab+2];
718             Heps2            = eps2*vftab[ntab+3];
719             Fp               = F+Geps+Heps2;
720             VV               = Y+eps*Fp;
721             FF               = Fp+Geps+2.0*Heps2;
722             velec[i]         = qq[i]*VV;
723             fscal_elec[i]    = -qq[i]*FF*r_coul*rpinv*tabscale;
724
725             /* Vdw */
726             rpinv            = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
727             r_vdw            = pow(rpinv, -1.0/sc_r_power);
728             /* Vdw table lookup data */
729             rtab             = r_vdw*tabscale;
730             ntab             = rtab;
731             eps              = rtab-ntab;
732             eps2             = eps*eps;
733             ntab             = 12*ntab;
734             /* Dispersion */
735             Y                = vftab[ntab+4];
736             F                = vftab[ntab+5];
737             Geps             = eps*vftab[ntab+6];
738             Heps2            = eps2*vftab[ntab+7];
739             Fp               = F+Geps+Heps2;
740             VV               = Y+eps*Fp;
741             FF               = Fp+Geps+2.0*Heps2;
742             vvdw[i]          = c6[i]*VV;
743             fscal_vdw[i]     = -c6[i]*FF;
744
745             /* Repulsion */
746             Y                = vftab[ntab+8];
747             F                = vftab[ntab+9];
748             Geps             = eps*vftab[ntab+10];
749             Heps2            = eps2*vftab[ntab+11];
750             Fp               = F+Geps+Heps2;
751             VV               = Y+eps*Fp;
752             FF               = Fp+Geps+2.0*Heps2;
753             vvdw[i]         += c12[i]*VV;
754             fscal_vdw[i]    -= c12[i]*FF;
755             fscal_vdw[i]    *= r_vdw*rpinv*tabscale;
756         }
757     }
758     /* Now we have velec[i], vvdw[i], and fscal[i] for both states */
759     /* Assemble A and B states */
760     velecsum  = 0;
761     vvdwsum   = 0;
762     dvdl_coul = 0;
763     dvdl_vdw  = 0;
764     fscal     = 0;
765     for (i = 0; i < 2; i++)
766     {
767         velecsum      += LFC[i]*velec[i];
768         vvdwsum       += LFV[i]*vvdw[i];
769
770         fscal         += (LFC[i]*fscal_elec[i]+LFV[i]*fscal_vdw[i])*rpm2;
771
772         dvdl_coul     += velec[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*fscal_elec[i]*sigma_pow[i];
773         dvdl_vdw      += vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*fscal_vdw[i]*sigma_pow[i];
774     }
775
776     dvdl[efptCOUL]     += dvdl_coul;
777     dvdl[efptVDW]      += dvdl_vdw;
778
779     *velectot           = velecsum;
780     *vvdwtot            = vvdwsum;
781
782     return fscal;
783 }