Update copyright statements and change license to LGPL
[alexxy/gromacs.git] / src / gmxlib / nonbonded / nb_free_energy.c
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team,
6  * check out http://www.gromacs.org for more information.
7  * Copyright (c) 2012, by the GROMACS development team, led by
8  * David van der Spoel, Berk Hess, Erik Lindahl, and including many
9  * others, as listed in the AUTHORS file in the top-level source
10  * directory and at http://www.gromacs.org.
11  *
12  * GROMACS is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU Lesser General Public License
14  * as published by the Free Software Foundation; either version 2.1
15  * of the License, or (at your option) any later version.
16  *
17  * GROMACS is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public
23  * License along with GROMACS; if not, see
24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
26  *
27  * If you want to redistribute modifications to GROMACS, please
28  * consider that scientific software is very special. Version
29  * control is crucial - bugs must be traceable. We will be happy to
30  * consider code for inclusion in the official distribution, but
31  * derived work must not be called official GROMACS. Details are found
32  * in the README & COPYING files - if they are missing, get the
33  * official version at http://www.gromacs.org.
34  *
35  * To help us fund GROMACS development, we humbly ask that you cite
36  * the research papers on the package. Check out http://www.gromacs.org.
37  */
38 #ifdef HAVE_CONFIG_H
39 #include <config.h>
40 #endif
41
42 #include <math.h>
43
44 #include "vec.h"
45 #include "typedefs.h"
46 #include "nonbonded.h"
47 #include "nb_kernel.h"
48 #include "nrnb.h"
49
50 void
51 gmx_nb_free_energy_kernel(t_nblist *                nlist,
52                           rvec *                    xx,
53                           rvec *                    ff,
54                           t_forcerec *              fr,
55                           t_mdatoms *               mdatoms,
56                           nb_kernel_data_t *        kernel_data,
57                           t_nrnb *                  nrnb)
58 {
59
60 #define  STATE_A  0
61 #define  STATE_B  1
62 #define  NSTATES  2
63     int           i,j,n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
64     real          shX,shY,shZ;
65     real          Fscal,FscalC[NSTATES],FscalV[NSTATES],tx,ty,tz;
66     real          Vcoul[NSTATES],Vvdw[NSTATES];
67     real          rinv6,r,rt,rtC,rtV;
68     real          iqA,iqB;
69     real          qq[NSTATES],vctot,krsq;
70     int           ntiA,ntiB,tj[NSTATES];
71     real          Vvdw6, Vvdw12,vvtot;
72     real          ix,iy,iz,fix,fiy,fiz;
73     real          dx,dy,dz,rsq,rinv;
74     real          c6[NSTATES],c12[NSTATES];
75     real          LFC[NSTATES],LFV[NSTATES],DLF[NSTATES];
76     double        dvdl_coul,dvdl_vdw;
77     real          lfac_coul[NSTATES],dlfac_coul[NSTATES],lfac_vdw[NSTATES],dlfac_vdw[NSTATES];
78     real          sigma6[NSTATES],alpha_vdw_eff,alpha_coul_eff,sigma2_def,sigma2_min;
79     real          rp,rpm2,rC,rV,rinvC,rpinvC,rinvV,rpinvV;
80     real          sigma2[NSTATES],sigma_pow[NSTATES],sigma_powm2[NSTATES],rs,rs2;
81     int           do_coultab,do_vdwtab,do_tab,tab_elemsize;
82     int           n0,n1C,n1V,nnn;
83     real          Y,F,G,H,Fp,Geps,Heps2,epsC,eps2C,epsV,eps2V,VV,FF;
84     int           icoul,ivdw;
85     int           nri;
86     int *         iinr;
87     int *         jindex;
88     int *         jjnr;
89     int *         shift;
90     int *         gid;
91     int *         typeA;
92     int *         typeB;
93     int           ntype;
94     real *        shiftvec;
95     real          dvdl_part;
96     real *        fshift;
97     real          tabscale;
98     real *        VFtab;
99     real *        x;
100     real *        f;
101     real          facel,krf,crf;
102     real *        chargeA;
103     real *        chargeB;
104     real          sigma6_min,sigma6_def,lam_power,sc_power,sc_r_power;
105     real          alpha_coul,alpha_vdw,lambda_coul,lambda_vdw,ewc;
106     real *        nbfp;
107     real *        dvdl;
108     real *        Vv;
109     real *        Vc;
110     gmx_bool      bDoForces;
111     real          rcoulomb,rvdw,factor_coul,factor_vdw,sh_invrc6;
112     gmx_bool      bExactElecCutoff,bExactVdwCutoff;
113     real          rcutoff,rcutoff2,rswitch,d,d2,swV3,swV4,swV5,swF2,swF3,swF4,sw,dsw,rinvcorr;
114
115     x                   = xx[0];
116     f                   = ff[0];
117
118     fshift              = fr->fshift[0];
119     Vc                  = kernel_data->energygrp_elec;
120     Vv                  = kernel_data->energygrp_vdw;
121     tabscale            = kernel_data->table_elec_vdw->scale;
122     VFtab               = kernel_data->table_elec_vdw->data;
123
124     nri                 = nlist->nri;
125     iinr                = nlist->iinr;
126     jindex              = nlist->jindex;
127     jjnr                = nlist->jjnr;
128     icoul               = nlist->ielec;
129     ivdw                = nlist->ivdw;
130     shift               = nlist->shift;
131     gid                 = nlist->gid;
132
133     shiftvec            = fr->shift_vec[0];
134     chargeA             = mdatoms->chargeA;
135     chargeB             = mdatoms->chargeB;
136     facel               = fr->epsfac;
137     krf                 = fr->k_rf;
138     crf                 = fr->c_rf;
139     ewc                 = fr->ewaldcoeff;
140     Vc                  = kernel_data->energygrp_elec;
141     typeA               = mdatoms->typeA;
142     typeB               = mdatoms->typeB;
143     ntype               = fr->ntype;
144     nbfp                = fr->nbfp;
145     Vv                  = kernel_data->energygrp_vdw;
146     tabscale            = kernel_data->table_elec_vdw->scale;
147     VFtab               = kernel_data->table_elec_vdw->data;
148     lambda_coul         = kernel_data->lambda[efptCOUL];
149     lambda_vdw          = kernel_data->lambda[efptVDW];
150     dvdl                = kernel_data->dvdl;
151     alpha_coul          = fr->sc_alphacoul;
152     alpha_vdw           = fr->sc_alphavdw;
153     lam_power           = fr->sc_power;
154     sc_r_power          = fr->sc_r_power;
155     sigma6_def          = fr->sc_sigma6_def;
156     sigma6_min          = fr->sc_sigma6_min;
157     bDoForces           = kernel_data->flags & GMX_NONBONDED_DO_FORCE;
158
159     rcoulomb            = fr->rcoulomb;
160     rvdw                = fr->rvdw;
161     sh_invrc6           = fr->ic->sh_invrc6;
162
163     if(fr->coulomb_modifier==eintmodPOTSWITCH || fr->vdw_modifier==eintmodPOTSWITCH)
164     {
165         rcutoff         = (fr->coulomb_modifier==eintmodPOTSWITCH) ? fr->rcoulomb : fr->rvdw;
166         rcutoff2        = rcutoff*rcutoff;
167         rswitch         = (fr->coulomb_modifier==eintmodPOTSWITCH) ? fr->rcoulomb_switch : fr->rvdw_switch;
168         d               = rcutoff-rswitch;
169         swV3            = -10.0/(d*d*d);
170         swV4            =  15.0/(d*d*d*d);
171         swV5            =  -6.0/(d*d*d*d*d);
172         swF2            = -30.0/(d*d*d);
173         swF3            =  60.0/(d*d*d*d);
174         swF4            = -30.0/(d*d*d*d*d);
175     }
176     else
177     {
178         /* Stupid compilers dont realize these variables will not be used */
179         rswitch         = 0.0;
180         swV3            = 0.0;
181         swV4            = 0.0;
182         swV5            = 0.0;
183         swF2            = 0.0;
184         swF3            = 0.0;
185         swF4            = 0.0;
186     }
187
188     bExactElecCutoff    = (fr->coulomb_modifier!=eintmodNONE) || fr->eeltype == eelRF_ZERO;
189     bExactVdwCutoff     = (fr->vdw_modifier!=eintmodNONE);
190
191     /* fix compiler warnings */
192     nj1   = 0;
193     n1C   = n1V   = 0;
194     epsC  = epsV  = 0;
195     eps2C = eps2V = 0;
196
197     dvdl_coul  = 0;
198     dvdl_vdw   = 0;
199
200     /* Lambda factor for state A, 1-lambda*/
201     LFC[STATE_A] = 1.0 - lambda_coul;
202     LFV[STATE_A] = 1.0 - lambda_vdw;
203
204     /* Lambda factor for state B, lambda*/
205     LFC[STATE_B] = lambda_coul;
206     LFV[STATE_B] = lambda_vdw;
207
208     /*derivative of the lambda factor for state A and B */
209     DLF[STATE_A] = -1;
210     DLF[STATE_B] = 1;
211
212     for (i=0;i<NSTATES;i++)
213     {
214         lfac_coul[i]  = (lam_power==2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
215         dlfac_coul[i] = DLF[i]*lam_power/sc_r_power*(lam_power==2 ? (1-LFC[i]) : 1);
216         lfac_vdw[i]   = (lam_power==2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
217         dlfac_vdw[i]  = DLF[i]*lam_power/sc_r_power*(lam_power==2 ? (1-LFV[i]) : 1);
218     }
219     /* precalculate */
220     sigma2_def = pow(sigma6_def,1.0/3.0);
221     sigma2_min = pow(sigma6_min,1.0/3.0);
222
223     /* Ewald (not PME) table is special (icoul==enbcoulFEWALD) */
224
225     do_coultab = (icoul==GMX_NBKERNEL_ELEC_CUBICSPLINETABLE);
226     do_vdwtab  = (ivdw==GMX_NBKERNEL_VDW_CUBICSPLINETABLE);
227     
228     do_tab = do_coultab || do_vdwtab;
229     
230     /* we always use the combined table here */
231     tab_elemsize = 12;
232     
233     for(n=0; (n<nri); n++)
234     {
235         is3              = 3*shift[n];
236         shX              = shiftvec[is3];  
237         shY              = shiftvec[is3+1];
238         shZ              = shiftvec[is3+2];
239         nj0              = jindex[n];
240         nj1              = jindex[n+1];    
241         ii               = iinr[n];        
242         ii3              = 3*ii;           
243         ix               = shX + x[ii3+0];
244         iy               = shY + x[ii3+1];
245         iz               = shZ + x[ii3+2];
246         iqA              = facel*chargeA[ii];
247         iqB              = facel*chargeB[ii];
248         ntiA             = 2*ntype*typeA[ii];
249         ntiB             = 2*ntype*typeB[ii];
250         vctot            = 0;              
251         vvtot            = 0;
252         fix              = 0;              
253         fiy              = 0;              
254         fiz              = 0;              
255         
256         for(k=nj0; (k<nj1); k++)
257         {
258             jnr              = jjnr[k];        
259             j3               = 3*jnr;          
260             dx               = ix - x[j3];      
261             dy               = iy - x[j3+1];      
262             dz               = iz - x[j3+2];      
263             rsq              = dx*dx+dy*dy+dz*dz;
264             rinv             = gmx_invsqrt(rsq);
265             r                = rsq*rinv;
266             if (sc_r_power == 6.0)
267             {
268                 rpm2             = rsq*rsq; /* r4 */
269                 rp               = rpm2*rsq; /* r6 */
270             }
271             else if (sc_r_power == 48.0)
272             {
273                 rp               = rsq*rsq*rsq;  /* r6 */
274                 rp               = rp*rp; /* r12 */
275                 rp               = rp*rp; /* r24 */
276                 rp               = rp*rp; /* r48 */
277                 rpm2             = rp/rsq; /* r46 */
278             }
279             else
280             {
281                 rp             = pow(r,sc_r_power);  /* not currently supported as input, but can handle it */
282                 rpm2           = rp/rsq;
283             }
284
285             tj[STATE_A]      = ntiA+2*typeA[jnr];
286             tj[STATE_B]      = ntiB+2*typeB[jnr];
287             qq[STATE_A]      = iqA*chargeA[jnr];
288             qq[STATE_B]      = iqB*chargeB[jnr];
289
290             for (i=0;i<NSTATES;i++)
291             {
292
293                 c6[i]              = nbfp[tj[i]];
294                 c12[i]             = nbfp[tj[i]+1];
295                 if((c6[i] > 0) && (c12[i] > 0))
296                 {
297                     /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
298                     sigma6[i]       = 0.5*c12[i]/c6[i];
299                     sigma2[i]       = pow(sigma6[i],1.0/3.0);
300                     /* should be able to get rid of this ^^^ internal pow call eventually.  Will require agreement on
301                        what data to store externally.  Can't be fixed without larger scale changes, so not 4.6 */
302                     if (sigma6[i] < sigma6_min) { /* for disappearing coul and vdw with soft core at the same time */
303                         sigma6[i] = sigma6_min;
304                         sigma2[i] = sigma2_min;
305                     }
306                 }
307                 else
308                 {
309                     sigma6[i]       = sigma6_def;
310                     sigma2[i]       = sigma2_def;
311                 }
312                 if (sc_r_power == 6.0)
313                 {
314                     sigma_pow[i]    = sigma6[i];
315                     sigma_powm2[i]  = sigma6[i]/sigma2[i];
316                 }
317                 else if (sc_r_power == 48.0) 
318                 {
319                     sigma_pow[i]    = sigma6[i]*sigma6[i];   /* sigma^12 */
320                     sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
321                     sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
322                     sigma_powm2[i]  = sigma_pow[i]/sigma2[i];                    
323                 }
324                 else 
325                 {    /* not really supported as input, but in here for testing the general case*/
326                     sigma_pow[i]    = pow(sigma2[i],sc_r_power/2);
327                     sigma_powm2[i]  = sigma_pow[i]/(sigma2[i]);
328                 }
329             }
330
331             /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
332             if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0)) {
333                 alpha_vdw_eff    = 0;
334                 alpha_coul_eff   = 0;
335             } else {
336                 alpha_vdw_eff    = alpha_vdw;
337                 alpha_coul_eff   = alpha_coul;
338             }
339
340             for (i=0;i<NSTATES;i++) 
341             {
342                 FscalC[i]    = 0;
343                 FscalV[i]    = 0;
344                 Vcoul[i]     = 0;
345                 Vvdw[i]      = 0;
346
347                 /* Only spend time on A or B state if it is non-zero */
348                 if( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
349                 {
350
351                     /* this section has to be inside the loop becaue of the dependence on sigma_pow */
352                     rpinvC         = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
353                     rinvC          = pow(rpinvC,1.0/sc_r_power);
354                     rC             = 1.0/rinvC;
355                     
356                     rpinvV         = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
357                     rinvV          = pow(rpinvV,1.0/sc_r_power);
358                     rV             = 1.0/rinvV;
359
360                     factor_coul    = (rC<=rcoulomb) ? 1.0 : 0.0;
361                     factor_vdw     = (rV<=rvdw)     ? 1.0 : 0.0;
362
363                     if (do_tab)
364                     {
365                         rtC        = rC*tabscale;
366                         n0         = rtC;
367                         epsC       = rtC-n0;
368                         eps2C      = epsC*epsC;
369                         n1C        = tab_elemsize*n0;
370
371                         rtV        = rV*tabscale;
372                         n0         = rtV;
373                         epsV       = rtV-n0;
374                         eps2V      = epsV*epsV;
375                         n1V        = tab_elemsize*n0;
376                     }
377
378                     if(qq[i] != 0)
379                     {
380                         switch(icoul)
381                         {
382                             case GMX_NBKERNEL_ELEC_COULOMB:
383                             case GMX_NBKERNEL_ELEC_EWALD:
384                                 /* simple cutoff (yes, ewald is done all on direct space for free energy) */
385                                 Vcoul[i]   = qq[i]*rinvC;
386                                 FscalC[i]  = Vcoul[i]*rpinvC;
387                                 break;
388                                 
389                             case GMX_NBKERNEL_ELEC_REACTIONFIELD:
390                                 /* reaction-field */
391                                 Vcoul[i]   = qq[i]*(rinvC+krf*rC*rC-crf);
392                                 FscalC[i]  = qq[i]*(rinvC*rpinvC-2.0*krf);
393                                 break;
394
395                             case GMX_NBKERNEL_ELEC_CUBICSPLINETABLE:
396                                 /* non-Ewald tabulated coulomb */
397                                 nnn        = n1C;
398                                 Y          = VFtab[nnn];
399                                 F          = VFtab[nnn+1];
400                                 Geps       = epsC*VFtab[nnn+2];
401                                 Heps2      = eps2C*VFtab[nnn+3];
402                                 Fp         = F+Geps+Heps2;
403                                 VV         = Y+epsC*Fp;
404                                 FF         = Fp+Geps+2.0*Heps2;
405                                 Vcoul[i]   = qq[i]*VV;
406                                 FscalC[i]  = -qq[i]*tabscale*FF*rC*rpinvC;
407                                 break;
408
409                             default:
410                                 FscalC[i]  = 0.0;
411                                 Vcoul[i]   = 0.0;
412                                 break;
413                         }
414
415                         if(fr->coulomb_modifier==eintmodPOTSWITCH)
416                         {
417                             d                = rC-rswitch;
418                             d                = (d>0.0) ? d : 0.0;
419                             d2               = d*d;
420                             sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
421                             dsw              = d2*(swF2+d*(swF3+d*swF4));
422
423                             Vcoul[i]        *= sw;
424                             FscalC[i]        = FscalC[i]*sw + Vcoul[i]*dsw;
425                         }
426
427                         if(bExactElecCutoff)
428                         {
429                             FscalC[i]        = (rC<rcoulomb) ? FscalC[i] : 0.0;
430                             Vcoul[i]         = (rC<rcoulomb) ? Vcoul[i] : 0.0;
431                         }
432                     }
433
434                     if((c6[i] != 0) || (c12[i] != 0))
435                     {
436                         switch(ivdw)
437                         {
438                             case GMX_NBKERNEL_VDW_LENNARDJONES:
439                                 /* cutoff LJ */
440                                 if (sc_r_power == 6.0)
441                                 {
442                                     rinv6            = rpinvV;
443                                 }
444                                 else
445                                 {
446                                     rinv6            = pow(rinvV,6.0);
447                                 }
448                                 Vvdw6            = c6[i]*rinv6;
449                                 Vvdw12           = c12[i]*rinv6*rinv6;
450                                 if(fr->vdw_modifier==eintmodPOTSHIFT)
451                                 {
452                                     Vvdw[i]          = ( (Vvdw12-c12[i]*sh_invrc6*sh_invrc6)*(1.0/12.0)
453                                                         -(Vvdw6-c6[i]*sh_invrc6)*(1.0/6.0));
454                                 }
455                                 else
456                                 {
457                                     Vvdw[i]          = Vvdw12*(1.0/12.0)-Vvdw6*(1.0/6.0);
458                                 }
459                                 FscalV[i]        = (Vvdw12-Vvdw6)*rpinvV;
460                                 break;
461
462                             case GMX_NBKERNEL_VDW_BUCKINGHAM:
463                                 gmx_fatal(FARGS,"Buckingham free energy not supported.");
464                                 break;
465
466                             case GMX_NBKERNEL_VDW_CUBICSPLINETABLE:
467                                 /* Table LJ */
468                                 nnn = n1V+4;
469                                 /* dispersion */
470                                 Y          = VFtab[nnn];
471                                 F          = VFtab[nnn+1];
472                                 Geps       = epsV*VFtab[nnn+2];
473                                 Heps2      = eps2V*VFtab[nnn+3];
474                                 Fp         = F+Geps+Heps2;
475                                 VV         = Y+epsV*Fp;
476                                 FF         = Fp+Geps+2.0*Heps2;
477                                 Vvdw[i]   += c6[i]*VV;
478                                 FscalV[i] -= c6[i]*tabscale*FF*rV*rpinvV;
479
480                                 /* repulsion */
481                                 Y          = VFtab[nnn+4];
482                                 F          = VFtab[nnn+5];
483                                 Geps       = epsV*VFtab[nnn+6];
484                                 Heps2      = eps2V*VFtab[nnn+7];
485                                 Fp         = F+Geps+Heps2;
486                                 VV         = Y+epsV*Fp;
487                                 FF         = Fp+Geps+2.0*Heps2;
488                                 Vvdw[i]   += c12[i]*VV;
489                                 FscalV[i] -= c12[i]*tabscale*FF*rV*rpinvV;
490                                 break;
491
492                             default:
493                                 Vvdw[i]    = 0.0;
494                                 FscalV[i]  = 0.0;
495                                 break;
496                         }
497
498                         if(fr->vdw_modifier==eintmodPOTSWITCH)
499                         {
500                             d                = rV-rswitch;
501                             d                = (d>0.0) ? d : 0.0;
502                             d2               = d*d;
503                             sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
504                             dsw              = d2*(swF2+d*(swF3+d*swF4));
505
506                             Vvdw[i]         *= sw;
507                             FscalV[i]        = FscalV[i]*sw + Vvdw[i]*dsw;
508
509                             FscalV[i]        = (rV<rvdw) ? FscalV[i] : 0.0;
510                             Vvdw[i]          = (rV<rvdw) ? Vvdw[i] : 0.0;
511                         }
512                     }
513                 }
514             }
515
516             Fscal = 0;
517
518             if (icoul==GMX_NBKERNEL_ELEC_EWALD)
519             {
520                 /* because we compute the softcore normally,
521                  we have to remove the ewald short range portion. Done outside of
522                  the states loop because this part doesn't depend on the scaled R */
523
524                 if (r != 0)
525                 {
526                     VV    = gmx_erf(ewc*r)*rinv;
527                     FF    = rinv*rinv*(VV - ewc*M_2_SQRTPI*exp(-ewc*ewc*rsq));
528                 }
529                 else
530                 {
531                     VV    = ewc*M_2_SQRTPI;
532                     FF    = 0;
533                 }
534
535                 for (i=0;i<NSTATES;i++)
536                 {
537                     vctot      -= LFC[i]*qq[i]*VV;
538                     Fscal      -= LFC[i]*qq[i]*FF;
539                     dvdl_coul  -= (DLF[i]*qq[i])*VV;
540                 }
541             }
542
543             /* Assemble A and B states */
544             for (i=0;i<NSTATES;i++)
545             {
546                 vctot         += LFC[i]*Vcoul[i];
547                 vvtot         += LFV[i]*Vvdw[i];
548
549                 Fscal         += LFC[i]*FscalC[i]*rpm2;
550                 Fscal         += LFV[i]*FscalV[i]*rpm2;
551
552                 dvdl_coul     += Vcoul[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*FscalC[i]*sigma_pow[i];
553                 dvdl_vdw      += Vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*FscalV[i]*sigma_pow[i];
554             }
555
556             if (bDoForces)
557             {
558                 tx         = Fscal*dx;     
559                 ty         = Fscal*dy;     
560                 tz         = Fscal*dz;     
561                 fix        = fix + tx;      
562                 fiy        = fiy + ty;      
563                 fiz        = fiz + tz;      
564                 f[j3]      = f[j3]   - tx;
565                 f[j3+1]    = f[j3+1] - ty;
566                 f[j3+2]    = f[j3+2] - tz;
567             }
568         }
569
570         if (bDoForces)
571         {
572             f[ii3]         = f[ii3]        + fix;
573             f[ii3+1]       = f[ii3+1]      + fiy;
574             f[ii3+2]       = f[ii3+2]      + fiz;
575             fshift[is3]    = fshift[is3]   + fix;
576             fshift[is3+1]  = fshift[is3+1] + fiy;
577             fshift[is3+2]  = fshift[is3+2] + fiz;
578         }
579         ggid               = gid[n];
580         Vc[ggid]           = Vc[ggid] + vctot;
581         Vv[ggid]           = Vv[ggid] + vvtot;
582     }
583
584     dvdl[efptCOUL]     += dvdl_coul;
585     dvdl[efptVDW]      += dvdl_vdw;
586
587     /* Estimate flops, average for free energy stuff:
588      * 12  flops per outer iteration
589      * 150 flops per inner iteration
590      */
591     inc_nrnb(nrnb,eNR_NBKERNEL_FREE_ENERGY,nlist->nri*12 + nlist->jindex[n]*150);
592 }
593
594 real
595 nb_free_energy_evaluate_single(real r2,real sc_r_power,real alpha_coul,real alpha_vdw,
596                                real tabscale,real *vftab,
597                                real qqA, real c6A, real c12A, real qqB, real c6B, real c12B,
598                                real LFC[2], real LFV[2],real DLF[2],
599                                real lfac_coul[2], real lfac_vdw[2],real dlfac_coul[2], real dlfac_vdw[2],
600                                real sigma6_def, real sigma6_min,real sigma2_def, real sigma2_min,
601                                real *velectot, real *vvdwtot, real *dvdl)
602 {
603     real       r,rp,rpm2,rtab,eps,eps2,Y,F,Geps,Heps2,Fp,VV,FF,fscal;
604     real       qq[2],c6[2],c12[2],sigma6[2],sigma2[2],sigma_pow[2],sigma_powm2[2];
605     real       alpha_coul_eff,alpha_vdw_eff,dvdl_coul,dvdl_vdw;
606     real       rpinv,r_coul,r_vdw,velecsum,vvdwsum;
607     real       fscal_vdw[2],fscal_elec[2];
608     real       velec[2],vvdw[2];
609     int        i,ntab;
610
611     qq[0]    = qqA;
612     qq[1]    = qqB;
613     c6[0]    = c6A;
614     c6[1]    = c6B;
615     c12[0]   = c12A;
616     c12[1]   = c12B;
617
618     if (sc_r_power == 6.0)
619     {
620         rpm2             = r2*r2; /* r4 */
621         rp               = rpm2*r2; /* r6 */
622     }
623     else if (sc_r_power == 48.0)
624     {
625         rp               = r2*r2*r2;  /* r6 */
626         rp               = rp*rp; /* r12 */
627         rp               = rp*rp; /* r24 */
628         rp               = rp*rp; /* r48 */
629         rpm2             = rp/r2; /* r46 */
630     }
631     else
632     {
633         rp             = pow(r2,0.5*sc_r_power);  /* not currently supported as input, but can handle it */
634         rpm2           = rp/r2;
635     }
636
637     /* Loop over state A(0) and B(1) */
638     for (i=0;i<2;i++)
639     {
640         if((c6[i] > 0) && (c12[i] > 0))
641         {
642             /* The c6 & c12 coefficients now contain the constants 6.0 and 12.0, respectively.
643              * Correct for this by multiplying with (1/12.0)/(1/6.0)=6.0/12.0=0.5.
644              */
645             sigma6[i]       = 0.5*c12[i]/c6[i];
646             sigma2[i]       = pow(0.5*c12[i]/c6[i],1.0/3.0);
647             /* should be able to get rid of this ^^^ internal pow call eventually.  Will require agreement on
648              what data to store externally.  Can't be fixed without larger scale changes, so not 4.6 */
649             if (sigma6[i] < sigma6_min) { /* for disappearing coul and vdw with soft core at the same time */
650                 sigma6[i] = sigma6_min;
651                 sigma2[i] = sigma2_min;
652             }
653         }
654         else
655         {
656             sigma6[i]       = sigma6_def;
657             sigma2[i]       = sigma2_def;
658         }
659         if (sc_r_power == 6.0)
660         {
661             sigma_pow[i]    = sigma6[i];
662             sigma_powm2[i]  = sigma6[i]/sigma2[i];
663         }
664         else if (sc_r_power == 48.0)
665         {
666             sigma_pow[i]    = sigma6[i]*sigma6[i];   /* sigma^12 */
667             sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
668             sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
669             sigma_powm2[i]  = sigma_pow[i]/sigma2[i];
670         }
671         else
672         {    /* not really supported as input, but in here for testing the general case*/
673             sigma_pow[i]    = pow(sigma2[i],sc_r_power/2);
674             sigma_powm2[i]  = sigma_pow[i]/(sigma2[i]);
675         }
676     }
677
678     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
679     if ((c12[0] > 0) && (c12[1] > 0)) {
680         alpha_vdw_eff    = 0;
681         alpha_coul_eff   = 0;
682     } else {
683         alpha_vdw_eff    = alpha_vdw;
684         alpha_coul_eff   = alpha_coul;
685     }
686
687     /* Loop over A and B states again */
688     for (i=0;i<2;i++)
689     {
690         fscal_elec[i] = 0;
691         fscal_vdw[i]  = 0;
692         velec[i]      = 0;
693         vvdw[i]       = 0;
694
695         /* Only spend time on A or B state if it is non-zero */
696         if( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
697         {
698             /* Coulomb */
699             rpinv            = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
700             r_coul           = pow(rpinv,-1.0/sc_r_power);
701
702             /* Electrostatics table lookup data */
703             rtab             = r_coul*tabscale;
704             ntab             = rtab;
705             eps              = rtab-ntab;
706             eps2             = eps*eps;
707             ntab             = 12*ntab;
708             /* Electrostatics */
709             Y                = vftab[ntab];
710             F                = vftab[ntab+1];
711             Geps             = eps*vftab[ntab+2];
712             Heps2            = eps2*vftab[ntab+3];
713             Fp               = F+Geps+Heps2;
714             VV               = Y+eps*Fp;
715             FF               = Fp+Geps+2.0*Heps2;
716             velec[i]         = qq[i]*VV;
717             fscal_elec[i]    = -qq[i]*FF*r_coul*rpinv*tabscale;
718
719             /* Vdw */
720             rpinv            = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
721             r_vdw            = pow(rpinv,-1.0/sc_r_power);
722             /* Vdw table lookup data */
723             rtab             = r_vdw*tabscale;
724             ntab             = rtab;
725             eps              = rtab-ntab;
726             eps2             = eps*eps;
727             ntab             = 12*ntab;
728             /* Dispersion */
729             Y                = vftab[ntab+4];
730             F                = vftab[ntab+5];
731             Geps             = eps*vftab[ntab+6];
732             Heps2            = eps2*vftab[ntab+7];
733             Fp               = F+Geps+Heps2;
734             VV               = Y+eps*Fp;
735             FF               = Fp+Geps+2.0*Heps2;
736             vvdw[i]          = c6[i]*VV;
737             fscal_vdw[i]     = -c6[i]*FF;
738
739             /* Repulsion */
740             Y                = vftab[ntab+8];
741             F                = vftab[ntab+9];
742             Geps             = eps*vftab[ntab+10];
743             Heps2            = eps2*vftab[ntab+11];
744             Fp               = F+Geps+Heps2;
745             VV               = Y+eps*Fp;
746             FF               = Fp+Geps+2.0*Heps2;
747             vvdw[i]         += c12[i]*VV;
748             fscal_vdw[i]    -= c12[i]*FF;
749             fscal_vdw[i]    *= r_vdw*rpinv*tabscale;
750         }
751     }
752     /* Now we have velec[i], vvdw[i], and fscal[i] for both states */
753     /* Assemble A and B states */
754     velecsum  = 0;
755     vvdwsum   = 0;
756     dvdl_coul = 0;
757     dvdl_vdw  = 0;
758     fscal     = 0;
759     for (i=0;i<2;i++)
760     {
761         velecsum      += LFC[i]*velec[i];
762         vvdwsum       += LFV[i]*vvdw[i];
763
764         fscal         += (LFC[i]*fscal_elec[i]+LFV[i]*fscal_vdw[i])*rpm2;
765
766         dvdl_coul     += velec[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*fscal_elec[i]*sigma_pow[i];
767         dvdl_vdw      += vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*fscal_vdw[i]*sigma_pow[i];
768     }
769
770     dvdl[efptCOUL]     += dvdl_coul;
771     dvdl[efptVDW]      += dvdl_vdw;
772
773     *velectot           = velecsum;
774     *vvdwtot            = vvdwsum;
775
776     return fscal;
777 }
778