Merge 'release-4-6' into master
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_free_energy.c
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
2  *
3  * 
4  *                This source code is part of
5  * 
6  *                 G   R   O   M   A   C   S
7  * 
8  *          GROningen MAchine for Chemical Simulations
9  * 
10  *                        VERSION 3.2.0
11  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13  * Copyright (c) 2001-2004, The GROMACS development team,
14  * check out http://www.gromacs.org for more information.
15
16  * This program is free software; you can redistribute it and/or
17  * modify it under the terms of the GNU General Public License
18  * as published by the Free Software Foundation; either version 2
19  * of the License, or (at your option) any later version.
20  * 
21  * If you want to redistribute modifications, please consider that
22  * scientific software is very special. Version control is crucial -
23  * bugs must be traceable. We will be happy to consider code for
24  * inclusion in the official distribution, but derived work must not
25  * be called official GROMACS. Details are found in the README & COPYING
26  * files - if they are missing, get the official version at www.gromacs.org.
27  * 
28  * To help us fund GROMACS development, we humbly ask that you cite
29  * the papers on the package - you can find them in the top README file.
30  * 
31  * For more info, check our website at http://www.gromacs.org
32  * 
33  * And Hey:
34  * GROningen Mixture of Alchemy and Childrens' Stories
35  */
36 #ifdef HAVE_CONFIG_H
37 #include <config.h>
38 #endif
39
40 #include <math.h>
41
42 #include "vec.h"
43 #include "typedefs.h"
44 #include "nonbonded.h"
45 #include "nb_kernel.h"
46 #include "nrnb.h"
47
48 void
49 gmx_nb_free_energy_kernel(t_nblist *                nlist,
50                           rvec *                    xx,
51                           rvec *                    ff,
52                           t_forcerec *              fr,
53                           t_mdatoms *               mdatoms,
54                           nb_kernel_data_t *        kernel_data,
55                           t_nrnb *                  nrnb)
56 {
57
58 #define  STATE_A  0
59 #define  STATE_B  1
60 #define  NSTATES  2
61     int           i,j,n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
62     real          shX,shY,shZ;
63     real          Fscal,FscalC[NSTATES],FscalV[NSTATES],tx,ty,tz;
64     real          Vcoul[NSTATES],Vvdw[NSTATES];
65     real          rinv6,r,rt,rtC,rtV;
66     real          iqA,iqB;
67     real          qq[NSTATES],vctot,krsq;
68     int           ntiA,ntiB,tj[NSTATES];
69     real          Vvdw6, Vvdw12,vvtot;
70     real          ix,iy,iz,fix,fiy,fiz;
71     real          dx,dy,dz,rsq,rinv;
72     real          c6[NSTATES],c12[NSTATES];
73     real          LFC[NSTATES],LFV[NSTATES],DLF[NSTATES];
74     double        dvdl_coul,dvdl_vdw;
75     real          lfac_coul[NSTATES],dlfac_coul[NSTATES],lfac_vdw[NSTATES],dlfac_vdw[NSTATES];
76     real          sigma6[NSTATES],alpha_vdw_eff,alpha_coul_eff,sigma2_def,sigma2_min;
77     real          rp,rpm2,rC,rV,rinvC,rpinvC,rinvV,rpinvV;
78     real          sigma2[NSTATES],sigma_pow[NSTATES],sigma_powm2[NSTATES],rs,rs2;
79     int           do_coultab,do_vdwtab,do_tab,tab_elemsize;
80     int           n0,n1C,n1V,nnn;
81     real          Y,F,G,H,Fp,Geps,Heps2,epsC,eps2C,epsV,eps2V,VV,FF;
82     double        isp=0.564189583547756;
83     int           icoul,ivdw;
84     int           nri;
85     int *         iinr;
86     int *         jindex;
87     int *         jjnr;
88     int *         shift;
89     int *         gid;
90     int *         typeA;
91     int *         typeB;
92     int           ntype;
93     real *        shiftvec;
94     real          dvdl_part;
95     real *        fshift;
96     real          tabscale;
97     real *        VFtab;
98     real *        x;
99     real *        f;
100     real          facel,krf,crf;
101     real *        chargeA;
102     real *        chargeB;
103     real          sigma6_min,sigma6_def,lam_power,sc_power,sc_r_power;
104     real          alpha_coul,alpha_vdw,lambda_coul,lambda_vdw,ewc;
105     real *        nbfp;
106     real *        dvdl;
107     real *        Vv;
108     real *        Vc;
109     gmx_bool      bDoForces;
110     real          rcoulomb,rvdw,factor_coul,factor_vdw,sh_invrc6;
111     gmx_bool      bExactElecCutoff,bExactVdwCutoff;
112     real          rcutoff,rcutoff2,rswitch,d,d2,swV3,swV4,swV5,swF2,swF3,swF4,sw,dsw,rinvcorr;
113
114     x                   = xx[0];
115     f                   = ff[0];
116
117     fshift              = fr->fshift[0];
118     Vc                  = kernel_data->energygrp_elec;
119     Vv                  = kernel_data->energygrp_vdw;
120     tabscale            = kernel_data->table_elec_vdw->scale;
121     VFtab               = kernel_data->table_elec_vdw->data;
122
123     nri                 = nlist->nri;
124     iinr                = nlist->iinr;
125     jindex              = nlist->jindex;
126     jjnr                = nlist->jjnr;
127     icoul               = nlist->ielec;
128     ivdw                = nlist->ivdw;
129     shift               = nlist->shift;
130     gid                 = nlist->gid;
131
132     shiftvec            = fr->shift_vec[0];
133     chargeA             = mdatoms->chargeA;
134     chargeB             = mdatoms->chargeB;
135     facel               = fr->epsfac;
136     krf                 = fr->k_rf;
137     crf                 = fr->c_rf;
138     ewc                 = fr->ewaldcoeff;
139     Vc                  = kernel_data->energygrp_elec;
140     typeA               = mdatoms->typeA;
141     typeB               = mdatoms->typeB;
142     ntype               = fr->ntype;
143     nbfp                = fr->nbfp;
144     Vv                  = kernel_data->energygrp_vdw;
145     tabscale            = kernel_data->table_elec_vdw->scale;
146     VFtab               = kernel_data->table_elec_vdw->data;
147     lambda_coul         = kernel_data->lambda[efptCOUL];
148     lambda_vdw          = kernel_data->lambda[efptVDW];
149     dvdl                = kernel_data->dvdl;
150     alpha_coul          = fr->sc_alphacoul;
151     alpha_vdw           = fr->sc_alphavdw;
152     lam_power           = fr->sc_power;
153     sc_r_power          = fr->sc_r_power;
154     sigma6_def          = fr->sc_sigma6_def;
155     sigma6_min          = fr->sc_sigma6_min;
156     bDoForces           = kernel_data->flags & GMX_NONBONDED_DO_FORCE;
157
158     rcoulomb            = fr->rcoulomb;
159     rvdw                = fr->rvdw;
160     sh_invrc6           = fr->ic->sh_invrc6;
161
162     if(fr->coulomb_modifier==eintmodPOTSWITCH || fr->vdw_modifier==eintmodPOTSWITCH)
163     {
164         rcutoff         = (fr->coulomb_modifier==eintmodPOTSWITCH) ? fr->rcoulomb : fr->rvdw;
165         rcutoff2        = rcutoff*rcutoff;
166         rswitch         = (fr->coulomb_modifier==eintmodPOTSWITCH) ? fr->rcoulomb_switch : fr->rvdw_switch;
167         d               = rcutoff-rswitch;
168         swV3            = -10.0/(d*d*d);
169         swV4            =  15.0/(d*d*d*d);
170         swV5            =  -6.0/(d*d*d*d*d);
171         swF2            = -30.0/(d*d*d);
172         swF3            =  60.0/(d*d*d*d);
173         swF4            = -30.0/(d*d*d*d*d);
174     }
175     else
176     {
177         /* Stupid compilers dont realize these variables will not be used */
178         rswitch         = 0.0;
179         swV3            = 0.0;
180         swV4            = 0.0;
181         swV5            = 0.0;
182         swF2            = 0.0;
183         swF3            = 0.0;
184         swF4            = 0.0;
185     }
186
187     bExactElecCutoff    = (fr->coulomb_modifier!=eintmodNONE) || fr->eeltype == eelRF_ZERO;
188     bExactVdwCutoff     = (fr->vdw_modifier!=eintmodNONE);
189
190     /* fix compiler warnings */
191     nj1   = 0;
192     n1C   = n1V   = 0;
193     epsC  = epsV  = 0;
194     eps2C = eps2V = 0;
195
196     dvdl_coul  = 0;
197     dvdl_vdw   = 0;
198
199     /* Lambda factor for state A, 1-lambda*/
200     LFC[STATE_A] = 1.0 - lambda_coul;
201     LFV[STATE_A] = 1.0 - lambda_vdw;
202
203     /* Lambda factor for state B, lambda*/
204     LFC[STATE_B] = lambda_coul;
205     LFV[STATE_B] = lambda_vdw;
206
207     /*derivative of the lambda factor for state A and B */
208     DLF[STATE_A] = -1;
209     DLF[STATE_B] = 1;
210
211     for (i=0;i<NSTATES;i++)
212     {
213         lfac_coul[i]  = (lam_power==2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
214         dlfac_coul[i] = DLF[i]*lam_power/sc_r_power*(lam_power==2 ? (1-LFC[i]) : 1);
215         lfac_vdw[i]   = (lam_power==2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
216         dlfac_vdw[i]  = DLF[i]*lam_power/sc_r_power*(lam_power==2 ? (1-LFV[i]) : 1);
217     }
218     /* precalculate */
219     sigma2_def = pow(sigma6_def,1.0/3.0);
220     sigma2_min = pow(sigma6_min,1.0/3.0);
221
222     /* Ewald (not PME) table is special (icoul==enbcoulFEWALD) */
223
224     do_coultab = (icoul==GMX_NBKERNEL_ELEC_CUBICSPLINETABLE);
225     do_vdwtab  = (ivdw==GMX_NBKERNEL_VDW_CUBICSPLINETABLE);
226     
227     do_tab = do_coultab || do_vdwtab;
228     
229     /* we always use the combined table here */
230     tab_elemsize = 12;
231     
232     for(n=0; (n<nri); n++)
233     {
234         is3              = 3*shift[n];
235         shX              = shiftvec[is3];  
236         shY              = shiftvec[is3+1];
237         shZ              = shiftvec[is3+2];
238         nj0              = jindex[n];
239         nj1              = jindex[n+1];    
240         ii               = iinr[n];        
241         ii3              = 3*ii;           
242         ix               = shX + x[ii3+0];
243         iy               = shY + x[ii3+1];
244         iz               = shZ + x[ii3+2];
245         iqA              = facel*chargeA[ii];
246         iqB              = facel*chargeB[ii];
247         ntiA             = 2*ntype*typeA[ii];
248         ntiB             = 2*ntype*typeB[ii];
249         vctot            = 0;              
250         vvtot            = 0;
251         fix              = 0;              
252         fiy              = 0;              
253         fiz              = 0;              
254         
255         for(k=nj0; (k<nj1); k++)
256         {
257             jnr              = jjnr[k];        
258             j3               = 3*jnr;          
259             dx               = ix - x[j3];      
260             dy               = iy - x[j3+1];      
261             dz               = iz - x[j3+2];      
262             rsq              = dx*dx+dy*dy+dz*dz;
263             rinv             = gmx_invsqrt(rsq);
264             r                = rsq*rinv;
265             if (sc_r_power == 6.0)
266             {
267                 rpm2             = rsq*rsq; /* r4 */
268                 rp               = rpm2*rsq; /* r6 */
269             }
270             else if (sc_r_power == 48.0)
271             {
272                 rp               = rsq*rsq*rsq;  /* r6 */
273                 rp               = rp*rp; /* r12 */
274                 rp               = rp*rp; /* r24 */
275                 rp               = rp*rp; /* r48 */
276                 rpm2             = rp/rsq; /* r46 */
277             }
278             else
279             {
280                 rp             = pow(r,sc_r_power);  /* not currently supported as input, but can handle it */
281                 rpm2           = rp/rsq;
282             }
283
284             tj[STATE_A]      = ntiA+2*typeA[jnr];
285             tj[STATE_B]      = ntiB+2*typeB[jnr];
286             qq[STATE_A]      = iqA*chargeA[jnr];
287             qq[STATE_B]      = iqB*chargeB[jnr];
288
289             for (i=0;i<NSTATES;i++)
290             {
291
292                 c6[i]              = nbfp[tj[i]];
293                 c12[i]             = nbfp[tj[i]+1];
294                 if((c6[i] > 0) && (c12[i] > 0))
295                 {
296                     /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
297                     sigma6[i]       = 0.5*c12[i]/c6[i];
298                     sigma2[i]       = pow(sigma6[i],1.0/3.0);
299                     /* should be able to get rid of this ^^^ internal pow call eventually.  Will require agreement on
300                        what data to store externally.  Can't be fixed without larger scale changes, so not 4.6 */
301                     if (sigma6[i] < sigma6_min) { /* for disappearing coul and vdw with soft core at the same time */
302                         sigma6[i] = sigma6_min;
303                         sigma2[i] = sigma2_min;
304                     }
305                 }
306                 else
307                 {
308                     sigma6[i]       = sigma6_def;
309                     sigma2[i]       = sigma2_def;
310                 }
311                 if (sc_r_power == 6.0)
312                 {
313                     sigma_pow[i]    = sigma6[i];
314                     sigma_powm2[i]  = sigma6[i]/sigma2[i];
315                 }
316                 else if (sc_r_power == 48.0) 
317                 {
318                     sigma_pow[i]    = sigma6[i]*sigma6[i];   /* sigma^12 */
319                     sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
320                     sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
321                     sigma_powm2[i]  = sigma_pow[i]/sigma2[i];                    
322                 }
323                 else 
324                 {    /* not really supported as input, but in here for testing the general case*/
325                     sigma_pow[i]    = pow(sigma2[i],sc_r_power/2);
326                     sigma_powm2[i]  = sigma_pow[i]/(sigma2[i]);
327                 }
328             }
329
330             /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
331             if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0)) {
332                 alpha_vdw_eff    = 0;
333                 alpha_coul_eff   = 0;
334             } else {
335                 alpha_vdw_eff    = alpha_vdw;
336                 alpha_coul_eff   = alpha_coul;
337             }
338
339             for (i=0;i<NSTATES;i++) 
340             {
341                 FscalC[i]    = 0;
342                 FscalV[i]    = 0;
343                 Vcoul[i]     = 0;
344                 Vvdw[i]      = 0;
345
346                 /* Only spend time on A or B state if it is non-zero */
347                 if( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
348                 {
349
350                     /* this section has to be inside the loop becaue of the dependence on sigma_pow */
351                     rpinvC         = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
352                     rinvC          = pow(rpinvC,1.0/sc_r_power);
353                     rC             = 1.0/rinvC;
354                     
355                     rpinvV         = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
356                     rinvV          = pow(rpinvV,1.0/sc_r_power);
357                     rV             = 1.0/rinvV;
358
359                     factor_coul    = (rC<=rcoulomb) ? 1.0 : 0.0;
360                     factor_vdw     = (rV<=rvdw)     ? 1.0 : 0.0;
361
362                     if (do_tab)
363                     {
364                         rtC        = rC*tabscale;
365                         n0         = rtC;
366                         epsC       = rtC-n0;
367                         eps2C      = epsC*epsC;
368                         n1C        = tab_elemsize*n0;
369
370                         rtV        = rV*tabscale;
371                         n0         = rtV;
372                         epsV       = rtV-n0;
373                         eps2V      = epsV*epsV;
374                         n1V        = tab_elemsize*n0;
375                     }
376
377                     if(qq[i] != 0)
378                     {
379                         switch(icoul)
380                         {
381                             case GMX_NBKERNEL_ELEC_COULOMB:
382                             case GMX_NBKERNEL_ELEC_EWALD:
383                                 /* simple cutoff (yes, ewald is done all on direct space for free energy) */
384                                 Vcoul[i]   = qq[i]*rinvC;
385                                 FscalC[i]  = Vcoul[i]*rpinvC;
386                                 break;
387                                 
388                             case GMX_NBKERNEL_ELEC_REACTIONFIELD:
389                                 /* reaction-field */
390                                 Vcoul[i]   = qq[i]*(rinvC+krf*rC*rC-crf);
391                                 FscalC[i]  = qq[i]*(rinvC*rpinvC-2.0*krf);
392                                 break;
393
394                             case GMX_NBKERNEL_ELEC_CUBICSPLINETABLE:
395                                 /* non-Ewald tabulated coulomb */
396                                 nnn        = n1C;
397                                 Y          = VFtab[nnn];
398                                 F          = VFtab[nnn+1];
399                                 Geps       = epsC*VFtab[nnn+2];
400                                 Heps2      = eps2C*VFtab[nnn+3];
401                                 Fp         = F+Geps+Heps2;
402                                 VV         = Y+epsC*Fp;
403                                 FF         = Fp+Geps+2.0*Heps2;
404                                 Vcoul[i]   = qq[i]*VV;
405                                 FscalC[i]  = -qq[i]*tabscale*FF*rC*rpinvC;
406                                 break;
407
408                             default:
409                                 FscalC[i]  = 0.0;
410                                 Vcoul[i]   = 0.0;
411                                 break;
412                         }
413
414                         if(fr->coulomb_modifier==eintmodPOTSWITCH)
415                         {
416                             d                = rC-rswitch;
417                             d                = (d>0.0) ? d : 0.0;
418                             d2               = d*d;
419                             sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
420                             dsw              = d2*(swF2+d*(swF3+d*swF4));
421
422                             Vcoul[i]        *= sw;
423                             FscalC[i]        = FscalC[i]*sw + Vcoul[i]*dsw;
424                         }
425
426                         if(bExactElecCutoff)
427                         {
428                             FscalC[i]        = (rC<rcoulomb) ? FscalC[i] : 0.0;
429                             Vcoul[i]         = (rC<rcoulomb) ? Vcoul[i] : 0.0;
430                         }
431                     }
432
433                     if((c6[i] != 0) || (c12[i] != 0))
434                     {
435                         switch(ivdw)
436                         {
437                             case GMX_NBKERNEL_VDW_LENNARDJONES:
438                                 /* cutoff LJ */
439                                 if (sc_r_power == 6.0)
440                                 {
441                                     rinv6            = rpinvV;
442                                 }
443                                 else
444                                 {
445                                     rinv6            = pow(rinvV,6.0);
446                                 }
447                                 Vvdw6            = c6[i]*rinv6;
448                                 Vvdw12           = c12[i]*rinv6*rinv6;
449                                 if(fr->vdw_modifier==eintmodPOTSHIFT)
450                                 {
451                                     Vvdw[i]          = ( (Vvdw12-c12[i]*sh_invrc6*sh_invrc6)*(1.0/12.0)
452                                                         -(Vvdw6-c6[i]*sh_invrc6)*(1.0/6.0));
453                                 }
454                                 else
455                                 {
456                                     Vvdw[i]          = Vvdw12*(1.0/12.0)-Vvdw6*(1.0/6.0);
457                                 }
458                                 FscalV[i]        = (Vvdw12-Vvdw6)*rpinvV;
459                                 break;
460
461                             case GMX_NBKERNEL_VDW_BUCKINGHAM:
462                                 gmx_fatal(FARGS,"Buckingham free energy not supported.");
463                                 break;
464
465                             case GMX_NBKERNEL_VDW_CUBICSPLINETABLE:
466                                 /* Table LJ */
467                                 nnn = n1V+4;
468                                 /* dispersion */
469                                 Y          = VFtab[nnn];
470                                 F          = VFtab[nnn+1];
471                                 Geps       = epsV*VFtab[nnn+2];
472                                 Heps2      = eps2V*VFtab[nnn+3];
473                                 Fp         = F+Geps+Heps2;
474                                 VV         = Y+epsV*Fp;
475                                 FF         = Fp+Geps+2.0*Heps2;
476                                 Vvdw[i]   += c6[i]*VV;
477                                 FscalV[i] -= c6[i]*tabscale*FF*rV*rpinvV;
478
479                                 /* repulsion */
480                                 Y          = VFtab[nnn+4];
481                                 F          = VFtab[nnn+5];
482                                 Geps       = epsV*VFtab[nnn+6];
483                                 Heps2      = eps2V*VFtab[nnn+7];
484                                 Fp         = F+Geps+Heps2;
485                                 VV         = Y+epsV*Fp;
486                                 FF         = Fp+Geps+2.0*Heps2;
487                                 Vvdw[i]   += c12[i]*VV;
488                                 FscalV[i] -= c12[i]*tabscale*FF*rV*rpinvV;
489                                 break;
490
491                             default:
492                                 Vvdw[i]    = 0.0;
493                                 FscalV[i]  = 0.0;
494                                 break;
495                         }
496
497                         if(fr->vdw_modifier==eintmodPOTSWITCH)
498                         {
499                             d                = rV-rswitch;
500                             d                = (d>0.0) ? d : 0.0;
501                             d2               = d*d;
502                             sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
503                             dsw              = d2*(swF2+d*(swF3+d*swF4));
504
505                             Vvdw[i]         *= sw;
506                             FscalV[i]        = FscalV[i]*sw + Vvdw[i]*dsw;
507
508                             FscalV[i]        = (rV<rvdw) ? FscalV[i] : 0.0;
509                             Vvdw[i]          = (rV<rvdw) ? Vvdw[i] : 0.0;
510                         }
511                     }
512                 }
513             }
514
515             Fscal = 0;
516
517             if (icoul==GMX_NBKERNEL_ELEC_EWALD)
518             {
519                 /* because we compute the softcore normally,
520                  we have to remove the ewald short range portion. Done outside of
521                  the states loop because this part doesn't depend on the scaled R */
522
523                 if (r != 0)
524                 {
525                     VV    = gmx_erf(ewc*r)*rinv;
526                     FF    = rinv*rinv*(VV - 2.0*ewc*isp*exp(-ewc*ewc*rsq));
527                 }
528                 else
529                 {
530                     VV    = ewc*2.0/sqrt(M_PI);
531                     FF    = 0;
532                 }
533
534                 for (i=0;i<NSTATES;i++)
535                 {
536                     vctot      -= LFC[i]*qq[i]*VV;
537                     Fscal      -= LFC[i]*qq[i]*FF;
538                     dvdl_coul  -= (DLF[i]*qq[i])*VV;
539                 }
540             }
541
542             /* Assemble A and B states */
543             for (i=0;i<NSTATES;i++)
544             {
545                 vctot         += LFC[i]*Vcoul[i];
546                 vvtot         += LFV[i]*Vvdw[i];
547
548                 Fscal         += LFC[i]*FscalC[i]*rpm2;
549                 Fscal         += LFV[i]*FscalV[i]*rpm2;
550
551                 dvdl_coul     += Vcoul[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*FscalC[i]*sigma_pow[i];
552                 dvdl_vdw      += Vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*FscalV[i]*sigma_pow[i];
553             }
554
555             if (bDoForces)
556             {
557                 tx         = Fscal*dx;     
558                 ty         = Fscal*dy;     
559                 tz         = Fscal*dz;     
560                 fix        = fix + tx;      
561                 fiy        = fiy + ty;      
562                 fiz        = fiz + tz;      
563                 f[j3]      = f[j3]   - tx;
564                 f[j3+1]    = f[j3+1] - ty;
565                 f[j3+2]    = f[j3+2] - tz;
566             }
567         }
568
569         if (bDoForces)
570         {
571             f[ii3]         = f[ii3]        + fix;
572             f[ii3+1]       = f[ii3+1]      + fiy;
573             f[ii3+2]       = f[ii3+2]      + fiz;
574             fshift[is3]    = fshift[is3]   + fix;
575             fshift[is3+1]  = fshift[is3+1] + fiy;
576             fshift[is3+2]  = fshift[is3+2] + fiz;
577         }
578         ggid               = gid[n];
579         Vc[ggid]           = Vc[ggid] + vctot;
580         Vv[ggid]           = Vv[ggid] + vvtot;
581     }
582
583     dvdl[efptCOUL]     += dvdl_coul;
584     dvdl[efptVDW]      += dvdl_vdw;
585
586     /* Estimate flops, average for free energy stuff:
587      * 12  flops per outer iteration
588      * 150 flops per inner iteration
589      */
590     inc_nrnb(nrnb,eNR_NBKERNEL_FREE_ENERGY,nlist->nri*12 + nlist->jindex[n]*150);
591 }
592
593 real
594 nb_free_energy_evaluate_single(real r2,real sc_r_power,real alpha_coul,real alpha_vdw,
595                                real tabscale,real *vftab,
596                                real qqA, real c6A, real c12A, real qqB, real c6B, real c12B,
597                                real LFC[2], real LFV[2],real DLF[2],
598                                real lfac_coul[2], real lfac_vdw[2],real dlfac_coul[2], real dlfac_vdw[2],
599                                real sigma6_def, real sigma6_min,real sigma2_def, real sigma2_min,
600                                real *velectot, real *vvdwtot, real *dvdl)
601 {
602     real       r,rp,rpm2,rtab,eps,eps2,Y,F,Geps,Heps2,Fp,VV,FF,fscal;
603     real       qq[2],c6[2],c12[2],sigma6[2],sigma2[2],sigma_pow[2],sigma_powm2[2];
604     real       alpha_coul_eff,alpha_vdw_eff,dvdl_coul,dvdl_vdw;
605     real       rpinv,r_coul,r_vdw,velecsum,vvdwsum;
606     real       fscal_vdw[2],fscal_elec[2];
607     real       velec[2],vvdw[2];
608     int        i,ntab;
609
610     qq[0]    = qqA;
611     qq[1]    = qqB;
612     c6[0]    = c6A;
613     c6[1]    = c6B;
614     c12[0]   = c12A;
615     c12[1]   = c12B;
616
617     if (sc_r_power == 6.0)
618     {
619         rpm2             = r2*r2; /* r4 */
620         rp               = rpm2*r2; /* r6 */
621     }
622     else if (sc_r_power == 48.0)
623     {
624         rp               = r2*r2*r2;  /* r6 */
625         rp               = rp*rp; /* r12 */
626         rp               = rp*rp; /* r24 */
627         rp               = rp*rp; /* r48 */
628         rpm2             = rp/r2; /* r46 */
629     }
630     else
631     {
632         rp             = pow(r2,0.5*sc_r_power);  /* not currently supported as input, but can handle it */
633         rpm2           = rp/r2;
634     }
635
636     /* Loop over state A(0) and B(1) */
637     for (i=0;i<2;i++)
638     {
639         if((c6[i] > 0) && (c12[i] > 0))
640         {
641             sigma6[i]       = c12[i]/c6[i];
642             sigma2[i]       = pow(c12[i]/c6[i],1.0/3.0);
643             /* should be able to get rid of this ^^^ internal pow call eventually.  Will require agreement on
644              what data to store externally.  Can't be fixed without larger scale changes, so not 4.6 */
645             if (sigma6[i] < sigma6_min) { /* for disappearing coul and vdw with soft core at the same time */
646                 sigma6[i] = sigma6_min;
647                 sigma2[i] = sigma2_min;
648             }
649         }
650         else
651         {
652             sigma6[i]       = sigma6_def;
653             sigma2[i]       = sigma2_def;
654         }
655         if (sc_r_power == 6.0)
656         {
657             sigma_pow[i]    = sigma6[i];
658             sigma_powm2[i]  = sigma6[i]/sigma2[i];
659         }
660         else if (sc_r_power == 48.0)
661         {
662             sigma_pow[i]    = sigma6[i]*sigma6[i];   /* sigma^12 */
663             sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
664             sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
665             sigma_powm2[i]  = sigma_pow[i]/sigma2[i];
666         }
667         else
668         {    /* not really supported as input, but in here for testing the general case*/
669             sigma_pow[i]    = pow(sigma2[i],sc_r_power/2);
670             sigma_powm2[i]  = sigma_pow[i]/(sigma2[i]);
671         }
672     }
673
674     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
675     if ((c12[0] > 0) && (c12[1] > 0)) {
676         alpha_vdw_eff    = 0;
677         alpha_coul_eff   = 0;
678     } else {
679         alpha_vdw_eff    = alpha_vdw;
680         alpha_coul_eff   = alpha_coul;
681     }
682
683     /* Loop over A and B states again */
684     for (i=0;i<2;i++)
685     {
686         fscal_elec[i] = 0;
687         fscal_vdw[i]  = 0;
688         velec[i]      = 0;
689         vvdw[i]       = 0;
690
691         /* Only spend time on A or B state if it is non-zero */
692         if( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
693         {
694             /* Coulomb */
695             rpinv            = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
696             r_coul           = pow(rpinv,-1.0/sc_r_power);
697
698             /* Electrostatics table lookup data */
699             rtab             = r_coul*tabscale;
700             ntab             = rtab;
701             eps              = rtab-ntab;
702             eps2             = eps*eps;
703             ntab             = 12*ntab;
704             /* Electrostatics */
705             Y                = vftab[ntab];
706             F                = vftab[ntab+1];
707             Geps             = eps*vftab[ntab+2];
708             Heps2            = eps2*vftab[ntab+3];
709             Fp               = F+Geps+Heps2;
710             VV               = Y+eps*Fp;
711             FF               = Fp+Geps+2.0*Heps2;
712             velec[i]         = qq[i]*VV;
713             fscal_elec[i]    = -qq[i]*FF*r_coul*rpinv*tabscale;
714
715             /* Vdw */
716             rpinv            = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
717             r_vdw            = pow(rpinv,-1.0/sc_r_power);
718             /* Vdw table lookup data */
719             rtab             = r_vdw*tabscale;
720             ntab             = rtab;
721             eps              = rtab-ntab;
722             eps2             = eps*eps;
723             ntab             = 12*ntab;
724             /* Dispersion */
725             Y                = vftab[ntab+4];
726             F                = vftab[ntab+5];
727             Geps             = eps*vftab[ntab+6];
728             Heps2            = eps2*vftab[ntab+7];
729             Fp               = F+Geps+Heps2;
730             VV               = Y+eps*Fp;
731             FF               = Fp+Geps+2.0*Heps2;
732             vvdw[i]          = c6[i]*VV;
733             fscal_vdw[i]     = -c6[i]*FF;
734
735             /* Repulsion */
736             Y                = vftab[ntab+8];
737             F                = vftab[ntab+9];
738             Geps             = eps*vftab[ntab+10];
739             Heps2            = eps2*vftab[ntab+11];
740             Fp               = F+Geps+Heps2;
741             VV               = Y+eps*Fp;
742             FF               = Fp+Geps+2.0*Heps2;
743             vvdw[i]         += c12[i]*VV;
744             fscal_vdw[i]    -= c12[i]*FF;
745             fscal_vdw[i]    *= r_vdw*rpinv*tabscale;
746         }
747     }
748     /* Now we have velec[i], vvdw[i], and fscal[i] for both states */
749     /* Assemble A and B states */
750     velecsum  = 0;
751     vvdwsum   = 0;
752     dvdl_coul = 0;
753     dvdl_vdw  = 0;
754     fscal     = 0;
755     for (i=0;i<2;i++)
756     {
757         velecsum      += LFC[i]*velec[i];
758         vvdwsum       += LFV[i]*vvdw[i];
759
760         fscal         += (LFC[i]*fscal_elec[i]+LFV[i]*fscal_vdw[i])*rpm2;
761
762         dvdl_coul     += velec[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*fscal_elec[i]*sigma_pow[i];
763         dvdl_vdw      += vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*fscal_vdw[i]*sigma_pow[i];
764     }
765
766     dvdl[efptCOUL]     += dvdl_coul;
767     dvdl[efptVDW]      += dvdl_vdw;
768
769     *velectot           = velecsum;
770     *vvdwtot            = vvdwsum;
771
772     return fscal;
773 }
774