Merge release-4-6 into master
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_free_energy.c
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
2  *
3  * 
4  *                This source code is part of
5  * 
6  *                 G   R   O   M   A   C   S
7  * 
8  *          GROningen MAchine for Chemical Simulations
9  * 
10  *                        VERSION 3.2.0
11  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13  * Copyright (c) 2001-2004, The GROMACS development team,
14  * check out http://www.gromacs.org for more information.
15
16  * This program is free software; you can redistribute it and/or
17  * modify it under the terms of the GNU General Public License
18  * as published by the Free Software Foundation; either version 2
19  * of the License, or (at your option) any later version.
20  * 
21  * If you want to redistribute modifications, please consider that
22  * scientific software is very special. Version control is crucial -
23  * bugs must be traceable. We will be happy to consider code for
24  * inclusion in the official distribution, but derived work must not
25  * be called official GROMACS. Details are found in the README & COPYING
26  * files - if they are missing, get the official version at www.gromacs.org.
27  * 
28  * To help us fund GROMACS development, we humbly ask that you cite
29  * the papers on the package - you can find them in the top README file.
30  * 
31  * For more info, check our website at http://www.gromacs.org
32  * 
33  * And Hey:
34  * GROningen Mixture of Alchemy and Childrens' Stories
35  */
36 #ifdef HAVE_CONFIG_H
37 #include <config.h>
38 #endif
39
40 #include <math.h>
41
42 #include "vec.h"
43 #include "typedefs.h"
44 #include "nonbonded.h"
45 #include "nb_kernel.h"
46 #include "nrnb.h"
47
48 void
49 gmx_nb_free_energy_kernel(t_nblist *                nlist,
50                           rvec *                    xx,
51                           rvec *                    ff,
52                           t_forcerec *              fr,
53                           t_mdatoms *               mdatoms,
54                           nb_kernel_data_t *        kernel_data,
55                           t_nrnb *                  nrnb)
56 {
57
58 #define  STATE_A  0
59 #define  STATE_B  1
60 #define  NSTATES  2
61     int           i,j,n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
62     real          shX,shY,shZ;
63     real          Fscal,FscalC[NSTATES],FscalV[NSTATES],tx,ty,tz;
64     real          Vcoul[NSTATES],Vvdw[NSTATES];
65     real          rinv6,r,rt,rtC,rtV;
66     real          iqA,iqB;
67     real          qq[NSTATES],vctot,krsq;
68     int           ntiA,ntiB,tj[NSTATES];
69     real          Vvdw6, Vvdw12,vvtot;
70     real          ix,iy,iz,fix,fiy,fiz;
71     real          dx,dy,dz,rsq,rinv;
72     real          c6[NSTATES],c12[NSTATES];
73     real          LFC[NSTATES],LFV[NSTATES],DLF[NSTATES];
74     double        dvdl_coul,dvdl_vdw;
75     real          lfac_coul[NSTATES],dlfac_coul[NSTATES],lfac_vdw[NSTATES],dlfac_vdw[NSTATES];
76     real          sigma6[NSTATES],alpha_vdw_eff,alpha_coul_eff,sigma2_def,sigma2_min;
77     real          rp,rpm2,rC,rV,rinvC,rpinvC,rinvV,rpinvV;
78     real          sigma2[NSTATES],sigma_pow[NSTATES],sigma_powm2[NSTATES],rs,rs2;
79     int           do_coultab,do_vdwtab,do_tab,tab_elemsize;
80     int           n0,n1C,n1V,nnn;
81     real          Y,F,G,H,Fp,Geps,Heps2,epsC,eps2C,epsV,eps2V,VV,FF;
82     double        isp=0.564189583547756;
83     int           icoul,ivdw;
84     int           nri;
85     int *         iinr;
86     int *         jindex;
87     int *         jjnr;
88     int *         shift;
89     int *         gid;
90     int *         typeA;
91     int *         typeB;
92     int           ntype;
93     real *        shiftvec;
94     real          dvdl_part;
95     real *        fshift;
96     real          tabscale;
97     real *        VFtab;
98     real *        x;
99     real *        f;
100     real          facel,krf,crf;
101     real *        chargeA;
102     real *        chargeB;
103     real          sigma6_min,sigma6_def,lam_power,sc_power,sc_r_power;
104     real          alpha_coul,alpha_vdw,lambda_coul,lambda_vdw,ewc;
105     real *        nbfp;
106     real *        dvdl;
107     real *        Vv;
108     real *        Vc;
109     gmx_bool      bDoForces;
110     real          rcoulomb,rvdw,factor_coul,factor_vdw,sh_invrc6;
111     gmx_bool      bExactElecCutoff,bExactVdwCutoff;
112     real          rcutoff,rcutoff2,rswitch,d,d2,swV3,swV4,swV5,swF2,swF3,swF4,sw,dsw,rinvcorr;
113
114     x                   = xx[0];
115     f                   = ff[0];
116
117     fshift              = fr->fshift[0];
118     Vc                  = kernel_data->energygrp_elec;
119     Vv                  = kernel_data->energygrp_vdw;
120     tabscale            = kernel_data->table_elec_vdw->scale;
121     VFtab               = kernel_data->table_elec_vdw->data;
122
123     nri                 = nlist->nri;
124     iinr                = nlist->iinr;
125     jindex              = nlist->jindex;
126     jjnr                = nlist->jjnr;
127     icoul               = nlist->ielec;
128     ivdw                = nlist->ivdw;
129     shift               = nlist->shift;
130     gid                 = nlist->gid;
131
132     shiftvec            = fr->shift_vec[0];
133     chargeA             = mdatoms->chargeA;
134     chargeB             = mdatoms->chargeB;
135     facel               = fr->epsfac;
136     krf                 = fr->k_rf;
137     crf                 = fr->c_rf;
138     ewc                 = fr->ewaldcoeff;
139     Vc                  = kernel_data->energygrp_elec;
140     typeA               = mdatoms->typeA;
141     typeB               = mdatoms->typeB;
142     ntype               = fr->ntype;
143     nbfp                = fr->nbfp;
144     Vv                  = kernel_data->energygrp_vdw;
145     tabscale            = kernel_data->table_elec_vdw->scale;
146     VFtab               = kernel_data->table_elec_vdw->data;
147     lambda_coul         = kernel_data->lambda[efptCOUL];
148     lambda_vdw          = kernel_data->lambda[efptVDW];
149     dvdl                = kernel_data->dvdl;
150     alpha_coul          = fr->sc_alphacoul;
151     alpha_vdw           = fr->sc_alphavdw;
152     lam_power           = fr->sc_power;
153     sc_r_power          = fr->sc_r_power;
154     sigma6_def          = fr->sc_sigma6_def;
155     sigma6_min          = fr->sc_sigma6_min;
156     bDoForces           = kernel_data->flags & GMX_NONBONDED_DO_FORCE;
157
158     rcoulomb            = fr->rcoulomb;
159     rvdw                = fr->rvdw;
160     sh_invrc6           = fr->ic->sh_invrc6;
161
162     if(fr->coulomb_modifier==eintmodPOTSWITCH || fr->vdw_modifier==eintmodPOTSWITCH)
163     {
164         rcutoff         = (fr->coulomb_modifier==eintmodPOTSWITCH) ? fr->rcoulomb : fr->rvdw;
165         rcutoff2        = rcutoff*rcutoff;
166         rswitch         = (fr->coulomb_modifier==eintmodPOTSWITCH) ? fr->rcoulomb_switch : fr->rvdw_switch;
167         d               = rcutoff-rswitch;
168         swV3            = -10.0/(d*d*d);
169         swV4            =  15.0/(d*d*d*d);
170         swV5            =  -6.0/(d*d*d*d*d);
171         swF2            = -30.0/(d*d*d);
172         swF3            =  60.0/(d*d*d*d);
173         swF4            = -30.0/(d*d*d*d*d);
174     }
175     else
176     {
177         /* Stupid compilers dont realize these variables will not be used */
178         rswitch         = 0.0;
179         swV3            = 0.0;
180         swV4            = 0.0;
181         swV5            = 0.0;
182         swF2            = 0.0;
183         swF3            = 0.0;
184         swF4            = 0.0;
185     }
186
187     bExactElecCutoff    = (fr->coulomb_modifier!=eintmodNONE) || fr->eeltype == eelRF_ZERO;
188     bExactVdwCutoff     = (fr->vdw_modifier!=eintmodNONE);
189
190     /* fix compiler warnings */
191     nj1   = 0;
192     n1C   = n1V   = 0;
193     epsC  = epsV  = 0;
194     eps2C = eps2V = 0;
195
196     dvdl_coul  = 0;
197     dvdl_vdw   = 0;
198
199     /* Lambda factor for state A, 1-lambda*/
200     LFC[STATE_A] = 1.0 - lambda_coul;
201     LFV[STATE_A] = 1.0 - lambda_vdw;
202
203     /* Lambda factor for state B, lambda*/
204     LFC[STATE_B] = lambda_coul;
205     LFV[STATE_B] = lambda_vdw;
206
207     /*derivative of the lambda factor for state A and B */
208     DLF[STATE_A] = -1;
209     DLF[STATE_B] = 1;
210
211     for (i=0;i<NSTATES;i++)
212     {
213         lfac_coul[i]  = (lam_power==2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
214         dlfac_coul[i] = DLF[i]*lam_power/sc_r_power*(lam_power==2 ? (1-LFC[i]) : 1);
215         lfac_vdw[i]   = (lam_power==2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
216         dlfac_vdw[i]  = DLF[i]*lam_power/sc_r_power*(lam_power==2 ? (1-LFV[i]) : 1);
217     }
218     /* precalculate */
219     sigma2_def = pow(sigma6_def,1.0/3.0);
220     sigma2_min = pow(sigma6_min,1.0/3.0);
221
222     /* Ewald (not PME) table is special (icoul==enbcoulFEWALD) */
223
224     do_coultab = (icoul==GMX_NBKERNEL_ELEC_CUBICSPLINETABLE);
225     do_vdwtab  = (ivdw==GMX_NBKERNEL_VDW_CUBICSPLINETABLE);
226     
227     do_tab = do_coultab || do_vdwtab;
228     
229     /* we always use the combined table here */
230     tab_elemsize = 12;
231     
232     for(n=0; (n<nri); n++)
233     {
234         is3              = 3*shift[n];
235         shX              = shiftvec[is3];  
236         shY              = shiftvec[is3+1];
237         shZ              = shiftvec[is3+2];
238         nj0              = jindex[n];
239         nj1              = jindex[n+1];    
240         ii               = iinr[n];        
241         ii3              = 3*ii;           
242         ix               = shX + x[ii3+0];
243         iy               = shY + x[ii3+1];
244         iz               = shZ + x[ii3+2];
245         iqA              = facel*chargeA[ii];
246         iqB              = facel*chargeB[ii];
247         ntiA             = 2*ntype*typeA[ii];
248         ntiB             = 2*ntype*typeB[ii];
249         vctot            = 0;              
250         vvtot            = 0;
251         fix              = 0;              
252         fiy              = 0;              
253         fiz              = 0;              
254         
255         for(k=nj0; (k<nj1); k++)
256         {
257             jnr              = jjnr[k];        
258             j3               = 3*jnr;          
259             dx               = ix - x[j3];      
260             dy               = iy - x[j3+1];      
261             dz               = iz - x[j3+2];      
262             rsq              = dx*dx+dy*dy+dz*dz;
263             rinv             = gmx_invsqrt(rsq);
264             r                = rsq*rinv;
265             if (sc_r_power == 6.0)
266             {
267                 rpm2             = rsq*rsq; /* r4 */
268                 rp               = rpm2*rsq; /* r6 */
269             }
270             else if (sc_r_power == 48.0)
271             {
272                 rp               = rsq*rsq*rsq;  /* r6 */
273                 rp               = rp*rp; /* r12 */
274                 rp               = rp*rp; /* r24 */
275                 rp               = rp*rp; /* r48 */
276                 rpm2             = rp/rsq; /* r46 */
277             }
278             else
279             {
280                 rp             = pow(r,sc_r_power);  /* not currently supported as input, but can handle it */
281                 rpm2           = rp/rsq;
282             }
283
284             tj[STATE_A]      = ntiA+2*typeA[jnr];
285             tj[STATE_B]      = ntiB+2*typeB[jnr];
286             qq[STATE_A]      = iqA*chargeA[jnr];
287             qq[STATE_B]      = iqB*chargeB[jnr];
288
289             for (i=0;i<NSTATES;i++)
290             {
291
292                 c6[i]              = nbfp[tj[i]];
293                 c12[i]             = nbfp[tj[i]+1];
294                 if((c6[i] > 0) && (c12[i] > 0))
295                 {
296                     /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
297                     sigma6[i]       = 0.5*c12[i]/c6[i];
298                     sigma2[i]       = pow(sigma6[i],1.0/3.0);
299                     /* should be able to get rid of this ^^^ internal pow call eventually.  Will require agreement on
300                        what data to store externally.  Can't be fixed without larger scale changes, so not 4.6 */
301                     if (sigma6[i] < sigma6_min) { /* for disappearing coul and vdw with soft core at the same time */
302                         sigma6[i] = sigma6_min;
303                         sigma2[i] = sigma2_min;
304                     }
305                 }
306                 else
307                 {
308                     sigma6[i]       = sigma6_def;
309                     sigma2[i]       = sigma2_def;
310                 }
311                 if (sc_r_power == 6.0)
312                 {
313                     sigma_pow[i]    = sigma6[i];
314                     sigma_powm2[i]  = sigma6[i]/sigma2[i];
315                 }
316                 else if (sc_r_power == 48.0) 
317                 {
318                     sigma_pow[i]    = sigma6[i]*sigma6[i];   /* sigma^12 */
319                     sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
320                     sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
321                     sigma_powm2[i]  = sigma_pow[i]/sigma2[i];                    
322                 }
323                 else 
324                 {    /* not really supported as input, but in here for testing the general case*/
325                     sigma_pow[i]    = pow(sigma2[i],sc_r_power/2);
326                     sigma_powm2[i]  = sigma_pow[i]/(sigma2[i]);
327                 }
328             }
329
330             /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
331             if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0)) {
332                 alpha_vdw_eff    = 0;
333                 alpha_coul_eff   = 0;
334             } else {
335                 alpha_vdw_eff    = alpha_vdw;
336                 alpha_coul_eff   = alpha_coul;
337             }
338
339             for (i=0;i<NSTATES;i++) 
340             {
341                 FscalC[i]    = 0;
342                 FscalV[i]    = 0;
343                 Vcoul[i]     = 0;
344                 Vvdw[i]      = 0;
345
346                 /* Only spend time on A or B state if it is non-zero */
347                 if( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
348                 {
349
350                     /* this section has to be inside the loop becaue of the dependence on sigma_pow */
351                     rpinvC         = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
352                     rinvC          = pow(rpinvC,1.0/sc_r_power);
353                     rC             = 1.0/rinvC;
354                     
355                     rpinvV         = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
356                     rinvV          = pow(rpinvV,1.0/sc_r_power);
357                     rV             = 1.0/rinvV;
358
359                     factor_coul    = (rC<=rcoulomb) ? 1.0 : 0.0;
360                     factor_vdw     = (rV<=rvdw)     ? 1.0 : 0.0;
361
362                     if (do_tab)
363                     {
364                         rtC        = rC*tabscale;
365                         n0         = rtC;
366                         epsC       = rtC-n0;
367                         eps2C      = epsC*epsC;
368                         n1C        = tab_elemsize*n0;
369
370                         rtV        = rV*tabscale;
371                         n0         = rtV;
372                         epsV       = rtV-n0;
373                         eps2V      = epsV*epsV;
374                         n1V        = tab_elemsize*n0;
375                     }
376
377                     if(qq[i] != 0)
378                     {
379                         switch(icoul)
380                         {
381                             case GMX_NBKERNEL_ELEC_COULOMB:
382                             case GMX_NBKERNEL_ELEC_EWALD:
383                                 /* simple cutoff (yes, ewald is done all on direct space for free energy) */
384                                 Vcoul[i]   = qq[i]*rinvC;
385                                 FscalC[i]  = Vcoul[i]*rpinvC;
386                                 break;
387                                 
388                             case GMX_NBKERNEL_ELEC_REACTIONFIELD:
389                                 /* reaction-field */
390                                 Vcoul[i]   = qq[i]*(rinvC+krf*rC*rC-crf);
391                                 FscalC[i]  = qq[i]*(rinvC*rpinvC-2.0*krf);
392                                 break;
393
394                             case GMX_NBKERNEL_ELEC_CUBICSPLINETABLE:
395                                 /* non-Ewald tabulated coulomb */
396                                 nnn        = n1C;
397                                 Y          = VFtab[nnn];
398                                 F          = VFtab[nnn+1];
399                                 Geps       = epsC*VFtab[nnn+2];
400                                 Heps2      = eps2C*VFtab[nnn+3];
401                                 Fp         = F+Geps+Heps2;
402                                 VV         = Y+epsC*Fp;
403                                 FF         = Fp+Geps+2.0*Heps2;
404                                 Vcoul[i]   = qq[i]*VV;
405                                 FscalC[i]  = -qq[i]*tabscale*FF*rC*rpinvC;
406                                 break;
407
408                             default:
409                                 FscalC[i]  = 0.0;
410                                 Vcoul[i]   = 0.0;
411                                 break;
412                         }
413
414                         if(fr->coulomb_modifier==eintmodPOTSWITCH)
415                         {
416                             d                = rC-rswitch;
417                             d                = (d>0.0) ? d : 0.0;
418                             d2               = d*d;
419                             sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
420                             dsw              = d2*(swF2+d*(swF3+d*swF4));
421
422                             Vcoul[i]        *= sw;
423                             FscalC[i]        = FscalC[i]*sw + Vcoul[i]*dsw;
424                         }
425
426                         if(bExactElecCutoff)
427                         {
428                             FscalC[i]        = (rC<rcoulomb) ? FscalC[i] : 0.0;
429                             Vcoul[i]         = (rC<rcoulomb) ? Vcoul[i] : 0.0;
430                         }
431                     }
432
433                     if((c6[i] != 0) || (c12[i] != 0))
434                     {
435                         switch(ivdw)
436                         {
437                             case GMX_NBKERNEL_VDW_LENNARDJONES:
438                                 /* cutoff LJ */
439                                 if (sc_r_power == 6.0)
440                                 {
441                                     rinv6            = rpinvV;
442                                 }
443                                 else
444                                 {
445                                     rinv6            = pow(rinvV,6.0);
446                                 }
447                                 Vvdw6            = c6[i]*rinv6;
448                                 Vvdw12           = c12[i]*rinv6*rinv6;
449                                 if(fr->vdw_modifier==eintmodPOTSHIFT)
450                                 {
451                                     Vvdw[i]          = ( (Vvdw12-c12[i]*sh_invrc6*sh_invrc6)*(1.0/12.0)
452                                                         -(Vvdw6-c6[i]*sh_invrc6)*(1.0/6.0));
453                                 }
454                                 else
455                                 {
456                                     Vvdw[i]          = Vvdw12*(1.0/12.0)-Vvdw6*(1.0/6.0);
457                                 }
458                                 FscalV[i]        = (Vvdw12-Vvdw6)*rpinvV;
459                                 break;
460
461                             case GMX_NBKERNEL_VDW_BUCKINGHAM:
462                                 gmx_fatal(FARGS,"Buckingham free energy not supported.");
463                                 break;
464
465                             case GMX_NBKERNEL_VDW_CUBICSPLINETABLE:
466                                 /* Table LJ */
467                                 nnn = n1V+4;
468                                 /* dispersion */
469                                 Y          = VFtab[nnn];
470                                 F          = VFtab[nnn+1];
471                                 Geps       = epsV*VFtab[nnn+2];
472                                 Heps2      = eps2V*VFtab[nnn+3];
473                                 Fp         = F+Geps+Heps2;
474                                 VV         = Y+epsV*Fp;
475                                 FF         = Fp+Geps+2.0*Heps2;
476                                 Vvdw[i]   += c6[i]*VV;
477                                 FscalV[i] -= c6[i]*tabscale*FF*rV*rpinvV;
478
479                                 /* repulsion */
480                                 Y          = VFtab[nnn+4];
481                                 F          = VFtab[nnn+5];
482                                 Geps       = epsV*VFtab[nnn+6];
483                                 Heps2      = eps2V*VFtab[nnn+7];
484                                 Fp         = F+Geps+Heps2;
485                                 VV         = Y+epsV*Fp;
486                                 FF         = Fp+Geps+2.0*Heps2;
487                                 Vvdw[i]   += c12[i]*VV;
488                                 FscalV[i] -= c12[i]*tabscale*FF*rV*rpinvV;
489                                 break;
490
491                             default:
492                                 Vvdw[i]    = 0.0;
493                                 FscalV[i]  = 0.0;
494                                 break;
495                         }
496
497                         if(fr->vdw_modifier==eintmodPOTSWITCH)
498                         {
499                             d                = rV-rswitch;
500                             d                = (d>0.0) ? d : 0.0;
501                             d2               = d*d;
502                             sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
503                             dsw              = d2*(swF2+d*(swF3+d*swF4));
504
505                             Vvdw[i]         *= sw;
506                             FscalV[i]        = FscalV[i]*sw + Vvdw[i]*dsw;
507
508                             FscalV[i]        = (rV<rvdw) ? FscalV[i] : 0.0;
509                             Vvdw[i]          = (rV<rvdw) ? Vvdw[i] : 0.0;
510                         }
511                     }
512                 }
513             }
514
515             Fscal = 0;
516
517             if (icoul==GMX_NBKERNEL_ELEC_EWALD)
518             {
519                 /* because we compute the softcore normally,
520                  we have to remove the ewald short range portion. Done outside of
521                  the states loop because this part doesn't depend on the scaled R */
522
523                 if (r != 0)
524                 {
525                     VV    = gmx_erf(ewc*r)*rinv;
526                     FF    = rinv*rinv*(VV - 2.0*ewc*isp*exp(-ewc*ewc*rsq));
527                 }
528                 else
529                 {
530                     VV    = ewc*2.0/sqrt(M_PI);
531                     FF    = 0;
532                 }
533
534                 for (i=0;i<NSTATES;i++)
535                 {
536                     vctot      -= LFC[i]*qq[i]*VV;
537                     Fscal      -= LFC[i]*qq[i]*FF;
538                     dvdl_coul  -= (DLF[i]*qq[i])*VV;
539                 }
540             }
541
542             /* Assemble A and B states */
543             for (i=0;i<NSTATES;i++)
544             {
545                 vctot         += LFC[i]*Vcoul[i];
546                 vvtot         += LFV[i]*Vvdw[i];
547
548                 Fscal         += LFC[i]*FscalC[i]*rpm2;
549                 Fscal         += LFV[i]*FscalV[i]*rpm2;
550
551                 dvdl_coul     += Vcoul[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*FscalC[i]*sigma_pow[i];
552                 dvdl_vdw      += Vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*FscalV[i]*sigma_pow[i];
553             }
554
555             if (bDoForces)
556             {
557                 tx         = Fscal*dx;     
558                 ty         = Fscal*dy;     
559                 tz         = Fscal*dz;     
560                 fix        = fix + tx;      
561                 fiy        = fiy + ty;      
562                 fiz        = fiz + tz;      
563                 f[j3]      = f[j3]   - tx;
564                 f[j3+1]    = f[j3+1] - ty;
565                 f[j3+2]    = f[j3+2] - tz;
566             }
567         }
568
569         if (bDoForces)
570         {
571             f[ii3]         = f[ii3]        + fix;
572             f[ii3+1]       = f[ii3+1]      + fiy;
573             f[ii3+2]       = f[ii3+2]      + fiz;
574             fshift[is3]    = fshift[is3]   + fix;
575             fshift[is3+1]  = fshift[is3+1] + fiy;
576             fshift[is3+2]  = fshift[is3+2] + fiz;
577         }
578         ggid               = gid[n];
579         Vc[ggid]           = Vc[ggid] + vctot;
580         Vv[ggid]           = Vv[ggid] + vvtot;
581     }
582
583     dvdl[efptCOUL]     += dvdl_coul;
584     dvdl[efptVDW]      += dvdl_vdw;
585
586     /* Estimate flops, average for free energy stuff:
587      * 12  flops per outer iteration
588      * 150 flops per inner iteration
589      */
590     inc_nrnb(nrnb,eNR_NBKERNEL_FREE_ENERGY,nlist->nri*12 + nlist->jindex[n]*150);
591 }
592
593 real
594 nb_free_energy_evaluate_single(real r2,real sc_r_power,real alpha_coul,real alpha_vdw,
595                                real tabscale,real *vftab,
596                                real qqA, real c6A, real c12A, real qqB, real c6B, real c12B,
597                                real LFC[2], real LFV[2],real DLF[2],
598                                real lfac_coul[2], real lfac_vdw[2],real dlfac_coul[2], real dlfac_vdw[2],
599                                real sigma6_def, real sigma6_min,real sigma2_def, real sigma2_min,
600                                real *velectot, real *vvdwtot, real *dvdl)
601 {
602     real       r,rp,rpm2,rtab,eps,eps2,Y,F,Geps,Heps2,Fp,VV,FF,fscal;
603     real       qq[2],c6[2],c12[2],sigma6[2],sigma2[2],sigma_pow[2],sigma_powm2[2];
604     real       alpha_coul_eff,alpha_vdw_eff,dvdl_coul,dvdl_vdw;
605     real       rpinv,r_coul,r_vdw,velecsum,vvdwsum;
606     real       fscal_vdw[2],fscal_elec[2];
607     real       velec[2],vvdw[2];
608     int        i,ntab;
609
610     qq[0]    = qqA;
611     qq[1]    = qqB;
612     c6[0]    = c6A;
613     c6[1]    = c6B;
614     c12[0]   = c12A;
615     c12[1]   = c12B;
616
617     if (sc_r_power == 6.0)
618     {
619         rpm2             = r2*r2; /* r4 */
620         rp               = rpm2*r2; /* r6 */
621     }
622     else if (sc_r_power == 48.0)
623     {
624         rp               = r2*r2*r2;  /* r6 */
625         rp               = rp*rp; /* r12 */
626         rp               = rp*rp; /* r24 */
627         rp               = rp*rp; /* r48 */
628         rpm2             = rp/r2; /* r46 */
629     }
630     else
631     {
632         rp             = pow(r2,0.5*sc_r_power);  /* not currently supported as input, but can handle it */
633         rpm2           = rp/r2;
634     }
635
636     /* Loop over state A(0) and B(1) */
637     for (i=0;i<2;i++)
638     {
639         if((c6[i] > 0) && (c12[i] > 0))
640         {
641             /* The c6 & c12 coefficients now contain the constants 6.0 and 12.0, respectively.
642              * Correct for this by multiplying with (1/12.0)/(1/6.0)=6.0/12.0=0.5.
643              */
644             sigma6[i]       = 0.5*c12[i]/c6[i];
645             sigma2[i]       = pow(0.5*c12[i]/c6[i],1.0/3.0);
646             /* should be able to get rid of this ^^^ internal pow call eventually.  Will require agreement on
647              what data to store externally.  Can't be fixed without larger scale changes, so not 4.6 */
648             if (sigma6[i] < sigma6_min) { /* for disappearing coul and vdw with soft core at the same time */
649                 sigma6[i] = sigma6_min;
650                 sigma2[i] = sigma2_min;
651             }
652         }
653         else
654         {
655             sigma6[i]       = sigma6_def;
656             sigma2[i]       = sigma2_def;
657         }
658         if (sc_r_power == 6.0)
659         {
660             sigma_pow[i]    = sigma6[i];
661             sigma_powm2[i]  = sigma6[i]/sigma2[i];
662         }
663         else if (sc_r_power == 48.0)
664         {
665             sigma_pow[i]    = sigma6[i]*sigma6[i];   /* sigma^12 */
666             sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
667             sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
668             sigma_powm2[i]  = sigma_pow[i]/sigma2[i];
669         }
670         else
671         {    /* not really supported as input, but in here for testing the general case*/
672             sigma_pow[i]    = pow(sigma2[i],sc_r_power/2);
673             sigma_powm2[i]  = sigma_pow[i]/(sigma2[i]);
674         }
675     }
676
677     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
678     if ((c12[0] > 0) && (c12[1] > 0)) {
679         alpha_vdw_eff    = 0;
680         alpha_coul_eff   = 0;
681     } else {
682         alpha_vdw_eff    = alpha_vdw;
683         alpha_coul_eff   = alpha_coul;
684     }
685
686     /* Loop over A and B states again */
687     for (i=0;i<2;i++)
688     {
689         fscal_elec[i] = 0;
690         fscal_vdw[i]  = 0;
691         velec[i]      = 0;
692         vvdw[i]       = 0;
693
694         /* Only spend time on A or B state if it is non-zero */
695         if( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
696         {
697             /* Coulomb */
698             rpinv            = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
699             r_coul           = pow(rpinv,-1.0/sc_r_power);
700
701             /* Electrostatics table lookup data */
702             rtab             = r_coul*tabscale;
703             ntab             = rtab;
704             eps              = rtab-ntab;
705             eps2             = eps*eps;
706             ntab             = 12*ntab;
707             /* Electrostatics */
708             Y                = vftab[ntab];
709             F                = vftab[ntab+1];
710             Geps             = eps*vftab[ntab+2];
711             Heps2            = eps2*vftab[ntab+3];
712             Fp               = F+Geps+Heps2;
713             VV               = Y+eps*Fp;
714             FF               = Fp+Geps+2.0*Heps2;
715             velec[i]         = qq[i]*VV;
716             fscal_elec[i]    = -qq[i]*FF*r_coul*rpinv*tabscale;
717
718             /* Vdw */
719             rpinv            = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
720             r_vdw            = pow(rpinv,-1.0/sc_r_power);
721             /* Vdw table lookup data */
722             rtab             = r_vdw*tabscale;
723             ntab             = rtab;
724             eps              = rtab-ntab;
725             eps2             = eps*eps;
726             ntab             = 12*ntab;
727             /* Dispersion */
728             Y                = vftab[ntab+4];
729             F                = vftab[ntab+5];
730             Geps             = eps*vftab[ntab+6];
731             Heps2            = eps2*vftab[ntab+7];
732             Fp               = F+Geps+Heps2;
733             VV               = Y+eps*Fp;
734             FF               = Fp+Geps+2.0*Heps2;
735             vvdw[i]          = c6[i]*VV;
736             fscal_vdw[i]     = -c6[i]*FF;
737
738             /* Repulsion */
739             Y                = vftab[ntab+8];
740             F                = vftab[ntab+9];
741             Geps             = eps*vftab[ntab+10];
742             Heps2            = eps2*vftab[ntab+11];
743             Fp               = F+Geps+Heps2;
744             VV               = Y+eps*Fp;
745             FF               = Fp+Geps+2.0*Heps2;
746             vvdw[i]         += c12[i]*VV;
747             fscal_vdw[i]    -= c12[i]*FF;
748             fscal_vdw[i]    *= r_vdw*rpinv*tabscale;
749         }
750     }
751     /* Now we have velec[i], vvdw[i], and fscal[i] for both states */
752     /* Assemble A and B states */
753     velecsum  = 0;
754     vvdwsum   = 0;
755     dvdl_coul = 0;
756     dvdl_vdw  = 0;
757     fscal     = 0;
758     for (i=0;i<2;i++)
759     {
760         velecsum      += LFC[i]*velec[i];
761         vvdwsum       += LFV[i]*vvdw[i];
762
763         fscal         += (LFC[i]*fscal_elec[i]+LFV[i]*fscal_vdw[i])*rpm2;
764
765         dvdl_coul     += velec[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*fscal_elec[i]*sigma_pow[i];
766         dvdl_vdw      += vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*fscal_vdw[i]*sigma_pow[i];
767     }
768
769     dvdl[efptCOUL]     += dvdl_coul;
770     dvdl[efptVDW]      += dvdl_vdw;
771
772     *velectot           = velecsum;
773     *vvdwtot            = vvdwsum;
774
775     return fscal;
776 }
777