Merge release-4-6 into master
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_free_energy.c
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
2  *
3  * 
4  *                This source code is part of
5  * 
6  *                 G   R   O   M   A   C   S
7  * 
8  *          GROningen MAchine for Chemical Simulations
9  * 
10  *                        VERSION 3.2.0
11  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13  * Copyright (c) 2001-2004, The GROMACS development team,
14  * check out http://www.gromacs.org for more information.
15
16  * This program is free software; you can redistribute it and/or
17  * modify it under the terms of the GNU General Public License
18  * as published by the Free Software Foundation; either version 2
19  * of the License, or (at your option) any later version.
20  * 
21  * If you want to redistribute modifications, please consider that
22  * scientific software is very special. Version control is crucial -
23  * bugs must be traceable. We will be happy to consider code for
24  * inclusion in the official distribution, but derived work must not
25  * be called official GROMACS. Details are found in the README & COPYING
26  * files - if they are missing, get the official version at www.gromacs.org.
27  * 
28  * To help us fund GROMACS development, we humbly ask that you cite
29  * the papers on the package - you can find them in the top README file.
30  * 
31  * For more info, check our website at http://www.gromacs.org
32  * 
33  * And Hey:
34  * GROningen Mixture of Alchemy and Childrens' Stories
35  */
36 #ifdef HAVE_CONFIG_H
37 #include <config.h>
38 #endif
39
40 #include <math.h>
41
42 #include "vec.h"
43 #include "typedefs.h"
44 #include "nonbonded.h"
45 #include "nb_kernel.h"
46 #include "nrnb.h"
47
48 void
49 gmx_nb_free_energy_kernel(t_nblist *                nlist,
50                           rvec *                    xx,
51                           rvec *                    ff,
52                           t_forcerec *              fr,
53                           t_mdatoms *               mdatoms,
54                           nb_kernel_data_t *        kernel_data,
55                           t_nrnb *                  nrnb)
56 {
57
58 #define  STATE_A  0
59 #define  STATE_B  1
60 #define  NSTATES  2
61     int           i,j,n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
62     real          shX,shY,shZ;
63     real          Fscal,FscalC[NSTATES],FscalV[NSTATES],tx,ty,tz;
64     real          Vcoul[NSTATES],Vvdw[NSTATES];
65     real          rinv6,r,rt,rtC,rtV;
66     real          iqA,iqB;
67     real          qq[NSTATES],vctot,krsq;
68     int           ntiA,ntiB,tj[NSTATES];
69     real          Vvdw6, Vvdw12,vvtot;
70     real          ix,iy,iz,fix,fiy,fiz;
71     real          dx,dy,dz,rsq,rinv;
72     real          c6[NSTATES],c12[NSTATES];
73     real          LFC[NSTATES],LFV[NSTATES],DLF[NSTATES];
74     double        dvdl_coul,dvdl_vdw;
75     real          lfac_coul[NSTATES],dlfac_coul[NSTATES],lfac_vdw[NSTATES],dlfac_vdw[NSTATES];
76     real          sigma6[NSTATES],alpha_vdw_eff,alpha_coul_eff,sigma2_def,sigma2_min;
77     real          rp,rpm2,rC,rV,rinvC,rpinvC,rinvV,rpinvV;
78     real          sigma2[NSTATES],sigma_pow[NSTATES],sigma_powm2[NSTATES],rs,rs2;
79     int           do_coultab,do_vdwtab,do_tab,tab_elemsize;
80     int           n0,n1C,n1V,nnn;
81     real          Y,F,G,H,Fp,Geps,Heps2,epsC,eps2C,epsV,eps2V,VV,FF;
82     int           icoul,ivdw;
83     int           nri;
84     int *         iinr;
85     int *         jindex;
86     int *         jjnr;
87     int *         shift;
88     int *         gid;
89     int *         typeA;
90     int *         typeB;
91     int           ntype;
92     real *        shiftvec;
93     real          dvdl_part;
94     real *        fshift;
95     real          tabscale;
96     real *        VFtab;
97     real *        x;
98     real *        f;
99     real          facel,krf,crf;
100     real *        chargeA;
101     real *        chargeB;
102     real          sigma6_min,sigma6_def,lam_power,sc_power,sc_r_power;
103     real          alpha_coul,alpha_vdw,lambda_coul,lambda_vdw,ewc;
104     real *        nbfp;
105     real *        dvdl;
106     real *        Vv;
107     real *        Vc;
108     gmx_bool      bDoForces;
109     real          rcoulomb,rvdw,factor_coul,factor_vdw,sh_invrc6;
110     gmx_bool      bExactElecCutoff,bExactVdwCutoff;
111     real          rcutoff,rcutoff2,rswitch,d,d2,swV3,swV4,swV5,swF2,swF3,swF4,sw,dsw,rinvcorr;
112
113     x                   = xx[0];
114     f                   = ff[0];
115
116     fshift              = fr->fshift[0];
117     Vc                  = kernel_data->energygrp_elec;
118     Vv                  = kernel_data->energygrp_vdw;
119     tabscale            = kernel_data->table_elec_vdw->scale;
120     VFtab               = kernel_data->table_elec_vdw->data;
121
122     nri                 = nlist->nri;
123     iinr                = nlist->iinr;
124     jindex              = nlist->jindex;
125     jjnr                = nlist->jjnr;
126     icoul               = nlist->ielec;
127     ivdw                = nlist->ivdw;
128     shift               = nlist->shift;
129     gid                 = nlist->gid;
130
131     shiftvec            = fr->shift_vec[0];
132     chargeA             = mdatoms->chargeA;
133     chargeB             = mdatoms->chargeB;
134     facel               = fr->epsfac;
135     krf                 = fr->k_rf;
136     crf                 = fr->c_rf;
137     ewc                 = fr->ewaldcoeff;
138     Vc                  = kernel_data->energygrp_elec;
139     typeA               = mdatoms->typeA;
140     typeB               = mdatoms->typeB;
141     ntype               = fr->ntype;
142     nbfp                = fr->nbfp;
143     Vv                  = kernel_data->energygrp_vdw;
144     tabscale            = kernel_data->table_elec_vdw->scale;
145     VFtab               = kernel_data->table_elec_vdw->data;
146     lambda_coul         = kernel_data->lambda[efptCOUL];
147     lambda_vdw          = kernel_data->lambda[efptVDW];
148     dvdl                = kernel_data->dvdl;
149     alpha_coul          = fr->sc_alphacoul;
150     alpha_vdw           = fr->sc_alphavdw;
151     lam_power           = fr->sc_power;
152     sc_r_power          = fr->sc_r_power;
153     sigma6_def          = fr->sc_sigma6_def;
154     sigma6_min          = fr->sc_sigma6_min;
155     bDoForces           = kernel_data->flags & GMX_NONBONDED_DO_FORCE;
156
157     rcoulomb            = fr->rcoulomb;
158     rvdw                = fr->rvdw;
159     sh_invrc6           = fr->ic->sh_invrc6;
160
161     if(fr->coulomb_modifier==eintmodPOTSWITCH || fr->vdw_modifier==eintmodPOTSWITCH)
162     {
163         rcutoff         = (fr->coulomb_modifier==eintmodPOTSWITCH) ? fr->rcoulomb : fr->rvdw;
164         rcutoff2        = rcutoff*rcutoff;
165         rswitch         = (fr->coulomb_modifier==eintmodPOTSWITCH) ? fr->rcoulomb_switch : fr->rvdw_switch;
166         d               = rcutoff-rswitch;
167         swV3            = -10.0/(d*d*d);
168         swV4            =  15.0/(d*d*d*d);
169         swV5            =  -6.0/(d*d*d*d*d);
170         swF2            = -30.0/(d*d*d);
171         swF3            =  60.0/(d*d*d*d);
172         swF4            = -30.0/(d*d*d*d*d);
173     }
174     else
175     {
176         /* Stupid compilers dont realize these variables will not be used */
177         rswitch         = 0.0;
178         swV3            = 0.0;
179         swV4            = 0.0;
180         swV5            = 0.0;
181         swF2            = 0.0;
182         swF3            = 0.0;
183         swF4            = 0.0;
184     }
185
186     bExactElecCutoff    = (fr->coulomb_modifier!=eintmodNONE) || fr->eeltype == eelRF_ZERO;
187     bExactVdwCutoff     = (fr->vdw_modifier!=eintmodNONE);
188
189     /* fix compiler warnings */
190     nj1   = 0;
191     n1C   = n1V   = 0;
192     epsC  = epsV  = 0;
193     eps2C = eps2V = 0;
194
195     dvdl_coul  = 0;
196     dvdl_vdw   = 0;
197
198     /* Lambda factor for state A, 1-lambda*/
199     LFC[STATE_A] = 1.0 - lambda_coul;
200     LFV[STATE_A] = 1.0 - lambda_vdw;
201
202     /* Lambda factor for state B, lambda*/
203     LFC[STATE_B] = lambda_coul;
204     LFV[STATE_B] = lambda_vdw;
205
206     /*derivative of the lambda factor for state A and B */
207     DLF[STATE_A] = -1;
208     DLF[STATE_B] = 1;
209
210     for (i=0;i<NSTATES;i++)
211     {
212         lfac_coul[i]  = (lam_power==2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
213         dlfac_coul[i] = DLF[i]*lam_power/sc_r_power*(lam_power==2 ? (1-LFC[i]) : 1);
214         lfac_vdw[i]   = (lam_power==2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
215         dlfac_vdw[i]  = DLF[i]*lam_power/sc_r_power*(lam_power==2 ? (1-LFV[i]) : 1);
216     }
217     /* precalculate */
218     sigma2_def = pow(sigma6_def,1.0/3.0);
219     sigma2_min = pow(sigma6_min,1.0/3.0);
220
221     /* Ewald (not PME) table is special (icoul==enbcoulFEWALD) */
222
223     do_coultab = (icoul==GMX_NBKERNEL_ELEC_CUBICSPLINETABLE);
224     do_vdwtab  = (ivdw==GMX_NBKERNEL_VDW_CUBICSPLINETABLE);
225     
226     do_tab = do_coultab || do_vdwtab;
227     
228     /* we always use the combined table here */
229     tab_elemsize = 12;
230     
231     for(n=0; (n<nri); n++)
232     {
233         is3              = 3*shift[n];
234         shX              = shiftvec[is3];  
235         shY              = shiftvec[is3+1];
236         shZ              = shiftvec[is3+2];
237         nj0              = jindex[n];
238         nj1              = jindex[n+1];    
239         ii               = iinr[n];        
240         ii3              = 3*ii;           
241         ix               = shX + x[ii3+0];
242         iy               = shY + x[ii3+1];
243         iz               = shZ + x[ii3+2];
244         iqA              = facel*chargeA[ii];
245         iqB              = facel*chargeB[ii];
246         ntiA             = 2*ntype*typeA[ii];
247         ntiB             = 2*ntype*typeB[ii];
248         vctot            = 0;              
249         vvtot            = 0;
250         fix              = 0;              
251         fiy              = 0;              
252         fiz              = 0;              
253         
254         for(k=nj0; (k<nj1); k++)
255         {
256             jnr              = jjnr[k];        
257             j3               = 3*jnr;          
258             dx               = ix - x[j3];      
259             dy               = iy - x[j3+1];      
260             dz               = iz - x[j3+2];      
261             rsq              = dx*dx+dy*dy+dz*dz;
262             rinv             = gmx_invsqrt(rsq);
263             r                = rsq*rinv;
264             if (sc_r_power == 6.0)
265             {
266                 rpm2             = rsq*rsq; /* r4 */
267                 rp               = rpm2*rsq; /* r6 */
268             }
269             else if (sc_r_power == 48.0)
270             {
271                 rp               = rsq*rsq*rsq;  /* r6 */
272                 rp               = rp*rp; /* r12 */
273                 rp               = rp*rp; /* r24 */
274                 rp               = rp*rp; /* r48 */
275                 rpm2             = rp/rsq; /* r46 */
276             }
277             else
278             {
279                 rp             = pow(r,sc_r_power);  /* not currently supported as input, but can handle it */
280                 rpm2           = rp/rsq;
281             }
282
283             tj[STATE_A]      = ntiA+2*typeA[jnr];
284             tj[STATE_B]      = ntiB+2*typeB[jnr];
285             qq[STATE_A]      = iqA*chargeA[jnr];
286             qq[STATE_B]      = iqB*chargeB[jnr];
287
288             for (i=0;i<NSTATES;i++)
289             {
290
291                 c6[i]              = nbfp[tj[i]];
292                 c12[i]             = nbfp[tj[i]+1];
293                 if((c6[i] > 0) && (c12[i] > 0))
294                 {
295                     /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
296                     sigma6[i]       = 0.5*c12[i]/c6[i];
297                     sigma2[i]       = pow(sigma6[i],1.0/3.0);
298                     /* should be able to get rid of this ^^^ internal pow call eventually.  Will require agreement on
299                        what data to store externally.  Can't be fixed without larger scale changes, so not 4.6 */
300                     if (sigma6[i] < sigma6_min) { /* for disappearing coul and vdw with soft core at the same time */
301                         sigma6[i] = sigma6_min;
302                         sigma2[i] = sigma2_min;
303                     }
304                 }
305                 else
306                 {
307                     sigma6[i]       = sigma6_def;
308                     sigma2[i]       = sigma2_def;
309                 }
310                 if (sc_r_power == 6.0)
311                 {
312                     sigma_pow[i]    = sigma6[i];
313                     sigma_powm2[i]  = sigma6[i]/sigma2[i];
314                 }
315                 else if (sc_r_power == 48.0) 
316                 {
317                     sigma_pow[i]    = sigma6[i]*sigma6[i];   /* sigma^12 */
318                     sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
319                     sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
320                     sigma_powm2[i]  = sigma_pow[i]/sigma2[i];                    
321                 }
322                 else 
323                 {    /* not really supported as input, but in here for testing the general case*/
324                     sigma_pow[i]    = pow(sigma2[i],sc_r_power/2);
325                     sigma_powm2[i]  = sigma_pow[i]/(sigma2[i]);
326                 }
327             }
328
329             /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
330             if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0)) {
331                 alpha_vdw_eff    = 0;
332                 alpha_coul_eff   = 0;
333             } else {
334                 alpha_vdw_eff    = alpha_vdw;
335                 alpha_coul_eff   = alpha_coul;
336             }
337
338             for (i=0;i<NSTATES;i++) 
339             {
340                 FscalC[i]    = 0;
341                 FscalV[i]    = 0;
342                 Vcoul[i]     = 0;
343                 Vvdw[i]      = 0;
344
345                 /* Only spend time on A or B state if it is non-zero */
346                 if( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
347                 {
348
349                     /* this section has to be inside the loop becaue of the dependence on sigma_pow */
350                     rpinvC         = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
351                     rinvC          = pow(rpinvC,1.0/sc_r_power);
352                     rC             = 1.0/rinvC;
353                     
354                     rpinvV         = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
355                     rinvV          = pow(rpinvV,1.0/sc_r_power);
356                     rV             = 1.0/rinvV;
357
358                     factor_coul    = (rC<=rcoulomb) ? 1.0 : 0.0;
359                     factor_vdw     = (rV<=rvdw)     ? 1.0 : 0.0;
360
361                     if (do_tab)
362                     {
363                         rtC        = rC*tabscale;
364                         n0         = rtC;
365                         epsC       = rtC-n0;
366                         eps2C      = epsC*epsC;
367                         n1C        = tab_elemsize*n0;
368
369                         rtV        = rV*tabscale;
370                         n0         = rtV;
371                         epsV       = rtV-n0;
372                         eps2V      = epsV*epsV;
373                         n1V        = tab_elemsize*n0;
374                     }
375
376                     if(qq[i] != 0)
377                     {
378                         switch(icoul)
379                         {
380                             case GMX_NBKERNEL_ELEC_COULOMB:
381                             case GMX_NBKERNEL_ELEC_EWALD:
382                                 /* simple cutoff (yes, ewald is done all on direct space for free energy) */
383                                 Vcoul[i]   = qq[i]*rinvC;
384                                 FscalC[i]  = Vcoul[i]*rpinvC;
385                                 break;
386                                 
387                             case GMX_NBKERNEL_ELEC_REACTIONFIELD:
388                                 /* reaction-field */
389                                 Vcoul[i]   = qq[i]*(rinvC+krf*rC*rC-crf);
390                                 FscalC[i]  = qq[i]*(rinvC*rpinvC-2.0*krf);
391                                 break;
392
393                             case GMX_NBKERNEL_ELEC_CUBICSPLINETABLE:
394                                 /* non-Ewald tabulated coulomb */
395                                 nnn        = n1C;
396                                 Y          = VFtab[nnn];
397                                 F          = VFtab[nnn+1];
398                                 Geps       = epsC*VFtab[nnn+2];
399                                 Heps2      = eps2C*VFtab[nnn+3];
400                                 Fp         = F+Geps+Heps2;
401                                 VV         = Y+epsC*Fp;
402                                 FF         = Fp+Geps+2.0*Heps2;
403                                 Vcoul[i]   = qq[i]*VV;
404                                 FscalC[i]  = -qq[i]*tabscale*FF*rC*rpinvC;
405                                 break;
406
407                             default:
408                                 FscalC[i]  = 0.0;
409                                 Vcoul[i]   = 0.0;
410                                 break;
411                         }
412
413                         if(fr->coulomb_modifier==eintmodPOTSWITCH)
414                         {
415                             d                = rC-rswitch;
416                             d                = (d>0.0) ? d : 0.0;
417                             d2               = d*d;
418                             sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
419                             dsw              = d2*(swF2+d*(swF3+d*swF4));
420
421                             Vcoul[i]        *= sw;
422                             FscalC[i]        = FscalC[i]*sw + Vcoul[i]*dsw;
423                         }
424
425                         if(bExactElecCutoff)
426                         {
427                             FscalC[i]        = (rC<rcoulomb) ? FscalC[i] : 0.0;
428                             Vcoul[i]         = (rC<rcoulomb) ? Vcoul[i] : 0.0;
429                         }
430                     }
431
432                     if((c6[i] != 0) || (c12[i] != 0))
433                     {
434                         switch(ivdw)
435                         {
436                             case GMX_NBKERNEL_VDW_LENNARDJONES:
437                                 /* cutoff LJ */
438                                 if (sc_r_power == 6.0)
439                                 {
440                                     rinv6            = rpinvV;
441                                 }
442                                 else
443                                 {
444                                     rinv6            = pow(rinvV,6.0);
445                                 }
446                                 Vvdw6            = c6[i]*rinv6;
447                                 Vvdw12           = c12[i]*rinv6*rinv6;
448                                 if(fr->vdw_modifier==eintmodPOTSHIFT)
449                                 {
450                                     Vvdw[i]          = ( (Vvdw12-c12[i]*sh_invrc6*sh_invrc6)*(1.0/12.0)
451                                                         -(Vvdw6-c6[i]*sh_invrc6)*(1.0/6.0));
452                                 }
453                                 else
454                                 {
455                                     Vvdw[i]          = Vvdw12*(1.0/12.0)-Vvdw6*(1.0/6.0);
456                                 }
457                                 FscalV[i]        = (Vvdw12-Vvdw6)*rpinvV;
458                                 break;
459
460                             case GMX_NBKERNEL_VDW_BUCKINGHAM:
461                                 gmx_fatal(FARGS,"Buckingham free energy not supported.");
462                                 break;
463
464                             case GMX_NBKERNEL_VDW_CUBICSPLINETABLE:
465                                 /* Table LJ */
466                                 nnn = n1V+4;
467                                 /* dispersion */
468                                 Y          = VFtab[nnn];
469                                 F          = VFtab[nnn+1];
470                                 Geps       = epsV*VFtab[nnn+2];
471                                 Heps2      = eps2V*VFtab[nnn+3];
472                                 Fp         = F+Geps+Heps2;
473                                 VV         = Y+epsV*Fp;
474                                 FF         = Fp+Geps+2.0*Heps2;
475                                 Vvdw[i]   += c6[i]*VV;
476                                 FscalV[i] -= c6[i]*tabscale*FF*rV*rpinvV;
477
478                                 /* repulsion */
479                                 Y          = VFtab[nnn+4];
480                                 F          = VFtab[nnn+5];
481                                 Geps       = epsV*VFtab[nnn+6];
482                                 Heps2      = eps2V*VFtab[nnn+7];
483                                 Fp         = F+Geps+Heps2;
484                                 VV         = Y+epsV*Fp;
485                                 FF         = Fp+Geps+2.0*Heps2;
486                                 Vvdw[i]   += c12[i]*VV;
487                                 FscalV[i] -= c12[i]*tabscale*FF*rV*rpinvV;
488                                 break;
489
490                             default:
491                                 Vvdw[i]    = 0.0;
492                                 FscalV[i]  = 0.0;
493                                 break;
494                         }
495
496                         if(fr->vdw_modifier==eintmodPOTSWITCH)
497                         {
498                             d                = rV-rswitch;
499                             d                = (d>0.0) ? d : 0.0;
500                             d2               = d*d;
501                             sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
502                             dsw              = d2*(swF2+d*(swF3+d*swF4));
503
504                             Vvdw[i]         *= sw;
505                             FscalV[i]        = FscalV[i]*sw + Vvdw[i]*dsw;
506
507                             FscalV[i]        = (rV<rvdw) ? FscalV[i] : 0.0;
508                             Vvdw[i]          = (rV<rvdw) ? Vvdw[i] : 0.0;
509                         }
510                     }
511                 }
512             }
513
514             Fscal = 0;
515
516             if (icoul==GMX_NBKERNEL_ELEC_EWALD)
517             {
518                 /* because we compute the softcore normally,
519                  we have to remove the ewald short range portion. Done outside of
520                  the states loop because this part doesn't depend on the scaled R */
521
522                 if (r != 0)
523                 {
524                     VV    = gmx_erf(ewc*r)*rinv;
525                     FF    = rinv*rinv*(VV - ewc*M_2_SQRTPI*exp(-ewc*ewc*rsq));
526                 }
527                 else
528                 {
529                     VV    = ewc*M_2_SQRTPI;
530                     FF    = 0;
531                 }
532
533                 for (i=0;i<NSTATES;i++)
534                 {
535                     vctot      -= LFC[i]*qq[i]*VV;
536                     Fscal      -= LFC[i]*qq[i]*FF;
537                     dvdl_coul  -= (DLF[i]*qq[i])*VV;
538                 }
539             }
540
541             /* Assemble A and B states */
542             for (i=0;i<NSTATES;i++)
543             {
544                 vctot         += LFC[i]*Vcoul[i];
545                 vvtot         += LFV[i]*Vvdw[i];
546
547                 Fscal         += LFC[i]*FscalC[i]*rpm2;
548                 Fscal         += LFV[i]*FscalV[i]*rpm2;
549
550                 dvdl_coul     += Vcoul[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*FscalC[i]*sigma_pow[i];
551                 dvdl_vdw      += Vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*FscalV[i]*sigma_pow[i];
552             }
553
554             if (bDoForces)
555             {
556                 tx         = Fscal*dx;     
557                 ty         = Fscal*dy;     
558                 tz         = Fscal*dz;     
559                 fix        = fix + tx;      
560                 fiy        = fiy + ty;      
561                 fiz        = fiz + tz;      
562                 f[j3]      = f[j3]   - tx;
563                 f[j3+1]    = f[j3+1] - ty;
564                 f[j3+2]    = f[j3+2] - tz;
565             }
566         }
567
568         if (bDoForces)
569         {
570             f[ii3]         = f[ii3]        + fix;
571             f[ii3+1]       = f[ii3+1]      + fiy;
572             f[ii3+2]       = f[ii3+2]      + fiz;
573             fshift[is3]    = fshift[is3]   + fix;
574             fshift[is3+1]  = fshift[is3+1] + fiy;
575             fshift[is3+2]  = fshift[is3+2] + fiz;
576         }
577         ggid               = gid[n];
578         Vc[ggid]           = Vc[ggid] + vctot;
579         Vv[ggid]           = Vv[ggid] + vvtot;
580     }
581
582     dvdl[efptCOUL]     += dvdl_coul;
583     dvdl[efptVDW]      += dvdl_vdw;
584
585     /* Estimate flops, average for free energy stuff:
586      * 12  flops per outer iteration
587      * 150 flops per inner iteration
588      */
589     inc_nrnb(nrnb,eNR_NBKERNEL_FREE_ENERGY,nlist->nri*12 + nlist->jindex[n]*150);
590 }
591
592 real
593 nb_free_energy_evaluate_single(real r2,real sc_r_power,real alpha_coul,real alpha_vdw,
594                                real tabscale,real *vftab,
595                                real qqA, real c6A, real c12A, real qqB, real c6B, real c12B,
596                                real LFC[2], real LFV[2],real DLF[2],
597                                real lfac_coul[2], real lfac_vdw[2],real dlfac_coul[2], real dlfac_vdw[2],
598                                real sigma6_def, real sigma6_min,real sigma2_def, real sigma2_min,
599                                real *velectot, real *vvdwtot, real *dvdl)
600 {
601     real       r,rp,rpm2,rtab,eps,eps2,Y,F,Geps,Heps2,Fp,VV,FF,fscal;
602     real       qq[2],c6[2],c12[2],sigma6[2],sigma2[2],sigma_pow[2],sigma_powm2[2];
603     real       alpha_coul_eff,alpha_vdw_eff,dvdl_coul,dvdl_vdw;
604     real       rpinv,r_coul,r_vdw,velecsum,vvdwsum;
605     real       fscal_vdw[2],fscal_elec[2];
606     real       velec[2],vvdw[2];
607     int        i,ntab;
608
609     qq[0]    = qqA;
610     qq[1]    = qqB;
611     c6[0]    = c6A;
612     c6[1]    = c6B;
613     c12[0]   = c12A;
614     c12[1]   = c12B;
615
616     if (sc_r_power == 6.0)
617     {
618         rpm2             = r2*r2; /* r4 */
619         rp               = rpm2*r2; /* r6 */
620     }
621     else if (sc_r_power == 48.0)
622     {
623         rp               = r2*r2*r2;  /* r6 */
624         rp               = rp*rp; /* r12 */
625         rp               = rp*rp; /* r24 */
626         rp               = rp*rp; /* r48 */
627         rpm2             = rp/r2; /* r46 */
628     }
629     else
630     {
631         rp             = pow(r2,0.5*sc_r_power);  /* not currently supported as input, but can handle it */
632         rpm2           = rp/r2;
633     }
634
635     /* Loop over state A(0) and B(1) */
636     for (i=0;i<2;i++)
637     {
638         if((c6[i] > 0) && (c12[i] > 0))
639         {
640             /* The c6 & c12 coefficients now contain the constants 6.0 and 12.0, respectively.
641              * Correct for this by multiplying with (1/12.0)/(1/6.0)=6.0/12.0=0.5.
642              */
643             sigma6[i]       = 0.5*c12[i]/c6[i];
644             sigma2[i]       = pow(0.5*c12[i]/c6[i],1.0/3.0);
645             /* should be able to get rid of this ^^^ internal pow call eventually.  Will require agreement on
646              what data to store externally.  Can't be fixed without larger scale changes, so not 4.6 */
647             if (sigma6[i] < sigma6_min) { /* for disappearing coul and vdw with soft core at the same time */
648                 sigma6[i] = sigma6_min;
649                 sigma2[i] = sigma2_min;
650             }
651         }
652         else
653         {
654             sigma6[i]       = sigma6_def;
655             sigma2[i]       = sigma2_def;
656         }
657         if (sc_r_power == 6.0)
658         {
659             sigma_pow[i]    = sigma6[i];
660             sigma_powm2[i]  = sigma6[i]/sigma2[i];
661         }
662         else if (sc_r_power == 48.0)
663         {
664             sigma_pow[i]    = sigma6[i]*sigma6[i];   /* sigma^12 */
665             sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
666             sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
667             sigma_powm2[i]  = sigma_pow[i]/sigma2[i];
668         }
669         else
670         {    /* not really supported as input, but in here for testing the general case*/
671             sigma_pow[i]    = pow(sigma2[i],sc_r_power/2);
672             sigma_powm2[i]  = sigma_pow[i]/(sigma2[i]);
673         }
674     }
675
676     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
677     if ((c12[0] > 0) && (c12[1] > 0)) {
678         alpha_vdw_eff    = 0;
679         alpha_coul_eff   = 0;
680     } else {
681         alpha_vdw_eff    = alpha_vdw;
682         alpha_coul_eff   = alpha_coul;
683     }
684
685     /* Loop over A and B states again */
686     for (i=0;i<2;i++)
687     {
688         fscal_elec[i] = 0;
689         fscal_vdw[i]  = 0;
690         velec[i]      = 0;
691         vvdw[i]       = 0;
692
693         /* Only spend time on A or B state if it is non-zero */
694         if( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
695         {
696             /* Coulomb */
697             rpinv            = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
698             r_coul           = pow(rpinv,-1.0/sc_r_power);
699
700             /* Electrostatics table lookup data */
701             rtab             = r_coul*tabscale;
702             ntab             = rtab;
703             eps              = rtab-ntab;
704             eps2             = eps*eps;
705             ntab             = 12*ntab;
706             /* Electrostatics */
707             Y                = vftab[ntab];
708             F                = vftab[ntab+1];
709             Geps             = eps*vftab[ntab+2];
710             Heps2            = eps2*vftab[ntab+3];
711             Fp               = F+Geps+Heps2;
712             VV               = Y+eps*Fp;
713             FF               = Fp+Geps+2.0*Heps2;
714             velec[i]         = qq[i]*VV;
715             fscal_elec[i]    = -qq[i]*FF*r_coul*rpinv*tabscale;
716
717             /* Vdw */
718             rpinv            = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
719             r_vdw            = pow(rpinv,-1.0/sc_r_power);
720             /* Vdw table lookup data */
721             rtab             = r_vdw*tabscale;
722             ntab             = rtab;
723             eps              = rtab-ntab;
724             eps2             = eps*eps;
725             ntab             = 12*ntab;
726             /* Dispersion */
727             Y                = vftab[ntab+4];
728             F                = vftab[ntab+5];
729             Geps             = eps*vftab[ntab+6];
730             Heps2            = eps2*vftab[ntab+7];
731             Fp               = F+Geps+Heps2;
732             VV               = Y+eps*Fp;
733             FF               = Fp+Geps+2.0*Heps2;
734             vvdw[i]          = c6[i]*VV;
735             fscal_vdw[i]     = -c6[i]*FF;
736
737             /* Repulsion */
738             Y                = vftab[ntab+8];
739             F                = vftab[ntab+9];
740             Geps             = eps*vftab[ntab+10];
741             Heps2            = eps2*vftab[ntab+11];
742             Fp               = F+Geps+Heps2;
743             VV               = Y+eps*Fp;
744             FF               = Fp+Geps+2.0*Heps2;
745             vvdw[i]         += c12[i]*VV;
746             fscal_vdw[i]    -= c12[i]*FF;
747             fscal_vdw[i]    *= r_vdw*rpinv*tabscale;
748         }
749     }
750     /* Now we have velec[i], vvdw[i], and fscal[i] for both states */
751     /* Assemble A and B states */
752     velecsum  = 0;
753     vvdwsum   = 0;
754     dvdl_coul = 0;
755     dvdl_vdw  = 0;
756     fscal     = 0;
757     for (i=0;i<2;i++)
758     {
759         velecsum      += LFC[i]*velec[i];
760         vvdwsum       += LFV[i]*vvdw[i];
761
762         fscal         += (LFC[i]*fscal_elec[i]+LFV[i]*fscal_vdw[i])*rpm2;
763
764         dvdl_coul     += velec[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*fscal_elec[i]*sigma_pow[i];
765         dvdl_vdw      += vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*fscal_vdw[i]*sigma_pow[i];
766     }
767
768     dvdl[efptCOUL]     += dvdl_coul;
769     dvdl[efptVDW]      += dvdl_vdw;
770
771     *velectot           = velecsum;
772     *vvdwtot            = vvdwsum;
773
774     return fscal;
775 }
776