Fixed shift and switch modifiers, particularly for free-energy
[alexxy/gromacs.git] / src / gmxlib / nonbonded / nb_free_energy.c
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team,
6  * check out http://www.gromacs.org for more information.
7  * Copyright (c) 2012,2013, by the GROMACS development team, led by
8  * David van der Spoel, Berk Hess, Erik Lindahl, and including many
9  * others, as listed in the AUTHORS file in the top-level source
10  * directory and at http://www.gromacs.org.
11  *
12  * GROMACS is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU Lesser General Public License
14  * as published by the Free Software Foundation; either version 2.1
15  * of the License, or (at your option) any later version.
16  *
17  * GROMACS is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public
23  * License along with GROMACS; if not, see
24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
26  *
27  * If you want to redistribute modifications to GROMACS, please
28  * consider that scientific software is very special. Version
29  * control is crucial - bugs must be traceable. We will be happy to
30  * consider code for inclusion in the official distribution, but
31  * derived work must not be called official GROMACS. Details are found
32  * in the README & COPYING files - if they are missing, get the
33  * official version at http://www.gromacs.org.
34  *
35  * To help us fund GROMACS development, we humbly ask that you cite
36  * the research papers on the package. Check out http://www.gromacs.org.
37  */
38 #ifdef HAVE_CONFIG_H
39 #include <config.h>
40 #endif
41
42 #include <math.h>
43
44 #include "vec.h"
45 #include "typedefs.h"
46 #include "nonbonded.h"
47 #include "nb_kernel.h"
48 #include "nrnb.h"
49 #include "nb_free_energy.h"
50
51 void
52 gmx_nb_free_energy_kernel(const t_nblist * gmx_restrict    nlist,
53                           rvec * gmx_restrict              xx,
54                           rvec * gmx_restrict              ff,
55                           t_forcerec * gmx_restrict        fr,
56                           const t_mdatoms * gmx_restrict   mdatoms,
57                           nb_kernel_data_t * gmx_restrict  kernel_data,
58                           t_nrnb * gmx_restrict            nrnb)
59 {
60
61 #define  STATE_A  0
62 #define  STATE_B  1
63 #define  NSTATES  2
64     int           i, j, n, ii, is3, ii3, k, nj0, nj1, jnr, j3, ggid;
65     real          shX, shY, shZ;
66     real          Fscal, FscalC[NSTATES], FscalV[NSTATES], tx, ty, tz;
67     real          Vcoul[NSTATES], Vvdw[NSTATES];
68     real          rinv6, r, rt, rtC, rtV;
69     real          iqA, iqB;
70     real          qq[NSTATES], vctot, krsq;
71     int           ntiA, ntiB, tj[NSTATES];
72     real          Vvdw6, Vvdw12, vvtot;
73     real          ix, iy, iz, fix, fiy, fiz;
74     real          dx, dy, dz, rsq, rinv;
75     real          c6[NSTATES], c12[NSTATES];
76     real          LFC[NSTATES], LFV[NSTATES], DLF[NSTATES];
77     double        dvdl_coul, dvdl_vdw;
78     real          lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
79     real          sigma6[NSTATES], alpha_vdw_eff, alpha_coul_eff, sigma2_def, sigma2_min;
80     real          rp, rpm2, rC, rV, rinvC, rpinvC, rinvV, rpinvV;
81     real          sigma2[NSTATES], sigma_pow[NSTATES], sigma_powm2[NSTATES], rs, rs2;
82     int           do_tab, tab_elemsize;
83     int           n0, n1C, n1V, nnn;
84     real          Y, F, G, H, Fp, Geps, Heps2, epsC, eps2C, epsV, eps2V, VV, FF;
85     int           icoul, ivdw;
86     int           nri;
87     const int *   iinr;
88     const int *   jindex;
89     const int *   jjnr;
90     const int *   shift;
91     const int *   gid;
92     const int *   typeA;
93     const int *   typeB;
94     int           ntype;
95     const real *  shiftvec;
96     real          dvdl_part;
97     real *        fshift;
98     real          tabscale;
99     const real *  VFtab;
100     const real *  x;
101     real *        f;
102     real          facel, krf, crf;
103     const real *  chargeA;
104     const real *  chargeB;
105     real          sigma6_min, sigma6_def, lam_power, sc_power, sc_r_power;
106     real          alpha_coul, alpha_vdw, lambda_coul, lambda_vdw, ewc;
107     const real *  nbfp;
108     real *        dvdl;
109     real *        Vv;
110     real *        Vc;
111     gmx_bool      bDoForces;
112     real          rcoulomb, rvdw, sh_invrc6;
113     gmx_bool      bExactElecCutoff, bExactVdwCutoff;
114     real          d, d2, sw, dsw, rinvcorr;
115     real          elec_swV3, elec_swV4, elec_swV5, elec_swF2, elec_swF3, elec_swF4;
116     real          vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4;
117     gmx_bool      bConvertEwaldToCoulomb, bComputeElecInteraction;
118     const real *  ewtab;
119     int           ewitab;
120     real          ewrt, eweps, ewtabscale, ewtabhalfspace, sh_ewald;
121
122     sh_ewald            = fr->ic->sh_ewald;
123     ewtab               = fr->ic->tabq_coul_FDV0;
124     ewtabscale          = fr->ic->tabq_scale;
125     ewtabhalfspace      = 0.5/ewtabscale;
126
127     x                   = xx[0];
128     f                   = ff[0];
129
130     fshift              = fr->fshift[0];
131
132     nri                 = nlist->nri;
133     iinr                = nlist->iinr;
134     jindex              = nlist->jindex;
135     jjnr                = nlist->jjnr;
136     icoul               = nlist->ielec;
137     ivdw                = nlist->ivdw;
138     shift               = nlist->shift;
139     gid                 = nlist->gid;
140
141     shiftvec            = fr->shift_vec[0];
142     chargeA             = mdatoms->chargeA;
143     chargeB             = mdatoms->chargeB;
144     facel               = fr->epsfac;
145     krf                 = fr->k_rf;
146     crf                 = fr->c_rf;
147     ewc                 = fr->ewaldcoeff;
148     Vc                  = kernel_data->energygrp_elec;
149     typeA               = mdatoms->typeA;
150     typeB               = mdatoms->typeB;
151     ntype               = fr->ntype;
152     nbfp                = fr->nbfp;
153     Vv                  = kernel_data->energygrp_vdw;
154     tabscale            = kernel_data->table_elec_vdw->scale;
155     VFtab               = kernel_data->table_elec_vdw->data;
156     lambda_coul         = kernel_data->lambda[efptCOUL];
157     lambda_vdw          = kernel_data->lambda[efptVDW];
158     dvdl                = kernel_data->dvdl;
159     alpha_coul          = fr->sc_alphacoul;
160     alpha_vdw           = fr->sc_alphavdw;
161     lam_power           = fr->sc_power;
162     sc_r_power          = fr->sc_r_power;
163     sigma6_def          = fr->sc_sigma6_def;
164     sigma6_min          = fr->sc_sigma6_min;
165     bDoForces           = kernel_data->flags & GMX_NONBONDED_DO_FORCE;
166
167     rcoulomb            = fr->rcoulomb;
168     rvdw                = fr->rvdw;
169     sh_invrc6           = fr->ic->sh_invrc6;
170
171     if (fr->coulomb_modifier == eintmodPOTSWITCH)
172     {
173         d               = fr->rcoulomb-fr->rcoulomb_switch;
174         elec_swV3       = -10.0/(d*d*d);
175         elec_swV4       =  15.0/(d*d*d*d);
176         elec_swV5       =  -6.0/(d*d*d*d*d);
177         elec_swF2       = -30.0/(d*d*d);
178         elec_swF3       =  60.0/(d*d*d*d);
179         elec_swF4       = -30.0/(d*d*d*d*d);
180     }
181     else
182     {
183         /* Avoid warnings from stupid compilers (looking at you, Clang!) */
184         elec_swV3 = elec_swV4 = elec_swV5 = elec_swF2 = elec_swF3 = elec_swF4 = 0.0;
185     }
186
187     if (fr->vdw_modifier == eintmodPOTSWITCH)
188     {
189         d               = fr->rvdw-fr->rvdw_switch;
190         vdw_swV3        = -10.0/(d*d*d);
191         vdw_swV4        =  15.0/(d*d*d*d);
192         vdw_swV5        =  -6.0/(d*d*d*d*d);
193         vdw_swF2        = -30.0/(d*d*d);
194         vdw_swF3        =  60.0/(d*d*d*d);
195         vdw_swF4        = -30.0/(d*d*d*d*d);
196     }
197     else
198     {
199         /* Avoid warnings from stupid compilers (looking at you, Clang!) */
200         vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0;
201     }
202
203     bExactElecCutoff    = (fr->coulomb_modifier != eintmodNONE) || fr->eeltype == eelRF_ZERO;
204     bExactVdwCutoff     = (fr->vdw_modifier != eintmodNONE);
205
206     /* For Ewald/PME interactions we cannot easily apply the soft-core component to
207      * reciprocal space. When we use vanilla (not switch/shift) Ewald interactions, we
208      * can apply the small trick of subtracting the _reciprocal_ space contribution
209      * in this kernel, and instead apply the free energy interaction to the 1/r
210      * (standard coulomb) interaction.
211      *
212      * However, we cannot use this approach for switch-modified since we would then
213      * effectively end up evaluating a significantly different interaction here compared to the
214      * normal (non-free-energy) kernels, either by applying a cutoff at a different
215      * position than what the user requested, or by switching different
216      * things (1/r rather than short-range Ewald). For these settings, we just
217      * use the traditional short-range Ewald interaction in that case.
218      */
219     bConvertEwaldToCoulomb = ((icoul == GMX_NBKERNEL_ELEC_EWALD) && (fr->coulomb_modifier != eintmodPOTSWITCH));
220
221     /* fix compiler warnings */
222     nj1   = 0;
223     n1C   = n1V   = 0;
224     epsC  = epsV  = 0;
225     eps2C = eps2V = 0;
226
227     dvdl_coul  = 0;
228     dvdl_vdw   = 0;
229
230     /* Lambda factor for state A, 1-lambda*/
231     LFC[STATE_A] = 1.0 - lambda_coul;
232     LFV[STATE_A] = 1.0 - lambda_vdw;
233
234     /* Lambda factor for state B, lambda*/
235     LFC[STATE_B] = lambda_coul;
236     LFV[STATE_B] = lambda_vdw;
237
238     /*derivative of the lambda factor for state A and B */
239     DLF[STATE_A] = -1;
240     DLF[STATE_B] = 1;
241
242     for (i = 0; i < NSTATES; i++)
243     {
244         lfac_coul[i]  = (lam_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
245         dlfac_coul[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFC[i]) : 1);
246         lfac_vdw[i]   = (lam_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
247         dlfac_vdw[i]  = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFV[i]) : 1);
248     }
249     /* precalculate */
250     sigma2_def = pow(sigma6_def, 1.0/3.0);
251     sigma2_min = pow(sigma6_min, 1.0/3.0);
252
253     /* Ewald (not PME) table is special (icoul==enbcoulFEWALD) */
254
255     do_tab = (icoul == GMX_NBKERNEL_ELEC_CUBICSPLINETABLE ||
256               ivdw == GMX_NBKERNEL_VDW_CUBICSPLINETABLE);
257
258     /* we always use the combined table here */
259     tab_elemsize = 12;
260
261     for (n = 0; (n < nri); n++)
262     {
263         is3              = 3*shift[n];
264         shX              = shiftvec[is3];
265         shY              = shiftvec[is3+1];
266         shZ              = shiftvec[is3+2];
267         nj0              = jindex[n];
268         nj1              = jindex[n+1];
269         ii               = iinr[n];
270         ii3              = 3*ii;
271         ix               = shX + x[ii3+0];
272         iy               = shY + x[ii3+1];
273         iz               = shZ + x[ii3+2];
274         iqA              = facel*chargeA[ii];
275         iqB              = facel*chargeB[ii];
276         ntiA             = 2*ntype*typeA[ii];
277         ntiB             = 2*ntype*typeB[ii];
278         vctot            = 0;
279         vvtot            = 0;
280         fix              = 0;
281         fiy              = 0;
282         fiz              = 0;
283
284         for (k = nj0; (k < nj1); k++)
285         {
286             jnr              = jjnr[k];
287             j3               = 3*jnr;
288             dx               = ix - x[j3];
289             dy               = iy - x[j3+1];
290             dz               = iz - x[j3+2];
291             rsq              = dx*dx+dy*dy+dz*dz;
292             rinv             = gmx_invsqrt(rsq);
293             r                = rsq*rinv;
294             if (sc_r_power == 6.0)
295             {
296                 rpm2             = rsq*rsq;  /* r4 */
297                 rp               = rpm2*rsq; /* r6 */
298             }
299             else if (sc_r_power == 48.0)
300             {
301                 rp               = rsq*rsq*rsq; /* r6 */
302                 rp               = rp*rp;       /* r12 */
303                 rp               = rp*rp;       /* r24 */
304                 rp               = rp*rp;       /* r48 */
305                 rpm2             = rp/rsq;      /* r46 */
306             }
307             else
308             {
309                 rp             = pow(r, sc_r_power);  /* not currently supported as input, but can handle it */
310                 rpm2           = rp/rsq;
311             }
312
313             tj[STATE_A]      = ntiA+2*typeA[jnr];
314             tj[STATE_B]      = ntiB+2*typeB[jnr];
315             qq[STATE_A]      = iqA*chargeA[jnr];
316             qq[STATE_B]      = iqB*chargeB[jnr];
317
318             for (i = 0; i < NSTATES; i++)
319             {
320
321                 c6[i]              = nbfp[tj[i]];
322                 c12[i]             = nbfp[tj[i]+1];
323                 if ((c6[i] > 0) && (c12[i] > 0))
324                 {
325                     /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
326                     sigma6[i]       = 0.5*c12[i]/c6[i];
327                     sigma2[i]       = pow(sigma6[i], 1.0/3.0);
328                     /* should be able to get rid of this ^^^ internal pow call eventually.  Will require agreement on
329                        what data to store externally.  Can't be fixed without larger scale changes, so not 4.6 */
330                     if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
331                     {
332                         sigma6[i] = sigma6_min;
333                         sigma2[i] = sigma2_min;
334                     }
335                 }
336                 else
337                 {
338                     sigma6[i]       = sigma6_def;
339                     sigma2[i]       = sigma2_def;
340                 }
341                 if (sc_r_power == 6.0)
342                 {
343                     sigma_pow[i]    = sigma6[i];
344                     sigma_powm2[i]  = sigma6[i]/sigma2[i];
345                 }
346                 else if (sc_r_power == 48.0)
347                 {
348                     sigma_pow[i]    = sigma6[i]*sigma6[i];       /* sigma^12 */
349                     sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
350                     sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
351                     sigma_powm2[i]  = sigma_pow[i]/sigma2[i];
352                 }
353                 else
354                 {    /* not really supported as input, but in here for testing the general case*/
355                     sigma_pow[i]    = pow(sigma2[i], sc_r_power/2);
356                     sigma_powm2[i]  = sigma_pow[i]/(sigma2[i]);
357                 }
358             }
359
360             /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
361             if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
362             {
363                 alpha_vdw_eff    = 0;
364                 alpha_coul_eff   = 0;
365             }
366             else
367             {
368                 alpha_vdw_eff    = alpha_vdw;
369                 alpha_coul_eff   = alpha_coul;
370             }
371
372             for (i = 0; i < NSTATES; i++)
373             {
374                 FscalC[i]    = 0;
375                 FscalV[i]    = 0;
376                 Vcoul[i]     = 0;
377                 Vvdw[i]      = 0;
378
379                 /* Only spend time on A or B state if it is non-zero */
380                 if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
381                 {
382
383                     /* this section has to be inside the loop becaue of the dependence on sigma_pow */
384                     rpinvC         = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
385                     rinvC          = pow(rpinvC, 1.0/sc_r_power);
386                     rC             = 1.0/rinvC;
387
388                     rpinvV         = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
389                     rinvV          = pow(rpinvV, 1.0/sc_r_power);
390                     rV             = 1.0/rinvV;
391
392                     if (do_tab)
393                     {
394                         rtC        = rC*tabscale;
395                         n0         = rtC;
396                         epsC       = rtC-n0;
397                         eps2C      = epsC*epsC;
398                         n1C        = tab_elemsize*n0;
399
400                         rtV        = rV*tabscale;
401                         n0         = rtV;
402                         epsV       = rtV-n0;
403                         eps2V      = epsV*epsV;
404                         n1V        = tab_elemsize*n0;
405                     }
406
407
408                     /* Only process the coulomb interactions if we have charges,
409                      * and if we either include all entries in the list (no cutoff
410                      * used in the kernel), or if we are within the cutoff.
411                      */
412                     bComputeElecInteraction = !bExactElecCutoff ||
413                         ( bConvertEwaldToCoulomb && r < rcoulomb) ||
414                         (!bConvertEwaldToCoulomb && rC < rcoulomb);
415
416                     if ( (qq[i] != 0) && bComputeElecInteraction)
417                     {
418                         switch (icoul)
419                         {
420                             case GMX_NBKERNEL_ELEC_COULOMB:
421                                 Vcoul[i]   = qq[i]*rinvC;
422                                 FscalC[i]  = Vcoul[i];
423                                 /* The shift for the Coulomb potential is stored in
424                                  * the RF parameter c_rf, which is 0 without shift
425                                  */
426                                 Vcoul[i]  -= qq[i]*fr->ic->c_rf;
427                                 break;
428
429                             case GMX_NBKERNEL_ELEC_REACTIONFIELD:
430                                 /* reaction-field */
431                                 Vcoul[i]   = qq[i]*(rinvC + krf*rC*rC-crf);
432                                 FscalC[i]  = qq[i]*(rinvC - 2.0*krf*rC*rC);
433                                 break;
434
435                             case GMX_NBKERNEL_ELEC_CUBICSPLINETABLE:
436                                 /* non-Ewald tabulated coulomb */
437                                 nnn        = n1C;
438                                 Y          = VFtab[nnn];
439                                 F          = VFtab[nnn+1];
440                                 Geps       = epsC*VFtab[nnn+2];
441                                 Heps2      = eps2C*VFtab[nnn+3];
442                                 Fp         = F+Geps+Heps2;
443                                 VV         = Y+epsC*Fp;
444                                 FF         = Fp+Geps+2.0*Heps2;
445                                 Vcoul[i]   = qq[i]*VV;
446                                 FscalC[i]  = -qq[i]*tabscale*FF*rC;
447                                 break;
448
449                             case GMX_NBKERNEL_ELEC_GENERALIZEDBORN:
450                                 gmx_fatal(FARGS, "Free energy and GB not implemented.\n");
451                                 break;
452
453                             case GMX_NBKERNEL_ELEC_EWALD:
454                                 if (bConvertEwaldToCoulomb)
455                                 {
456                                     Vcoul[i]   = qq[i]*(rinvC-sh_ewald);
457                                     FscalC[i]  = qq[i]*rinvC;
458                                 }
459                                 else
460                                 {
461                                     ewrt      = rC*ewtabscale;
462                                     ewitab    = (int) ewrt;
463                                     eweps     = ewrt-ewitab;
464                                     ewitab    = 4*ewitab;
465                                     FscalC[i] = ewtab[ewitab]+eweps*ewtab[ewitab+1];
466                                     rinvcorr  = rinvC-sh_ewald;
467                                     Vcoul[i]  = qq[i]*(rinvcorr-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+FscalC[i])));
468                                     FscalC[i] = qq[i]*(rinvC-rC*FscalC[i]);
469                                 }
470                                 break;
471
472                             case GMX_NBKERNEL_ELEC_NONE:
473                                 FscalC[i]  = 0.0;
474                                 Vcoul[i]   = 0.0;
475                                 break;
476
477                             default:
478                                 gmx_incons("Invalid icoul in free energy kernel");
479                                 break;
480                         }
481
482                         if (fr->coulomb_modifier == eintmodPOTSWITCH)
483                         {
484                             d                = rC-fr->rcoulomb_switch;
485                             d                = (d > 0.0) ? d : 0.0;
486                             d2               = d*d;
487                             sw               = 1.0+d2*d*(elec_swV3+d*(elec_swV4+d*elec_swV5));
488                             dsw              = d2*(elec_swF2+d*(elec_swF3+d*elec_swF4));
489
490                             FscalC[i]        = FscalC[i]*sw - rC*Vcoul[i]*dsw;
491                             Vcoul[i]        *= sw;
492
493                             FscalC[i]        = (rC < rcoulomb) ? FscalC[i] : 0.0;
494                             Vcoul[i]         = (rC < rcoulomb) ? Vcoul[i] : 0.0;
495                         }
496                     }
497
498                     if ((c6[i] != 0 || c12[i] != 0) && ( !bExactVdwCutoff || rV < rvdw ) )
499                     {
500                         switch (ivdw)
501                         {
502                             case GMX_NBKERNEL_VDW_LENNARDJONES:
503                                 /* cutoff LJ */
504                                 if (sc_r_power == 6.0)
505                                 {
506                                     rinv6            = rpinvV;
507                                 }
508                                 else
509                                 {
510                                     rinv6            = pow(rinvV, 6.0);
511                                 }
512                                 Vvdw6            = c6[i]*rinv6;
513                                 Vvdw12           = c12[i]*rinv6*rinv6;
514                                 if (fr->vdw_modifier == eintmodPOTSHIFT)
515                                 {
516                                     Vvdw[i]          = ( (Vvdw12-c12[i]*sh_invrc6*sh_invrc6)*(1.0/12.0)
517                                                          -(Vvdw6-c6[i]*sh_invrc6)*(1.0/6.0));
518                                 }
519                                 else
520                                 {
521                                     Vvdw[i]          = Vvdw12*(1.0/12.0) - Vvdw6*(1.0/6.0);
522                                 }
523                                 FscalV[i]        = Vvdw12 - Vvdw6;
524                                 break;
525
526                             case GMX_NBKERNEL_VDW_BUCKINGHAM:
527                                 gmx_fatal(FARGS, "Buckingham free energy not supported.");
528                                 break;
529
530                             case GMX_NBKERNEL_VDW_CUBICSPLINETABLE:
531                                 /* Table LJ */
532                                 nnn = n1V+4;
533                                 /* dispersion */
534                                 Y          = VFtab[nnn];
535                                 F          = VFtab[nnn+1];
536                                 Geps       = epsV*VFtab[nnn+2];
537                                 Heps2      = eps2V*VFtab[nnn+3];
538                                 Fp         = F+Geps+Heps2;
539                                 VV         = Y+epsV*Fp;
540                                 FF         = Fp+Geps+2.0*Heps2;
541                                 Vvdw[i]   += c6[i]*VV;
542                                 FscalV[i] -= c6[i]*tabscale*FF*rV;
543
544                                 /* repulsion */
545                                 Y          = VFtab[nnn+4];
546                                 F          = VFtab[nnn+5];
547                                 Geps       = epsV*VFtab[nnn+6];
548                                 Heps2      = eps2V*VFtab[nnn+7];
549                                 Fp         = F+Geps+Heps2;
550                                 VV         = Y+epsV*Fp;
551                                 FF         = Fp+Geps+2.0*Heps2;
552                                 Vvdw[i]   += c12[i]*VV;
553                                 FscalV[i] -= c12[i]*tabscale*FF*rV;
554                                 break;
555
556                             case GMX_NBKERNEL_VDW_NONE:
557                                 Vvdw[i]    = 0.0;
558                                 FscalV[i]  = 0.0;
559                                 break;
560
561                             default:
562                                 gmx_incons("Invalid ivdw in free energy kernel");
563                                 break;
564                         }
565
566                         if (fr->vdw_modifier == eintmodPOTSWITCH)
567                         {
568                             d                = rV-fr->rvdw_switch;
569                             d                = (d > 0.0) ? d : 0.0;
570                             d2               = d*d;
571                             sw               = 1.0+d2*d*(vdw_swV3+d*(vdw_swV4+d*vdw_swV5));
572                             dsw              = d2*(vdw_swF2+d*(vdw_swF3+d*vdw_swF4));
573
574                             FscalV[i]        = FscalV[i]*sw - rV*Vvdw[i]*dsw;
575                             Vvdw[i]         *= sw;
576
577                             FscalV[i]        = (rV < rvdw) ? FscalV[i] : 0.0;
578                             Vvdw[i]          = (rV < rvdw) ? Vvdw[i] : 0.0;
579                         }
580                     }
581
582                     /* FscalC (and FscalV) now contain: dV/drC * rC
583                      * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
584                      * Further down we first multiply by r^p-2 and then by
585                      * the vector r, which in total gives: dV/drC * (r/rC)^1-p
586                      */
587                     FscalC[i] *= rpinvC;
588                     FscalV[i] *= rpinvV;
589                 }
590             }
591
592             Fscal = 0;
593
594             if (bConvertEwaldToCoulomb && ( !bExactElecCutoff || r < rcoulomb ) )
595             {
596                 /* See comment in the preamble. When using Ewald interactions
597                  * (unless we use a switch modifier) we subtract the reciprocal-space
598                  * Ewald component here which made it possible to apply the free
599                  * energy interaction to 1/r (vanilla coulomb short-range part)
600                  * above. This gets us closer to the ideal case of applying
601                  * the softcore to the entire electrostatic interaction,
602                  * including the reciprocal-space component.
603                  */
604                 real v_lr, f_lr;
605
606                 ewrt      = r*ewtabscale;
607                 ewitab    = (int) ewrt;
608                 eweps     = ewrt-ewitab;
609                 ewitab    = 4*ewitab;
610                 f_lr      = ewtab[ewitab]+eweps*ewtab[ewitab+1];
611                 v_lr      = (ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+f_lr));
612                 f_lr     *= rinv;
613
614                 for (i = 0; i < NSTATES; i++)
615                 {
616                     vctot      -= LFC[i]*qq[i]*v_lr;
617                     Fscal      -= LFC[i]*qq[i]*f_lr;
618                     dvdl_coul  -= (DLF[i]*qq[i])*v_lr;
619                 }
620             }
621
622             /* Assemble A and B states */
623             for (i = 0; i < NSTATES; i++)
624             {
625                 vctot         += LFC[i]*Vcoul[i];
626                 vvtot         += LFV[i]*Vvdw[i];
627
628                 Fscal         += LFC[i]*FscalC[i]*rpm2;
629                 Fscal         += LFV[i]*FscalV[i]*rpm2;
630
631                 dvdl_coul     += Vcoul[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*FscalC[i]*sigma_pow[i];
632                 dvdl_vdw      += Vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*FscalV[i]*sigma_pow[i];
633             }
634
635             if (bDoForces)
636             {
637                 tx         = Fscal*dx;
638                 ty         = Fscal*dy;
639                 tz         = Fscal*dz;
640                 fix        = fix + tx;
641                 fiy        = fiy + ty;
642                 fiz        = fiz + tz;
643                 f[j3]      = f[j3]   - tx;
644                 f[j3+1]    = f[j3+1] - ty;
645                 f[j3+2]    = f[j3+2] - tz;
646             }
647         }
648
649         if (bDoForces)
650         {
651             f[ii3]         = f[ii3]        + fix;
652             f[ii3+1]       = f[ii3+1]      + fiy;
653             f[ii3+2]       = f[ii3+2]      + fiz;
654             fshift[is3]    = fshift[is3]   + fix;
655             fshift[is3+1]  = fshift[is3+1] + fiy;
656             fshift[is3+2]  = fshift[is3+2] + fiz;
657         }
658         ggid               = gid[n];
659         Vc[ggid]           = Vc[ggid] + vctot;
660         Vv[ggid]           = Vv[ggid] + vvtot;
661     }
662
663     dvdl[efptCOUL]     += dvdl_coul;
664     dvdl[efptVDW]      += dvdl_vdw;
665
666     /* Estimate flops, average for free energy stuff:
667      * 12  flops per outer iteration
668      * 150 flops per inner iteration
669      */
670     inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri*12 + nlist->jindex[n]*150);
671 }
672
673 real
674 nb_free_energy_evaluate_single(real r2, real sc_r_power, real alpha_coul, real alpha_vdw,
675                                real tabscale, real *vftab,
676                                real qqA, real c6A, real c12A, real qqB, real c6B, real c12B,
677                                real LFC[2], real LFV[2], real DLF[2],
678                                real lfac_coul[2], real lfac_vdw[2], real dlfac_coul[2], real dlfac_vdw[2],
679                                real sigma6_def, real sigma6_min, real sigma2_def, real sigma2_min,
680                                real *velectot, real *vvdwtot, real *dvdl)
681 {
682     real       r, rp, rpm2, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VV, FF, fscal;
683     real       qq[2], c6[2], c12[2], sigma6[2], sigma2[2], sigma_pow[2], sigma_powm2[2];
684     real       alpha_coul_eff, alpha_vdw_eff, dvdl_coul, dvdl_vdw;
685     real       rpinv, r_coul, r_vdw, velecsum, vvdwsum;
686     real       fscal_vdw[2], fscal_elec[2];
687     real       velec[2], vvdw[2];
688     int        i, ntab;
689
690     qq[0]    = qqA;
691     qq[1]    = qqB;
692     c6[0]    = c6A;
693     c6[1]    = c6B;
694     c12[0]   = c12A;
695     c12[1]   = c12B;
696
697     if (sc_r_power == 6.0)
698     {
699         rpm2             = r2*r2;   /* r4 */
700         rp               = rpm2*r2; /* r6 */
701     }
702     else if (sc_r_power == 48.0)
703     {
704         rp               = r2*r2*r2; /* r6 */
705         rp               = rp*rp;    /* r12 */
706         rp               = rp*rp;    /* r24 */
707         rp               = rp*rp;    /* r48 */
708         rpm2             = rp/r2;    /* r46 */
709     }
710     else
711     {
712         rp             = pow(r2, 0.5*sc_r_power);  /* not currently supported as input, but can handle it */
713         rpm2           = rp/r2;
714     }
715
716     /* Loop over state A(0) and B(1) */
717     for (i = 0; i < 2; i++)
718     {
719         if ((c6[i] > 0) && (c12[i] > 0))
720         {
721             /* The c6 & c12 coefficients now contain the constants 6.0 and 12.0, respectively.
722              * Correct for this by multiplying with (1/12.0)/(1/6.0)=6.0/12.0=0.5.
723              */
724             sigma6[i]       = 0.5*c12[i]/c6[i];
725             sigma2[i]       = pow(0.5*c12[i]/c6[i], 1.0/3.0);
726             /* should be able to get rid of this ^^^ internal pow call eventually.  Will require agreement on
727                what data to store externally.  Can't be fixed without larger scale changes, so not 4.6 */
728             if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
729             {
730                 sigma6[i] = sigma6_min;
731                 sigma2[i] = sigma2_min;
732             }
733         }
734         else
735         {
736             sigma6[i]       = sigma6_def;
737             sigma2[i]       = sigma2_def;
738         }
739         if (sc_r_power == 6.0)
740         {
741             sigma_pow[i]    = sigma6[i];
742             sigma_powm2[i]  = sigma6[i]/sigma2[i];
743         }
744         else if (sc_r_power == 48.0)
745         {
746             sigma_pow[i]    = sigma6[i]*sigma6[i];       /* sigma^12 */
747             sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
748             sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
749             sigma_powm2[i]  = sigma_pow[i]/sigma2[i];
750         }
751         else
752         {    /* not really supported as input, but in here for testing the general case*/
753             sigma_pow[i]    = pow(sigma2[i], sc_r_power/2);
754             sigma_powm2[i]  = sigma_pow[i]/(sigma2[i]);
755         }
756     }
757
758     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
759     if ((c12[0] > 0) && (c12[1] > 0))
760     {
761         alpha_vdw_eff    = 0;
762         alpha_coul_eff   = 0;
763     }
764     else
765     {
766         alpha_vdw_eff    = alpha_vdw;
767         alpha_coul_eff   = alpha_coul;
768     }
769
770     /* Loop over A and B states again */
771     for (i = 0; i < 2; i++)
772     {
773         fscal_elec[i] = 0;
774         fscal_vdw[i]  = 0;
775         velec[i]      = 0;
776         vvdw[i]       = 0;
777
778         /* Only spend time on A or B state if it is non-zero */
779         if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
780         {
781             /* Coulomb */
782             rpinv            = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
783             r_coul           = pow(rpinv, -1.0/sc_r_power);
784
785             /* Electrostatics table lookup data */
786             rtab             = r_coul*tabscale;
787             ntab             = rtab;
788             eps              = rtab-ntab;
789             eps2             = eps*eps;
790             ntab             = 12*ntab;
791             /* Electrostatics */
792             Y                = vftab[ntab];
793             F                = vftab[ntab+1];
794             Geps             = eps*vftab[ntab+2];
795             Heps2            = eps2*vftab[ntab+3];
796             Fp               = F+Geps+Heps2;
797             VV               = Y+eps*Fp;
798             FF               = Fp+Geps+2.0*Heps2;
799             velec[i]         = qq[i]*VV;
800             fscal_elec[i]    = -qq[i]*FF*r_coul*rpinv*tabscale;
801
802             /* Vdw */
803             rpinv            = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
804             r_vdw            = pow(rpinv, -1.0/sc_r_power);
805             /* Vdw table lookup data */
806             rtab             = r_vdw*tabscale;
807             ntab             = rtab;
808             eps              = rtab-ntab;
809             eps2             = eps*eps;
810             ntab             = 12*ntab;
811             /* Dispersion */
812             Y                = vftab[ntab+4];
813             F                = vftab[ntab+5];
814             Geps             = eps*vftab[ntab+6];
815             Heps2            = eps2*vftab[ntab+7];
816             Fp               = F+Geps+Heps2;
817             VV               = Y+eps*Fp;
818             FF               = Fp+Geps+2.0*Heps2;
819             vvdw[i]          = c6[i]*VV;
820             fscal_vdw[i]     = -c6[i]*FF;
821
822             /* Repulsion */
823             Y                = vftab[ntab+8];
824             F                = vftab[ntab+9];
825             Geps             = eps*vftab[ntab+10];
826             Heps2            = eps2*vftab[ntab+11];
827             Fp               = F+Geps+Heps2;
828             VV               = Y+eps*Fp;
829             FF               = Fp+Geps+2.0*Heps2;
830             vvdw[i]         += c12[i]*VV;
831             fscal_vdw[i]    -= c12[i]*FF;
832             fscal_vdw[i]    *= r_vdw*rpinv*tabscale;
833         }
834     }
835     /* Now we have velec[i], vvdw[i], and fscal[i] for both states */
836     /* Assemble A and B states */
837     velecsum  = 0;
838     vvdwsum   = 0;
839     dvdl_coul = 0;
840     dvdl_vdw  = 0;
841     fscal     = 0;
842     for (i = 0; i < 2; i++)
843     {
844         velecsum      += LFC[i]*velec[i];
845         vvdwsum       += LFV[i]*vvdw[i];
846
847         fscal         += (LFC[i]*fscal_elec[i]+LFV[i]*fscal_vdw[i])*rpm2;
848
849         dvdl_coul     += velec[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*fscal_elec[i]*sigma_pow[i];
850         dvdl_vdw      += vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*fscal_vdw[i]*sigma_pow[i];
851     }
852
853     dvdl[efptCOUL]     += dvdl_coul;
854     dvdl[efptVDW]      += dvdl_vdw;
855
856     *velectot           = velecsum;
857     *vvdwtot            = vvdwsum;
858
859     return fscal;
860 }