Merge release-4-6 into release-5-0
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_free_energy.c
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2013,2014, by the GROMACS development team, led by
7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8  * and including many others, as listed in the AUTHORS file in the
9  * top-level source directory and at http://www.gromacs.org.
10  *
11  * GROMACS is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public License
13  * as published by the Free Software Foundation; either version 2.1
14  * of the License, or (at your option) any later version.
15  *
16  * GROMACS is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with GROMACS; if not, see
23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
25  *
26  * If you want to redistribute modifications to GROMACS, please
27  * consider that scientific software is very special. Version
28  * control is crucial - bugs must be traceable. We will be happy to
29  * consider code for inclusion in the official distribution, but
30  * derived work must not be called official GROMACS. Details are found
31  * in the README & COPYING files - if they are missing, get the
32  * official version at http://www.gromacs.org.
33  *
34  * To help us fund GROMACS development, we humbly ask that you cite
35  * the research papers on the package. Check out http://www.gromacs.org.
36  */
37 #ifdef HAVE_CONFIG_H
38 #include <config.h>
39 #endif
40
41 #include <math.h>
42
43 #include "vec.h"
44 #include "typedefs.h"
45 #include "nonbonded.h"
46 #include "nb_kernel.h"
47 #include "nrnb.h"
48 #include "macros.h"
49 #include "nb_free_energy.h"
50
51 #include "gmx_fatal.h"
52
53 void
54 gmx_nb_free_energy_kernel(const t_nblist * gmx_restrict    nlist,
55                           rvec * gmx_restrict              xx,
56                           rvec * gmx_restrict              ff,
57                           t_forcerec * gmx_restrict        fr,
58                           const t_mdatoms * gmx_restrict   mdatoms,
59                           nb_kernel_data_t * gmx_restrict  kernel_data,
60                           t_nrnb * gmx_restrict            nrnb)
61 {
62
63 #define  STATE_A  0
64 #define  STATE_B  1
65 #define  NSTATES  2
66     int           i, j, n, ii, is3, ii3, k, nj0, nj1, jnr, j3, ggid;
67     real          shX, shY, shZ;
68     real          Fscal, FscalC[NSTATES], FscalV[NSTATES], tx, ty, tz;
69     real          Vcoul[NSTATES], Vvdw[NSTATES];
70     real          rinv6, r, rt, rtC, rtV;
71     real          iqA, iqB;
72     real          qq[NSTATES], vctot, krsq;
73     int           ntiA, ntiB, tj[NSTATES];
74     real          Vvdw6, Vvdw12, vvtot;
75     real          ix, iy, iz, fix, fiy, fiz;
76     real          dx, dy, dz, rsq, rinv;
77     real          c6[NSTATES], c12[NSTATES], c6grid[NSTATES];
78     real          LFC[NSTATES], LFV[NSTATES], DLF[NSTATES];
79     double        dvdl_coul, dvdl_vdw;
80     real          lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
81     real          sigma6[NSTATES], alpha_vdw_eff, alpha_coul_eff, sigma2_def, sigma2_min;
82     real          rp, rpm2, rC, rV, rinvC, rpinvC, rinvV, rpinvV;
83     real          sigma2[NSTATES], sigma_pow[NSTATES], sigma_powm2[NSTATES], rs, rs2;
84     int           do_tab, tab_elemsize;
85     int           n0, n1C, n1V, nnn;
86     real          Y, F, G, H, Fp, Geps, Heps2, epsC, eps2C, epsV, eps2V, VV, FF;
87     int           icoul, ivdw;
88     int           nri;
89     const int *   iinr;
90     const int *   jindex;
91     const int *   jjnr;
92     const int *   shift;
93     const int *   gid;
94     const int *   typeA;
95     const int *   typeB;
96     int           ntype;
97     const real *  shiftvec;
98     real          dvdl_part;
99     real *        fshift;
100     real          tabscale = 0;
101     const real *  VFtab    = NULL;
102     const real *  x;
103     real *        f;
104     real          facel, krf, crf;
105     const real *  chargeA;
106     const real *  chargeB;
107     real          sigma6_min, sigma6_def, lam_power, sc_power, sc_r_power;
108     real          alpha_coul, alpha_vdw, lambda_coul, lambda_vdw, ewc_lj;
109     const real *  nbfp, *nbfp_grid;
110     real *        dvdl;
111     real *        Vv;
112     real *        Vc;
113     gmx_bool      bDoForces, bDoShiftForces, bDoPotential;
114     real          rcoulomb, sh_ewald;
115     real          rvdw, sh_invrc6;
116     gmx_bool      bExactElecCutoff, bExactVdwCutoff, bExactCutoffAll, bEwald;
117     real          rcutoff_max2;
118     real          rcutoff, rcutoff2, rswitch, d, d2, swV3, swV4, swV5, swF2, swF3, swF4, sw, dsw, rinvcorr;
119     const real *  tab_ewald_F;
120     const real *  tab_ewald_V;
121     const real *  tab_ewald_F_lj;
122     const real *  tab_ewald_V_lj;
123     real          tab_ewald_scale, tab_ewald_halfsp;
124
125     x                   = xx[0];
126     f                   = ff[0];
127
128     fshift              = fr->fshift[0];
129
130     nri                 = nlist->nri;
131     iinr                = nlist->iinr;
132     jindex              = nlist->jindex;
133     jjnr                = nlist->jjnr;
134     icoul               = nlist->ielec;
135     ivdw                = nlist->ivdw;
136     shift               = nlist->shift;
137     gid                 = nlist->gid;
138
139     shiftvec            = fr->shift_vec[0];
140     chargeA             = mdatoms->chargeA;
141     chargeB             = mdatoms->chargeB;
142     facel               = fr->epsfac;
143     krf                 = fr->k_rf;
144     crf                 = fr->c_rf;
145     ewc_lj              = fr->ewaldcoeff_lj;
146     Vc                  = kernel_data->energygrp_elec;
147     typeA               = mdatoms->typeA;
148     typeB               = mdatoms->typeB;
149     ntype               = fr->ntype;
150     nbfp                = fr->nbfp;
151     nbfp_grid           = fr->ljpme_c6grid;
152     Vv                  = kernel_data->energygrp_vdw;
153     lambda_coul         = kernel_data->lambda[efptCOUL];
154     lambda_vdw          = kernel_data->lambda[efptVDW];
155     dvdl                = kernel_data->dvdl;
156     alpha_coul          = fr->sc_alphacoul;
157     alpha_vdw           = fr->sc_alphavdw;
158     lam_power           = fr->sc_power;
159     sc_r_power          = fr->sc_r_power;
160     sigma6_def          = fr->sc_sigma6_def;
161     sigma6_min          = fr->sc_sigma6_min;
162     bDoForces           = kernel_data->flags & GMX_NONBONDED_DO_FORCE;
163     bDoShiftForces      = kernel_data->flags & GMX_NONBONDED_DO_SHIFTFORCE;
164     bDoPotential        = kernel_data->flags & GMX_NONBONDED_DO_POTENTIAL;
165
166     rcoulomb            = fr->rcoulomb;
167     sh_ewald            = fr->ic->sh_ewald;
168     rvdw                = fr->rvdw;
169     sh_invrc6           = fr->ic->sh_invrc6;
170
171     /* Ewald (PME) reciprocal force and energy quadratic spline tables */
172     tab_ewald_F         = fr->ic->tabq_coul_F;
173     tab_ewald_V         = fr->ic->tabq_coul_V;
174     tab_ewald_scale     = fr->ic->tabq_scale;
175     tab_ewald_F_lj      = fr->ic->tabq_vdw_F;
176     tab_ewald_V_lj      = fr->ic->tabq_vdw_V;
177     tab_ewald_halfsp    = 0.5/tab_ewald_scale;
178
179     if (fr->coulomb_modifier == eintmodPOTSWITCH || fr->vdw_modifier == eintmodPOTSWITCH)
180     {
181         rcutoff         = (fr->coulomb_modifier == eintmodPOTSWITCH) ? fr->rcoulomb : fr->rvdw;
182         rcutoff2        = rcutoff*rcutoff;
183         rswitch         = (fr->coulomb_modifier == eintmodPOTSWITCH) ? fr->rcoulomb_switch : fr->rvdw_switch;
184         d               = rcutoff-rswitch;
185         swV3            = -10.0/(d*d*d);
186         swV4            =  15.0/(d*d*d*d);
187         swV5            =  -6.0/(d*d*d*d*d);
188         swF2            = -30.0/(d*d*d);
189         swF3            =  60.0/(d*d*d*d);
190         swF4            = -30.0/(d*d*d*d*d);
191     }
192     else
193     {
194         /* Stupid compilers dont realize these variables will not be used */
195         rswitch         = 0.0;
196         swV3            = 0.0;
197         swV4            = 0.0;
198         swV5            = 0.0;
199         swF2            = 0.0;
200         swF3            = 0.0;
201         swF4            = 0.0;
202     }
203
204     if (fr->cutoff_scheme == ecutsVERLET)
205     {
206         const interaction_const_t *ic;
207
208         ic = fr->ic;
209         if (EVDW_PME(ic->vdwtype))
210         {
211             ivdw         = GMX_NBKERNEL_VDW_LJEWALD;
212         }
213         else
214         {
215             ivdw         = GMX_NBKERNEL_VDW_LENNARDJONES;
216         }
217
218         if (ic->eeltype == eelCUT || EEL_RF(ic->eeltype))
219         {
220             icoul        = GMX_NBKERNEL_ELEC_REACTIONFIELD;
221         }
222         else if (EEL_PME_EWALD(ic->eeltype))
223         {
224             icoul        = GMX_NBKERNEL_ELEC_EWALD;
225         }
226         else
227         {
228             gmx_incons("Unsupported eeltype with Verlet and free-energy");
229         }
230
231         bExactElecCutoff = TRUE;
232         bExactVdwCutoff  = TRUE;
233     }
234     else
235     {
236         bExactElecCutoff = (fr->coulomb_modifier != eintmodNONE) || fr->eeltype == eelRF_ZERO;
237         bExactVdwCutoff  = (fr->vdw_modifier != eintmodNONE);
238     }
239
240     bExactCutoffAll = (bExactElecCutoff && bExactVdwCutoff);
241     rcutoff_max2    = max(fr->rcoulomb, fr->rvdw);
242     rcutoff_max2    = rcutoff_max2*rcutoff_max2;
243
244     bEwald          = (icoul == GMX_NBKERNEL_ELEC_EWALD);
245
246     /* fix compiler warnings */
247     nj1   = 0;
248     n1C   = n1V   = 0;
249     epsC  = epsV  = 0;
250     eps2C = eps2V = 0;
251
252     dvdl_coul  = 0;
253     dvdl_vdw   = 0;
254
255     /* Lambda factor for state A, 1-lambda*/
256     LFC[STATE_A] = 1.0 - lambda_coul;
257     LFV[STATE_A] = 1.0 - lambda_vdw;
258
259     /* Lambda factor for state B, lambda*/
260     LFC[STATE_B] = lambda_coul;
261     LFV[STATE_B] = lambda_vdw;
262
263     /*derivative of the lambda factor for state A and B */
264     DLF[STATE_A] = -1;
265     DLF[STATE_B] = 1;
266
267     for (i = 0; i < NSTATES; i++)
268     {
269         lfac_coul[i]  = (lam_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
270         dlfac_coul[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFC[i]) : 1);
271         lfac_vdw[i]   = (lam_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
272         dlfac_vdw[i]  = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFV[i]) : 1);
273     }
274     /* precalculate */
275     sigma2_def = pow(sigma6_def, 1.0/3.0);
276     sigma2_min = pow(sigma6_min, 1.0/3.0);
277
278     /* Ewald (not PME) table is special (icoul==enbcoulFEWALD) */
279
280     do_tab = (icoul == GMX_NBKERNEL_ELEC_CUBICSPLINETABLE ||
281               ivdw == GMX_NBKERNEL_VDW_CUBICSPLINETABLE);
282     if (do_tab)
283     {
284         tabscale         = kernel_data->table_elec_vdw->scale;
285         VFtab            = kernel_data->table_elec_vdw->data;
286         /* we always use the combined table here */
287         tab_elemsize     = 12;
288     }
289
290     for (n = 0; (n < nri); n++)
291     {
292         int npair_within_cutoff;
293
294         npair_within_cutoff = 0;
295
296         is3              = 3*shift[n];
297         shX              = shiftvec[is3];
298         shY              = shiftvec[is3+1];
299         shZ              = shiftvec[is3+2];
300         nj0              = jindex[n];
301         nj1              = jindex[n+1];
302         ii               = iinr[n];
303         ii3              = 3*ii;
304         ix               = shX + x[ii3+0];
305         iy               = shY + x[ii3+1];
306         iz               = shZ + x[ii3+2];
307         iqA              = facel*chargeA[ii];
308         iqB              = facel*chargeB[ii];
309         ntiA             = 2*ntype*typeA[ii];
310         ntiB             = 2*ntype*typeB[ii];
311         vctot            = 0;
312         vvtot            = 0;
313         fix              = 0;
314         fiy              = 0;
315         fiz              = 0;
316
317         for (k = nj0; (k < nj1); k++)
318         {
319             jnr              = jjnr[k];
320             j3               = 3*jnr;
321             dx               = ix - x[j3];
322             dy               = iy - x[j3+1];
323             dz               = iz - x[j3+2];
324             rsq              = dx*dx + dy*dy + dz*dz;
325
326             if (bExactCutoffAll && rsq >= rcutoff_max2)
327             {
328                 /* We save significant time by skipping all code below.
329                  * Note that with soft-core interactions, the actual cut-off
330                  * check might be different. But since the soft-core distance
331                  * is always larger than r, checking on r here is safe.
332                  */
333                 continue;
334             }
335             npair_within_cutoff++;
336
337             if (rsq > 0)
338             {
339                 rinv         = gmx_invsqrt(rsq);
340                 r            = rsq*rinv;
341             }
342             else
343             {
344                 /* The force at r=0 is zero, because of symmetry.
345                  * But note that the potential is in general non-zero,
346                  * since the soft-cored r will be non-zero.
347                  */
348                 rinv         = 0;
349                 r            = 0;
350             }
351
352             if (sc_r_power == 6.0)
353             {
354                 rpm2             = rsq*rsq;  /* r4 */
355                 rp               = rpm2*rsq; /* r6 */
356             }
357             else if (sc_r_power == 48.0)
358             {
359                 rp               = rsq*rsq*rsq; /* r6 */
360                 rp               = rp*rp;       /* r12 */
361                 rp               = rp*rp;       /* r24 */
362                 rp               = rp*rp;       /* r48 */
363                 rpm2             = rp/rsq;      /* r46 */
364             }
365             else
366             {
367                 rp             = pow(r, sc_r_power);  /* not currently supported as input, but can handle it */
368                 rpm2           = rp/rsq;
369             }
370
371             Fscal = 0;
372
373             qq[STATE_A]      = iqA*chargeA[jnr];
374             qq[STATE_B]      = iqB*chargeB[jnr];
375
376             tj[STATE_A]      = ntiA+2*typeA[jnr];
377             tj[STATE_B]      = ntiB+2*typeB[jnr];
378
379             if (ivdw == GMX_NBKERNEL_VDW_LJEWALD)
380             {
381                 c6grid[STATE_A] = nbfp_grid[tj[STATE_A]];
382                 c6grid[STATE_B] = nbfp_grid[tj[STATE_B]];
383             }
384
385             if (nlist->excl_fep == NULL || nlist->excl_fep[k])
386             {
387                 c6[STATE_A]      = nbfp[tj[STATE_A]];
388                 c6[STATE_B]      = nbfp[tj[STATE_B]];
389
390                 for (i = 0; i < NSTATES; i++)
391                 {
392                     c12[i]             = nbfp[tj[i]+1];
393                     if ((c6[i] > 0) && (c12[i] > 0))
394                     {
395                         /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
396                         sigma6[i]       = 0.5*c12[i]/c6[i];
397                         sigma2[i]       = pow(sigma6[i], 1.0/3.0);
398                         /* should be able to get rid of this ^^^ internal pow call eventually.  Will require agreement on
399                            what data to store externally.  Can't be fixed without larger scale changes, so not 4.6 */
400                         if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
401                         {
402                             sigma6[i] = sigma6_min;
403                             sigma2[i] = sigma2_min;
404                         }
405                     }
406                     else
407                     {
408                         sigma6[i]       = sigma6_def;
409                         sigma2[i]       = sigma2_def;
410                     }
411                     if (sc_r_power == 6.0)
412                     {
413                         sigma_pow[i]    = sigma6[i];
414                         sigma_powm2[i]  = sigma6[i]/sigma2[i];
415                     }
416                     else if (sc_r_power == 48.0)
417                     {
418                         sigma_pow[i]    = sigma6[i]*sigma6[i];       /* sigma^12 */
419                         sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
420                         sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
421                         sigma_powm2[i]  = sigma_pow[i]/sigma2[i];
422                     }
423                     else
424                     {    /* not really supported as input, but in here for testing the general case*/
425                         sigma_pow[i]    = pow(sigma2[i], sc_r_power/2);
426                         sigma_powm2[i]  = sigma_pow[i]/(sigma2[i]);
427                     }
428                 }
429
430                 /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
431                 if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
432                 {
433                     alpha_vdw_eff    = 0;
434                     alpha_coul_eff   = 0;
435                 }
436                 else
437                 {
438                     alpha_vdw_eff    = alpha_vdw;
439                     alpha_coul_eff   = alpha_coul;
440                 }
441
442                 for (i = 0; i < NSTATES; i++)
443                 {
444                     FscalC[i]    = 0;
445                     FscalV[i]    = 0;
446                     Vcoul[i]     = 0;
447                     Vvdw[i]      = 0;
448
449                     /* Only spend time on A or B state if it is non-zero */
450                     if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
451                     {
452                         /* this section has to be inside the loop because of the dependence on sigma_pow */
453                         rpinvC         = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
454                         rinvC          = pow(rpinvC, 1.0/sc_r_power);
455                         rC             = 1.0/rinvC;
456
457                         rpinvV         = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
458                         rinvV          = pow(rpinvV, 1.0/sc_r_power);
459                         rV             = 1.0/rinvV;
460
461                         if (do_tab)
462                         {
463                             rtC        = rC*tabscale;
464                             n0         = rtC;
465                             epsC       = rtC-n0;
466                             eps2C      = epsC*epsC;
467                             n1C        = tab_elemsize*n0;
468
469                             rtV        = rV*tabscale;
470                             n0         = rtV;
471                             epsV       = rtV-n0;
472                             eps2V      = epsV*epsV;
473                             n1V        = tab_elemsize*n0;
474                         }
475
476                         /* With Ewald and soft-core we should put the cut-off on r,
477                          * not on the soft-cored rC, as the real-space and
478                          * reciprocal space contributions should (almost) cancel.
479                          */
480                         if (qq[i] != 0 &&
481                             !(bExactElecCutoff &&
482                               ((!bEwald && rC >= rcoulomb) ||
483                                (bEwald && r >= rcoulomb))))
484                         {
485                             switch (icoul)
486                             {
487                                 case GMX_NBKERNEL_ELEC_COULOMB:
488                                     /* simple cutoff */
489                                     Vcoul[i]   = qq[i]*rinvC;
490                                     FscalC[i]  = Vcoul[i];
491                                     break;
492
493                                 case GMX_NBKERNEL_ELEC_EWALD:
494                                     /* Ewald FEP is done only on the 1/r part */
495                                     Vcoul[i]   = qq[i]*(rinvC - sh_ewald);
496                                     FscalC[i]  = Vcoul[i];
497                                     break;
498
499                                 case GMX_NBKERNEL_ELEC_REACTIONFIELD:
500                                     /* reaction-field */
501                                     Vcoul[i]   = qq[i]*(rinvC + krf*rC*rC-crf);
502                                     FscalC[i]  = qq[i]*(rinvC - 2.0*krf*rC*rC);
503                                     break;
504
505                                 case GMX_NBKERNEL_ELEC_CUBICSPLINETABLE:
506                                     /* non-Ewald tabulated coulomb */
507                                     nnn        = n1C;
508                                     Y          = VFtab[nnn];
509                                     F          = VFtab[nnn+1];
510                                     Geps       = epsC*VFtab[nnn+2];
511                                     Heps2      = eps2C*VFtab[nnn+3];
512                                     Fp         = F+Geps+Heps2;
513                                     VV         = Y+epsC*Fp;
514                                     FF         = Fp+Geps+2.0*Heps2;
515                                     Vcoul[i]   = qq[i]*VV;
516                                     FscalC[i]  = -qq[i]*tabscale*FF*rC;
517                                     break;
518
519                                 case GMX_NBKERNEL_ELEC_GENERALIZEDBORN:
520                                     gmx_fatal(FARGS, "Free energy and GB not implemented.\n");
521                                     break;
522
523                                 case GMX_NBKERNEL_ELEC_NONE:
524                                     FscalC[i]  = 0.0;
525                                     Vcoul[i]   = 0.0;
526                                     break;
527
528                                 default:
529                                     gmx_incons("Invalid icoul in free energy kernel");
530                                     break;
531                             }
532
533                             if (fr->coulomb_modifier == eintmodPOTSWITCH)
534                             {
535                                 d                = rC-rswitch;
536                                 d                = (d > 0.0) ? d : 0.0;
537                                 d2               = d*d;
538                                 sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
539                                 dsw              = d2*(swF2+d*(swF3+d*swF4));
540
541                                 Vcoul[i]  *= sw;
542                                 FscalC[i]  = FscalC[i]*sw + Vcoul[i]*dsw;
543                             }
544                         }
545
546                         if ((c6[i] != 0 || c12[i] != 0) &&
547                             !(bExactVdwCutoff &&
548                               ((ivdw != GMX_NBKERNEL_VDW_LJEWALD && rV >= rvdw) ||
549                                (ivdw == GMX_NBKERNEL_VDW_LJEWALD && r >= rvdw))))
550                         {
551                             switch (ivdw)
552                             {
553                                 case GMX_NBKERNEL_VDW_LENNARDJONES:
554                                 case GMX_NBKERNEL_VDW_LJEWALD:
555                                     /* cutoff LJ */
556                                     if (sc_r_power == 6.0)
557                                     {
558                                         rinv6            = rpinvV;
559                                     }
560                                     else
561                                     {
562                                         rinv6            = pow(rinvV, 6.0);
563                                     }
564                                     Vvdw6            = c6[i]*rinv6;
565                                     Vvdw12           = c12[i]*rinv6*rinv6;
566                                     if (fr->vdw_modifier == eintmodPOTSHIFT)
567                                     {
568                                         Vvdw[i]          = ( (Vvdw12-c12[i]*sh_invrc6*sh_invrc6)*(1.0/12.0)
569                                                              -(Vvdw6-c6[i]*sh_invrc6)*(1.0/6.0));
570                                     }
571                                     else
572                                     {
573                                         Vvdw[i]          = Vvdw12*(1.0/12.0) - Vvdw6*(1.0/6.0);
574                                     }
575                                     FscalV[i]        = Vvdw12 - Vvdw6;
576                                     break;
577
578                                 case GMX_NBKERNEL_VDW_BUCKINGHAM:
579                                     gmx_fatal(FARGS, "Buckingham free energy not supported.");
580                                     break;
581
582                                 case GMX_NBKERNEL_VDW_CUBICSPLINETABLE:
583                                     /* Table LJ */
584                                     nnn = n1V+4;
585                                     /* dispersion */
586                                     Y          = VFtab[nnn];
587                                     F          = VFtab[nnn+1];
588                                     Geps       = epsV*VFtab[nnn+2];
589                                     Heps2      = eps2V*VFtab[nnn+3];
590                                     Fp         = F+Geps+Heps2;
591                                     VV         = Y+epsV*Fp;
592                                     FF         = Fp+Geps+2.0*Heps2;
593                                     Vvdw[i]   += c6[i]*VV;
594                                     FscalV[i] -= c6[i]*tabscale*FF*rV;
595
596                                     /* repulsion */
597                                     Y          = VFtab[nnn+4];
598                                     F          = VFtab[nnn+5];
599                                     Geps       = epsV*VFtab[nnn+6];
600                                     Heps2      = eps2V*VFtab[nnn+7];
601                                     Fp         = F+Geps+Heps2;
602                                     VV         = Y+epsV*Fp;
603                                     FF         = Fp+Geps+2.0*Heps2;
604                                     Vvdw[i]   += c12[i]*VV;
605                                     FscalV[i] -= c12[i]*tabscale*FF*rV;
606                                     break;
607
608                                 case GMX_NBKERNEL_VDW_NONE:
609                                     Vvdw[i]    = 0.0;
610                                     FscalV[i]  = 0.0;
611                                     break;
612
613                                 default:
614                                     gmx_incons("Invalid ivdw in free energy kernel");
615                                     break;
616                             }
617
618                             if (fr->vdw_modifier == eintmodPOTSWITCH)
619                             {
620                                 d          = rV-rswitch;
621                                 d          = (d > 0.0) ? d : 0.0;
622                                 d2         = d*d;
623                                 sw         = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
624                                 dsw        = d2*(swF2+d*(swF3+d*swF4));
625
626                                 Vvdw[i]   *= sw;
627                                 FscalV[i]  = FscalV[i]*sw + Vvdw[i]*dsw;
628
629                                 FscalV[i]  = (rV < rvdw) ? FscalV[i] : 0.0;
630                                 Vvdw[i]    = (rV < rvdw) ? Vvdw[i] : 0.0;
631                             }
632                         }
633
634                         /* FscalC (and FscalV) now contain: dV/drC * rC
635                          * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
636                          * Further down we first multiply by r^p-2 and then by
637                          * the vector r, which in total gives: dV/drC * (r/rC)^1-p
638                          */
639                         FscalC[i] *= rpinvC;
640                         FscalV[i] *= rpinvV;
641                     }
642                 }
643
644                 /* Assemble A and B states */
645                 for (i = 0; i < NSTATES; i++)
646                 {
647                     vctot         += LFC[i]*Vcoul[i];
648                     vvtot         += LFV[i]*Vvdw[i];
649
650                     Fscal         += LFC[i]*FscalC[i]*rpm2;
651                     Fscal         += LFV[i]*FscalV[i]*rpm2;
652
653                     dvdl_coul     += Vcoul[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*FscalC[i]*sigma_pow[i];
654                     dvdl_vdw      += Vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*FscalV[i]*sigma_pow[i];
655                 }
656             }
657             else if (icoul == GMX_NBKERNEL_ELEC_REACTIONFIELD)
658             {
659                 /* For excluded pairs, which are only in this pair list when
660                  * using the Verlet scheme, we don't use soft-core.
661                  * The group scheme also doesn't soft-core for these.
662                  * As there is no singularity, there is no need for soft-core.
663                  */
664                 VV = krf*rsq - crf;
665                 FF = -2.0*krf;
666
667                 if (ii == jnr)
668                 {
669                     VV *= 0.5;
670                 }
671
672                 for (i = 0; i < NSTATES; i++)
673                 {
674                     vctot      += LFC[i]*qq[i]*VV;
675                     Fscal      += LFC[i]*qq[i]*FF;
676                     dvdl_coul  += DLF[i]*qq[i]*VV;
677                 }
678             }
679
680             if (icoul == GMX_NBKERNEL_ELEC_EWALD &&
681                 !(bExactElecCutoff && r >= rcoulomb))
682             {
683                 /* Because we compute the soft-core normally,
684                  * we have to remove the Ewald short range portion.
685                  * Done outside of the states loop because this part
686                  * doesn't depend on the scaled R.
687                  */
688                 real rs, frac, f_lr;
689                 int  ri;
690
691                 rs     = rsq*rinv*tab_ewald_scale;
692                 ri     = (int)rs;
693                 frac   = rs - ri;
694                 f_lr   = (1 - frac)*tab_ewald_F[ri] + frac*tab_ewald_F[ri+1];
695                 FF     = f_lr*rinv;
696                 VV     = tab_ewald_V[ri] - tab_ewald_halfsp*frac*(tab_ewald_F[ri] + f_lr);
697
698                 if (ii == jnr)
699                 {
700                     VV   *= 0.5;
701                 }
702
703                 for (i = 0; i < NSTATES; i++)
704                 {
705                     vctot      -= LFC[i]*qq[i]*VV;
706                     Fscal      -= LFC[i]*qq[i]*FF;
707                     dvdl_coul  -= (DLF[i]*qq[i])*VV;
708                 }
709             }
710
711             if (ivdw == GMX_NBKERNEL_VDW_LJEWALD &&
712                 !(bExactVdwCutoff && r >= rvdw))
713             {
714                 real rs, frac, f_lr;
715                 int  ri;
716
717                 rs     = rsq*rinv*tab_ewald_scale;
718                 ri     = (int)rs;
719                 frac   = rs - ri;
720                 f_lr   = (1 - frac)*tab_ewald_F_lj[ri] + frac*tab_ewald_F_lj[ri+1];
721                 FF     = f_lr*rinv;
722                 VV     = tab_ewald_V_lj[ri] - tab_ewald_halfsp*frac*(tab_ewald_F_lj[ri] + f_lr);
723                 for (i = 0; i < NSTATES; i++)
724                 {
725                     vvtot      += LFV[i]*c6grid[i]*VV*(1.0/6.0);
726                     Fscal      += LFV[i]*c6grid[i]*FF*(1.0/6.0);
727                     dvdl_vdw   += (DLF[i]*c6grid[i])*VV*(1.0/6.0);
728                 }
729
730             }
731
732             if (bDoForces)
733             {
734                 tx         = Fscal*dx;
735                 ty         = Fscal*dy;
736                 tz         = Fscal*dz;
737                 fix        = fix + tx;
738                 fiy        = fiy + ty;
739                 fiz        = fiz + tz;
740                 /* OpenMP atomics are expensive, but this kernels is also
741                  * expensive, so we can take this hit, instead of using
742                  * thread-local output buffers and extra reduction.
743                  */
744 #pragma omp atomic
745                 f[j3]     -= tx;
746 #pragma omp atomic
747                 f[j3+1]   -= ty;
748 #pragma omp atomic
749                 f[j3+2]   -= tz;
750             }
751         }
752
753         /* The atomics below are expensive with many OpenMP threads.
754          * Here unperturbed i-particles will usually only have a few
755          * (perturbed) j-particles in the list. Thus with a buffered list
756          * we can skip a significant number of i-reductions with a check.
757          */
758         if (npair_within_cutoff > 0)
759         {
760             if (bDoForces)
761             {
762 #pragma omp atomic
763                 f[ii3]        += fix;
764 #pragma omp atomic
765                 f[ii3+1]      += fiy;
766 #pragma omp atomic
767                 f[ii3+2]      += fiz;
768             }
769             if (bDoShiftForces)
770             {
771 #pragma omp atomic
772                 fshift[is3]   += fix;
773 #pragma omp atomic
774                 fshift[is3+1] += fiy;
775 #pragma omp atomic
776                 fshift[is3+2] += fiz;
777             }
778             if (bDoPotential)
779             {
780                 ggid               = gid[n];
781 #pragma omp atomic
782                 Vc[ggid]          += vctot;
783 #pragma omp atomic
784                 Vv[ggid]          += vvtot;
785             }
786         }
787     }
788
789 #pragma omp atomic
790     dvdl[efptCOUL]     += dvdl_coul;
791  #pragma omp atomic
792     dvdl[efptVDW]      += dvdl_vdw;
793
794     /* Estimate flops, average for free energy stuff:
795      * 12  flops per outer iteration
796      * 150 flops per inner iteration
797      */
798 #pragma omp atomic
799     inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri*12 + nlist->jindex[n]*150);
800 }
801
802 real
803 nb_free_energy_evaluate_single(real r2, real sc_r_power, real alpha_coul, real alpha_vdw,
804                                real tabscale, real *vftab,
805                                real qqA, real c6A, real c12A, real qqB, real c6B, real c12B,
806                                real LFC[2], real LFV[2], real DLF[2],
807                                real lfac_coul[2], real lfac_vdw[2], real dlfac_coul[2], real dlfac_vdw[2],
808                                real sigma6_def, real sigma6_min, real sigma2_def, real sigma2_min,
809                                real *velectot, real *vvdwtot, real *dvdl)
810 {
811     real       r, rp, rpm2, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VV, FF, fscal;
812     real       qq[2], c6[2], c12[2], sigma6[2], sigma2[2], sigma_pow[2], sigma_powm2[2];
813     real       alpha_coul_eff, alpha_vdw_eff, dvdl_coul, dvdl_vdw;
814     real       rpinv, r_coul, r_vdw, velecsum, vvdwsum;
815     real       fscal_vdw[2], fscal_elec[2];
816     real       velec[2], vvdw[2];
817     int        i, ntab;
818
819     qq[0]    = qqA;
820     qq[1]    = qqB;
821     c6[0]    = c6A;
822     c6[1]    = c6B;
823     c12[0]   = c12A;
824     c12[1]   = c12B;
825
826     if (sc_r_power == 6.0)
827     {
828         rpm2             = r2*r2;   /* r4 */
829         rp               = rpm2*r2; /* r6 */
830     }
831     else if (sc_r_power == 48.0)
832     {
833         rp               = r2*r2*r2; /* r6 */
834         rp               = rp*rp;    /* r12 */
835         rp               = rp*rp;    /* r24 */
836         rp               = rp*rp;    /* r48 */
837         rpm2             = rp/r2;    /* r46 */
838     }
839     else
840     {
841         rp             = pow(r2, 0.5*sc_r_power);  /* not currently supported as input, but can handle it */
842         rpm2           = rp/r2;
843     }
844
845     /* Loop over state A(0) and B(1) */
846     for (i = 0; i < 2; i++)
847     {
848         if ((c6[i] > 0) && (c12[i] > 0))
849         {
850             /* The c6 & c12 coefficients now contain the constants 6.0 and 12.0, respectively.
851              * Correct for this by multiplying with (1/12.0)/(1/6.0)=6.0/12.0=0.5.
852              */
853             sigma6[i]       = 0.5*c12[i]/c6[i];
854             sigma2[i]       = pow(0.5*c12[i]/c6[i], 1.0/3.0);
855             /* should be able to get rid of this ^^^ internal pow call eventually.  Will require agreement on
856                what data to store externally.  Can't be fixed without larger scale changes, so not 5.0 */
857             if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
858             {
859                 sigma6[i] = sigma6_min;
860                 sigma2[i] = sigma2_min;
861             }
862         }
863         else
864         {
865             sigma6[i]       = sigma6_def;
866             sigma2[i]       = sigma2_def;
867         }
868         if (sc_r_power == 6.0)
869         {
870             sigma_pow[i]    = sigma6[i];
871             sigma_powm2[i]  = sigma6[i]/sigma2[i];
872         }
873         else if (sc_r_power == 48.0)
874         {
875             sigma_pow[i]    = sigma6[i]*sigma6[i];       /* sigma^12 */
876             sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
877             sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
878             sigma_powm2[i]  = sigma_pow[i]/sigma2[i];
879         }
880         else
881         {    /* not really supported as input, but in here for testing the general case*/
882             sigma_pow[i]    = pow(sigma2[i], sc_r_power/2);
883             sigma_powm2[i]  = sigma_pow[i]/(sigma2[i]);
884         }
885     }
886
887     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
888     if ((c12[0] > 0) && (c12[1] > 0))
889     {
890         alpha_vdw_eff    = 0;
891         alpha_coul_eff   = 0;
892     }
893     else
894     {
895         alpha_vdw_eff    = alpha_vdw;
896         alpha_coul_eff   = alpha_coul;
897     }
898
899     /* Loop over A and B states again */
900     for (i = 0; i < 2; i++)
901     {
902         fscal_elec[i] = 0;
903         fscal_vdw[i]  = 0;
904         velec[i]      = 0;
905         vvdw[i]       = 0;
906
907         /* Only spend time on A or B state if it is non-zero */
908         if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
909         {
910             /* Coulomb */
911             rpinv            = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
912             r_coul           = pow(rpinv, -1.0/sc_r_power);
913
914             /* Electrostatics table lookup data */
915             rtab             = r_coul*tabscale;
916             ntab             = rtab;
917             eps              = rtab-ntab;
918             eps2             = eps*eps;
919             ntab             = 12*ntab;
920             /* Electrostatics */
921             Y                = vftab[ntab];
922             F                = vftab[ntab+1];
923             Geps             = eps*vftab[ntab+2];
924             Heps2            = eps2*vftab[ntab+3];
925             Fp               = F+Geps+Heps2;
926             VV               = Y+eps*Fp;
927             FF               = Fp+Geps+2.0*Heps2;
928             velec[i]         = qq[i]*VV;
929             fscal_elec[i]    = -qq[i]*FF*r_coul*rpinv*tabscale;
930
931             /* Vdw */
932             rpinv            = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
933             r_vdw            = pow(rpinv, -1.0/sc_r_power);
934             /* Vdw table lookup data */
935             rtab             = r_vdw*tabscale;
936             ntab             = rtab;
937             eps              = rtab-ntab;
938             eps2             = eps*eps;
939             ntab             = 12*ntab;
940             /* Dispersion */
941             Y                = vftab[ntab+4];
942             F                = vftab[ntab+5];
943             Geps             = eps*vftab[ntab+6];
944             Heps2            = eps2*vftab[ntab+7];
945             Fp               = F+Geps+Heps2;
946             VV               = Y+eps*Fp;
947             FF               = Fp+Geps+2.0*Heps2;
948             vvdw[i]          = c6[i]*VV;
949             fscal_vdw[i]     = -c6[i]*FF;
950
951             /* Repulsion */
952             Y                = vftab[ntab+8];
953             F                = vftab[ntab+9];
954             Geps             = eps*vftab[ntab+10];
955             Heps2            = eps2*vftab[ntab+11];
956             Fp               = F+Geps+Heps2;
957             VV               = Y+eps*Fp;
958             FF               = Fp+Geps+2.0*Heps2;
959             vvdw[i]         += c12[i]*VV;
960             fscal_vdw[i]    -= c12[i]*FF;
961             fscal_vdw[i]    *= r_vdw*rpinv*tabscale;
962         }
963     }
964     /* Now we have velec[i], vvdw[i], and fscal[i] for both states */
965     /* Assemble A and B states */
966     velecsum  = 0;
967     vvdwsum   = 0;
968     dvdl_coul = 0;
969     dvdl_vdw  = 0;
970     fscal     = 0;
971     for (i = 0; i < 2; i++)
972     {
973         velecsum      += LFC[i]*velec[i];
974         vvdwsum       += LFV[i]*vvdw[i];
975
976         fscal         += (LFC[i]*fscal_elec[i]+LFV[i]*fscal_vdw[i])*rpm2;
977
978         dvdl_coul     += velec[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*fscal_elec[i]*sigma_pow[i];
979         dvdl_vdw      += vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*fscal_vdw[i]*sigma_pow[i];
980     }
981
982     dvdl[efptCOUL]     += dvdl_coul;
983     dvdl[efptVDW]      += dvdl_vdw;
984
985     *velectot           = velecsum;
986     *vvdwtot            = vvdwsum;
987
988     return fscal;
989 }