Merge branch release-4-6
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_free_energy.c
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2013,2014, by the GROMACS development team, led by
7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8  * and including many others, as listed in the AUTHORS file in the
9  * top-level source directory and at http://www.gromacs.org.
10  *
11  * GROMACS is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public License
13  * as published by the Free Software Foundation; either version 2.1
14  * of the License, or (at your option) any later version.
15  *
16  * GROMACS is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with GROMACS; if not, see
23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
25  *
26  * If you want to redistribute modifications to GROMACS, please
27  * consider that scientific software is very special. Version
28  * control is crucial - bugs must be traceable. We will be happy to
29  * consider code for inclusion in the official distribution, but
30  * derived work must not be called official GROMACS. Details are found
31  * in the README & COPYING files - if they are missing, get the
32  * official version at http://www.gromacs.org.
33  *
34  * To help us fund GROMACS development, we humbly ask that you cite
35  * the research papers on the package. Check out http://www.gromacs.org.
36  */
37 #ifdef HAVE_CONFIG_H
38 #include <config.h>
39 #endif
40
41 #include <math.h>
42
43 #include "vec.h"
44 #include "typedefs.h"
45 #include "nonbonded.h"
46 #include "nb_kernel.h"
47 #include "nrnb.h"
48 #include "macros.h"
49 #include "nb_free_energy.h"
50
51 void
52 gmx_nb_free_energy_kernel(const t_nblist * gmx_restrict    nlist,
53                           rvec * gmx_restrict              xx,
54                           rvec * gmx_restrict              ff,
55                           t_forcerec * gmx_restrict        fr,
56                           const t_mdatoms * gmx_restrict   mdatoms,
57                           nb_kernel_data_t * gmx_restrict  kernel_data,
58                           t_nrnb * gmx_restrict            nrnb)
59 {
60
61 #define  STATE_A  0
62 #define  STATE_B  1
63 #define  NSTATES  2
64     int           i, j, n, ii, is3, ii3, k, nj0, nj1, jnr, j3, ggid;
65     real          shX, shY, shZ;
66     real          Fscal, FscalC[NSTATES], FscalV[NSTATES], tx, ty, tz;
67     real          Vcoul[NSTATES], Vvdw[NSTATES];
68     real          rinv6, r, rt, rtC, rtV;
69     real          iqA, iqB;
70     real          qq[NSTATES], vctot, krsq;
71     int           ntiA, ntiB, tj[NSTATES];
72     real          Vvdw6, Vvdw12, vvtot;
73     real          ix, iy, iz, fix, fiy, fiz;
74     real          dx, dy, dz, rsq, rinv;
75     real          c6[NSTATES], c12[NSTATES];
76     real          LFC[NSTATES], LFV[NSTATES], DLF[NSTATES];
77     double        dvdl_coul, dvdl_vdw;
78     real          lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
79     real          sigma6[NSTATES], alpha_vdw_eff, alpha_coul_eff, sigma2_def, sigma2_min;
80     real          rp, rpm2, rC, rV, rinvC, rpinvC, rinvV, rpinvV;
81     real          sigma2[NSTATES], sigma_pow[NSTATES], sigma_powm2[NSTATES], rs, rs2;
82     int           do_tab, tab_elemsize;
83     int           n0, n1C, n1V, nnn;
84     real          Y, F, G, H, Fp, Geps, Heps2, epsC, eps2C, epsV, eps2V, VV, FF;
85     int           icoul, ivdw;
86     int           nri;
87     const int *   iinr;
88     const int *   jindex;
89     const int *   jjnr;
90     const int *   shift;
91     const int *   gid;
92     const int *   typeA;
93     const int *   typeB;
94     int           ntype;
95     const real *  shiftvec;
96     real          dvdl_part;
97     real *        fshift;
98     real          tabscale = 0;
99     const real *  VFtab    = NULL;
100     const real *  x;
101     real *        f;
102     real          facel, krf, crf;
103     const real *  chargeA;
104     const real *  chargeB;
105     real          sigma6_min, sigma6_def, lam_power, sc_power, sc_r_power;
106     real          alpha_coul, alpha_vdw, lambda_coul, lambda_vdw, ewc;
107     const real *  nbfp;
108     real *        dvdl;
109     real *        Vv;
110     real *        Vc;
111     gmx_bool      bDoForces, bDoShiftForces, bDoPotential;
112     real          rcoulomb, sh_ewald;
113     real          rvdw, sh_invrc6;
114     gmx_bool      bExactElecCutoff, bExactVdwCutoff, bExactCutoffAll, bEwald;
115     real          rcutoff_max2;
116     real          rcutoff, rcutoff2, rswitch, d, d2, swV3, swV4, swV5, swF2, swF3, swF4, sw, dsw, rinvcorr;
117     const real *  tab_ewald_F;
118     const real *  tab_ewald_V;
119     real          tab_ewald_scale, tab_ewald_halfsp;
120
121     x                   = xx[0];
122     f                   = ff[0];
123
124     fshift              = fr->fshift[0];
125
126     nri                 = nlist->nri;
127     iinr                = nlist->iinr;
128     jindex              = nlist->jindex;
129     jjnr                = nlist->jjnr;
130     icoul               = nlist->ielec;
131     ivdw                = nlist->ivdw;
132     shift               = nlist->shift;
133     gid                 = nlist->gid;
134
135     shiftvec            = fr->shift_vec[0];
136     chargeA             = mdatoms->chargeA;
137     chargeB             = mdatoms->chargeB;
138     facel               = fr->epsfac;
139     krf                 = fr->k_rf;
140     crf                 = fr->c_rf;
141     ewc                 = fr->ewaldcoeff_q;
142     Vc                  = kernel_data->energygrp_elec;
143     typeA               = mdatoms->typeA;
144     typeB               = mdatoms->typeB;
145     ntype               = fr->ntype;
146     nbfp                = fr->nbfp;
147     Vv                  = kernel_data->energygrp_vdw;
148     lambda_coul         = kernel_data->lambda[efptCOUL];
149     lambda_vdw          = kernel_data->lambda[efptVDW];
150     dvdl                = kernel_data->dvdl;
151     alpha_coul          = fr->sc_alphacoul;
152     alpha_vdw           = fr->sc_alphavdw;
153     lam_power           = fr->sc_power;
154     sc_r_power          = fr->sc_r_power;
155     sigma6_def          = fr->sc_sigma6_def;
156     sigma6_min          = fr->sc_sigma6_min;
157     bDoForces           = kernel_data->flags & GMX_NONBONDED_DO_FORCE;
158     bDoShiftForces      = kernel_data->flags & GMX_NONBONDED_DO_SHIFTFORCE;
159     bDoPotential        = kernel_data->flags & GMX_NONBONDED_DO_POTENTIAL;
160
161     rcoulomb            = fr->rcoulomb;
162     sh_ewald            = fr->ic->sh_ewald;
163     rvdw                = fr->rvdw;
164     sh_invrc6           = fr->ic->sh_invrc6;
165
166     /* Ewald (PME) reciprocal force and energy quadratic spline tables */
167     tab_ewald_F         = fr->ic->tabq_coul_F;
168     tab_ewald_V         = fr->ic->tabq_coul_V;
169     tab_ewald_scale     = fr->ic->tabq_scale;
170     tab_ewald_halfsp    = 0.5/tab_ewald_scale;
171
172     if (fr->coulomb_modifier == eintmodPOTSWITCH || fr->vdw_modifier == eintmodPOTSWITCH)
173     {
174         rcutoff         = (fr->coulomb_modifier == eintmodPOTSWITCH) ? fr->rcoulomb : fr->rvdw;
175         rcutoff2        = rcutoff*rcutoff;
176         rswitch         = (fr->coulomb_modifier == eintmodPOTSWITCH) ? fr->rcoulomb_switch : fr->rvdw_switch;
177         d               = rcutoff-rswitch;
178         swV3            = -10.0/(d*d*d);
179         swV4            =  15.0/(d*d*d*d);
180         swV5            =  -6.0/(d*d*d*d*d);
181         swF2            = -30.0/(d*d*d);
182         swF3            =  60.0/(d*d*d*d);
183         swF4            = -30.0/(d*d*d*d*d);
184     }
185     else
186     {
187         /* Stupid compilers dont realize these variables will not be used */
188         rswitch         = 0.0;
189         swV3            = 0.0;
190         swV4            = 0.0;
191         swV5            = 0.0;
192         swF2            = 0.0;
193         swF3            = 0.0;
194         swF4            = 0.0;
195     }
196
197     if (fr->cutoff_scheme == ecutsVERLET)
198     {
199         const interaction_const_t *ic;
200
201         ic = fr->ic;
202
203         ivdw             = GMX_NBKERNEL_VDW_LENNARDJONES;
204
205         if (ic->eeltype == eelCUT || EEL_RF(ic->eeltype))
206         {
207             icoul        = GMX_NBKERNEL_ELEC_REACTIONFIELD;
208         }
209         else if (EEL_PME_EWALD(ic->eeltype))
210         {
211             icoul        = GMX_NBKERNEL_ELEC_EWALD;
212         }
213         else
214         {
215             gmx_incons("Unsupported eeltype with Verlet and free-energy");
216         }
217
218         bExactElecCutoff = TRUE;
219         bExactVdwCutoff  = TRUE;
220     }
221     else
222     {
223         bExactElecCutoff = (fr->coulomb_modifier != eintmodNONE) || fr->eeltype == eelRF_ZERO;
224         bExactVdwCutoff  = (fr->vdw_modifier != eintmodNONE);
225     }
226
227     bExactCutoffAll = (bExactElecCutoff && bExactVdwCutoff);
228     rcutoff_max2    = max(fr->rcoulomb, fr->rvdw);
229     rcutoff_max2    = rcutoff_max2*rcutoff_max2;
230
231     bEwald          = (icoul == GMX_NBKERNEL_ELEC_EWALD);
232
233     /* fix compiler warnings */
234     nj1   = 0;
235     n1C   = n1V   = 0;
236     epsC  = epsV  = 0;
237     eps2C = eps2V = 0;
238
239     dvdl_coul  = 0;
240     dvdl_vdw   = 0;
241
242     /* Lambda factor for state A, 1-lambda*/
243     LFC[STATE_A] = 1.0 - lambda_coul;
244     LFV[STATE_A] = 1.0 - lambda_vdw;
245
246     /* Lambda factor for state B, lambda*/
247     LFC[STATE_B] = lambda_coul;
248     LFV[STATE_B] = lambda_vdw;
249
250     /*derivative of the lambda factor for state A and B */
251     DLF[STATE_A] = -1;
252     DLF[STATE_B] = 1;
253
254     for (i = 0; i < NSTATES; i++)
255     {
256         lfac_coul[i]  = (lam_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
257         dlfac_coul[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFC[i]) : 1);
258         lfac_vdw[i]   = (lam_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
259         dlfac_vdw[i]  = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFV[i]) : 1);
260     }
261     /* precalculate */
262     sigma2_def = pow(sigma6_def, 1.0/3.0);
263     sigma2_min = pow(sigma6_min, 1.0/3.0);
264
265     /* Ewald (not PME) table is special (icoul==enbcoulFEWALD) */
266
267     do_tab = (icoul == GMX_NBKERNEL_ELEC_CUBICSPLINETABLE ||
268               ivdw == GMX_NBKERNEL_VDW_CUBICSPLINETABLE);
269     if (do_tab)
270     {
271         tabscale         = kernel_data->table_elec_vdw->scale;
272         VFtab            = kernel_data->table_elec_vdw->data;
273         /* we always use the combined table here */
274         tab_elemsize     = 12;
275     }
276
277     for (n = 0; (n < nri); n++)
278     {
279         int npair_within_cutoff;
280
281         npair_within_cutoff = 0;
282
283         is3              = 3*shift[n];
284         shX              = shiftvec[is3];
285         shY              = shiftvec[is3+1];
286         shZ              = shiftvec[is3+2];
287         nj0              = jindex[n];
288         nj1              = jindex[n+1];
289         ii               = iinr[n];
290         ii3              = 3*ii;
291         ix               = shX + x[ii3+0];
292         iy               = shY + x[ii3+1];
293         iz               = shZ + x[ii3+2];
294         iqA              = facel*chargeA[ii];
295         iqB              = facel*chargeB[ii];
296         ntiA             = 2*ntype*typeA[ii];
297         ntiB             = 2*ntype*typeB[ii];
298         vctot            = 0;
299         vvtot            = 0;
300         fix              = 0;
301         fiy              = 0;
302         fiz              = 0;
303
304         for (k = nj0; (k < nj1); k++)
305         {
306             jnr              = jjnr[k];
307             j3               = 3*jnr;
308             dx               = ix - x[j3];
309             dy               = iy - x[j3+1];
310             dz               = iz - x[j3+2];
311             rsq              = dx*dx + dy*dy + dz*dz;
312
313             if (bExactCutoffAll && rsq >= rcutoff_max2)
314             {
315                 /* We save significant time by skipping all code below.
316                  * Note that with soft-core interactions, the actual cut-off
317                  * check might be different. But since the soft-core distance
318                  * is always larger than r, checking on r here is safe.
319                  */
320                 continue;
321             }
322             npair_within_cutoff++;
323
324             if (rsq > 0)
325             {
326                 rinv         = gmx_invsqrt(rsq);
327                 r            = rsq*rinv;
328             }
329             else
330             {
331                 /* The force at r=0 is zero, because of symmetry.
332                  * But note that the potential is in general non-zero,
333                  * since the soft-cored r will be non-zero.
334                  */
335                 rinv         = 0;
336                 r            = 0;
337             }
338
339             if (sc_r_power == 6.0)
340             {
341                 rpm2             = rsq*rsq;  /* r4 */
342                 rp               = rpm2*rsq; /* r6 */
343             }
344             else if (sc_r_power == 48.0)
345             {
346                 rp               = rsq*rsq*rsq; /* r6 */
347                 rp               = rp*rp;       /* r12 */
348                 rp               = rp*rp;       /* r24 */
349                 rp               = rp*rp;       /* r48 */
350                 rpm2             = rp/rsq;      /* r46 */
351             }
352             else
353             {
354                 rp             = pow(r, sc_r_power);  /* not currently supported as input, but can handle it */
355                 rpm2           = rp/rsq;
356             }
357
358             Fscal = 0;
359
360             qq[STATE_A]      = iqA*chargeA[jnr];
361             qq[STATE_B]      = iqB*chargeB[jnr];
362
363             if (nlist->excl_fep == NULL || nlist->excl_fep[k])
364             {
365                 tj[STATE_A]      = ntiA+2*typeA[jnr];
366                 tj[STATE_B]      = ntiB+2*typeB[jnr];
367
368                 for (i = 0; i < NSTATES; i++)
369                 {
370
371                     c6[i]              = nbfp[tj[i]];
372                     c12[i]             = nbfp[tj[i]+1];
373                     if ((c6[i] > 0) && (c12[i] > 0))
374                     {
375                         /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
376                         sigma6[i]       = 0.5*c12[i]/c6[i];
377                         sigma2[i]       = pow(sigma6[i], 1.0/3.0);
378                         /* should be able to get rid of this ^^^ internal pow call eventually.  Will require agreement on
379                            what data to store externally.  Can't be fixed without larger scale changes, so not 4.6 */
380                         if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
381                         {
382                             sigma6[i] = sigma6_min;
383                             sigma2[i] = sigma2_min;
384                         }
385                     }
386                     else
387                     {
388                         sigma6[i]       = sigma6_def;
389                         sigma2[i]       = sigma2_def;
390                     }
391                     if (sc_r_power == 6.0)
392                     {
393                         sigma_pow[i]    = sigma6[i];
394                         sigma_powm2[i]  = sigma6[i]/sigma2[i];
395                     }
396                     else if (sc_r_power == 48.0)
397                     {
398                         sigma_pow[i]    = sigma6[i]*sigma6[i];       /* sigma^12 */
399                         sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
400                         sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
401                         sigma_powm2[i]  = sigma_pow[i]/sigma2[i];
402                     }
403                     else
404                     {    /* not really supported as input, but in here for testing the general case*/
405                         sigma_pow[i]    = pow(sigma2[i], sc_r_power/2);
406                         sigma_powm2[i]  = sigma_pow[i]/(sigma2[i]);
407                     }
408                 }
409
410                 /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
411                 if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
412                 {
413                     alpha_vdw_eff    = 0;
414                     alpha_coul_eff   = 0;
415                 }
416                 else
417                 {
418                     alpha_vdw_eff    = alpha_vdw;
419                     alpha_coul_eff   = alpha_coul;
420                 }
421
422                 for (i = 0; i < NSTATES; i++)
423                 {
424                     FscalC[i]    = 0;
425                     FscalV[i]    = 0;
426                     Vcoul[i]     = 0;
427                     Vvdw[i]      = 0;
428
429                     /* Only spend time on A or B state if it is non-zero */
430                     if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
431                     {
432                         /* this section has to be inside the loop because of the dependence on sigma_pow */
433                         rpinvC         = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
434                         rinvC          = pow(rpinvC, 1.0/sc_r_power);
435                         rC             = 1.0/rinvC;
436
437                         rpinvV         = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
438                         rinvV          = pow(rpinvV, 1.0/sc_r_power);
439                         rV             = 1.0/rinvV;
440
441                         if (do_tab)
442                         {
443                             rtC        = rC*tabscale;
444                             n0         = rtC;
445                             epsC       = rtC-n0;
446                             eps2C      = epsC*epsC;
447                             n1C        = tab_elemsize*n0;
448
449                             rtV        = rV*tabscale;
450                             n0         = rtV;
451                             epsV       = rtV-n0;
452                             eps2V      = epsV*epsV;
453                             n1V        = tab_elemsize*n0;
454                         }
455
456                         /* With Ewald and soft-core we should put the cut-off on r,
457                          * not on the soft-cored rC, as the real-space and
458                          * reciprocal space contributions should (almost) cancel.
459                          */
460                         if (qq[i] != 0 &&
461                             !(bExactElecCutoff &&
462                               ((!bEwald && rC >= rcoulomb) ||
463                                (bEwald && r >= rcoulomb))))
464                         {
465                             switch (icoul)
466                             {
467                                 case GMX_NBKERNEL_ELEC_COULOMB:
468                                     /* simple cutoff */
469                                     Vcoul[i]   = qq[i]*rinvC;
470                                     FscalC[i]  = Vcoul[i];
471                                     break;
472
473                                 case GMX_NBKERNEL_ELEC_EWALD:
474                                     /* Ewald FEP is done only on the 1/r part */
475                                     Vcoul[i]   = qq[i]*(rinvC - sh_ewald);
476                                     FscalC[i]  = Vcoul[i];
477                                     break;
478
479                                 case GMX_NBKERNEL_ELEC_REACTIONFIELD:
480                                     /* reaction-field */
481                                     Vcoul[i]   = qq[i]*(rinvC + krf*rC*rC-crf);
482                                     FscalC[i]  = qq[i]*(rinvC - 2.0*krf*rC*rC);
483                                     break;
484
485                                 case GMX_NBKERNEL_ELEC_CUBICSPLINETABLE:
486                                     /* non-Ewald tabulated coulomb */
487                                     nnn        = n1C;
488                                     Y          = VFtab[nnn];
489                                     F          = VFtab[nnn+1];
490                                     Geps       = epsC*VFtab[nnn+2];
491                                     Heps2      = eps2C*VFtab[nnn+3];
492                                     Fp         = F+Geps+Heps2;
493                                     VV         = Y+epsC*Fp;
494                                     FF         = Fp+Geps+2.0*Heps2;
495                                     Vcoul[i]   = qq[i]*VV;
496                                     FscalC[i]  = -qq[i]*tabscale*FF*rC;
497                                     break;
498
499                                 case GMX_NBKERNEL_ELEC_GENERALIZEDBORN:
500                                     gmx_fatal(FARGS, "Free energy and GB not implemented.\n");
501                                     break;
502
503                                 case GMX_NBKERNEL_ELEC_NONE:
504                                     FscalC[i]  = 0.0;
505                                     Vcoul[i]   = 0.0;
506                                     break;
507
508                                 default:
509                                     gmx_incons("Invalid icoul in free energy kernel");
510                                     break;
511                             }
512
513                             if (fr->coulomb_modifier == eintmodPOTSWITCH)
514                             {
515                                 d                = rC-rswitch;
516                                 d                = (d > 0.0) ? d : 0.0;
517                                 d2               = d*d;
518                                 sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
519                                 dsw              = d2*(swF2+d*(swF3+d*swF4));
520
521                                 Vcoul[i]  *= sw;
522                                 FscalC[i]  = FscalC[i]*sw + Vcoul[i]*dsw;
523                             }
524                         }
525
526                         if ((c6[i] != 0 || c12[i] != 0) &&
527                             !(bExactVdwCutoff && rV >= rvdw))
528                         {
529                             switch (ivdw)
530                             {
531                                 case GMX_NBKERNEL_VDW_LENNARDJONES:
532                                     /* cutoff LJ */
533                                     if (sc_r_power == 6.0)
534                                     {
535                                         rinv6            = rpinvV;
536                                     }
537                                     else
538                                     {
539                                         rinv6            = pow(rinvV, 6.0);
540                                     }
541                                     Vvdw6            = c6[i]*rinv6;
542                                     Vvdw12           = c12[i]*rinv6*rinv6;
543                                     if (fr->vdw_modifier == eintmodPOTSHIFT)
544                                     {
545                                         Vvdw[i]          = ( (Vvdw12-c12[i]*sh_invrc6*sh_invrc6)*(1.0/12.0)
546                                                              -(Vvdw6-c6[i]*sh_invrc6)*(1.0/6.0));
547                                     }
548                                     else
549                                     {
550                                         Vvdw[i]          = Vvdw12*(1.0/12.0) - Vvdw6*(1.0/6.0);
551                                     }
552                                     FscalV[i]        = Vvdw12 - Vvdw6;
553                                     break;
554
555                                 case GMX_NBKERNEL_VDW_BUCKINGHAM:
556                                     gmx_fatal(FARGS, "Buckingham free energy not supported.");
557                                     break;
558
559                                 case GMX_NBKERNEL_VDW_CUBICSPLINETABLE:
560                                     /* Table LJ */
561                                     nnn = n1V+4;
562                                     /* dispersion */
563                                     Y          = VFtab[nnn];
564                                     F          = VFtab[nnn+1];
565                                     Geps       = epsV*VFtab[nnn+2];
566                                     Heps2      = eps2V*VFtab[nnn+3];
567                                     Fp         = F+Geps+Heps2;
568                                     VV         = Y+epsV*Fp;
569                                     FF         = Fp+Geps+2.0*Heps2;
570                                     Vvdw[i]   += c6[i]*VV;
571                                     FscalV[i] -= c6[i]*tabscale*FF*rV;
572
573                                     /* repulsion */
574                                     Y          = VFtab[nnn+4];
575                                     F          = VFtab[nnn+5];
576                                     Geps       = epsV*VFtab[nnn+6];
577                                     Heps2      = eps2V*VFtab[nnn+7];
578                                     Fp         = F+Geps+Heps2;
579                                     VV         = Y+epsV*Fp;
580                                     FF         = Fp+Geps+2.0*Heps2;
581                                     Vvdw[i]   += c12[i]*VV;
582                                     FscalV[i] -= c12[i]*tabscale*FF*rV;
583                                     break;
584
585                                 case GMX_NBKERNEL_VDW_NONE:
586                                     Vvdw[i]    = 0.0;
587                                     FscalV[i]  = 0.0;
588                                     break;
589
590                                 default:
591                                     gmx_incons("Invalid ivdw in free energy kernel");
592                                     break;
593                             }
594
595                             if (fr->vdw_modifier == eintmodPOTSWITCH)
596                             {
597                                 d          = rV-rswitch;
598                                 d          = (d > 0.0) ? d : 0.0;
599                                 d2         = d*d;
600                                 sw         = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
601                                 dsw        = d2*(swF2+d*(swF3+d*swF4));
602
603                                 Vvdw[i]   *= sw;
604                                 FscalV[i]  = FscalV[i]*sw + Vvdw[i]*dsw;
605
606                                 FscalV[i]  = (rV < rvdw) ? FscalV[i] : 0.0;
607                                 Vvdw[i]    = (rV < rvdw) ? Vvdw[i] : 0.0;
608                             }
609                         }
610
611                         /* FscalC (and FscalV) now contain: dV/drC * rC
612                          * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
613                          * Further down we first multiply by r^p-2 and then by
614                          * the vector r, which in total gives: dV/drC * (r/rC)^1-p
615                          */
616                         FscalC[i] *= rpinvC;
617                         FscalV[i] *= rpinvV;
618                     }
619                 }
620
621                 /* Assemble A and B states */
622                 for (i = 0; i < NSTATES; i++)
623                 {
624                     vctot         += LFC[i]*Vcoul[i];
625                     vvtot         += LFV[i]*Vvdw[i];
626
627                     Fscal         += LFC[i]*FscalC[i]*rpm2;
628                     Fscal         += LFV[i]*FscalV[i]*rpm2;
629
630                     dvdl_coul     += Vcoul[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*FscalC[i]*sigma_pow[i];
631                     dvdl_vdw      += Vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*FscalV[i]*sigma_pow[i];
632                 }
633             }
634             else if (icoul == GMX_NBKERNEL_ELEC_REACTIONFIELD)
635             {
636                 /* For excluded pairs, which are only in this pair list when
637                  * using the Verlet scheme, we don't use soft-core.
638                  * The group scheme also doesn't soft-core for these.
639                  * As there is no singularity, there is no need for soft-core.
640                  */
641                 VV = krf*rsq - crf;
642                 FF = -2.0*krf;
643
644                 if (ii == jnr)
645                 {
646                     VV *= 0.5;
647                 }
648
649                 for (i = 0; i < NSTATES; i++)
650                 {
651                     vctot      += LFC[i]*qq[i]*VV;
652                     Fscal      += LFC[i]*qq[i]*FF;
653                     dvdl_coul  += DLF[i]*qq[i]*VV;
654                 }
655             }
656
657             if (icoul == GMX_NBKERNEL_ELEC_EWALD &&
658                 !(bExactElecCutoff && r >= rcoulomb))
659             {
660                 /* Because we compute the soft-core normally,
661                  * we have to remove the Ewald short range portion.
662                  * Done outside of the states loop because this part
663                  * doesn't depend on the scaled R.
664                  */
665                 real rs, frac, f_lr;
666                 int  ri;
667
668                 rs     = rsq*rinv*tab_ewald_scale;
669                 ri     = (int)rs;
670                 frac   = rs - ri;
671                 f_lr   = (1 - frac)*tab_ewald_F[ri] + frac*tab_ewald_F[ri+1];
672                 FF     = f_lr*rinv;
673                 VV     = tab_ewald_V[ri] - tab_ewald_halfsp*frac*(tab_ewald_F[ri] + f_lr);
674
675                 if (ii == jnr)
676                 {
677                     VV   *= 0.5;
678                 }
679
680                 for (i = 0; i < NSTATES; i++)
681                 {
682                     vctot      -= LFC[i]*qq[i]*VV;
683                     Fscal      -= LFC[i]*qq[i]*FF;
684                     dvdl_coul  -= (DLF[i]*qq[i])*VV;
685                 }
686             }
687
688             if (bDoForces)
689             {
690                 tx         = Fscal*dx;
691                 ty         = Fscal*dy;
692                 tz         = Fscal*dz;
693                 fix        = fix + tx;
694                 fiy        = fiy + ty;
695                 fiz        = fiz + tz;
696                 /* OpenMP atomics are expensive, but this kernels is also
697                  * expensive, so we can take this hit, instead of using
698                  * thread-local output buffers and extra reduction.
699                  */
700 #pragma omp atomic
701                 f[j3]     -= tx;
702 #pragma omp atomic
703                 f[j3+1]   -= ty;
704 #pragma omp atomic
705                 f[j3+2]   -= tz;
706             }
707         }
708
709         /* The atomics below are expensive with many OpenMP threads.
710          * Here unperturbed i-particles will usually only have a few
711          * (perturbed) j-particles in the list. Thus with a buffered list
712          * we can skip a significant number of i-reductions with a check.
713          */
714         if (npair_within_cutoff > 0)
715         {
716             if (bDoForces)
717             {
718 #pragma omp atomic
719                 f[ii3]        += fix;
720 #pragma omp atomic
721                 f[ii3+1]      += fiy;
722 #pragma omp atomic
723                 f[ii3+2]      += fiz;
724             }
725             if (bDoShiftForces)
726             {
727 #pragma omp atomic
728                 fshift[is3]   += fix;
729 #pragma omp atomic
730                 fshift[is3+1] += fiy;
731 #pragma omp atomic
732                 fshift[is3+2] += fiz;
733             }
734             if (bDoPotential)
735             {
736                 ggid               = gid[n];
737 #pragma omp atomic
738                 Vc[ggid]          += vctot;
739 #pragma omp atomic
740                 Vv[ggid]          += vvtot;
741             }
742         }
743     }
744
745 #pragma omp atomic
746     dvdl[efptCOUL]     += dvdl_coul;
747  #pragma omp atomic
748     dvdl[efptVDW]      += dvdl_vdw;
749
750     /* Estimate flops, average for free energy stuff:
751      * 12  flops per outer iteration
752      * 150 flops per inner iteration
753      */
754     inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri*12 + nlist->jindex[n]*150);
755 }
756
757 real
758 nb_free_energy_evaluate_single(real r2, real sc_r_power, real alpha_coul, real alpha_vdw,
759                                real tabscale, real *vftab,
760                                real qqA, real c6A, real c12A, real qqB, real c6B, real c12B,
761                                real LFC[2], real LFV[2], real DLF[2],
762                                real lfac_coul[2], real lfac_vdw[2], real dlfac_coul[2], real dlfac_vdw[2],
763                                real sigma6_def, real sigma6_min, real sigma2_def, real sigma2_min,
764                                real *velectot, real *vvdwtot, real *dvdl)
765 {
766     real       r, rp, rpm2, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VV, FF, fscal;
767     real       qq[2], c6[2], c12[2], sigma6[2], sigma2[2], sigma_pow[2], sigma_powm2[2];
768     real       alpha_coul_eff, alpha_vdw_eff, dvdl_coul, dvdl_vdw;
769     real       rpinv, r_coul, r_vdw, velecsum, vvdwsum;
770     real       fscal_vdw[2], fscal_elec[2];
771     real       velec[2], vvdw[2];
772     int        i, ntab;
773
774     qq[0]    = qqA;
775     qq[1]    = qqB;
776     c6[0]    = c6A;
777     c6[1]    = c6B;
778     c12[0]   = c12A;
779     c12[1]   = c12B;
780
781     if (sc_r_power == 6.0)
782     {
783         rpm2             = r2*r2;   /* r4 */
784         rp               = rpm2*r2; /* r6 */
785     }
786     else if (sc_r_power == 48.0)
787     {
788         rp               = r2*r2*r2; /* r6 */
789         rp               = rp*rp;    /* r12 */
790         rp               = rp*rp;    /* r24 */
791         rp               = rp*rp;    /* r48 */
792         rpm2             = rp/r2;    /* r46 */
793     }
794     else
795     {
796         rp             = pow(r2, 0.5*sc_r_power);  /* not currently supported as input, but can handle it */
797         rpm2           = rp/r2;
798     }
799
800     /* Loop over state A(0) and B(1) */
801     for (i = 0; i < 2; i++)
802     {
803         if ((c6[i] > 0) && (c12[i] > 0))
804         {
805             /* The c6 & c12 coefficients now contain the constants 6.0 and 12.0, respectively.
806              * Correct for this by multiplying with (1/12.0)/(1/6.0)=6.0/12.0=0.5.
807              */
808             sigma6[i]       = 0.5*c12[i]/c6[i];
809             sigma2[i]       = pow(0.5*c12[i]/c6[i], 1.0/3.0);
810             /* should be able to get rid of this ^^^ internal pow call eventually.  Will require agreement on
811                what data to store externally.  Can't be fixed without larger scale changes, so not 5.0 */
812             if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
813             {
814                 sigma6[i] = sigma6_min;
815                 sigma2[i] = sigma2_min;
816             }
817         }
818         else
819         {
820             sigma6[i]       = sigma6_def;
821             sigma2[i]       = sigma2_def;
822         }
823         if (sc_r_power == 6.0)
824         {
825             sigma_pow[i]    = sigma6[i];
826             sigma_powm2[i]  = sigma6[i]/sigma2[i];
827         }
828         else if (sc_r_power == 48.0)
829         {
830             sigma_pow[i]    = sigma6[i]*sigma6[i];       /* sigma^12 */
831             sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
832             sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
833             sigma_powm2[i]  = sigma_pow[i]/sigma2[i];
834         }
835         else
836         {    /* not really supported as input, but in here for testing the general case*/
837             sigma_pow[i]    = pow(sigma2[i], sc_r_power/2);
838             sigma_powm2[i]  = sigma_pow[i]/(sigma2[i]);
839         }
840     }
841
842     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
843     if ((c12[0] > 0) && (c12[1] > 0))
844     {
845         alpha_vdw_eff    = 0;
846         alpha_coul_eff   = 0;
847     }
848     else
849     {
850         alpha_vdw_eff    = alpha_vdw;
851         alpha_coul_eff   = alpha_coul;
852     }
853
854     /* Loop over A and B states again */
855     for (i = 0; i < 2; i++)
856     {
857         fscal_elec[i] = 0;
858         fscal_vdw[i]  = 0;
859         velec[i]      = 0;
860         vvdw[i]       = 0;
861
862         /* Only spend time on A or B state if it is non-zero */
863         if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
864         {
865             /* Coulomb */
866             rpinv            = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
867             r_coul           = pow(rpinv, -1.0/sc_r_power);
868
869             /* Electrostatics table lookup data */
870             rtab             = r_coul*tabscale;
871             ntab             = rtab;
872             eps              = rtab-ntab;
873             eps2             = eps*eps;
874             ntab             = 12*ntab;
875             /* Electrostatics */
876             Y                = vftab[ntab];
877             F                = vftab[ntab+1];
878             Geps             = eps*vftab[ntab+2];
879             Heps2            = eps2*vftab[ntab+3];
880             Fp               = F+Geps+Heps2;
881             VV               = Y+eps*Fp;
882             FF               = Fp+Geps+2.0*Heps2;
883             velec[i]         = qq[i]*VV;
884             fscal_elec[i]    = -qq[i]*FF*r_coul*rpinv*tabscale;
885
886             /* Vdw */
887             rpinv            = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
888             r_vdw            = pow(rpinv, -1.0/sc_r_power);
889             /* Vdw table lookup data */
890             rtab             = r_vdw*tabscale;
891             ntab             = rtab;
892             eps              = rtab-ntab;
893             eps2             = eps*eps;
894             ntab             = 12*ntab;
895             /* Dispersion */
896             Y                = vftab[ntab+4];
897             F                = vftab[ntab+5];
898             Geps             = eps*vftab[ntab+6];
899             Heps2            = eps2*vftab[ntab+7];
900             Fp               = F+Geps+Heps2;
901             VV               = Y+eps*Fp;
902             FF               = Fp+Geps+2.0*Heps2;
903             vvdw[i]          = c6[i]*VV;
904             fscal_vdw[i]     = -c6[i]*FF;
905
906             /* Repulsion */
907             Y                = vftab[ntab+8];
908             F                = vftab[ntab+9];
909             Geps             = eps*vftab[ntab+10];
910             Heps2            = eps2*vftab[ntab+11];
911             Fp               = F+Geps+Heps2;
912             VV               = Y+eps*Fp;
913             FF               = Fp+Geps+2.0*Heps2;
914             vvdw[i]         += c12[i]*VV;
915             fscal_vdw[i]    -= c12[i]*FF;
916             fscal_vdw[i]    *= r_vdw*rpinv*tabscale;
917         }
918     }
919     /* Now we have velec[i], vvdw[i], and fscal[i] for both states */
920     /* Assemble A and B states */
921     velecsum  = 0;
922     vvdwsum   = 0;
923     dvdl_coul = 0;
924     dvdl_vdw  = 0;
925     fscal     = 0;
926     for (i = 0; i < 2; i++)
927     {
928         velecsum      += LFC[i]*velec[i];
929         vvdwsum       += LFV[i]*vvdw[i];
930
931         fscal         += (LFC[i]*fscal_elec[i]+LFV[i]*fscal_vdw[i])*rpm2;
932
933         dvdl_coul     += velec[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*fscal_elec[i]*sigma_pow[i];
934         dvdl_vdw      += vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*fscal_vdw[i]*sigma_pow[i];
935     }
936
937     dvdl[efptCOUL]     += dvdl_coul;
938     dvdl[efptVDW]      += dvdl_vdw;
939
940     *velectot           = velecsum;
941     *vvdwtot            = vvdwsum;
942
943     return fscal;
944 }