2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
41 #include "gromacs/math/vec.h"
43 #include "nonbonded.h"
44 #include "nb_kernel.h"
47 #include "nb_free_energy.h"
49 #include "gromacs/utility/fatalerror.h"
52 gmx_nb_free_energy_kernel(const t_nblist * gmx_restrict nlist,
53 rvec * gmx_restrict xx,
54 rvec * gmx_restrict ff,
55 t_forcerec * gmx_restrict fr,
56 const t_mdatoms * gmx_restrict mdatoms,
57 nb_kernel_data_t * gmx_restrict kernel_data,
58 t_nrnb * gmx_restrict nrnb)
64 int i, j, n, ii, is3, ii3, k, nj0, nj1, jnr, j3, ggid;
66 real Fscal, FscalC[NSTATES], FscalV[NSTATES], tx, ty, tz;
67 real Vcoul[NSTATES], Vvdw[NSTATES];
68 real rinv6, r, rt, rtC, rtV;
70 real qq[NSTATES], vctot, krsq;
71 int ntiA, ntiB, tj[NSTATES];
72 real Vvdw6, Vvdw12, vvtot;
73 real ix, iy, iz, fix, fiy, fiz;
74 real dx, dy, dz, rsq, rinv;
75 real c6[NSTATES], c12[NSTATES], c6grid;
76 real LFC[NSTATES], LFV[NSTATES], DLF[NSTATES];
77 double dvdl_coul, dvdl_vdw;
78 real lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
79 real sigma6[NSTATES], alpha_vdw_eff, alpha_coul_eff, sigma2_def, sigma2_min;
80 real rp, rpm2, rC, rV, rinvC, rpinvC, rinvV, rpinvV;
81 real sigma2[NSTATES], sigma_pow[NSTATES], sigma_powm2[NSTATES], rs, rs2;
82 int do_tab, tab_elemsize;
83 int n0, n1C, n1V, nnn;
84 real Y, F, G, H, Fp, Geps, Heps2, epsC, eps2C, epsV, eps2V, VV, FF;
95 const real * shiftvec;
99 const real * VFtab = NULL;
102 real facel, krf, crf;
103 const real * chargeA;
104 const real * chargeB;
105 real sigma6_min, sigma6_def, lam_power, sc_power, sc_r_power;
106 real alpha_coul, alpha_vdw, lambda_coul, lambda_vdw, ewc_lj;
107 real ewcljrsq, ewclj, ewclj2, exponent, poly, vvdw_disp, vvdw_rep, sh_lj_ewald;
109 const real * nbfp, *nbfp_grid;
113 gmx_bool bDoForces, bDoShiftForces, bDoPotential;
114 real rcoulomb, rvdw, sh_invrc6;
115 gmx_bool bExactElecCutoff, bExactVdwCutoff, bExactCutoffAll;
116 gmx_bool bEwald, bEwaldLJ;
118 const real * tab_ewald_F_lj;
119 const real * tab_ewald_V_lj;
120 real d, d2, sw, dsw, rinvcorr;
121 real elec_swV3, elec_swV4, elec_swV5, elec_swF2, elec_swF3, elec_swF4;
122 real vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4;
123 gmx_bool bConvertEwaldToCoulomb, bConvertLJEwaldToLJ6;
124 gmx_bool bComputeVdwInteraction, bComputeElecInteraction;
127 real ewrt, eweps, ewtabscale, ewtabhalfspace, sh_ewald;
129 sh_ewald = fr->ic->sh_ewald;
130 ewtab = fr->ic->tabq_coul_FDV0;
131 ewtabscale = fr->ic->tabq_scale;
132 ewtabhalfspace = 0.5/ewtabscale;
133 tab_ewald_F_lj = fr->ic->tabq_vdw_F;
134 tab_ewald_V_lj = fr->ic->tabq_vdw_V;
139 fshift = fr->fshift[0];
143 jindex = nlist->jindex;
145 icoul = nlist->ielec;
147 shift = nlist->shift;
150 shiftvec = fr->shift_vec[0];
151 chargeA = mdatoms->chargeA;
152 chargeB = mdatoms->chargeB;
156 ewc_lj = fr->ewaldcoeff_lj;
157 Vc = kernel_data->energygrp_elec;
158 typeA = mdatoms->typeA;
159 typeB = mdatoms->typeB;
162 nbfp_grid = fr->ljpme_c6grid;
163 Vv = kernel_data->energygrp_vdw;
164 lambda_coul = kernel_data->lambda[efptCOUL];
165 lambda_vdw = kernel_data->lambda[efptVDW];
166 dvdl = kernel_data->dvdl;
167 alpha_coul = fr->sc_alphacoul;
168 alpha_vdw = fr->sc_alphavdw;
169 lam_power = fr->sc_power;
170 sc_r_power = fr->sc_r_power;
171 sigma6_def = fr->sc_sigma6_def;
172 sigma6_min = fr->sc_sigma6_min;
173 bDoForces = kernel_data->flags & GMX_NONBONDED_DO_FORCE;
174 bDoShiftForces = kernel_data->flags & GMX_NONBONDED_DO_SHIFTFORCE;
175 bDoPotential = kernel_data->flags & GMX_NONBONDED_DO_POTENTIAL;
177 rcoulomb = fr->rcoulomb;
179 sh_invrc6 = fr->ic->sh_invrc6;
180 sh_lj_ewald = fr->ic->sh_lj_ewald;
181 ewclj = fr->ewaldcoeff_lj;
182 ewclj2 = ewclj*ewclj;
183 ewclj6 = ewclj2*ewclj2*ewclj2;
185 if (fr->coulomb_modifier == eintmodPOTSWITCH)
187 d = fr->rcoulomb-fr->rcoulomb_switch;
188 elec_swV3 = -10.0/(d*d*d);
189 elec_swV4 = 15.0/(d*d*d*d);
190 elec_swV5 = -6.0/(d*d*d*d*d);
191 elec_swF2 = -30.0/(d*d*d);
192 elec_swF3 = 60.0/(d*d*d*d);
193 elec_swF4 = -30.0/(d*d*d*d*d);
197 /* Avoid warnings from stupid compilers (looking at you, Clang!) */
198 elec_swV3 = elec_swV4 = elec_swV5 = elec_swF2 = elec_swF3 = elec_swF4 = 0.0;
201 if (fr->vdw_modifier == eintmodPOTSWITCH)
203 d = fr->rvdw-fr->rvdw_switch;
204 vdw_swV3 = -10.0/(d*d*d);
205 vdw_swV4 = 15.0/(d*d*d*d);
206 vdw_swV5 = -6.0/(d*d*d*d*d);
207 vdw_swF2 = -30.0/(d*d*d);
208 vdw_swF3 = 60.0/(d*d*d*d);
209 vdw_swF4 = -30.0/(d*d*d*d*d);
213 /* Avoid warnings from stupid compilers (looking at you, Clang!) */
214 vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0;
217 if (fr->cutoff_scheme == ecutsVERLET)
219 const interaction_const_t *ic;
222 if (EVDW_PME(ic->vdwtype))
224 ivdw = GMX_NBKERNEL_VDW_LJEWALD;
228 ivdw = GMX_NBKERNEL_VDW_LENNARDJONES;
231 if (ic->eeltype == eelCUT || EEL_RF(ic->eeltype))
233 icoul = GMX_NBKERNEL_ELEC_REACTIONFIELD;
235 else if (EEL_PME_EWALD(ic->eeltype))
237 icoul = GMX_NBKERNEL_ELEC_EWALD;
241 gmx_incons("Unsupported eeltype with Verlet and free-energy");
244 bExactElecCutoff = TRUE;
245 bExactVdwCutoff = TRUE;
249 bExactElecCutoff = (fr->coulomb_modifier != eintmodNONE) || fr->eeltype == eelRF_ZERO;
250 bExactVdwCutoff = (fr->vdw_modifier != eintmodNONE);
253 bExactCutoffAll = (bExactElecCutoff && bExactVdwCutoff);
254 rcutoff_max2 = max(fr->rcoulomb, fr->rvdw);
255 rcutoff_max2 = rcutoff_max2*rcutoff_max2;
257 bEwald = (icoul == GMX_NBKERNEL_ELEC_EWALD);
258 bEwaldLJ = (ivdw == GMX_NBKERNEL_VDW_LJEWALD);
260 /* For Ewald/PME interactions we cannot easily apply the soft-core component to
261 * reciprocal space. When we use vanilla (not switch/shift) Ewald interactions, we
262 * can apply the small trick of subtracting the _reciprocal_ space contribution
263 * in this kernel, and instead apply the free energy interaction to the 1/r
264 * (standard coulomb) interaction.
266 * However, we cannot use this approach for switch-modified since we would then
267 * effectively end up evaluating a significantly different interaction here compared to the
268 * normal (non-free-energy) kernels, either by applying a cutoff at a different
269 * position than what the user requested, or by switching different
270 * things (1/r rather than short-range Ewald). For these settings, we just
271 * use the traditional short-range Ewald interaction in that case.
273 bConvertEwaldToCoulomb = (bEwald && (fr->coulomb_modifier != eintmodPOTSWITCH));
274 /* For now the below will always be true (since LJ-PME only works with Shift in Gromacs-5.0),
275 * but writing it this way means we stay in sync with coulomb, and it avoids future bugs.
277 bConvertLJEwaldToLJ6 = (bEwaldLJ && (fr->vdw_modifier != eintmodPOTSWITCH));
279 /* We currently don't implement exclusion correction, needed with the Verlet cut-off scheme, without conversion */
280 if (fr->cutoff_scheme == ecutsVERLET &&
281 ((bEwald && !bConvertEwaldToCoulomb) ||
282 (bEwaldLJ && !bConvertLJEwaldToLJ6)))
284 gmx_incons("Unimplemented non-bonded setup");
287 /* fix compiler warnings */
296 /* Lambda factor for state A, 1-lambda*/
297 LFC[STATE_A] = 1.0 - lambda_coul;
298 LFV[STATE_A] = 1.0 - lambda_vdw;
300 /* Lambda factor for state B, lambda*/
301 LFC[STATE_B] = lambda_coul;
302 LFV[STATE_B] = lambda_vdw;
304 /*derivative of the lambda factor for state A and B */
308 for (i = 0; i < NSTATES; i++)
310 lfac_coul[i] = (lam_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
311 dlfac_coul[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFC[i]) : 1);
312 lfac_vdw[i] = (lam_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
313 dlfac_vdw[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFV[i]) : 1);
316 sigma2_def = pow(sigma6_def, 1.0/3.0);
317 sigma2_min = pow(sigma6_min, 1.0/3.0);
319 /* Ewald (not PME) table is special (icoul==enbcoulFEWALD) */
321 do_tab = (icoul == GMX_NBKERNEL_ELEC_CUBICSPLINETABLE ||
322 ivdw == GMX_NBKERNEL_VDW_CUBICSPLINETABLE);
325 tabscale = kernel_data->table_elec_vdw->scale;
326 VFtab = kernel_data->table_elec_vdw->data;
327 /* we always use the combined table here */
331 for (n = 0; (n < nri); n++)
333 int npair_within_cutoff;
335 npair_within_cutoff = 0;
339 shY = shiftvec[is3+1];
340 shZ = shiftvec[is3+2];
348 iqA = facel*chargeA[ii];
349 iqB = facel*chargeB[ii];
350 ntiA = 2*ntype*typeA[ii];
351 ntiB = 2*ntype*typeB[ii];
358 for (k = nj0; (k < nj1); k++)
365 rsq = dx*dx + dy*dy + dz*dz;
367 if (bExactCutoffAll && rsq >= rcutoff_max2)
369 /* We save significant time by skipping all code below.
370 * Note that with soft-core interactions, the actual cut-off
371 * check might be different. But since the soft-core distance
372 * is always larger than r, checking on r here is safe.
376 npair_within_cutoff++;
380 rinv = gmx_invsqrt(rsq);
385 /* The force at r=0 is zero, because of symmetry.
386 * But note that the potential is in general non-zero,
387 * since the soft-cored r will be non-zero.
393 if (sc_r_power == 6.0)
395 rpm2 = rsq*rsq; /* r4 */
396 rp = rpm2*rsq; /* r6 */
398 else if (sc_r_power == 48.0)
400 rp = rsq*rsq*rsq; /* r6 */
401 rp = rp*rp; /* r12 */
402 rp = rp*rp; /* r24 */
403 rp = rp*rp; /* r48 */
404 rpm2 = rp/rsq; /* r46 */
408 rp = pow(r, sc_r_power); /* not currently supported as input, but can handle it */
414 qq[STATE_A] = iqA*chargeA[jnr];
415 qq[STATE_B] = iqB*chargeB[jnr];
417 tj[STATE_A] = ntiA+2*typeA[jnr];
418 tj[STATE_B] = ntiB+2*typeB[jnr];
420 if (nlist->excl_fep == NULL || nlist->excl_fep[k])
422 c6[STATE_A] = nbfp[tj[STATE_A]];
423 c6[STATE_B] = nbfp[tj[STATE_B]];
425 for (i = 0; i < NSTATES; i++)
427 c12[i] = nbfp[tj[i]+1];
428 if ((c6[i] > 0) && (c12[i] > 0))
430 /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
431 sigma6[i] = 0.5*c12[i]/c6[i];
432 sigma2[i] = pow(sigma6[i], 1.0/3.0);
433 /* should be able to get rid of this ^^^ internal pow call eventually. Will require agreement on
434 what data to store externally. Can't be fixed without larger scale changes, so not 4.6 */
435 if (sigma6[i] < sigma6_min) /* for disappearing coul and vdw with soft core at the same time */
437 sigma6[i] = sigma6_min;
438 sigma2[i] = sigma2_min;
443 sigma6[i] = sigma6_def;
444 sigma2[i] = sigma2_def;
446 if (sc_r_power == 6.0)
448 sigma_pow[i] = sigma6[i];
449 sigma_powm2[i] = sigma6[i]/sigma2[i];
451 else if (sc_r_power == 48.0)
453 sigma_pow[i] = sigma6[i]*sigma6[i]; /* sigma^12 */
454 sigma_pow[i] = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
455 sigma_pow[i] = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
456 sigma_powm2[i] = sigma_pow[i]/sigma2[i];
459 { /* not really supported as input, but in here for testing the general case*/
460 sigma_pow[i] = pow(sigma2[i], sc_r_power/2);
461 sigma_powm2[i] = sigma_pow[i]/(sigma2[i]);
465 /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
466 if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
473 alpha_vdw_eff = alpha_vdw;
474 alpha_coul_eff = alpha_coul;
477 for (i = 0; i < NSTATES; i++)
484 /* Only spend time on A or B state if it is non-zero */
485 if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
487 /* this section has to be inside the loop because of the dependence on sigma_pow */
488 rpinvC = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
489 rinvC = pow(rpinvC, 1.0/sc_r_power);
492 rpinvV = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
493 rinvV = pow(rpinvV, 1.0/sc_r_power);
502 n1C = tab_elemsize*n0;
508 n1V = tab_elemsize*n0;
511 /* Only process the coulomb interactions if we have charges,
512 * and if we either include all entries in the list (no cutoff
513 * used in the kernel), or if we are within the cutoff.
515 bComputeElecInteraction = !bExactElecCutoff ||
516 ( bConvertEwaldToCoulomb && r < rcoulomb) ||
517 (!bConvertEwaldToCoulomb && rC < rcoulomb);
519 if ( (qq[i] != 0) && bComputeElecInteraction)
523 case GMX_NBKERNEL_ELEC_COULOMB:
525 Vcoul[i] = qq[i]*rinvC;
526 FscalC[i] = Vcoul[i];
527 /* The shift for the Coulomb potential is stored in
528 * the RF parameter c_rf, which is 0 without shift.
530 Vcoul[i] -= qq[i]*fr->ic->c_rf;
533 case GMX_NBKERNEL_ELEC_REACTIONFIELD:
535 Vcoul[i] = qq[i]*(rinvC + krf*rC*rC-crf);
536 FscalC[i] = qq[i]*(rinvC - 2.0*krf*rC*rC);
539 case GMX_NBKERNEL_ELEC_CUBICSPLINETABLE:
540 /* non-Ewald tabulated coulomb */
544 Geps = epsC*VFtab[nnn+2];
545 Heps2 = eps2C*VFtab[nnn+3];
548 FF = Fp+Geps+2.0*Heps2;
550 FscalC[i] = -qq[i]*tabscale*FF*rC;
553 case GMX_NBKERNEL_ELEC_GENERALIZEDBORN:
554 gmx_fatal(FARGS, "Free energy and GB not implemented.\n");
557 case GMX_NBKERNEL_ELEC_EWALD:
558 if (bConvertEwaldToCoulomb)
560 /* Ewald FEP is done only on the 1/r part */
561 Vcoul[i] = qq[i]*(rinvC-sh_ewald);
562 FscalC[i] = qq[i]*rinvC;
566 ewrt = rC*ewtabscale;
570 FscalC[i] = ewtab[ewitab]+eweps*ewtab[ewitab+1];
571 rinvcorr = rinvC-sh_ewald;
572 Vcoul[i] = qq[i]*(rinvcorr-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+FscalC[i])));
573 FscalC[i] = qq[i]*(rinvC-rC*FscalC[i]);
577 case GMX_NBKERNEL_ELEC_NONE:
583 gmx_incons("Invalid icoul in free energy kernel");
587 if (fr->coulomb_modifier == eintmodPOTSWITCH)
589 d = rC-fr->rcoulomb_switch;
590 d = (d > 0.0) ? d : 0.0;
592 sw = 1.0+d2*d*(elec_swV3+d*(elec_swV4+d*elec_swV5));
593 dsw = d2*(elec_swF2+d*(elec_swF3+d*elec_swF4));
595 FscalC[i] = FscalC[i]*sw - rC*Vcoul[i]*dsw;
598 FscalC[i] = (rC < rcoulomb) ? FscalC[i] : 0.0;
599 Vcoul[i] = (rC < rcoulomb) ? Vcoul[i] : 0.0;
603 /* Only process the VDW interactions if we have
604 * some non-zero parameters, and if we either
605 * include all entries in the list (no cutoff used
606 * in the kernel), or if we are within the cutoff.
608 bComputeVdwInteraction = !bExactVdwCutoff ||
609 ( bConvertLJEwaldToLJ6 && r < rvdw) ||
610 (!bConvertLJEwaldToLJ6 && rV < rvdw);
611 if ((c6[i] != 0 || c12[i] != 0) && bComputeVdwInteraction)
615 case GMX_NBKERNEL_VDW_LENNARDJONES:
617 if (sc_r_power == 6.0)
624 rinv6 = rinv6*rinv6*rinv6;
627 Vvdw12 = c12[i]*rinv6*rinv6;
629 Vvdw[i] = ( (Vvdw12 - c12[i]*sh_invrc6*sh_invrc6)*(1.0/12.0)
630 - (Vvdw6 - c6[i]*sh_invrc6)*(1.0/6.0));
631 FscalV[i] = Vvdw12 - Vvdw6;
634 case GMX_NBKERNEL_VDW_BUCKINGHAM:
635 gmx_fatal(FARGS, "Buckingham free energy not supported.");
638 case GMX_NBKERNEL_VDW_CUBICSPLINETABLE:
644 Geps = epsV*VFtab[nnn+2];
645 Heps2 = eps2V*VFtab[nnn+3];
648 FF = Fp+Geps+2.0*Heps2;
650 FscalV[i] -= c6[i]*tabscale*FF*rV;
655 Geps = epsV*VFtab[nnn+6];
656 Heps2 = eps2V*VFtab[nnn+7];
659 FF = Fp+Geps+2.0*Heps2;
660 Vvdw[i] += c12[i]*VV;
661 FscalV[i] -= c12[i]*tabscale*FF*rV;
664 case GMX_NBKERNEL_VDW_LJEWALD:
665 if (sc_r_power == 6.0)
672 rinv6 = rinv6*rinv6*rinv6;
674 c6grid = nbfp_grid[tj[i]];
676 if (bConvertLJEwaldToLJ6)
680 Vvdw12 = c12[i]*rinv6*rinv6;
682 Vvdw[i] = ( (Vvdw12 - c12[i]*sh_invrc6*sh_invrc6)*(1.0/12.0)
683 - (Vvdw6 - c6[i]*sh_invrc6 - c6grid*sh_lj_ewald)*(1.0/6.0));
684 FscalV[i] = Vvdw12 - Vvdw6;
689 ewcljrsq = ewclj2*rV*rV;
690 exponent = exp(-ewcljrsq);
691 poly = exponent*(1.0 + ewcljrsq + ewcljrsq*ewcljrsq*0.5);
692 vvdw_disp = (c6[i]-c6grid*(1.0-poly))*rinv6;
693 vvdw_rep = c12[i]*rinv6*rinv6;
694 FscalV[i] = vvdw_rep - vvdw_disp - c6grid*(1.0/6.0)*exponent*ewclj6;
695 Vvdw[i] = (vvdw_rep - c12[i]*sh_invrc6*sh_invrc6)/12.0 - (vvdw_disp - c6[i]*sh_invrc6 - c6grid*sh_lj_ewald)/6.0;
699 case GMX_NBKERNEL_VDW_NONE:
705 gmx_incons("Invalid ivdw in free energy kernel");
709 if (fr->vdw_modifier == eintmodPOTSWITCH)
711 d = rV-fr->rvdw_switch;
712 d = (d > 0.0) ? d : 0.0;
714 sw = 1.0+d2*d*(vdw_swV3+d*(vdw_swV4+d*vdw_swV5));
715 dsw = d2*(vdw_swF2+d*(vdw_swF3+d*vdw_swF4));
717 FscalV[i] = FscalV[i]*sw - rV*Vvdw[i]*dsw;
720 FscalV[i] = (rV < rvdw) ? FscalV[i] : 0.0;
721 Vvdw[i] = (rV < rvdw) ? Vvdw[i] : 0.0;
725 /* FscalC (and FscalV) now contain: dV/drC * rC
726 * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
727 * Further down we first multiply by r^p-2 and then by
728 * the vector r, which in total gives: dV/drC * (r/rC)^1-p
735 /* Assemble A and B states */
736 for (i = 0; i < NSTATES; i++)
738 vctot += LFC[i]*Vcoul[i];
739 vvtot += LFV[i]*Vvdw[i];
741 Fscal += LFC[i]*FscalC[i]*rpm2;
742 Fscal += LFV[i]*FscalV[i]*rpm2;
744 dvdl_coul += Vcoul[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*FscalC[i]*sigma_pow[i];
745 dvdl_vdw += Vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*FscalV[i]*sigma_pow[i];
748 else if (icoul == GMX_NBKERNEL_ELEC_REACTIONFIELD)
750 /* For excluded pairs, which are only in this pair list when
751 * using the Verlet scheme, we don't use soft-core.
752 * The group scheme also doesn't soft-core for these.
753 * As there is no singularity, there is no need for soft-core.
763 for (i = 0; i < NSTATES; i++)
765 vctot += LFC[i]*qq[i]*VV;
766 Fscal += LFC[i]*qq[i]*FF;
767 dvdl_coul += DLF[i]*qq[i]*VV;
771 if (bConvertEwaldToCoulomb && ( !bExactElecCutoff || r < rcoulomb ) )
773 /* See comment in the preamble. When using Ewald interactions
774 * (unless we use a switch modifier) we subtract the reciprocal-space
775 * Ewald component here which made it possible to apply the free
776 * energy interaction to 1/r (vanilla coulomb short-range part)
777 * above. This gets us closer to the ideal case of applying
778 * the softcore to the entire electrostatic interaction,
779 * including the reciprocal-space component.
787 f_lr = ewtab[ewitab]+eweps*ewtab[ewitab+1];
788 v_lr = (ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+f_lr));
791 /* Note that any possible Ewald shift has already been applied in
792 * the normal interaction part above.
797 /* If we get here, the i particle (ii) has itself (jnr)
798 * in its neighborlist. This can only happen with the Verlet
799 * scheme, and corresponds to a self-interaction that will
800 * occur twice. Scale it down by 50% to only include it once.
805 for (i = 0; i < NSTATES; i++)
807 vctot -= LFC[i]*qq[i]*v_lr;
808 Fscal -= LFC[i]*qq[i]*f_lr;
809 dvdl_coul -= (DLF[i]*qq[i])*v_lr;
813 if (bConvertLJEwaldToLJ6 && (!bExactVdwCutoff || r < rvdw))
815 /* See comment in the preamble. When using LJ-Ewald interactions
816 * (unless we use a switch modifier) we subtract the reciprocal-space
817 * Ewald component here which made it possible to apply the free
818 * energy interaction to r^-6 (vanilla LJ6 short-range part)
819 * above. This gets us closer to the ideal case of applying
820 * the softcore to the entire VdW interaction,
821 * including the reciprocal-space component.
823 /* We could also use the analytical form here
824 * iso a table, but that can cause issues for
825 * r close to 0 for non-interacting pairs.
830 rs = rsq*rinv*ewtabscale;
833 f_lr = (1 - frac)*tab_ewald_F_lj[ri] + frac*tab_ewald_F_lj[ri+1];
834 /* TODO: Currently the Ewald LJ table does not contain
835 * the factor 1/6, we should add this.
838 VV = (tab_ewald_V_lj[ri] - ewtabhalfspace*frac*(tab_ewald_F_lj[ri] + f_lr))/6.0;
842 /* If we get here, the i particle (ii) has itself (jnr)
843 * in its neighborlist. This can only happen with the Verlet
844 * scheme, and corresponds to a self-interaction that will
845 * occur twice. Scale it down by 50% to only include it once.
850 for (i = 0; i < NSTATES; i++)
852 c6grid = nbfp_grid[tj[i]];
853 vvtot += LFV[i]*c6grid*VV;
854 Fscal += LFV[i]*c6grid*FF;
855 dvdl_vdw += (DLF[i]*c6grid)*VV;
867 /* OpenMP atomics are expensive, but this kernels is also
868 * expensive, so we can take this hit, instead of using
869 * thread-local output buffers and extra reduction.
880 /* The atomics below are expensive with many OpenMP threads.
881 * Here unperturbed i-particles will usually only have a few
882 * (perturbed) j-particles in the list. Thus with a buffered list
883 * we can skip a significant number of i-reductions with a check.
885 if (npair_within_cutoff > 0)
901 fshift[is3+1] += fiy;
903 fshift[is3+2] += fiz;
917 dvdl[efptCOUL] += dvdl_coul;
919 dvdl[efptVDW] += dvdl_vdw;
921 /* Estimate flops, average for free energy stuff:
922 * 12 flops per outer iteration
923 * 150 flops per inner iteration
926 inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri*12 + nlist->jindex[n]*150);
930 nb_free_energy_evaluate_single(real r2, real sc_r_power, real alpha_coul, real alpha_vdw,
931 real tabscale, real *vftab,
932 real qqA, real c6A, real c12A, real qqB, real c6B, real c12B,
933 real LFC[2], real LFV[2], real DLF[2],
934 real lfac_coul[2], real lfac_vdw[2], real dlfac_coul[2], real dlfac_vdw[2],
935 real sigma6_def, real sigma6_min, real sigma2_def, real sigma2_min,
936 real *velectot, real *vvdwtot, real *dvdl)
938 real r, rp, rpm2, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VV, FF, fscal;
939 real qq[2], c6[2], c12[2], sigma6[2], sigma2[2], sigma_pow[2], sigma_powm2[2];
940 real alpha_coul_eff, alpha_vdw_eff, dvdl_coul, dvdl_vdw;
941 real rpinv, r_coul, r_vdw, velecsum, vvdwsum;
942 real fscal_vdw[2], fscal_elec[2];
943 real velec[2], vvdw[2];
953 if (sc_r_power == 6.0)
955 rpm2 = r2*r2; /* r4 */
956 rp = rpm2*r2; /* r6 */
958 else if (sc_r_power == 48.0)
960 rp = r2*r2*r2; /* r6 */
961 rp = rp*rp; /* r12 */
962 rp = rp*rp; /* r24 */
963 rp = rp*rp; /* r48 */
964 rpm2 = rp/r2; /* r46 */
968 rp = pow(r2, 0.5*sc_r_power); /* not currently supported as input, but can handle it */
972 /* Loop over state A(0) and B(1) */
973 for (i = 0; i < 2; i++)
975 if ((c6[i] > 0) && (c12[i] > 0))
977 /* The c6 & c12 coefficients now contain the constants 6.0 and 12.0, respectively.
978 * Correct for this by multiplying with (1/12.0)/(1/6.0)=6.0/12.0=0.5.
980 sigma6[i] = 0.5*c12[i]/c6[i];
981 sigma2[i] = pow(0.5*c12[i]/c6[i], 1.0/3.0);
982 /* should be able to get rid of this ^^^ internal pow call eventually. Will require agreement on
983 what data to store externally. Can't be fixed without larger scale changes, so not 5.0 */
984 if (sigma6[i] < sigma6_min) /* for disappearing coul and vdw with soft core at the same time */
986 sigma6[i] = sigma6_min;
987 sigma2[i] = sigma2_min;
992 sigma6[i] = sigma6_def;
993 sigma2[i] = sigma2_def;
995 if (sc_r_power == 6.0)
997 sigma_pow[i] = sigma6[i];
998 sigma_powm2[i] = sigma6[i]/sigma2[i];
1000 else if (sc_r_power == 48.0)
1002 sigma_pow[i] = sigma6[i]*sigma6[i]; /* sigma^12 */
1003 sigma_pow[i] = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
1004 sigma_pow[i] = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
1005 sigma_powm2[i] = sigma_pow[i]/sigma2[i];
1008 { /* not really supported as input, but in here for testing the general case*/
1009 sigma_pow[i] = pow(sigma2[i], sc_r_power/2);
1010 sigma_powm2[i] = sigma_pow[i]/(sigma2[i]);
1014 /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
1015 if ((c12[0] > 0) && (c12[1] > 0))
1022 alpha_vdw_eff = alpha_vdw;
1023 alpha_coul_eff = alpha_coul;
1026 /* Loop over A and B states again */
1027 for (i = 0; i < 2; i++)
1034 /* Only spend time on A or B state if it is non-zero */
1035 if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
1038 rpinv = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
1039 r_coul = pow(rpinv, -1.0/sc_r_power);
1041 /* Electrostatics table lookup data */
1042 rtab = r_coul*tabscale;
1047 /* Electrostatics */
1050 Geps = eps*vftab[ntab+2];
1051 Heps2 = eps2*vftab[ntab+3];
1054 FF = Fp+Geps+2.0*Heps2;
1055 velec[i] = qq[i]*VV;
1056 fscal_elec[i] = -qq[i]*FF*r_coul*rpinv*tabscale;
1059 rpinv = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
1060 r_vdw = pow(rpinv, -1.0/sc_r_power);
1061 /* Vdw table lookup data */
1062 rtab = r_vdw*tabscale;
1070 Geps = eps*vftab[ntab+6];
1071 Heps2 = eps2*vftab[ntab+7];
1074 FF = Fp+Geps+2.0*Heps2;
1076 fscal_vdw[i] = -c6[i]*FF;
1081 Geps = eps*vftab[ntab+10];
1082 Heps2 = eps2*vftab[ntab+11];
1085 FF = Fp+Geps+2.0*Heps2;
1086 vvdw[i] += c12[i]*VV;
1087 fscal_vdw[i] -= c12[i]*FF;
1088 fscal_vdw[i] *= r_vdw*rpinv*tabscale;
1091 /* Now we have velec[i], vvdw[i], and fscal[i] for both states */
1092 /* Assemble A and B states */
1098 for (i = 0; i < 2; i++)
1100 velecsum += LFC[i]*velec[i];
1101 vvdwsum += LFV[i]*vvdw[i];
1103 fscal += (LFC[i]*fscal_elec[i]+LFV[i]*fscal_vdw[i])*rpm2;
1105 dvdl_coul += velec[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*fscal_elec[i]*sigma_pow[i];
1106 dvdl_vdw += vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*fscal_vdw[i]*sigma_pow[i];
1109 dvdl[efptCOUL] += dvdl_coul;
1110 dvdl[efptVDW] += dvdl_vdw;
1112 *velectot = velecsum;