--- /dev/null
- real c6[NSTATES], c12[NSTATES], c6grid[NSTATES];
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ * Copyright (c) 2001-2004, The GROMACS development team.
+ * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "vec.h"
+#include "typedefs.h"
+#include "nonbonded.h"
+#include "nb_kernel.h"
+#include "nrnb.h"
+#include "macros.h"
+#include "nb_free_energy.h"
+
+#include "gmx_fatal.h"
+
+void
+gmx_nb_free_energy_kernel(const t_nblist * gmx_restrict nlist,
+ rvec * gmx_restrict xx,
+ rvec * gmx_restrict ff,
+ t_forcerec * gmx_restrict fr,
+ const t_mdatoms * gmx_restrict mdatoms,
+ nb_kernel_data_t * gmx_restrict kernel_data,
+ t_nrnb * gmx_restrict nrnb)
+{
+
+#define STATE_A 0
+#define STATE_B 1
+#define NSTATES 2
+ int i, j, n, ii, is3, ii3, k, nj0, nj1, jnr, j3, ggid;
+ real shX, shY, shZ;
+ real Fscal, FscalC[NSTATES], FscalV[NSTATES], tx, ty, tz;
+ real Vcoul[NSTATES], Vvdw[NSTATES];
+ real rinv6, r, rt, rtC, rtV;
+ real iqA, iqB;
+ real qq[NSTATES], vctot, krsq;
+ int ntiA, ntiB, tj[NSTATES];
+ real Vvdw6, Vvdw12, vvtot;
+ real ix, iy, iz, fix, fiy, fiz;
+ real dx, dy, dz, rsq, rinv;
- real rcoulomb, sh_ewald;
- real rvdw, sh_invrc6;
- gmx_bool bExactElecCutoff, bExactVdwCutoff, bExactCutoffAll, bEwald;
++ real c6[NSTATES], c12[NSTATES], c6grid;
+ real LFC[NSTATES], LFV[NSTATES], DLF[NSTATES];
+ double dvdl_coul, dvdl_vdw;
+ real lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
+ real sigma6[NSTATES], alpha_vdw_eff, alpha_coul_eff, sigma2_def, sigma2_min;
+ real rp, rpm2, rC, rV, rinvC, rpinvC, rinvV, rpinvV;
+ real sigma2[NSTATES], sigma_pow[NSTATES], sigma_powm2[NSTATES], rs, rs2;
+ int do_tab, tab_elemsize;
+ int n0, n1C, n1V, nnn;
+ real Y, F, G, H, Fp, Geps, Heps2, epsC, eps2C, epsV, eps2V, VV, FF;
+ int icoul, ivdw;
+ int nri;
+ const int * iinr;
+ const int * jindex;
+ const int * jjnr;
+ const int * shift;
+ const int * gid;
+ const int * typeA;
+ const int * typeB;
+ int ntype;
+ const real * shiftvec;
+ real dvdl_part;
+ real * fshift;
+ real tabscale = 0;
+ const real * VFtab = NULL;
+ const real * x;
+ real * f;
+ real facel, krf, crf;
+ const real * chargeA;
+ const real * chargeB;
+ real sigma6_min, sigma6_def, lam_power, sc_power, sc_r_power;
+ real alpha_coul, alpha_vdw, lambda_coul, lambda_vdw, ewc_lj;
+ const real * nbfp, *nbfp_grid;
+ real * dvdl;
+ real * Vv;
+ real * Vc;
+ gmx_bool bDoForces, bDoShiftForces, bDoPotential;
- real rcutoff, rcutoff2, rswitch, d, d2, swV3, swV4, swV5, swF2, swF3, swF4, sw, dsw, rinvcorr;
- const real * tab_ewald_F;
- const real * tab_ewald_V;
++ real rcoulomb, rvdw, sh_invrc6;
++ gmx_bool bExactElecCutoff, bExactVdwCutoff, bExactCutoffAll;
++ gmx_bool bEwald, bEwaldLJ;
+ real rcutoff_max2;
- real tab_ewald_scale, tab_ewald_halfsp;
+ const real * tab_ewald_F_lj;
+ const real * tab_ewald_V_lj;
- /* Ewald (PME) reciprocal force and energy quadratic spline tables */
- tab_ewald_F = fr->ic->tabq_coul_F;
- tab_ewald_V = fr->ic->tabq_coul_V;
- tab_ewald_scale = fr->ic->tabq_scale;
- tab_ewald_F_lj = fr->ic->tabq_vdw_F;
- tab_ewald_V_lj = fr->ic->tabq_vdw_V;
- tab_ewald_halfsp = 0.5/tab_ewald_scale;
++ real d, d2, sw, dsw, rinvcorr;
++ real elec_swV3, elec_swV4, elec_swV5, elec_swF2, elec_swF3, elec_swF4;
++ real vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4;
++ gmx_bool bConvertEwaldToCoulomb, bConvertLJEwaldToLJ6;
++ gmx_bool bComputeVdwInteraction, bComputeElecInteraction;
++ const real * ewtab;
++ int ewitab;
++ real ewrt, eweps, ewtabscale, ewtabhalfspace, sh_ewald;
++
++ sh_ewald = fr->ic->sh_ewald;
++ ewtab = fr->ic->tabq_coul_FDV0;
++ ewtabscale = fr->ic->tabq_scale;
++ ewtabhalfspace = 0.5/ewtabscale;
++ tab_ewald_F_lj = fr->ic->tabq_vdw_F;
++ tab_ewald_V_lj = fr->ic->tabq_vdw_V;
+
+ x = xx[0];
+ f = ff[0];
+
+ fshift = fr->fshift[0];
+
+ nri = nlist->nri;
+ iinr = nlist->iinr;
+ jindex = nlist->jindex;
+ jjnr = nlist->jjnr;
+ icoul = nlist->ielec;
+ ivdw = nlist->ivdw;
+ shift = nlist->shift;
+ gid = nlist->gid;
+
+ shiftvec = fr->shift_vec[0];
+ chargeA = mdatoms->chargeA;
+ chargeB = mdatoms->chargeB;
+ facel = fr->epsfac;
+ krf = fr->k_rf;
+ crf = fr->c_rf;
+ ewc_lj = fr->ewaldcoeff_lj;
+ Vc = kernel_data->energygrp_elec;
+ typeA = mdatoms->typeA;
+ typeB = mdatoms->typeB;
+ ntype = fr->ntype;
+ nbfp = fr->nbfp;
+ nbfp_grid = fr->ljpme_c6grid;
+ Vv = kernel_data->energygrp_vdw;
+ lambda_coul = kernel_data->lambda[efptCOUL];
+ lambda_vdw = kernel_data->lambda[efptVDW];
+ dvdl = kernel_data->dvdl;
+ alpha_coul = fr->sc_alphacoul;
+ alpha_vdw = fr->sc_alphavdw;
+ lam_power = fr->sc_power;
+ sc_r_power = fr->sc_r_power;
+ sigma6_def = fr->sc_sigma6_def;
+ sigma6_min = fr->sc_sigma6_min;
+ bDoForces = kernel_data->flags & GMX_NONBONDED_DO_FORCE;
+ bDoShiftForces = kernel_data->flags & GMX_NONBONDED_DO_SHIFTFORCE;
+ bDoPotential = kernel_data->flags & GMX_NONBONDED_DO_POTENTIAL;
+
+ rcoulomb = fr->rcoulomb;
+ sh_ewald = fr->ic->sh_ewald;
+ rvdw = fr->rvdw;
+ sh_invrc6 = fr->ic->sh_invrc6;
+
- if (fr->coulomb_modifier == eintmodPOTSWITCH || fr->vdw_modifier == eintmodPOTSWITCH)
++ if (fr->coulomb_modifier == eintmodPOTSWITCH)
++ {
++ d = fr->rcoulomb-fr->rcoulomb_switch;
++ elec_swV3 = -10.0/(d*d*d);
++ elec_swV4 = 15.0/(d*d*d*d);
++ elec_swV5 = -6.0/(d*d*d*d*d);
++ elec_swF2 = -30.0/(d*d*d);
++ elec_swF3 = 60.0/(d*d*d*d);
++ elec_swF4 = -30.0/(d*d*d*d*d);
++ }
++ else
++ {
++ /* Avoid warnings from stupid compilers (looking at you, Clang!) */
++ elec_swV3 = elec_swV4 = elec_swV5 = elec_swF2 = elec_swF3 = elec_swF4 = 0.0;
++ }
+
- rcutoff = (fr->coulomb_modifier == eintmodPOTSWITCH) ? fr->rcoulomb : fr->rvdw;
- rcutoff2 = rcutoff*rcutoff;
- rswitch = (fr->coulomb_modifier == eintmodPOTSWITCH) ? fr->rcoulomb_switch : fr->rvdw_switch;
- d = rcutoff-rswitch;
- swV3 = -10.0/(d*d*d);
- swV4 = 15.0/(d*d*d*d);
- swV5 = -6.0/(d*d*d*d*d);
- swF2 = -30.0/(d*d*d);
- swF3 = 60.0/(d*d*d*d);
- swF4 = -30.0/(d*d*d*d*d);
++ if (fr->vdw_modifier == eintmodPOTSWITCH)
+ {
- /* Stupid compilers dont realize these variables will not be used */
- rswitch = 0.0;
- swV3 = 0.0;
- swV4 = 0.0;
- swV5 = 0.0;
- swF2 = 0.0;
- swF3 = 0.0;
- swF4 = 0.0;
++ d = fr->rvdw-fr->rvdw_switch;
++ vdw_swV3 = -10.0/(d*d*d);
++ vdw_swV4 = 15.0/(d*d*d*d);
++ vdw_swV5 = -6.0/(d*d*d*d*d);
++ vdw_swF2 = -30.0/(d*d*d);
++ vdw_swF3 = 60.0/(d*d*d*d);
++ vdw_swF4 = -30.0/(d*d*d*d*d);
+ }
+ else
+ {
- if (ivdw == GMX_NBKERNEL_VDW_LJEWALD)
- {
- c6grid[STATE_A] = nbfp_grid[tj[STATE_A]];
- c6grid[STATE_B] = nbfp_grid[tj[STATE_B]];
- }
-
++ /* Avoid warnings from stupid compilers (looking at you, Clang!) */
++ vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0;
+ }
+
+ if (fr->cutoff_scheme == ecutsVERLET)
+ {
+ const interaction_const_t *ic;
+
+ ic = fr->ic;
+ if (EVDW_PME(ic->vdwtype))
+ {
+ ivdw = GMX_NBKERNEL_VDW_LJEWALD;
+ }
+ else
+ {
+ ivdw = GMX_NBKERNEL_VDW_LENNARDJONES;
+ }
+
+ if (ic->eeltype == eelCUT || EEL_RF(ic->eeltype))
+ {
+ icoul = GMX_NBKERNEL_ELEC_REACTIONFIELD;
+ }
+ else if (EEL_PME_EWALD(ic->eeltype))
+ {
+ icoul = GMX_NBKERNEL_ELEC_EWALD;
+ }
+ else
+ {
+ gmx_incons("Unsupported eeltype with Verlet and free-energy");
+ }
+
+ bExactElecCutoff = TRUE;
+ bExactVdwCutoff = TRUE;
+ }
+ else
+ {
+ bExactElecCutoff = (fr->coulomb_modifier != eintmodNONE) || fr->eeltype == eelRF_ZERO;
+ bExactVdwCutoff = (fr->vdw_modifier != eintmodNONE);
+ }
+
+ bExactCutoffAll = (bExactElecCutoff && bExactVdwCutoff);
+ rcutoff_max2 = max(fr->rcoulomb, fr->rvdw);
+ rcutoff_max2 = rcutoff_max2*rcutoff_max2;
+
+ bEwald = (icoul == GMX_NBKERNEL_ELEC_EWALD);
++ bEwaldLJ = (ivdw == GMX_NBKERNEL_VDW_LJEWALD);
++
++ /* For Ewald/PME interactions we cannot easily apply the soft-core component to
++ * reciprocal space. When we use vanilla (not switch/shift) Ewald interactions, we
++ * can apply the small trick of subtracting the _reciprocal_ space contribution
++ * in this kernel, and instead apply the free energy interaction to the 1/r
++ * (standard coulomb) interaction.
++ *
++ * However, we cannot use this approach for switch-modified since we would then
++ * effectively end up evaluating a significantly different interaction here compared to the
++ * normal (non-free-energy) kernels, either by applying a cutoff at a different
++ * position than what the user requested, or by switching different
++ * things (1/r rather than short-range Ewald). For these settings, we just
++ * use the traditional short-range Ewald interaction in that case.
++ */
++ bConvertEwaldToCoulomb = (bEwald && (fr->coulomb_modifier != eintmodPOTSWITCH));
++ /* For now the below will always be true (since LJ-PME only works with Shift in Gromacs-5.0),
++ * but writing it this way means we stay in sync with coulomb, and it avoids future bugs.
++ */
++ bConvertLJEwaldToLJ6 = (bEwaldLJ && (fr->vdw_modifier != eintmodPOTSWITCH));
+
+ /* fix compiler warnings */
+ nj1 = 0;
+ n1C = n1V = 0;
+ epsC = epsV = 0;
+ eps2C = eps2V = 0;
+
+ dvdl_coul = 0;
+ dvdl_vdw = 0;
+
+ /* Lambda factor for state A, 1-lambda*/
+ LFC[STATE_A] = 1.0 - lambda_coul;
+ LFV[STATE_A] = 1.0 - lambda_vdw;
+
+ /* Lambda factor for state B, lambda*/
+ LFC[STATE_B] = lambda_coul;
+ LFV[STATE_B] = lambda_vdw;
+
+ /*derivative of the lambda factor for state A and B */
+ DLF[STATE_A] = -1;
+ DLF[STATE_B] = 1;
+
+ for (i = 0; i < NSTATES; i++)
+ {
+ lfac_coul[i] = (lam_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
+ dlfac_coul[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFC[i]) : 1);
+ lfac_vdw[i] = (lam_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
+ dlfac_vdw[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFV[i]) : 1);
+ }
+ /* precalculate */
+ sigma2_def = pow(sigma6_def, 1.0/3.0);
+ sigma2_min = pow(sigma6_min, 1.0/3.0);
+
+ /* Ewald (not PME) table is special (icoul==enbcoulFEWALD) */
+
+ do_tab = (icoul == GMX_NBKERNEL_ELEC_CUBICSPLINETABLE ||
+ ivdw == GMX_NBKERNEL_VDW_CUBICSPLINETABLE);
+ if (do_tab)
+ {
+ tabscale = kernel_data->table_elec_vdw->scale;
+ VFtab = kernel_data->table_elec_vdw->data;
+ /* we always use the combined table here */
+ tab_elemsize = 12;
+ }
+
+ for (n = 0; (n < nri); n++)
+ {
+ int npair_within_cutoff;
+
+ npair_within_cutoff = 0;
+
+ is3 = 3*shift[n];
+ shX = shiftvec[is3];
+ shY = shiftvec[is3+1];
+ shZ = shiftvec[is3+2];
+ nj0 = jindex[n];
+ nj1 = jindex[n+1];
+ ii = iinr[n];
+ ii3 = 3*ii;
+ ix = shX + x[ii3+0];
+ iy = shY + x[ii3+1];
+ iz = shZ + x[ii3+2];
+ iqA = facel*chargeA[ii];
+ iqB = facel*chargeB[ii];
+ ntiA = 2*ntype*typeA[ii];
+ ntiB = 2*ntype*typeB[ii];
+ vctot = 0;
+ vvtot = 0;
+ fix = 0;
+ fiy = 0;
+ fiz = 0;
+
+ for (k = nj0; (k < nj1); k++)
+ {
+ jnr = jjnr[k];
+ j3 = 3*jnr;
+ dx = ix - x[j3];
+ dy = iy - x[j3+1];
+ dz = iz - x[j3+2];
+ rsq = dx*dx + dy*dy + dz*dz;
+
+ if (bExactCutoffAll && rsq >= rcutoff_max2)
+ {
+ /* We save significant time by skipping all code below.
+ * Note that with soft-core interactions, the actual cut-off
+ * check might be different. But since the soft-core distance
+ * is always larger than r, checking on r here is safe.
+ */
+ continue;
+ }
+ npair_within_cutoff++;
+
+ if (rsq > 0)
+ {
+ rinv = gmx_invsqrt(rsq);
+ r = rsq*rinv;
+ }
+ else
+ {
+ /* The force at r=0 is zero, because of symmetry.
+ * But note that the potential is in general non-zero,
+ * since the soft-cored r will be non-zero.
+ */
+ rinv = 0;
+ r = 0;
+ }
+
+ if (sc_r_power == 6.0)
+ {
+ rpm2 = rsq*rsq; /* r4 */
+ rp = rpm2*rsq; /* r6 */
+ }
+ else if (sc_r_power == 48.0)
+ {
+ rp = rsq*rsq*rsq; /* r6 */
+ rp = rp*rp; /* r12 */
+ rp = rp*rp; /* r24 */
+ rp = rp*rp; /* r48 */
+ rpm2 = rp/rsq; /* r46 */
+ }
+ else
+ {
+ rp = pow(r, sc_r_power); /* not currently supported as input, but can handle it */
+ rpm2 = rp/rsq;
+ }
+
+ Fscal = 0;
+
+ qq[STATE_A] = iqA*chargeA[jnr];
+ qq[STATE_B] = iqB*chargeB[jnr];
+
+ tj[STATE_A] = ntiA+2*typeA[jnr];
+ tj[STATE_B] = ntiB+2*typeB[jnr];
+
- /* With Ewald and soft-core we should put the cut-off on r,
- * not on the soft-cored rC, as the real-space and
- * reciprocal space contributions should (almost) cancel.
+ if (nlist->excl_fep == NULL || nlist->excl_fep[k])
+ {
+ c6[STATE_A] = nbfp[tj[STATE_A]];
+ c6[STATE_B] = nbfp[tj[STATE_B]];
+
+ for (i = 0; i < NSTATES; i++)
+ {
+ c12[i] = nbfp[tj[i]+1];
+ if ((c6[i] > 0) && (c12[i] > 0))
+ {
+ /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
+ sigma6[i] = 0.5*c12[i]/c6[i];
+ sigma2[i] = pow(sigma6[i], 1.0/3.0);
+ /* should be able to get rid of this ^^^ internal pow call eventually. Will require agreement on
+ what data to store externally. Can't be fixed without larger scale changes, so not 4.6 */
+ if (sigma6[i] < sigma6_min) /* for disappearing coul and vdw with soft core at the same time */
+ {
+ sigma6[i] = sigma6_min;
+ sigma2[i] = sigma2_min;
+ }
+ }
+ else
+ {
+ sigma6[i] = sigma6_def;
+ sigma2[i] = sigma2_def;
+ }
+ if (sc_r_power == 6.0)
+ {
+ sigma_pow[i] = sigma6[i];
+ sigma_powm2[i] = sigma6[i]/sigma2[i];
+ }
+ else if (sc_r_power == 48.0)
+ {
+ sigma_pow[i] = sigma6[i]*sigma6[i]; /* sigma^12 */
+ sigma_pow[i] = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
+ sigma_pow[i] = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
+ sigma_powm2[i] = sigma_pow[i]/sigma2[i];
+ }
+ else
+ { /* not really supported as input, but in here for testing the general case*/
+ sigma_pow[i] = pow(sigma2[i], sc_r_power/2);
+ sigma_powm2[i] = sigma_pow[i]/(sigma2[i]);
+ }
+ }
+
+ /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
+ if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
+ {
+ alpha_vdw_eff = 0;
+ alpha_coul_eff = 0;
+ }
+ else
+ {
+ alpha_vdw_eff = alpha_vdw;
+ alpha_coul_eff = alpha_coul;
+ }
+
+ for (i = 0; i < NSTATES; i++)
+ {
+ FscalC[i] = 0;
+ FscalV[i] = 0;
+ Vcoul[i] = 0;
+ Vvdw[i] = 0;
+
+ /* Only spend time on A or B state if it is non-zero */
+ if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
+ {
+ /* this section has to be inside the loop because of the dependence on sigma_pow */
+ rpinvC = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
+ rinvC = pow(rpinvC, 1.0/sc_r_power);
+ rC = 1.0/rinvC;
+
+ rpinvV = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
+ rinvV = pow(rpinvV, 1.0/sc_r_power);
+ rV = 1.0/rinvV;
+
+ if (do_tab)
+ {
+ rtC = rC*tabscale;
+ n0 = rtC;
+ epsC = rtC-n0;
+ eps2C = epsC*epsC;
+ n1C = tab_elemsize*n0;
+
+ rtV = rV*tabscale;
+ n0 = rtV;
+ epsV = rtV-n0;
+ eps2V = epsV*epsV;
+ n1V = tab_elemsize*n0;
+ }
+
- if (qq[i] != 0 &&
- !(bExactElecCutoff &&
- ((!bEwald && rC >= rcoulomb) ||
- (bEwald && r >= rcoulomb))))
++ /* Only process the coulomb interactions if we have charges,
++ * and if we either include all entries in the list (no cutoff
++ * used in the kernel), or if we are within the cutoff.
+ */
- break;
-
- case GMX_NBKERNEL_ELEC_EWALD:
- /* Ewald FEP is done only on the 1/r part */
- Vcoul[i] = qq[i]*(rinvC - sh_ewald);
- FscalC[i] = Vcoul[i];
++ bComputeElecInteraction = !bExactElecCutoff ||
++ ( bConvertEwaldToCoulomb && r < rcoulomb) ||
++ (!bConvertEwaldToCoulomb && rC < rcoulomb);
++
++ if ( (qq[i] != 0) && bComputeElecInteraction)
+ {
+ switch (icoul)
+ {
+ case GMX_NBKERNEL_ELEC_COULOMB:
+ /* simple cutoff */
+ Vcoul[i] = qq[i]*rinvC;
+ FscalC[i] = Vcoul[i];
- d = rC-rswitch;
++ /* The shift for the Coulomb potential is stored in
++ * the RF parameter c_rf, which is 0 without shift
++ */
++ Vcoul[i] -= qq[i]*fr->ic->c_rf;
+ break;
+
+ case GMX_NBKERNEL_ELEC_REACTIONFIELD:
+ /* reaction-field */
+ Vcoul[i] = qq[i]*(rinvC + krf*rC*rC-crf);
+ FscalC[i] = qq[i]*(rinvC - 2.0*krf*rC*rC);
+ break;
+
+ case GMX_NBKERNEL_ELEC_CUBICSPLINETABLE:
+ /* non-Ewald tabulated coulomb */
+ nnn = n1C;
+ Y = VFtab[nnn];
+ F = VFtab[nnn+1];
+ Geps = epsC*VFtab[nnn+2];
+ Heps2 = eps2C*VFtab[nnn+3];
+ Fp = F+Geps+Heps2;
+ VV = Y+epsC*Fp;
+ FF = Fp+Geps+2.0*Heps2;
+ Vcoul[i] = qq[i]*VV;
+ FscalC[i] = -qq[i]*tabscale*FF*rC;
+ break;
+
+ case GMX_NBKERNEL_ELEC_GENERALIZEDBORN:
+ gmx_fatal(FARGS, "Free energy and GB not implemented.\n");
+ break;
+
++ case GMX_NBKERNEL_ELEC_EWALD:
++ if (bConvertEwaldToCoulomb)
++ {
++ /* Ewald FEP is done only on the 1/r part */
++ Vcoul[i] = qq[i]*(rinvC-sh_ewald);
++ FscalC[i] = qq[i]*rinvC;
++ }
++ else
++ {
++ ewrt = rC*ewtabscale;
++ ewitab = (int) ewrt;
++ eweps = ewrt-ewitab;
++ ewitab = 4*ewitab;
++ FscalC[i] = ewtab[ewitab]+eweps*ewtab[ewitab+1];
++ rinvcorr = rinvC-sh_ewald;
++ Vcoul[i] = qq[i]*(rinvcorr-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+FscalC[i])));
++ FscalC[i] = qq[i]*(rinvC-rC*FscalC[i]);
++ }
++ break;
++
+ case GMX_NBKERNEL_ELEC_NONE:
+ FscalC[i] = 0.0;
+ Vcoul[i] = 0.0;
+ break;
+
+ default:
+ gmx_incons("Invalid icoul in free energy kernel");
+ break;
+ }
+
+ if (fr->coulomb_modifier == eintmodPOTSWITCH)
+ {
- sw = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
- dsw = d2*(swF2+d*(swF3+d*swF4));
++ d = rC-fr->rcoulomb_switch;
+ d = (d > 0.0) ? d : 0.0;
+ d2 = d*d;
- Vcoul[i] *= sw;
- FscalC[i] = FscalC[i]*sw + Vcoul[i]*dsw;
++ sw = 1.0+d2*d*(elec_swV3+d*(elec_swV4+d*elec_swV5));
++ dsw = d2*(elec_swF2+d*(elec_swF3+d*elec_swF4));
++
++ FscalC[i] = FscalC[i]*sw - rC*Vcoul[i]*dsw;
++ Vcoul[i] *= sw;
+
- if ((c6[i] != 0 || c12[i] != 0) &&
- !(bExactVdwCutoff &&
- ((ivdw != GMX_NBKERNEL_VDW_LJEWALD && rV >= rvdw) ||
- (ivdw == GMX_NBKERNEL_VDW_LJEWALD && r >= rvdw))))
++ FscalC[i] = (rC < rcoulomb) ? FscalC[i] : 0.0;
++ Vcoul[i] = (rC < rcoulomb) ? Vcoul[i] : 0.0;
+ }
+ }
+
- d = rV-rswitch;
- d = (d > 0.0) ? d : 0.0;
- d2 = d*d;
- sw = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
- dsw = d2*(swF2+d*(swF3+d*swF4));
++ /* Only process the VDW interactions if we have
++ * some non-zero parameters, and if we either
++ * include all entries in the list (no cutoff used
++ * in the kernel), or if we are within the cutoff.
++ */
++ bComputeVdwInteraction = !bExactVdwCutoff ||
++ ( bConvertLJEwaldToLJ6 && r < rvdw) ||
++ (!bConvertLJEwaldToLJ6 && rV < rvdw);
++ if ((c6[i] != 0 || c12[i] != 0) && bComputeVdwInteraction)
+ {
+ switch (ivdw)
+ {
+ case GMX_NBKERNEL_VDW_LENNARDJONES:
+ case GMX_NBKERNEL_VDW_LJEWALD:
+ /* cutoff LJ */
+ if (sc_r_power == 6.0)
+ {
+ rinv6 = rpinvV;
+ }
+ else
+ {
+ rinv6 = pow(rinvV, 6.0);
+ }
+ Vvdw6 = c6[i]*rinv6;
+ Vvdw12 = c12[i]*rinv6*rinv6;
+ if (fr->vdw_modifier == eintmodPOTSHIFT)
+ {
+ Vvdw[i] = ( (Vvdw12-c12[i]*sh_invrc6*sh_invrc6)*(1.0/12.0)
+ -(Vvdw6-c6[i]*sh_invrc6)*(1.0/6.0));
+ }
+ else
+ {
+ Vvdw[i] = Vvdw12*(1.0/12.0) - Vvdw6*(1.0/6.0);
+ }
+ FscalV[i] = Vvdw12 - Vvdw6;
+ break;
+
+ case GMX_NBKERNEL_VDW_BUCKINGHAM:
+ gmx_fatal(FARGS, "Buckingham free energy not supported.");
+ break;
+
+ case GMX_NBKERNEL_VDW_CUBICSPLINETABLE:
+ /* Table LJ */
+ nnn = n1V+4;
+ /* dispersion */
+ Y = VFtab[nnn];
+ F = VFtab[nnn+1];
+ Geps = epsV*VFtab[nnn+2];
+ Heps2 = eps2V*VFtab[nnn+3];
+ Fp = F+Geps+Heps2;
+ VV = Y+epsV*Fp;
+ FF = Fp+Geps+2.0*Heps2;
+ Vvdw[i] += c6[i]*VV;
+ FscalV[i] -= c6[i]*tabscale*FF*rV;
+
+ /* repulsion */
+ Y = VFtab[nnn+4];
+ F = VFtab[nnn+5];
+ Geps = epsV*VFtab[nnn+6];
+ Heps2 = eps2V*VFtab[nnn+7];
+ Fp = F+Geps+Heps2;
+ VV = Y+epsV*Fp;
+ FF = Fp+Geps+2.0*Heps2;
+ Vvdw[i] += c12[i]*VV;
+ FscalV[i] -= c12[i]*tabscale*FF*rV;
+ break;
+
+ case GMX_NBKERNEL_VDW_NONE:
+ Vvdw[i] = 0.0;
+ FscalV[i] = 0.0;
+ break;
+
+ default:
+ gmx_incons("Invalid ivdw in free energy kernel");
+ break;
+ }
+
+ if (fr->vdw_modifier == eintmodPOTSWITCH)
+ {
- Vvdw[i] *= sw;
- FscalV[i] = FscalV[i]*sw + Vvdw[i]*dsw;
++ d = rV-fr->rvdw_switch;
++ d = (d > 0.0) ? d : 0.0;
++ d2 = d*d;
++ sw = 1.0+d2*d*(vdw_swV3+d*(vdw_swV4+d*vdw_swV5));
++ dsw = d2*(vdw_swF2+d*(vdw_swF3+d*vdw_swF4));
+
- if (icoul == GMX_NBKERNEL_ELEC_EWALD &&
- !(bExactElecCutoff && r >= rcoulomb))
++ FscalV[i] = FscalV[i]*sw - rV*Vvdw[i]*dsw;
++ Vvdw[i] *= sw;
+
+ FscalV[i] = (rV < rvdw) ? FscalV[i] : 0.0;
+ Vvdw[i] = (rV < rvdw) ? Vvdw[i] : 0.0;
+ }
+ }
+
+ /* FscalC (and FscalV) now contain: dV/drC * rC
+ * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
+ * Further down we first multiply by r^p-2 and then by
+ * the vector r, which in total gives: dV/drC * (r/rC)^1-p
+ */
+ FscalC[i] *= rpinvC;
+ FscalV[i] *= rpinvV;
+ }
+ }
+
+ /* Assemble A and B states */
+ for (i = 0; i < NSTATES; i++)
+ {
+ vctot += LFC[i]*Vcoul[i];
+ vvtot += LFV[i]*Vvdw[i];
+
+ Fscal += LFC[i]*FscalC[i]*rpm2;
+ Fscal += LFV[i]*FscalV[i]*rpm2;
+
+ dvdl_coul += Vcoul[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*FscalC[i]*sigma_pow[i];
+ dvdl_vdw += Vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*FscalV[i]*sigma_pow[i];
+ }
+ }
+ else if (icoul == GMX_NBKERNEL_ELEC_REACTIONFIELD)
+ {
+ /* For excluded pairs, which are only in this pair list when
+ * using the Verlet scheme, we don't use soft-core.
+ * The group scheme also doesn't soft-core for these.
+ * As there is no singularity, there is no need for soft-core.
+ */
+ VV = krf*rsq - crf;
+ FF = -2.0*krf;
+
+ if (ii == jnr)
+ {
+ VV *= 0.5;
+ }
+
+ for (i = 0; i < NSTATES; i++)
+ {
+ vctot += LFC[i]*qq[i]*VV;
+ Fscal += LFC[i]*qq[i]*FF;
+ dvdl_coul += DLF[i]*qq[i]*VV;
+ }
+ }
+
- /* Because we compute the soft-core normally,
- * we have to remove the Ewald short range portion.
- * Done outside of the states loop because this part
- * doesn't depend on the scaled R.
++ if (bConvertEwaldToCoulomb && ( !bExactElecCutoff || r < rcoulomb ) )
+ {
- real rs, frac, f_lr;
- int ri;
++ /* See comment in the preamble. When using Ewald interactions
++ * (unless we use a switch modifier) we subtract the reciprocal-space
++ * Ewald component here which made it possible to apply the free
++ * energy interaction to 1/r (vanilla coulomb short-range part)
++ * above. This gets us closer to the ideal case of applying
++ * the softcore to the entire electrostatic interaction,
++ * including the reciprocal-space component.
+ */
- rs = rsq*rinv*tab_ewald_scale;
- ri = (int)rs;
- frac = rs - ri;
- f_lr = (1 - frac)*tab_ewald_F[ri] + frac*tab_ewald_F[ri+1];
- FF = f_lr*rinv;
- VV = tab_ewald_V[ri] - tab_ewald_halfsp*frac*(tab_ewald_F[ri] + f_lr);
++ real v_lr, f_lr;
+
- VV *= 0.5;
++ ewrt = r*ewtabscale;
++ ewitab = (int) ewrt;
++ eweps = ewrt-ewitab;
++ ewitab = 4*ewitab;
++ f_lr = ewtab[ewitab]+eweps*ewtab[ewitab+1];
++ v_lr = (ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+f_lr));
++ f_lr *= rinv;
+
+ if (ii == jnr)
+ {
- vctot -= LFC[i]*qq[i]*VV;
- Fscal -= LFC[i]*qq[i]*FF;
- dvdl_coul -= (DLF[i]*qq[i])*VV;
++ /* If we get here, the i particle (ii) has itself (jnr)
++ * in its neighborlist. This can only happen with the Verlet
++ * scheme, and corresponds to a self-interaction that will
++ * occur twice. Scale it down by 50% to only include it once.
++ */
++ v_lr *= 0.5;
+ }
+
+ for (i = 0; i < NSTATES; i++)
+ {
- if (ivdw == GMX_NBKERNEL_VDW_LJEWALD &&
- !(bExactVdwCutoff && r >= rvdw))
++ vctot -= LFC[i]*qq[i]*v_lr;
++ Fscal -= LFC[i]*qq[i]*f_lr;
++ dvdl_coul -= (DLF[i]*qq[i])*v_lr;
+ }
+ }
+
- rs = rsq*rinv*tab_ewald_scale;
++ if (bConvertLJEwaldToLJ6 && (!bExactVdwCutoff || r < rvdw))
+ {
++ /* See comment in the preamble. When using LJ-Ewald interactions
++ * (unless we use a switch modifier) we subtract the reciprocal-space
++ * Ewald component here which made it possible to apply the free
++ * energy interaction to r^-6 (vanilla LJ6 short-range part)
++ * above. This gets us closer to the ideal case of applying
++ * the softcore to the entire VdW interaction,
++ * including the reciprocal-space component.
++ */
+ real rs, frac, f_lr;
+ int ri;
+
- VV = tab_ewald_V_lj[ri] - tab_ewald_halfsp*frac*(tab_ewald_F_lj[ri] + f_lr);
++ rs = rsq*rinv*ewtabscale;
+ ri = (int)rs;
+ frac = rs - ri;
+ f_lr = (1 - frac)*tab_ewald_F_lj[ri] + frac*tab_ewald_F_lj[ri+1];
+ FF = f_lr*rinv;
- vvtot += LFV[i]*c6grid[i]*VV*(1.0/6.0);
- Fscal += LFV[i]*c6grid[i]*FF*(1.0/6.0);
- dvdl_vdw += (DLF[i]*c6grid[i])*VV*(1.0/6.0);
++ VV = tab_ewald_V_lj[ri] - ewtabhalfspace*frac*(tab_ewald_F_lj[ri] + f_lr);
++
++ if (ii == jnr)
++ {
++ /* If we get here, the i particle (ii) has itself (jnr)
++ * in its neighborlist. This can only happen with the Verlet
++ * scheme, and corresponds to a self-interaction that will
++ * occur twice. Scale it down by 50% to only include it once.
++ */
++ VV *= 0.5;
++ }
++
+ for (i = 0; i < NSTATES; i++)
+ {
++ c6grid = nbfp_grid[tj[i]];
++ vvtot += LFV[i]*c6grid*VV*(1.0/6.0);
++ Fscal += LFV[i]*c6grid*FF*(1.0/6.0);
++ dvdl_vdw += (DLF[i]*c6grid)*VV*(1.0/6.0);
+ }
+
+ }
+
+ if (bDoForces)
+ {
+ tx = Fscal*dx;
+ ty = Fscal*dy;
+ tz = Fscal*dz;
+ fix = fix + tx;
+ fiy = fiy + ty;
+ fiz = fiz + tz;
+ /* OpenMP atomics are expensive, but this kernels is also
+ * expensive, so we can take this hit, instead of using
+ * thread-local output buffers and extra reduction.
+ */
+#pragma omp atomic
+ f[j3] -= tx;
+#pragma omp atomic
+ f[j3+1] -= ty;
+#pragma omp atomic
+ f[j3+2] -= tz;
+ }
+ }
+
+ /* The atomics below are expensive with many OpenMP threads.
+ * Here unperturbed i-particles will usually only have a few
+ * (perturbed) j-particles in the list. Thus with a buffered list
+ * we can skip a significant number of i-reductions with a check.
+ */
+ if (npair_within_cutoff > 0)
+ {
+ if (bDoForces)
+ {
+#pragma omp atomic
+ f[ii3] += fix;
+#pragma omp atomic
+ f[ii3+1] += fiy;
+#pragma omp atomic
+ f[ii3+2] += fiz;
+ }
+ if (bDoShiftForces)
+ {
+#pragma omp atomic
+ fshift[is3] += fix;
+#pragma omp atomic
+ fshift[is3+1] += fiy;
+#pragma omp atomic
+ fshift[is3+2] += fiz;
+ }
+ if (bDoPotential)
+ {
+ ggid = gid[n];
+#pragma omp atomic
+ Vc[ggid] += vctot;
+#pragma omp atomic
+ Vv[ggid] += vvtot;
+ }
+ }
+ }
+
+#pragma omp atomic
+ dvdl[efptCOUL] += dvdl_coul;
+ #pragma omp atomic
+ dvdl[efptVDW] += dvdl_vdw;
+
+ /* Estimate flops, average for free energy stuff:
+ * 12 flops per outer iteration
+ * 150 flops per inner iteration
+ */
+#pragma omp atomic
+ inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri*12 + nlist->jindex[n]*150);
+}
+
+real
+nb_free_energy_evaluate_single(real r2, real sc_r_power, real alpha_coul, real alpha_vdw,
+ real tabscale, real *vftab,
+ real qqA, real c6A, real c12A, real qqB, real c6B, real c12B,
+ real LFC[2], real LFV[2], real DLF[2],
+ real lfac_coul[2], real lfac_vdw[2], real dlfac_coul[2], real dlfac_vdw[2],
+ real sigma6_def, real sigma6_min, real sigma2_def, real sigma2_min,
+ real *velectot, real *vvdwtot, real *dvdl)
+{
+ real r, rp, rpm2, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VV, FF, fscal;
+ real qq[2], c6[2], c12[2], sigma6[2], sigma2[2], sigma_pow[2], sigma_powm2[2];
+ real alpha_coul_eff, alpha_vdw_eff, dvdl_coul, dvdl_vdw;
+ real rpinv, r_coul, r_vdw, velecsum, vvdwsum;
+ real fscal_vdw[2], fscal_elec[2];
+ real velec[2], vvdw[2];
+ int i, ntab;
+
+ qq[0] = qqA;
+ qq[1] = qqB;
+ c6[0] = c6A;
+ c6[1] = c6B;
+ c12[0] = c12A;
+ c12[1] = c12B;
+
+ if (sc_r_power == 6.0)
+ {
+ rpm2 = r2*r2; /* r4 */
+ rp = rpm2*r2; /* r6 */
+ }
+ else if (sc_r_power == 48.0)
+ {
+ rp = r2*r2*r2; /* r6 */
+ rp = rp*rp; /* r12 */
+ rp = rp*rp; /* r24 */
+ rp = rp*rp; /* r48 */
+ rpm2 = rp/r2; /* r46 */
+ }
+ else
+ {
+ rp = pow(r2, 0.5*sc_r_power); /* not currently supported as input, but can handle it */
+ rpm2 = rp/r2;
+ }
+
+ /* Loop over state A(0) and B(1) */
+ for (i = 0; i < 2; i++)
+ {
+ if ((c6[i] > 0) && (c12[i] > 0))
+ {
+ /* The c6 & c12 coefficients now contain the constants 6.0 and 12.0, respectively.
+ * Correct for this by multiplying with (1/12.0)/(1/6.0)=6.0/12.0=0.5.
+ */
+ sigma6[i] = 0.5*c12[i]/c6[i];
+ sigma2[i] = pow(0.5*c12[i]/c6[i], 1.0/3.0);
+ /* should be able to get rid of this ^^^ internal pow call eventually. Will require agreement on
+ what data to store externally. Can't be fixed without larger scale changes, so not 5.0 */
+ if (sigma6[i] < sigma6_min) /* for disappearing coul and vdw with soft core at the same time */
+ {
+ sigma6[i] = sigma6_min;
+ sigma2[i] = sigma2_min;
+ }
+ }
+ else
+ {
+ sigma6[i] = sigma6_def;
+ sigma2[i] = sigma2_def;
+ }
+ if (sc_r_power == 6.0)
+ {
+ sigma_pow[i] = sigma6[i];
+ sigma_powm2[i] = sigma6[i]/sigma2[i];
+ }
+ else if (sc_r_power == 48.0)
+ {
+ sigma_pow[i] = sigma6[i]*sigma6[i]; /* sigma^12 */
+ sigma_pow[i] = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
+ sigma_pow[i] = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
+ sigma_powm2[i] = sigma_pow[i]/sigma2[i];
+ }
+ else
+ { /* not really supported as input, but in here for testing the general case*/
+ sigma_pow[i] = pow(sigma2[i], sc_r_power/2);
+ sigma_powm2[i] = sigma_pow[i]/(sigma2[i]);
+ }
+ }
+
+ /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
+ if ((c12[0] > 0) && (c12[1] > 0))
+ {
+ alpha_vdw_eff = 0;
+ alpha_coul_eff = 0;
+ }
+ else
+ {
+ alpha_vdw_eff = alpha_vdw;
+ alpha_coul_eff = alpha_coul;
+ }
+
+ /* Loop over A and B states again */
+ for (i = 0; i < 2; i++)
+ {
+ fscal_elec[i] = 0;
+ fscal_vdw[i] = 0;
+ velec[i] = 0;
+ vvdw[i] = 0;
+
+ /* Only spend time on A or B state if it is non-zero */
+ if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
+ {
+ /* Coulomb */
+ rpinv = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
+ r_coul = pow(rpinv, -1.0/sc_r_power);
+
+ /* Electrostatics table lookup data */
+ rtab = r_coul*tabscale;
+ ntab = rtab;
+ eps = rtab-ntab;
+ eps2 = eps*eps;
+ ntab = 12*ntab;
+ /* Electrostatics */
+ Y = vftab[ntab];
+ F = vftab[ntab+1];
+ Geps = eps*vftab[ntab+2];
+ Heps2 = eps2*vftab[ntab+3];
+ Fp = F+Geps+Heps2;
+ VV = Y+eps*Fp;
+ FF = Fp+Geps+2.0*Heps2;
+ velec[i] = qq[i]*VV;
+ fscal_elec[i] = -qq[i]*FF*r_coul*rpinv*tabscale;
+
+ /* Vdw */
+ rpinv = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
+ r_vdw = pow(rpinv, -1.0/sc_r_power);
+ /* Vdw table lookup data */
+ rtab = r_vdw*tabscale;
+ ntab = rtab;
+ eps = rtab-ntab;
+ eps2 = eps*eps;
+ ntab = 12*ntab;
+ /* Dispersion */
+ Y = vftab[ntab+4];
+ F = vftab[ntab+5];
+ Geps = eps*vftab[ntab+6];
+ Heps2 = eps2*vftab[ntab+7];
+ Fp = F+Geps+Heps2;
+ VV = Y+eps*Fp;
+ FF = Fp+Geps+2.0*Heps2;
+ vvdw[i] = c6[i]*VV;
+ fscal_vdw[i] = -c6[i]*FF;
+
+ /* Repulsion */
+ Y = vftab[ntab+8];
+ F = vftab[ntab+9];
+ Geps = eps*vftab[ntab+10];
+ Heps2 = eps2*vftab[ntab+11];
+ Fp = F+Geps+Heps2;
+ VV = Y+eps*Fp;
+ FF = Fp+Geps+2.0*Heps2;
+ vvdw[i] += c12[i]*VV;
+ fscal_vdw[i] -= c12[i]*FF;
+ fscal_vdw[i] *= r_vdw*rpinv*tabscale;
+ }
+ }
+ /* Now we have velec[i], vvdw[i], and fscal[i] for both states */
+ /* Assemble A and B states */
+ velecsum = 0;
+ vvdwsum = 0;
+ dvdl_coul = 0;
+ dvdl_vdw = 0;
+ fscal = 0;
+ for (i = 0; i < 2; i++)
+ {
+ velecsum += LFC[i]*velec[i];
+ vvdwsum += LFV[i]*vvdw[i];
+
+ fscal += (LFC[i]*fscal_elec[i]+LFV[i]*fscal_vdw[i])*rpm2;
+
+ dvdl_coul += velec[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*fscal_elec[i]*sigma_pow[i];
+ dvdl_vdw += vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*fscal_vdw[i]*sigma_pow[i];
+ }
+
+ dvdl[efptCOUL] += dvdl_coul;
+ dvdl[efptVDW] += dvdl_vdw;
+
+ *velectot = velecsum;
+ *vvdwtot = vvdwsum;
+
+ return fscal;
+}
--- /dev/null
- vvdw = (vvdw_rep + c12*sh_repulsion)/12.0 - (vvdw_disp + c6*sh_dispersion + c6grid*sh_lj_ewald)/6.0;
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ * Copyright (c) 2001-2004, The GROMACS development team.
+ * Copyright (c) 2012,2014, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "types/simple.h"
+#include "vec.h"
+#include "typedefs.h"
+#include "nb_generic.h"
+#include "nrnb.h"
+
+#include "gmx_fatal.h"
+
+#include "nonbonded.h"
+#include "nb_kernel.h"
+
+void
+gmx_nb_generic_kernel(t_nblist * nlist,
+ rvec * xx,
+ rvec * ff,
+ t_forcerec * fr,
+ t_mdatoms * mdatoms,
+ nb_kernel_data_t * kernel_data,
+ t_nrnb * nrnb)
+{
+ int nri, ntype, table_nelements, ielec, ivdw;
+ real facel, gbtabscale;
+ int n, ii, is3, ii3, k, nj0, nj1, jnr, j3, ggid, nnn, n0;
+ real shX, shY, shZ;
+ real fscal, felec, fvdw, velec, vvdw, tx, ty, tz;
+ real rinvsq;
+ real iq;
+ real qq, vctot;
+ int nti, nvdwparam;
+ int tj;
+ real rt, r, eps, eps2, Y, F, Geps, Heps2, VV, FF, Fp, fijD, fijR;
+ real rinvsix;
+ real vvdwtot;
+ real vvdw_rep, vvdw_disp;
+ real ix, iy, iz, fix, fiy, fiz;
+ real jx, jy, jz;
+ real dx, dy, dz, rsq, rinv;
+ real c6, c12, c6grid, cexp1, cexp2, br;
+ real * charge;
+ real * shiftvec;
+ real * vdwparam, *vdwgridparam;
+ int * shift;
+ int * type;
+ real * fshift;
+ real * velecgrp;
+ real * vvdwgrp;
+ real tabscale;
+ real * VFtab;
+ real * x;
+ real * f;
+ int ewitab;
+ real ewtabscale, eweps, sh_ewald, ewrt, ewtabhalfspace;
+ real * ewtab;
+ real rcoulomb2, rvdw, rvdw2, sh_dispersion, sh_repulsion;
+ real rcutoff, rcutoff2;
+ real rswitch_elec, rswitch_vdw, d, d2, sw, dsw, rinvcorr;
+ real elec_swV3, elec_swV4, elec_swV5, elec_swF2, elec_swF3, elec_swF4;
+ real vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4;
+ real ewclj, ewclj2, ewclj6, ewcljrsq, poly, exponent, sh_lj_ewald;
+ gmx_bool bExactElecCutoff, bExactVdwCutoff, bExactCutoff;
+
+ x = xx[0];
+ f = ff[0];
+ ielec = nlist->ielec;
+ ivdw = nlist->ivdw;
+
+ fshift = fr->fshift[0];
+ velecgrp = kernel_data->energygrp_elec;
+ vvdwgrp = kernel_data->energygrp_vdw;
+ tabscale = kernel_data->table_elec_vdw->scale;
+ VFtab = kernel_data->table_elec_vdw->data;
+
+ sh_ewald = fr->ic->sh_ewald;
+ ewtab = fr->ic->tabq_coul_FDV0;
+ ewtabscale = fr->ic->tabq_scale;
+ ewtabhalfspace = 0.5/ewtabscale;
+
+ rcoulomb2 = fr->rcoulomb*fr->rcoulomb;
+ rvdw = fr->rvdw;
+ rvdw2 = rvdw*rvdw;
+ sh_dispersion = fr->ic->dispersion_shift.cpot;
+ sh_repulsion = fr->ic->repulsion_shift.cpot;
+ sh_lj_ewald = fr->ic->sh_lj_ewald;
+
+ ewclj = fr->ewaldcoeff_lj;
+ ewclj2 = ewclj*ewclj;
+ ewclj6 = ewclj2*ewclj2*ewclj2;
+
+ if (fr->coulomb_modifier == eintmodPOTSWITCH)
+ {
+ d = fr->rcoulomb-fr->rcoulomb_switch;
+ elec_swV3 = -10.0/(d*d*d);
+ elec_swV4 = 15.0/(d*d*d*d);
+ elec_swV5 = -6.0/(d*d*d*d*d);
+ elec_swF2 = -30.0/(d*d*d);
+ elec_swF3 = 60.0/(d*d*d*d);
+ elec_swF4 = -30.0/(d*d*d*d*d);
+ }
+ else
+ {
+ /* Avoid warnings from stupid compilers (looking at you, Clang!) */
+ elec_swV3 = elec_swV4 = elec_swV5 = elec_swF2 = elec_swF3 = elec_swF4 = 0.0;
+ }
+ if (fr->vdw_modifier == eintmodPOTSWITCH)
+ {
+ d = fr->rvdw-fr->rvdw_switch;
+ vdw_swV3 = -10.0/(d*d*d);
+ vdw_swV4 = 15.0/(d*d*d*d);
+ vdw_swV5 = -6.0/(d*d*d*d*d);
+ vdw_swF2 = -30.0/(d*d*d);
+ vdw_swF3 = 60.0/(d*d*d*d);
+ vdw_swF4 = -30.0/(d*d*d*d*d);
+ }
+ else
+ {
+ /* Avoid warnings from stupid compilers (looking at you, Clang!) */
+ vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0;
+ }
+
+ bExactElecCutoff = (fr->coulomb_modifier != eintmodNONE) || fr->eeltype == eelRF_ZERO;
+ bExactVdwCutoff = (fr->vdw_modifier != eintmodNONE);
+ bExactCutoff = bExactElecCutoff && bExactVdwCutoff;
+
+ if (bExactCutoff)
+ {
+ rcutoff = ( fr->rcoulomb > fr->rvdw ) ? fr->rcoulomb : fr->rvdw;
+ rcutoff2 = rcutoff*rcutoff;
+ }
+ else
+ {
+ /* Fix warnings for stupid compilers */
+ rcutoff = rcutoff2 = 1e30;
+ }
+
+ /* avoid compiler warnings for cases that cannot happen */
+ nnn = 0;
+ eps = 0.0;
+ eps2 = 0.0;
+
+ /* 3 VdW parameters for Buckingham, otherwise 2 */
+ nvdwparam = (ivdw == GMX_NBKERNEL_VDW_BUCKINGHAM) ? 3 : 2;
+ table_nelements = 12;
+
+ charge = mdatoms->chargeA;
+ type = mdatoms->typeA;
+ facel = fr->epsfac;
+ shiftvec = fr->shift_vec[0];
+ vdwparam = fr->nbfp;
+ ntype = fr->ntype;
+ vdwgridparam = fr->ljpme_c6grid;
+
+ for (n = 0; (n < nlist->nri); n++)
+ {
+ is3 = 3*nlist->shift[n];
+ shX = shiftvec[is3];
+ shY = shiftvec[is3+1];
+ shZ = shiftvec[is3+2];
+ nj0 = nlist->jindex[n];
+ nj1 = nlist->jindex[n+1];
+ ii = nlist->iinr[n];
+ ii3 = 3*ii;
+ ix = shX + x[ii3+0];
+ iy = shY + x[ii3+1];
+ iz = shZ + x[ii3+2];
+ iq = facel*charge[ii];
+ nti = nvdwparam*ntype*type[ii];
+ vctot = 0;
+ vvdwtot = 0;
+ fix = 0;
+ fiy = 0;
+ fiz = 0;
+
+ for (k = nj0; (k < nj1); k++)
+ {
+ jnr = nlist->jjnr[k];
+ j3 = 3*jnr;
+ jx = x[j3+0];
+ jy = x[j3+1];
+ jz = x[j3+2];
+ dx = ix - jx;
+ dy = iy - jy;
+ dz = iz - jz;
+ rsq = dx*dx+dy*dy+dz*dz;
+ rinv = gmx_invsqrt(rsq);
+ rinvsq = rinv*rinv;
+ felec = 0;
+ fvdw = 0;
+ velec = 0;
+ vvdw = 0;
+
+ if (bExactCutoff && rsq >= rcutoff2)
+ {
+ continue;
+ }
+
+ if (ielec == GMX_NBKERNEL_ELEC_CUBICSPLINETABLE || ivdw == GMX_NBKERNEL_VDW_CUBICSPLINETABLE)
+ {
+ r = rsq*rinv;
+ rt = r*tabscale;
+ n0 = rt;
+ eps = rt-n0;
+ eps2 = eps*eps;
+ nnn = table_nelements*n0;
+ }
+
+ /* Coulomb interaction. ielec==0 means no interaction */
+ if (ielec != GMX_NBKERNEL_ELEC_NONE)
+ {
+ qq = iq*charge[jnr];
+
+ switch (ielec)
+ {
+ case GMX_NBKERNEL_ELEC_NONE:
+ break;
+
+ case GMX_NBKERNEL_ELEC_COULOMB:
+ /* Vanilla cutoff coulomb */
+ velec = qq*rinv;
+ felec = velec*rinvsq;
+ /* The shift for the Coulomb potential is stored in
+ * the RF parameter c_rf, which is 0 without shift
+ */
+ velec -= qq*fr->ic->c_rf;
+ break;
+
+ case GMX_NBKERNEL_ELEC_REACTIONFIELD:
+ /* Reaction-field */
+ velec = qq*(rinv+fr->k_rf*rsq-fr->c_rf);
+ felec = qq*(rinv*rinvsq-2.0*fr->k_rf);
+ break;
+
+ case GMX_NBKERNEL_ELEC_CUBICSPLINETABLE:
+ /* Tabulated coulomb */
+ Y = VFtab[nnn];
+ F = VFtab[nnn+1];
+ Geps = eps*VFtab[nnn+2];
+ Heps2 = eps2*VFtab[nnn+3];
+ Fp = F+Geps+Heps2;
+ VV = Y+eps*Fp;
+ FF = Fp+Geps+2.0*Heps2;
+ velec = qq*VV;
+ felec = -qq*FF*tabscale*rinv;
+ break;
+
+ case GMX_NBKERNEL_ELEC_GENERALIZEDBORN:
+ /* GB */
+ gmx_fatal(FARGS, "Death & horror! GB generic interaction not implemented.\n");
+ break;
+
+ case GMX_NBKERNEL_ELEC_EWALD:
+ ewrt = rsq*rinv*ewtabscale;
+ ewitab = ewrt;
+ eweps = ewrt-ewitab;
+ ewitab = 4*ewitab;
+ felec = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+ rinvcorr = (fr->coulomb_modifier == eintmodPOTSHIFT) ? rinv-fr->ic->sh_ewald : rinv;
+ velec = qq*(rinvcorr-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+ felec = qq*rinv*(rinvsq-felec);
+ break;
+
+ default:
+ gmx_fatal(FARGS, "Death & horror! No generic coulomb interaction for ielec=%d.\n", ielec);
+ break;
+ }
+ if (fr->coulomb_modifier == eintmodPOTSWITCH)
+ {
+ d = rsq*rinv-fr->rcoulomb_switch;
+ d = (d > 0.0) ? d : 0.0;
+ d2 = d*d;
+ sw = 1.0+d2*d*(elec_swV3+d*(elec_swV4+d*elec_swV5));
+ dsw = d2*(elec_swF2+d*(elec_swF3+d*elec_swF4));
+ /* Apply switch function. Note that felec=f/r since it will be multiplied
+ * by the i-j displacement vector. This means felec'=f'/r=-(v*sw)'/r=
+ * -(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=felec*sw-v*dsw/r
+ */
+ felec = felec*sw - rinv*velec*dsw;
+ /* Once we have used velec to update felec we can modify velec too */
+ velec *= sw;
+ }
+ if (bExactElecCutoff)
+ {
+ felec = (rsq < rcoulomb2) ? felec : 0.0;
+ velec = (rsq < rcoulomb2) ? velec : 0.0;
+ }
+ vctot += velec;
+ } /* End of coulomb interactions */
+
+
+ /* VdW interaction. ivdw==0 means no interaction */
+ if (ivdw != GMX_NBKERNEL_VDW_NONE)
+ {
+ tj = nti+nvdwparam*type[jnr];
+
+ switch (ivdw)
+ {
+ case GMX_NBKERNEL_VDW_NONE:
+ break;
+
+ case GMX_NBKERNEL_VDW_LENNARDJONES:
+ /* Vanilla Lennard-Jones cutoff */
+ c6 = vdwparam[tj];
+ c12 = vdwparam[tj+1];
+ rinvsix = rinvsq*rinvsq*rinvsq;
+ vvdw_disp = c6*rinvsix;
+ vvdw_rep = c12*rinvsix*rinvsix;
+ fvdw = (vvdw_rep-vvdw_disp)*rinvsq;
+ if (fr->vdw_modifier == eintmodPOTSHIFT)
+ {
+ vvdw = (vvdw_rep + c12*sh_repulsion)/12.0 - (vvdw_disp + c6*sh_dispersion)/6.0;
+ }
+ else
+ {
+ vvdw = vvdw_rep/12.0-vvdw_disp/6.0;
+ }
+ break;
+
+ case GMX_NBKERNEL_VDW_BUCKINGHAM:
+ /* Buckingham */
+ c6 = vdwparam[tj];
+ cexp1 = vdwparam[tj+1];
+ cexp2 = vdwparam[tj+2];
+
+ rinvsix = rinvsq*rinvsq*rinvsq;
+ vvdw_disp = c6*rinvsix;
+ br = cexp2*rsq*rinv;
+ vvdw_rep = cexp1*exp(-br);
+ fvdw = (br*vvdw_rep-vvdw_disp)*rinvsq;
+ if (fr->vdw_modifier == eintmodPOTSHIFT)
+ {
+ vvdw = (vvdw_rep-cexp1*exp(-cexp2*rvdw))-(vvdw_disp + c6*sh_dispersion)/6.0;
+ }
+ else
+ {
+ vvdw = vvdw_rep-vvdw_disp/6.0;
+ }
+ break;
+
+ case GMX_NBKERNEL_VDW_CUBICSPLINETABLE:
+ /* Tabulated VdW */
+ c6 = vdwparam[tj];
+ c12 = vdwparam[tj+1];
+ Y = VFtab[nnn+4];
+ F = VFtab[nnn+5];
+ Geps = eps*VFtab[nnn+6];
+ Heps2 = eps2*VFtab[nnn+7];
+ Fp = F+Geps+Heps2;
+ VV = Y+eps*Fp;
+ FF = Fp+Geps+2.0*Heps2;
+ vvdw_disp = c6*VV;
+ fijD = c6*FF;
+ Y = VFtab[nnn+8];
+ F = VFtab[nnn+9];
+ Geps = eps*VFtab[nnn+10];
+ Heps2 = eps2*VFtab[nnn+11];
+ Fp = F+Geps+Heps2;
+ VV = Y+eps*Fp;
+ FF = Fp+Geps+2.0*Heps2;
+ vvdw_rep = c12*VV;
+ fijR = c12*FF;
+ fvdw = -(fijD+fijR)*tabscale*rinv;
+ vvdw = vvdw_disp + vvdw_rep;
+ break;
+
+
+ case GMX_NBKERNEL_VDW_LJEWALD:
+ /* LJ-PME */
+ rinvsix = rinvsq*rinvsq*rinvsq;
+ ewcljrsq = ewclj2*rsq;
+ exponent = exp(-ewcljrsq);
+ poly = exponent*(1.0 + ewcljrsq + ewcljrsq*ewcljrsq*0.5);
+ c6 = vdwparam[tj];
+ c12 = vdwparam[tj+1];
+ c6grid = vdwgridparam[tj];
+ vvdw_disp = (c6-c6grid*(1.0-poly))*rinvsix;
+ vvdw_rep = c12*rinvsix*rinvsix;
+ fvdw = (vvdw_rep - vvdw_disp - c6grid*(1.0/6.0)*exponent*ewclj6)*rinvsq;
+ if (fr->vdw_modifier == eintmodPOTSHIFT)
+ {
++ vvdw = (vvdw_rep + c12*sh_repulsion)/12.0 - (vvdw_disp + c6*sh_dispersion - c6grid*sh_lj_ewald)/6.0;
+ }
+ else
+ {
+ vvdw = vvdw_rep/12.0-vvdw_disp/6.0;
+ }
+ break;
+
+ default:
+ gmx_fatal(FARGS, "Death & horror! No generic VdW interaction for ivdw=%d.\n", ivdw);
+ break;
+ }
+ if (fr->vdw_modifier == eintmodPOTSWITCH)
+ {
+ d = rsq*rinv-fr->rvdw_switch;
+ d = (d > 0.0) ? d : 0.0;
+ d2 = d*d;
+ sw = 1.0+d2*d*(vdw_swV3+d*(vdw_swV4+d*vdw_swV5));
+ dsw = d2*(vdw_swF2+d*(vdw_swF3+d*vdw_swF4));
+ /* See coulomb interaction for the force-switch formula */
+ fvdw = fvdw*sw - rinv*vvdw*dsw;
+ vvdw *= sw;
+ }
+ if (bExactVdwCutoff)
+ {
+ fvdw = (rsq < rvdw2) ? fvdw : 0.0;
+ vvdw = (rsq < rvdw2) ? vvdw : 0.0;
+ }
+ vvdwtot += vvdw;
+ } /* end VdW interactions */
+
+ fscal = felec+fvdw;
+
+ tx = fscal*dx;
+ ty = fscal*dy;
+ tz = fscal*dz;
+ fix = fix + tx;
+ fiy = fiy + ty;
+ fiz = fiz + tz;
+ f[j3+0] = f[j3+0] - tx;
+ f[j3+1] = f[j3+1] - ty;
+ f[j3+2] = f[j3+2] - tz;
+ }
+
+ f[ii3+0] = f[ii3+0] + fix;
+ f[ii3+1] = f[ii3+1] + fiy;
+ f[ii3+2] = f[ii3+2] + fiz;
+ fshift[is3] = fshift[is3]+fix;
+ fshift[is3+1] = fshift[is3+1]+fiy;
+ fshift[is3+2] = fshift[is3+2]+fiz;
+ ggid = nlist->gid[n];
+ velecgrp[ggid] += vctot;
+ vvdwgrp[ggid] += vvdwtot;
+ }
+ /* Estimate flops, average for generic kernel:
+ * 12 flops per outer iteration
+ * 50 flops per inner iteration
+ */
+ inc_nrnb(nrnb, eNR_NBKERNEL_GENERIC, nlist->nri*12 + nlist->jindex[n]*50);
+}
--- /dev/null
- gmx_nonbonded_set_kernel_pointers(FILE *log, t_nblist *nl)
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ * Copyright (c) 2001-2004, The GROMACS development team.
+ * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "thread_mpi/threads.h"
+
+#include "typedefs.h"
+#include "txtdump.h"
+#include "gromacs/utility/smalloc.h"
+#include "ns.h"
+#include "vec.h"
+#include "gromacs/math/utilities.h"
+#include "macros.h"
+#include "gromacs/utility/cstringutil.h"
+#include "force.h"
+#include "names.h"
+#include "main.h"
+#include "xvgr.h"
+#include "gmx_fatal.h"
+#include "physics.h"
+#include "force.h"
+#include "bondf.h"
+#include "nrnb.h"
+#include "nonbonded.h"
+#include "gromacs/simd/simd.h"
+
+#include "nb_kernel.h"
+#include "nb_free_energy.h"
+#include "nb_generic.h"
+#include "nb_generic_cg.h"
+#include "nb_generic_adress.h"
+
+/* Different default (c) and SIMD instructions interaction-specific kernels */
+#include "nb_kernel_c/nb_kernel_c.h"
+
+#if (defined GMX_SIMD_X86_SSE2) && !(defined GMX_DOUBLE)
+# include "nb_kernel_sse2_single/nb_kernel_sse2_single.h"
+#endif
+#if (defined GMX_SIMD_X86_SSE4_1) && !(defined GMX_DOUBLE)
+# include "nb_kernel_sse4_1_single/nb_kernel_sse4_1_single.h"
+#endif
+#if (defined GMX_SIMD_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
+# include "nb_kernel_avx_128_fma_single/nb_kernel_avx_128_fma_single.h"
+#endif
+#if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER) && !(defined GMX_DOUBLE)
+# include "nb_kernel_avx_256_single/nb_kernel_avx_256_single.h"
+#endif
+#if (defined GMX_SIMD_X86_SSE2 && defined GMX_DOUBLE)
+# include "nb_kernel_sse2_double/nb_kernel_sse2_double.h"
+#endif
+#if (defined GMX_SIMD_X86_SSE4_1 && defined GMX_DOUBLE)
+# include "nb_kernel_sse4_1_double/nb_kernel_sse4_1_double.h"
+#endif
+#if (defined GMX_SIMD_X86_AVX_128_FMA && defined GMX_DOUBLE)
+# include "nb_kernel_avx_128_fma_double/nb_kernel_avx_128_fma_double.h"
+#endif
+#if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER && defined GMX_DOUBLE)
+# include "nb_kernel_avx_256_double/nb_kernel_avx_256_double.h"
+#endif
+#if (defined GMX_SIMD_SPARC64_HPC_ACE && defined GMX_DOUBLE)
+# include "nb_kernel_sparc64_hpc_ace_double/nb_kernel_sparc64_hpc_ace_double.h"
+#endif
+
+
+static tMPI_Thread_mutex_t nonbonded_setup_mutex = TMPI_THREAD_MUTEX_INITIALIZER;
+static gmx_bool nonbonded_setup_done = FALSE;
+
+
+void
+gmx_nonbonded_setup(t_forcerec * fr,
+ gmx_bool bGenericKernelOnly)
+{
+ tMPI_Thread_mutex_lock(&nonbonded_setup_mutex);
+ /* Here we are guaranteed only one thread made it. */
+ if (nonbonded_setup_done == FALSE)
+ {
+ if (bGenericKernelOnly == FALSE)
+ {
+ /* Add the generic kernels to the structure stored statically in nb_kernel.c */
+ nb_kernel_list_add_kernels(kernellist_c, kernellist_c_size);
+
+ if (!(fr != NULL && fr->use_simd_kernels == FALSE))
+ {
+ /* Add interaction-specific kernels for different architectures */
+ /* Single precision */
+#if (defined GMX_SIMD_X86_SSE2) && !(defined GMX_DOUBLE)
+ nb_kernel_list_add_kernels(kernellist_sse2_single, kernellist_sse2_single_size);
+#endif
+#if (defined GMX_SIMD_X86_SSE4_1) && !(defined GMX_DOUBLE)
+ nb_kernel_list_add_kernels(kernellist_sse4_1_single, kernellist_sse4_1_single_size);
+#endif
+#if (defined GMX_SIMD_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
+ nb_kernel_list_add_kernels(kernellist_avx_128_fma_single, kernellist_avx_128_fma_single_size);
+#endif
+#if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER) && !(defined GMX_DOUBLE)
+ nb_kernel_list_add_kernels(kernellist_avx_256_single, kernellist_avx_256_single_size);
+#endif
+ /* Double precision */
+#if (defined GMX_SIMD_X86_SSE2 && defined GMX_DOUBLE)
+ nb_kernel_list_add_kernels(kernellist_sse2_double, kernellist_sse2_double_size);
+#endif
+#if (defined GMX_SIMD_X86_SSE4_1 && defined GMX_DOUBLE)
+ nb_kernel_list_add_kernels(kernellist_sse4_1_double, kernellist_sse4_1_double_size);
+#endif
+#if (defined GMX_SIMD_X86_AVX_128_FMA && defined GMX_DOUBLE)
+ nb_kernel_list_add_kernels(kernellist_avx_128_fma_double, kernellist_avx_128_fma_double_size);
+#endif
+#if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER && defined GMX_DOUBLE)
+ nb_kernel_list_add_kernels(kernellist_avx_256_double, kernellist_avx_256_double_size);
+#endif
+#if (defined GMX_SIMD_SPARC64_HPC_ACE && defined GMX_DOUBLE)
+ nb_kernel_list_add_kernels(kernellist_sparc64_hpc_ace_double, kernellist_sparc64_hpc_ace_double_size);
+#endif
+ ; /* empty statement to avoid a completely empty block */
+ }
+ }
+ /* Create a hash for faster lookups */
+ nb_kernel_list_hash_init();
+
+ nonbonded_setup_done = TRUE;
+ }
+ tMPI_Thread_mutex_unlock(&nonbonded_setup_mutex);
+}
+
+
+
+void
- /* Give up. If this was a water kernel, leave the pointer as NULL, which
- * will disable water optimization in NS. If it is a particle kernel, set
- * the pointer to the generic NB kernel.
++gmx_nonbonded_set_kernel_pointers(FILE *log, t_nblist *nl, gmx_bool bElecAndVdwSwitchDiffers)
+{
+ const char * elec;
+ const char * elec_mod;
+ const char * vdw;
+ const char * vdw_mod;
+ const char * geom;
+ const char * other;
+ const char * vf;
+
+ struct
+ {
+ const char * arch;
+ int simd_padding_width;
+ }
+ arch_and_padding[] =
+ {
+ /* Single precision */
+#if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER) && !(defined GMX_DOUBLE)
+ { "avx_256_single", 8 },
+#endif
+#if (defined GMX_SIMD_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
+ { "avx_128_fma_single", 4 },
+#endif
+#if (defined GMX_SIMD_X86_SSE4_1) && !(defined GMX_DOUBLE)
+ { "sse4_1_single", 4 },
+#endif
+#if (defined GMX_SIMD_X86_SSE2) && !(defined GMX_DOUBLE)
+ { "sse2_single", 4 },
+#endif
+ /* Double precision */
+#if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER && defined GMX_DOUBLE)
+ { "avx_256_double", 4 },
+#endif
+#if (defined GMX_SIMD_X86_AVX_128_FMA && defined GMX_DOUBLE)
+ /* Sic. Double precision 2-way SIMD does not require neighbor list padding,
+ * since the kernels execute a loop unrolled a factor 2, followed by
+ * a possible single odd-element epilogue.
+ */
+ { "avx_128_fma_double", 1 },
+#endif
+#if (defined GMX_SIMD_X86_SSE2 && defined GMX_DOUBLE)
+ /* No padding - see comment above */
+ { "sse2_double", 1 },
+#endif
+#if (defined GMX_SIMD_X86_SSE4_1 && defined GMX_DOUBLE)
+ /* No padding - see comment above */
+ { "sse4_1_double", 1 },
+#endif
+#if (defined GMX_SIMD_SPARC64_HPC_ACE && defined GMX_DOUBLE)
+ /* No padding - see comment above */
+ { "sparc64_hpc_ace_double", 1 },
+#endif
+ { "c", 1 },
+ };
+ int narch = asize(arch_and_padding);
+ int i;
+
+ if (nonbonded_setup_done == FALSE)
+ {
+ /* We typically call this setup routine before starting timers,
+ * but if that has not been done for whatever reason we do it now.
+ */
+ gmx_nonbonded_setup(NULL, FALSE);
+ }
+
+ /* Not used yet */
+ other = "";
+
+ nl->kernelptr_vf = NULL;
+ nl->kernelptr_v = NULL;
+ nl->kernelptr_f = NULL;
+
+ elec = gmx_nbkernel_elec_names[nl->ielec];
+ elec_mod = eintmod_names[nl->ielecmod];
+ vdw = gmx_nbkernel_vdw_names[nl->ivdw];
+ vdw_mod = eintmod_names[nl->ivdwmod];
+ geom = gmx_nblist_geometry_names[nl->igeometry];
+
+ if (nl->type == GMX_NBLIST_INTERACTION_ADRESS)
+ {
+ nl->kernelptr_vf = (void *) gmx_nb_generic_adress_kernel;
+ nl->kernelptr_f = (void *) gmx_nb_generic_adress_kernel;
+ nl->simd_padding_width = 1;
+ return;
+ }
+
+ if (nl->type == GMX_NBLIST_INTERACTION_FREE_ENERGY)
+ {
+ nl->kernelptr_vf = (void *) gmx_nb_free_energy_kernel;
+ nl->kernelptr_f = (void *) gmx_nb_free_energy_kernel;
+ nl->simd_padding_width = 1;
+ }
+ else if (!gmx_strcasecmp_min(geom, "CG-CG"))
+ {
+ nl->kernelptr_vf = (void *) gmx_nb_generic_cg_kernel;
+ nl->kernelptr_f = (void *) gmx_nb_generic_cg_kernel;
+ nl->simd_padding_width = 1;
+ }
+ else
+ {
+ /* Try to find a specific kernel first */
+
+ for (i = 0; i < narch && nl->kernelptr_vf == NULL; i++)
+ {
+ nl->kernelptr_vf = (void *) nb_kernel_list_findkernel(log, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "PotentialAndForce");
+ nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
+ }
+ for (i = 0; i < narch && nl->kernelptr_f == NULL; i++)
+ {
+ nl->kernelptr_f = (void *) nb_kernel_list_findkernel(log, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "Force");
+ nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
+
+ /* If there is not force-only optimized kernel, is there a potential & force one? */
+ if (nl->kernelptr_f == NULL)
+ {
+ nl->kernelptr_f = (void *) nb_kernel_list_findkernel(NULL, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "PotentialAndForce");
+ nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
+ }
+ }
+
- " Vdw: '%s', Modifier: '%s'\n"
- " Geom: '%s', Other: '%s'\n\n",
- elec, elec_mod, vdw, vdw_mod, geom, other);
++ /* For now, the accelerated kernels cannot handle the combination of switch functions for both
++ * electrostatics and VdW that use different switch radius or switch cutoff distances
++ * (both of them enter in the switch function calculation). This would require
++ * us to evaluate two completely separate switch functions for every interaction.
++ * Instead, we disable such kernels by setting the pointer to NULL.
++ * This will cause the generic kernel (which can handle it) to be called instead.
++ *
++ * Note that we typically already enable tabulated coulomb interactions for this case,
++ * so this is mostly a safe-guard to make sure we call the generic kernel if the
++ * tables are disabled.
++ */
++ if ((nl->ielec != GMX_NBKERNEL_ELEC_NONE) && (nl->ielecmod == eintmodPOTSWITCH) &&
++ (nl->ivdw != GMX_NBKERNEL_VDW_NONE) && (nl->ivdwmod == eintmodPOTSWITCH) &&
++ bElecAndVdwSwitchDiffers)
++ {
++ nl->kernelptr_vf = NULL;
++ nl->kernelptr_f = NULL;
++ }
++
++ /* Give up, pick a generic one instead.
++ * We only do this for particle-particle kernels; by leaving the water-optimized kernel
++ * pointers to NULL, the water optimization will automatically be disabled for this interaction.
+ */
+ if (nl->kernelptr_vf == NULL && !gmx_strcasecmp_min(geom, "Particle-Particle"))
+ {
+ nl->kernelptr_vf = (void *) gmx_nb_generic_kernel;
+ nl->kernelptr_f = (void *) gmx_nb_generic_kernel;
+ nl->simd_padding_width = 1;
+ if (debug)
+ {
+ fprintf(debug,
+ "WARNING - Slow generic NB kernel used for neighborlist with\n"
+ " Elec: '%s', Modifier: '%s'\n"
-
++ " Vdw: '%s', Modifier: '%s'\n",
++ elec, elec_mod, vdw, vdw_mod);
+ }
+ }
+ }
+ return;
+}
+
+void do_nonbonded(t_forcerec *fr,
+ rvec x[], rvec f_shortrange[], rvec f_longrange[], t_mdatoms *mdatoms, t_blocka *excl,
+ gmx_grppairener_t *grppener,
+ t_nrnb *nrnb, real *lambda, real *dvdl,
+ int nls, int eNL, int flags)
+{
+ t_nblist * nlist;
+ int n, n0, n1, i, i0, i1, sz, range;
+ t_nblists * nblists;
+ nb_kernel_data_t kernel_data;
+ nb_kernel_t * kernelptr = NULL;
+ rvec * f;
+
+ kernel_data.flags = flags;
+ kernel_data.exclusions = excl;
+ kernel_data.lambda = lambda;
+ kernel_data.dvdl = dvdl;
+
+ if (fr->bAllvsAll)
+ {
+ gmx_incons("All-vs-all kernels have not been implemented in version 4.6");
+ return;
+ }
+
+ if (eNL >= 0)
+ {
+ i0 = eNL;
+ i1 = i0+1;
+ }
+ else
+ {
+ i0 = 0;
+ i1 = eNL_NR;
+ }
+
+ if (nls >= 0)
+ {
+ n0 = nls;
+ n1 = nls+1;
+ }
+ else
+ {
+ n0 = 0;
+ n1 = fr->nnblists;
+ }
+
+ for (n = n0; (n < n1); n++)
+ {
+ nblists = &fr->nblists[n];
+
+ kernel_data.table_elec = &nblists->table_elec;
+ kernel_data.table_vdw = &nblists->table_vdw;
+ kernel_data.table_elec_vdw = &nblists->table_elec_vdw;
+
+ for (range = 0; range < 2; range++)
+ {
+ /* Are we doing short/long-range? */
+ if (range == 0)
+ {
+ /* Short-range */
+ if (!(flags & GMX_NONBONDED_DO_SR))
+ {
+ continue;
+ }
+ kernel_data.energygrp_elec = grppener->ener[egCOULSR];
+ kernel_data.energygrp_vdw = grppener->ener[fr->bBHAM ? egBHAMSR : egLJSR];
+ kernel_data.energygrp_polarization = grppener->ener[egGB];
+ nlist = nblists->nlist_sr;
+ f = f_shortrange;
+ }
+ else if (range == 1)
+ {
+ /* Long-range */
+ if (!(flags & GMX_NONBONDED_DO_LR))
+ {
+ continue;
+ }
+ kernel_data.energygrp_elec = grppener->ener[egCOULLR];
+ kernel_data.energygrp_vdw = grppener->ener[fr->bBHAM ? egBHAMLR : egLJLR];
+ kernel_data.energygrp_polarization = grppener->ener[egGB];
+ nlist = nblists->nlist_lr;
+ f = f_longrange;
+ }
+
+ for (i = i0; (i < i1); i++)
+ {
+ if (nlist[i].nri > 0)
+ {
+ if (flags & GMX_NONBONDED_DO_POTENTIAL)
+ {
+ /* Potential and force */
+ kernelptr = (nb_kernel_t *)nlist[i].kernelptr_vf;
+ }
+ else
+ {
+ /* Force only, no potential */
+ kernelptr = (nb_kernel_t *)nlist[i].kernelptr_f;
+ }
+
+ if (nlist[i].type != GMX_NBLIST_INTERACTION_FREE_ENERGY && (flags & GMX_NONBONDED_DO_FOREIGNLAMBDA))
+ {
+ /* We don't need the non-perturbed interactions */
+ continue;
+ }
+ /* Neighborlists whose kernelptr==NULL will always be empty */
+ if (kernelptr != NULL)
+ {
+ (*kernelptr)(&(nlist[i]), x, f, fr, mdatoms, &kernel_data, nrnb);
+ }
++ else
++ {
++ gmx_fatal(FARGS, "Non-empty neighborlist does not have any kernel pointer assigned.");
++ }
+ }
+ }
+ }
+ }
+}
+
+static void
+nb_listed_warning_rlimit(const rvec *x, int ai, int aj, int * global_atom_index, real r, real rlimit)
+{
+ gmx_warning("Listed nonbonded interaction between particles %d and %d\n"
+ "at distance %.3f which is larger than the table limit %.3f nm.\n\n"
+ "This is likely either a 1,4 interaction, or a listed interaction inside\n"
+ "a smaller molecule you are decoupling during a free energy calculation.\n"
+ "Since interactions at distances beyond the table cannot be computed,\n"
+ "they are skipped until they are inside the table limit again. You will\n"
+ "only see this message once, even if it occurs for several interactions.\n\n"
+ "IMPORTANT: This should not happen in a stable simulation, so there is\n"
+ "probably something wrong with your system. Only change the table-extension\n"
+ "distance in the mdp file if you are really sure that is the reason.\n",
+ glatnr(global_atom_index, ai), glatnr(global_atom_index, aj), r, rlimit);
+
+ if (debug)
+ {
+ fprintf(debug,
+ "%8f %8f %8f\n%8f %8f %8f\n1-4 (%d,%d) interaction not within cut-off! r=%g. Ignored\n",
+ x[ai][XX], x[ai][YY], x[ai][ZZ], x[aj][XX], x[aj][YY], x[aj][ZZ],
+ glatnr(global_atom_index, ai), glatnr(global_atom_index, aj), r);
+ }
+}
+
+
+
+/* This might logically belong better in the nb_generic.c module, but it is only
+ * used in do_nonbonded_listed(), and we want it to be inlined there to avoid an
+ * extra functional call for every single pair listed in the topology.
+ */
+static real
+nb_evaluate_single(real r2, real tabscale, real *vftab,
+ real qq, real c6, real c12, real *velec, real *vvdw)
+{
+ real rinv, r, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VVe, FFe, VVd, FFd, VVr, FFr, fscal;
+ int ntab;
+
+ /* Do the tabulated interactions - first table lookup */
+ rinv = gmx_invsqrt(r2);
+ r = r2*rinv;
+ rtab = r*tabscale;
+ ntab = rtab;
+ eps = rtab-ntab;
+ eps2 = eps*eps;
+ ntab = 12*ntab;
+ /* Electrostatics */
+ Y = vftab[ntab];
+ F = vftab[ntab+1];
+ Geps = eps*vftab[ntab+2];
+ Heps2 = eps2*vftab[ntab+3];
+ Fp = F+Geps+Heps2;
+ VVe = Y+eps*Fp;
+ FFe = Fp+Geps+2.0*Heps2;
+ /* Dispersion */
+ Y = vftab[ntab+4];
+ F = vftab[ntab+5];
+ Geps = eps*vftab[ntab+6];
+ Heps2 = eps2*vftab[ntab+7];
+ Fp = F+Geps+Heps2;
+ VVd = Y+eps*Fp;
+ FFd = Fp+Geps+2.0*Heps2;
+ /* Repulsion */
+ Y = vftab[ntab+8];
+ F = vftab[ntab+9];
+ Geps = eps*vftab[ntab+10];
+ Heps2 = eps2*vftab[ntab+11];
+ Fp = F+Geps+Heps2;
+ VVr = Y+eps*Fp;
+ FFr = Fp+Geps+2.0*Heps2;
+
+ *velec = qq*VVe;
+ *vvdw = c6*VVd+c12*VVr;
+
+ fscal = -(qq*FFe+c6*FFd+c12*FFr)*tabscale*rinv;
+
+ return fscal;
+}
+
+
+real
+do_nonbonded_listed(int ftype, int nbonds,
+ const t_iatom iatoms[], const t_iparams iparams[],
+ const rvec x[], rvec f[], rvec fshift[],
+ const t_pbc *pbc, const t_graph *g,
+ real *lambda, real *dvdl,
+ const t_mdatoms *md,
+ const t_forcerec *fr, gmx_grppairener_t *grppener,
+ int *global_atom_index)
+{
+ int ielec, ivdw;
+ real qq, c6, c12;
+ rvec dx;
+ ivec dt;
+ int i, j, itype, ai, aj, gid;
+ int fshift_index;
+ real r2, rinv;
+ real fscal, velec, vvdw;
+ real * energygrp_elec;
+ real * energygrp_vdw;
+ static gmx_bool warned_rlimit = FALSE;
+ /* Free energy stuff */
+ gmx_bool bFreeEnergy;
+ real LFC[2], LFV[2], DLF[2], lfac_coul[2], lfac_vdw[2], dlfac_coul[2], dlfac_vdw[2];
+ real qqB, c6B, c12B, sigma2_def, sigma2_min;
+
+
+ switch (ftype)
+ {
+ case F_LJ14:
+ case F_LJC14_Q:
+ energygrp_elec = grppener->ener[egCOUL14];
+ energygrp_vdw = grppener->ener[egLJ14];
+ break;
+ case F_LJC_PAIRS_NB:
+ energygrp_elec = grppener->ener[egCOULSR];
+ energygrp_vdw = grppener->ener[egLJSR];
+ break;
+ default:
+ energygrp_elec = NULL; /* Keep compiler happy */
+ energygrp_vdw = NULL; /* Keep compiler happy */
+ gmx_fatal(FARGS, "Unknown function type %d in do_nonbonded14", ftype);
+ break;
+ }
+
+ if (fr->efep != efepNO)
+ {
+ /* Lambda factor for state A=1-lambda and B=lambda */
+ LFC[0] = 1.0 - lambda[efptCOUL];
+ LFV[0] = 1.0 - lambda[efptVDW];
+ LFC[1] = lambda[efptCOUL];
+ LFV[1] = lambda[efptVDW];
+
+ /*derivative of the lambda factor for state A and B */
+ DLF[0] = -1;
+ DLF[1] = 1;
+
+ /* precalculate */
+ sigma2_def = pow(fr->sc_sigma6_def, 1.0/3.0);
+ sigma2_min = pow(fr->sc_sigma6_min, 1.0/3.0);
+
+ for (i = 0; i < 2; i++)
+ {
+ lfac_coul[i] = (fr->sc_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
+ dlfac_coul[i] = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power == 2 ? (1-LFC[i]) : 1);
+ lfac_vdw[i] = (fr->sc_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
+ dlfac_vdw[i] = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power == 2 ? (1-LFV[i]) : 1);
+ }
+ }
+ else
+ {
+ sigma2_min = sigma2_def = 0;
+ }
+
+ bFreeEnergy = FALSE;
+ for (i = 0; (i < nbonds); )
+ {
+ itype = iatoms[i++];
+ ai = iatoms[i++];
+ aj = iatoms[i++];
+ gid = GID(md->cENER[ai], md->cENER[aj], md->nenergrp);
+
+ /* Get parameters */
+ switch (ftype)
+ {
+ case F_LJ14:
+ bFreeEnergy =
+ (fr->efep != efepNO &&
+ ((md->nPerturbed && (md->bPerturbed[ai] || md->bPerturbed[aj])) ||
+ iparams[itype].lj14.c6A != iparams[itype].lj14.c6B ||
+ iparams[itype].lj14.c12A != iparams[itype].lj14.c12B));
+ qq = md->chargeA[ai]*md->chargeA[aj]*fr->epsfac*fr->fudgeQQ;
+ c6 = iparams[itype].lj14.c6A;
+ c12 = iparams[itype].lj14.c12A;
+ break;
+ case F_LJC14_Q:
+ qq = iparams[itype].ljc14.qi*iparams[itype].ljc14.qj*fr->epsfac*iparams[itype].ljc14.fqq;
+ c6 = iparams[itype].ljc14.c6;
+ c12 = iparams[itype].ljc14.c12;
+ break;
+ case F_LJC_PAIRS_NB:
+ qq = iparams[itype].ljcnb.qi*iparams[itype].ljcnb.qj*fr->epsfac;
+ c6 = iparams[itype].ljcnb.c6;
+ c12 = iparams[itype].ljcnb.c12;
+ break;
+ default:
+ /* Cannot happen since we called gmx_fatal() above in this case */
+ qq = c6 = c12 = 0; /* Keep compiler happy */
+ break;
+ }
+
+ /* To save flops in the optimized kernels, c6/c12 have 6.0/12.0 derivative prefactors
+ * included in the general nfbp array now. This means the tables are scaled down by the
+ * same factor, so when we use the original c6/c12 parameters from iparams[] they must
+ * be scaled up.
+ */
+ c6 *= 6.0;
+ c12 *= 12.0;
+
+ /* Do we need to apply full periodic boundary conditions? */
+ if (fr->bMolPBC == TRUE)
+ {
+ fshift_index = pbc_dx_aiuc(pbc, x[ai], x[aj], dx);
+ }
+ else
+ {
+ fshift_index = CENTRAL;
+ rvec_sub(x[ai], x[aj], dx);
+ }
+ r2 = norm2(dx);
+
+ if (r2 >= fr->tab14.r*fr->tab14.r)
+ {
+ if (warned_rlimit == FALSE)
+ {
+ nb_listed_warning_rlimit(x, ai, aj, global_atom_index, sqrt(r2), fr->tab14.r);
+ warned_rlimit = TRUE;
+ }
+ continue;
+ }
+
+ if (bFreeEnergy)
+ {
+ /* Currently free energy is only supported for F_LJ14, so no need to check for that if we got here */
+ qqB = md->chargeB[ai]*md->chargeB[aj]*fr->epsfac*fr->fudgeQQ;
+ c6B = iparams[itype].lj14.c6B*6.0;
+ c12B = iparams[itype].lj14.c12B*12.0;
+
+ fscal = nb_free_energy_evaluate_single(r2, fr->sc_r_power, fr->sc_alphacoul, fr->sc_alphavdw,
+ fr->tab14.scale, fr->tab14.data, qq, c6, c12, qqB, c6B, c12B,
+ LFC, LFV, DLF, lfac_coul, lfac_vdw, dlfac_coul, dlfac_vdw,
+ fr->sc_sigma6_def, fr->sc_sigma6_min, sigma2_def, sigma2_min, &velec, &vvdw, dvdl);
+ }
+ else
+ {
+ /* Evaluate tabulated interaction without free energy */
+ fscal = nb_evaluate_single(r2, fr->tab14.scale, fr->tab14.data, qq, c6, c12, &velec, &vvdw);
+ }
+
+ energygrp_elec[gid] += velec;
+ energygrp_vdw[gid] += vvdw;
+ svmul(fscal, dx, dx);
+
+ /* Add the forces */
+ rvec_inc(f[ai], dx);
+ rvec_dec(f[aj], dx);
+
+ if (g)
+ {
+ /* Correct the shift forces using the graph */
+ ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
+ fshift_index = IVEC2IS(dt);
+ }
+ if (fshift_index != CENTRAL)
+ {
+ rvec_inc(fshift[fshift_index], dx);
+ rvec_dec(fshift[CENTRAL], dx);
+ }
+ }
+ return 0.0;
+}
--- /dev/null
- sprintf(warn_buf, "The switching range for should be 5%% or less (currently %.2f%% using a switching range of %4f-%4f) for accurate electrostatic energies, energy conservation will be good regardless, since ewald_rtol = %g.",
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ * Copyright (c) 2001-2004, The GROMACS development team.
+ * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <ctype.h>
+#include <stdlib.h>
+#include <limits.h>
+#include "sysstuff.h"
+#include "gromacs/utility/smalloc.h"
+#include "typedefs.h"
+#include "physics.h"
+#include "names.h"
+#include "gmx_fatal.h"
+#include "macros.h"
+#include "index.h"
+#include "symtab.h"
+#include "gromacs/utility/cstringutil.h"
+#include "readinp.h"
+#include "warninp.h"
+#include "readir.h"
+#include "toputil.h"
+#include "index.h"
+#include "network.h"
+#include "vec.h"
+#include "pbc.h"
+#include "mtop_util.h"
+#include "chargegroup.h"
+#include "inputrec.h"
+#include "calc_verletbuf.h"
+
+#define MAXPTR 254
+#define NOGID 255
+
+/* Resource parameters
+ * Do not change any of these until you read the instruction
+ * in readinp.h. Some cpp's do not take spaces after the backslash
+ * (like the c-shell), which will give you a very weird compiler
+ * message.
+ */
+
+typedef struct t_inputrec_strings
+{
+ char tcgrps[STRLEN], tau_t[STRLEN], ref_t[STRLEN],
+ acc[STRLEN], accgrps[STRLEN], freeze[STRLEN], frdim[STRLEN],
+ energy[STRLEN], user1[STRLEN], user2[STRLEN], vcm[STRLEN], x_compressed_groups[STRLEN],
+ couple_moltype[STRLEN], orirefitgrp[STRLEN], egptable[STRLEN], egpexcl[STRLEN],
+ wall_atomtype[STRLEN], wall_density[STRLEN], deform[STRLEN], QMMM[STRLEN],
+ imd_grp[STRLEN];
+ char fep_lambda[efptNR][STRLEN];
+ char lambda_weights[STRLEN];
+ char **pull_grp;
+ char **rot_grp;
+ char anneal[STRLEN], anneal_npoints[STRLEN],
+ anneal_time[STRLEN], anneal_temp[STRLEN];
+ char QMmethod[STRLEN], QMbasis[STRLEN], QMcharge[STRLEN], QMmult[STRLEN],
+ bSH[STRLEN], CASorbitals[STRLEN], CASelectrons[STRLEN], SAon[STRLEN],
+ SAoff[STRLEN], SAsteps[STRLEN], bTS[STRLEN], bOPT[STRLEN];
+ char efield_x[STRLEN], efield_xt[STRLEN], efield_y[STRLEN],
+ efield_yt[STRLEN], efield_z[STRLEN], efield_zt[STRLEN];
+
+} gmx_inputrec_strings;
+
+static gmx_inputrec_strings *is = NULL;
+
+void init_inputrec_strings()
+{
+ if (is)
+ {
+ gmx_incons("Attempted to call init_inputrec_strings before calling done_inputrec_strings. Only one inputrec (i.e. .mdp file) can be parsed at a time.");
+ }
+ snew(is, 1);
+}
+
+void done_inputrec_strings()
+{
+ sfree(is);
+ is = NULL;
+}
+
+static char swapgrp[STRLEN], splitgrp0[STRLEN], splitgrp1[STRLEN], solgrp[STRLEN];
+
+enum {
+ egrptpALL, /* All particles have to be a member of a group. */
+ egrptpALL_GENREST, /* A rest group with name is generated for particles *
+ * that are not part of any group. */
+ egrptpPART, /* As egrptpALL_GENREST, but no name is generated *
+ * for the rest group. */
+ egrptpONE /* Merge all selected groups into one group, *
+ * make a rest group for the remaining particles. */
+};
+
+static const char *constraints[eshNR+1] = {
+ "none", "h-bonds", "all-bonds", "h-angles", "all-angles", NULL
+};
+
+static const char *couple_lam[ecouplamNR+1] = {
+ "vdw-q", "vdw", "q", "none", NULL
+};
+
+void init_ir(t_inputrec *ir, t_gromppopts *opts)
+{
+ snew(opts->include, STRLEN);
+ snew(opts->define, STRLEN);
+ snew(ir->fepvals, 1);
+ snew(ir->expandedvals, 1);
+ snew(ir->simtempvals, 1);
+}
+
+static void GetSimTemps(int ntemps, t_simtemp *simtemp, double *temperature_lambdas)
+{
+
+ int i;
+
+ for (i = 0; i < ntemps; i++)
+ {
+ /* simple linear scaling -- allows more control */
+ if (simtemp->eSimTempScale == esimtempLINEAR)
+ {
+ simtemp->temperatures[i] = simtemp->simtemp_low + (simtemp->simtemp_high-simtemp->simtemp_low)*temperature_lambdas[i];
+ }
+ else if (simtemp->eSimTempScale == esimtempGEOMETRIC) /* should give roughly equal acceptance for constant heat capacity . . . */
+ {
+ simtemp->temperatures[i] = simtemp->simtemp_low * pow(simtemp->simtemp_high/simtemp->simtemp_low, (1.0*i)/(ntemps-1));
+ }
+ else if (simtemp->eSimTempScale == esimtempEXPONENTIAL)
+ {
+ simtemp->temperatures[i] = simtemp->simtemp_low + (simtemp->simtemp_high-simtemp->simtemp_low)*((exp(temperature_lambdas[i])-1)/(exp(1.0)-1));
+ }
+ else
+ {
+ char errorstr[128];
+ sprintf(errorstr, "eSimTempScale=%d not defined", simtemp->eSimTempScale);
+ gmx_fatal(FARGS, errorstr);
+ }
+ }
+}
+
+
+
+static void _low_check(gmx_bool b, char *s, warninp_t wi)
+{
+ if (b)
+ {
+ warning_error(wi, s);
+ }
+}
+
+static void check_nst(const char *desc_nst, int nst,
+ const char *desc_p, int *p,
+ warninp_t wi)
+{
+ char buf[STRLEN];
+
+ if (*p > 0 && *p % nst != 0)
+ {
+ /* Round up to the next multiple of nst */
+ *p = ((*p)/nst + 1)*nst;
+ sprintf(buf, "%s should be a multiple of %s, changing %s to %d\n",
+ desc_p, desc_nst, desc_p, *p);
+ warning(wi, buf);
+ }
+}
+
+static gmx_bool ir_NVE(const t_inputrec *ir)
+{
+ return ((ir->eI == eiMD || EI_VV(ir->eI)) && ir->etc == etcNO);
+}
+
+static int lcd(int n1, int n2)
+{
+ int d, i;
+
+ d = 1;
+ for (i = 2; (i <= n1 && i <= n2); i++)
+ {
+ if (n1 % i == 0 && n2 % i == 0)
+ {
+ d = i;
+ }
+ }
+
+ return d;
+}
+
+static void process_interaction_modifier(const t_inputrec *ir, int *eintmod)
+{
+ if (*eintmod == eintmodPOTSHIFT_VERLET)
+ {
+ if (ir->cutoff_scheme == ecutsVERLET)
+ {
+ *eintmod = eintmodPOTSHIFT;
+ }
+ else
+ {
+ *eintmod = eintmodNONE;
+ }
+ }
+}
+
+void check_ir(const char *mdparin, t_inputrec *ir, t_gromppopts *opts,
+ warninp_t wi)
+/* Check internal consistency.
+ * NOTE: index groups are not set here yet, don't check things
+ * like temperature coupling group options here, but in triple_check
+ */
+{
+ /* Strange macro: first one fills the err_buf, and then one can check
+ * the condition, which will print the message and increase the error
+ * counter.
+ */
+#define CHECK(b) _low_check(b, err_buf, wi)
+ char err_buf[256], warn_buf[STRLEN];
+ int i, j;
+ int ns_type = 0;
+ real dt_coupl = 0;
+ real dt_pcoupl;
+ int nstcmin;
+ t_lambda *fep = ir->fepvals;
+ t_expanded *expand = ir->expandedvals;
+
+ set_warning_line(wi, mdparin, -1);
+
+ /* BASIC CUT-OFF STUFF */
+ if (ir->rcoulomb < 0)
+ {
+ warning_error(wi, "rcoulomb should be >= 0");
+ }
+ if (ir->rvdw < 0)
+ {
+ warning_error(wi, "rvdw should be >= 0");
+ }
+ if (ir->rlist < 0 &&
+ !(ir->cutoff_scheme == ecutsVERLET && ir->verletbuf_tol > 0))
+ {
+ warning_error(wi, "rlist should be >= 0");
+ }
+
+ process_interaction_modifier(ir, &ir->coulomb_modifier);
+ process_interaction_modifier(ir, &ir->vdw_modifier);
+
+ if (ir->cutoff_scheme == ecutsGROUP)
+ {
+ warning_note(wi,
+ "The group cutoff scheme is deprecated in Gromacs 5.0 and will be removed in a future "
+ "release when all interaction forms are supported for the verlet scheme. The verlet "
+ "scheme already scales better, and it is compatible with GPUs and other accelerators.");
+
+ /* BASIC CUT-OFF STUFF */
+ if (ir->rlist == 0 ||
+ !((ir_coulomb_might_be_zero_at_cutoff(ir) && ir->rcoulomb > ir->rlist) ||
+ (ir_vdw_might_be_zero_at_cutoff(ir) && ir->rvdw > ir->rlist)))
+ {
+ /* No switched potential and/or no twin-range:
+ * we can set the long-range cut-off to the maximum of the other cut-offs.
+ */
+ ir->rlistlong = max_cutoff(ir->rlist, max_cutoff(ir->rvdw, ir->rcoulomb));
+ }
+ else if (ir->rlistlong < 0)
+ {
+ ir->rlistlong = max_cutoff(ir->rlist, max_cutoff(ir->rvdw, ir->rcoulomb));
+ sprintf(warn_buf, "rlistlong was not set, setting it to %g (no buffer)",
+ ir->rlistlong);
+ warning(wi, warn_buf);
+ }
+ if (ir->rlistlong == 0 && ir->ePBC != epbcNONE)
+ {
+ warning_error(wi, "Can not have an infinite cut-off with PBC");
+ }
+ if (ir->rlistlong > 0 && (ir->rlist == 0 || ir->rlistlong < ir->rlist))
+ {
+ warning_error(wi, "rlistlong can not be shorter than rlist");
+ }
+ if (IR_TWINRANGE(*ir) && ir->nstlist <= 0)
+ {
+ warning_error(wi, "Can not have nstlist<=0 with twin-range interactions");
+ }
+ }
+
+ if (ir->rlistlong == ir->rlist)
+ {
+ ir->nstcalclr = 0;
+ }
+ else if (ir->rlistlong > ir->rlist && ir->nstcalclr == 0)
+ {
+ warning_error(wi, "With different cutoffs for electrostatics and VdW, nstcalclr must be -1 or a positive number");
+ }
+
+ if (ir->cutoff_scheme == ecutsVERLET)
+ {
+ real rc_max;
+
+ /* Normal Verlet type neighbor-list, currently only limited feature support */
+ if (inputrec2nboundeddim(ir) < 3)
+ {
+ warning_error(wi, "With Verlet lists only full pbc or pbc=xy with walls is supported");
+ }
+ if (ir->rcoulomb != ir->rvdw)
+ {
+ warning_error(wi, "With Verlet lists rcoulomb!=rvdw is not supported");
+ }
+ if (ir->vdwtype == evdwSHIFT || ir->vdwtype == evdwSWITCH)
+ {
+ if (ir->vdw_modifier == eintmodNONE ||
+ ir->vdw_modifier == eintmodPOTSHIFT)
+ {
+ ir->vdw_modifier = (ir->vdwtype == evdwSHIFT ? eintmodFORCESWITCH : eintmodPOTSWITCH);
+
+ sprintf(warn_buf, "Replacing vdwtype=%s by the equivalent combination of vdwtype=%s and vdw_modifier=%s", evdw_names[ir->vdwtype], evdw_names[evdwCUT], eintmod_names[ir->vdw_modifier]);
+ warning_note(wi, warn_buf);
+
+ ir->vdwtype = evdwCUT;
+ }
+ else
+ {
+ sprintf(warn_buf, "Unsupported combination of vdwtype=%s and vdw_modifier=%s", evdw_names[ir->vdwtype], eintmod_names[ir->vdw_modifier]);
+ warning_error(wi, warn_buf);
+ }
+ }
+
+ if (!(ir->vdwtype == evdwCUT || ir->vdwtype == evdwPME))
+ {
+ warning_error(wi, "With Verlet lists only cut-off and PME LJ interactions are supported");
+ }
+ if (!(ir->coulombtype == eelCUT ||
+ (EEL_RF(ir->coulombtype) && ir->coulombtype != eelRF_NEC) ||
+ EEL_PME(ir->coulombtype) || ir->coulombtype == eelEWALD))
+ {
+ warning_error(wi, "With Verlet lists only cut-off, reaction-field, PME and Ewald electrostatics are supported");
+ }
+ if (!(ir->coulomb_modifier == eintmodNONE ||
+ ir->coulomb_modifier == eintmodPOTSHIFT))
+ {
+ sprintf(warn_buf, "coulomb_modifier=%s is not supported with the Verlet cut-off scheme", eintmod_names[ir->coulomb_modifier]);
+ warning_error(wi, warn_buf);
+ }
+
+ if (ir->nstlist <= 0)
+ {
+ warning_error(wi, "With Verlet lists nstlist should be larger than 0");
+ }
+
+ if (ir->nstlist < 10)
+ {
+ warning_note(wi, "With Verlet lists the optimal nstlist is >= 10, with GPUs >= 20. Note that with the Verlet scheme, nstlist has no effect on the accuracy of your simulation.");
+ }
+
+ rc_max = max(ir->rvdw, ir->rcoulomb);
+
+ if (ir->verletbuf_tol <= 0)
+ {
+ if (ir->verletbuf_tol == 0)
+ {
+ warning_error(wi, "Can not have Verlet buffer tolerance of exactly 0");
+ }
+
+ if (ir->rlist < rc_max)
+ {
+ warning_error(wi, "With verlet lists rlist can not be smaller than rvdw or rcoulomb");
+ }
+
+ if (ir->rlist == rc_max && ir->nstlist > 1)
+ {
+ warning_note(wi, "rlist is equal to rvdw and/or rcoulomb: there is no explicit Verlet buffer. The cluster pair list does have a buffering effect, but choosing a larger rlist might be necessary for good energy conservation.");
+ }
+ }
+ else
+ {
+ if (ir->rlist > rc_max)
+ {
+ warning_note(wi, "You have set rlist larger than the interaction cut-off, but you also have verlet-buffer-tolerance > 0. Will set rlist using verlet-buffer-tolerance.");
+ }
+
+ if (ir->nstlist == 1)
+ {
+ /* No buffer required */
+ ir->rlist = rc_max;
+ }
+ else
+ {
+ if (EI_DYNAMICS(ir->eI))
+ {
+ if (inputrec2nboundeddim(ir) < 3)
+ {
+ warning_error(wi, "The box volume is required for calculating rlist from the energy drift with verlet-buffer-tolerance > 0. You are using at least one unbounded dimension, so no volume can be computed. Either use a finite box, or set rlist yourself together with verlet-buffer-tolerance = -1.");
+ }
+ /* Set rlist temporarily so we can continue processing */
+ ir->rlist = rc_max;
+ }
+ else
+ {
+ /* Set the buffer to 5% of the cut-off */
+ ir->rlist = (1.0 + verlet_buffer_ratio_nodynamics)*rc_max;
+ }
+ }
+ }
+
+ /* No twin-range calculations with Verlet lists */
+ ir->rlistlong = ir->rlist;
+ }
+
+ if (ir->nstcalclr == -1)
+ {
+ /* if rlist=rlistlong, this will later be changed to nstcalclr=0 */
+ ir->nstcalclr = ir->nstlist;
+ }
+ else if (ir->nstcalclr > 0)
+ {
+ if (ir->nstlist > 0 && (ir->nstlist % ir->nstcalclr != 0))
+ {
+ warning_error(wi, "nstlist must be evenly divisible by nstcalclr. Use nstcalclr = -1 to automatically follow nstlist");
+ }
+ }
+ else if (ir->nstcalclr < -1)
+ {
+ warning_error(wi, "nstcalclr must be a positive number (divisor of nstcalclr), or -1 to follow nstlist.");
+ }
+
+ if (EEL_PME(ir->coulombtype) && ir->rcoulomb > ir->rvdw && ir->nstcalclr > 1)
+ {
+ warning_error(wi, "When used with PME, the long-range component of twin-range interactions must be updated every step (nstcalclr)");
+ }
+
+ /* GENERAL INTEGRATOR STUFF */
+ if (!(ir->eI == eiMD || EI_VV(ir->eI)))
+ {
+ ir->etc = etcNO;
+ }
+ if (ir->eI == eiVVAK)
+ {
+ sprintf(warn_buf, "Integrator method %s is implemented primarily for validation purposes; for molecular dynamics, you should probably be using %s or %s", ei_names[eiVVAK], ei_names[eiMD], ei_names[eiVV]);
+ warning_note(wi, warn_buf);
+ }
+ if (!EI_DYNAMICS(ir->eI))
+ {
+ ir->epc = epcNO;
+ }
+ if (EI_DYNAMICS(ir->eI))
+ {
+ if (ir->nstcalcenergy < 0)
+ {
+ ir->nstcalcenergy = ir_optimal_nstcalcenergy(ir);
+ if (ir->nstenergy != 0 && ir->nstenergy < ir->nstcalcenergy)
+ {
+ /* nstcalcenergy larger than nstener does not make sense.
+ * We ideally want nstcalcenergy=nstener.
+ */
+ if (ir->nstlist > 0)
+ {
+ ir->nstcalcenergy = lcd(ir->nstenergy, ir->nstlist);
+ }
+ else
+ {
+ ir->nstcalcenergy = ir->nstenergy;
+ }
+ }
+ }
+ else if ( (ir->nstenergy > 0 && ir->nstcalcenergy > ir->nstenergy) ||
+ (ir->efep != efepNO && ir->fepvals->nstdhdl > 0 &&
+ (ir->nstcalcenergy > ir->fepvals->nstdhdl) ) )
+
+ {
+ const char *nsten = "nstenergy";
+ const char *nstdh = "nstdhdl";
+ const char *min_name = nsten;
+ int min_nst = ir->nstenergy;
+
+ /* find the smallest of ( nstenergy, nstdhdl ) */
+ if (ir->efep != efepNO && ir->fepvals->nstdhdl > 0 &&
+ (ir->nstenergy == 0 || ir->fepvals->nstdhdl < ir->nstenergy))
+ {
+ min_nst = ir->fepvals->nstdhdl;
+ min_name = nstdh;
+ }
+ /* If the user sets nstenergy small, we should respect that */
+ sprintf(warn_buf,
+ "Setting nstcalcenergy (%d) equal to %s (%d)",
+ ir->nstcalcenergy, min_name, min_nst);
+ warning_note(wi, warn_buf);
+ ir->nstcalcenergy = min_nst;
+ }
+
+ if (ir->epc != epcNO)
+ {
+ if (ir->nstpcouple < 0)
+ {
+ ir->nstpcouple = ir_optimal_nstpcouple(ir);
+ }
+ }
+ if (IR_TWINRANGE(*ir))
+ {
+ check_nst("nstlist", ir->nstlist,
+ "nstcalcenergy", &ir->nstcalcenergy, wi);
+ if (ir->epc != epcNO)
+ {
+ check_nst("nstlist", ir->nstlist,
+ "nstpcouple", &ir->nstpcouple, wi);
+ }
+ }
+
+ if (ir->nstcalcenergy > 0)
+ {
+ if (ir->efep != efepNO)
+ {
+ /* nstdhdl should be a multiple of nstcalcenergy */
+ check_nst("nstcalcenergy", ir->nstcalcenergy,
+ "nstdhdl", &ir->fepvals->nstdhdl, wi);
+ /* nstexpanded should be a multiple of nstcalcenergy */
+ check_nst("nstcalcenergy", ir->nstcalcenergy,
+ "nstexpanded", &ir->expandedvals->nstexpanded, wi);
+ }
+ /* for storing exact averages nstenergy should be
+ * a multiple of nstcalcenergy
+ */
+ check_nst("nstcalcenergy", ir->nstcalcenergy,
+ "nstenergy", &ir->nstenergy, wi);
+ }
+ }
+
+ if (ir->nsteps == 0 && !ir->bContinuation)
+ {
+ warning_note(wi, "For a correct single-point energy evaluation with nsteps = 0, use continuation = yes to avoid constraining the input coordinates.");
+ }
+
+ /* LD STUFF */
+ if ((EI_SD(ir->eI) || ir->eI == eiBD) &&
+ ir->bContinuation && ir->ld_seed != -1)
+ {
+ warning_note(wi, "You are doing a continuation with SD or BD, make sure that ld_seed is different from the previous run (using ld_seed=-1 will ensure this)");
+ }
+
+ /* TPI STUFF */
+ if (EI_TPI(ir->eI))
+ {
+ sprintf(err_buf, "TPI only works with pbc = %s", epbc_names[epbcXYZ]);
+ CHECK(ir->ePBC != epbcXYZ);
+ sprintf(err_buf, "TPI only works with ns = %s", ens_names[ensGRID]);
+ CHECK(ir->ns_type != ensGRID);
+ sprintf(err_buf, "with TPI nstlist should be larger than zero");
+ CHECK(ir->nstlist <= 0);
+ sprintf(err_buf, "TPI does not work with full electrostatics other than PME");
+ CHECK(EEL_FULL(ir->coulombtype) && !EEL_PME(ir->coulombtype));
+ }
+
+ /* SHAKE / LINCS */
+ if ( (opts->nshake > 0) && (opts->bMorse) )
+ {
+ sprintf(warn_buf,
+ "Using morse bond-potentials while constraining bonds is useless");
+ warning(wi, warn_buf);
+ }
+
+ if ((EI_SD(ir->eI) || ir->eI == eiBD) &&
+ ir->bContinuation && ir->ld_seed != -1)
+ {
+ warning_note(wi, "You are doing a continuation with SD or BD, make sure that ld_seed is different from the previous run (using ld_seed=-1 will ensure this)");
+ }
+ /* verify simulated tempering options */
+
+ if (ir->bSimTemp)
+ {
+ gmx_bool bAllTempZero = TRUE;
+ for (i = 0; i < fep->n_lambda; i++)
+ {
+ sprintf(err_buf, "Entry %d for %s must be between 0 and 1, instead is %g", i, efpt_names[efptTEMPERATURE], fep->all_lambda[efptTEMPERATURE][i]);
+ CHECK((fep->all_lambda[efptTEMPERATURE][i] < 0) || (fep->all_lambda[efptTEMPERATURE][i] > 1));
+ if (fep->all_lambda[efptTEMPERATURE][i] > 0)
+ {
+ bAllTempZero = FALSE;
+ }
+ }
+ sprintf(err_buf, "if simulated tempering is on, temperature-lambdas may not be all zero");
+ CHECK(bAllTempZero == TRUE);
+
+ sprintf(err_buf, "Simulated tempering is currently only compatible with md-vv");
+ CHECK(ir->eI != eiVV);
+
+ /* check compatability of the temperature coupling with simulated tempering */
+
+ if (ir->etc == etcNOSEHOOVER)
+ {
+ sprintf(warn_buf, "Nose-Hoover based temperature control such as [%s] my not be entirelyconsistent with simulated tempering", etcoupl_names[ir->etc]);
+ warning_note(wi, warn_buf);
+ }
+
+ /* check that the temperatures make sense */
+
+ sprintf(err_buf, "Higher simulated tempering temperature (%g) must be >= than the simulated tempering lower temperature (%g)", ir->simtempvals->simtemp_high, ir->simtempvals->simtemp_low);
+ CHECK(ir->simtempvals->simtemp_high <= ir->simtempvals->simtemp_low);
+
+ sprintf(err_buf, "Higher simulated tempering temperature (%g) must be >= zero", ir->simtempvals->simtemp_high);
+ CHECK(ir->simtempvals->simtemp_high <= 0);
+
+ sprintf(err_buf, "Lower simulated tempering temperature (%g) must be >= zero", ir->simtempvals->simtemp_low);
+ CHECK(ir->simtempvals->simtemp_low <= 0);
+ }
+
+ /* verify free energy options */
+
+ if (ir->efep != efepNO)
+ {
+ fep = ir->fepvals;
+ sprintf(err_buf, "The soft-core power is %d and can only be 1 or 2",
+ fep->sc_power);
+ CHECK(fep->sc_alpha != 0 && fep->sc_power != 1 && fep->sc_power != 2);
+
+ sprintf(err_buf, "The soft-core sc-r-power is %d and can only be 6 or 48",
+ (int)fep->sc_r_power);
+ CHECK(fep->sc_alpha != 0 && fep->sc_r_power != 6.0 && fep->sc_r_power != 48.0);
+
+ sprintf(err_buf, "Can't use postive delta-lambda (%g) if initial state/lambda does not start at zero", fep->delta_lambda);
+ CHECK(fep->delta_lambda > 0 && ((fep->init_fep_state > 0) || (fep->init_lambda > 0)));
+
+ sprintf(err_buf, "Can't use postive delta-lambda (%g) with expanded ensemble simulations", fep->delta_lambda);
+ CHECK(fep->delta_lambda > 0 && (ir->efep == efepEXPANDED));
+
+ sprintf(err_buf, "Can only use expanded ensemble with md-vv for now; should be supported for other integrators in 5.0");
+ CHECK(!(EI_VV(ir->eI)) && (ir->efep == efepEXPANDED));
+
+ sprintf(err_buf, "Free-energy not implemented for Ewald");
+ CHECK(ir->coulombtype == eelEWALD);
+
+ /* check validty of lambda inputs */
+ if (fep->n_lambda == 0)
+ {
+ /* Clear output in case of no states:*/
+ sprintf(err_buf, "init-lambda-state set to %d: no lambda states are defined.", fep->init_fep_state);
+ CHECK((fep->init_fep_state >= 0) && (fep->n_lambda == 0));
+ }
+ else
+ {
+ sprintf(err_buf, "initial thermodynamic state %d does not exist, only goes to %d", fep->init_fep_state, fep->n_lambda-1);
+ CHECK((fep->init_fep_state >= fep->n_lambda));
+ }
+
+ sprintf(err_buf, "Lambda state must be set, either with init-lambda-state or with init-lambda");
+ CHECK((fep->init_fep_state < 0) && (fep->init_lambda < 0));
+
+ sprintf(err_buf, "init-lambda=%g while init-lambda-state=%d. Lambda state must be set either with init-lambda-state or with init-lambda, but not both",
+ fep->init_lambda, fep->init_fep_state);
+ CHECK((fep->init_fep_state >= 0) && (fep->init_lambda >= 0));
+
+
+
+ if ((fep->init_lambda >= 0) && (fep->delta_lambda == 0))
+ {
+ int n_lambda_terms;
+ n_lambda_terms = 0;
+ for (i = 0; i < efptNR; i++)
+ {
+ if (fep->separate_dvdl[i])
+ {
+ n_lambda_terms++;
+ }
+ }
+ if (n_lambda_terms > 1)
+ {
+ sprintf(warn_buf, "If lambda vector states (fep-lambdas, coul-lambdas etc.) are set, don't use init-lambda to set lambda state (except for slow growth). Use init-lambda-state instead.");
+ warning(wi, warn_buf);
+ }
+
+ if (n_lambda_terms < 2 && fep->n_lambda > 0)
+ {
+ warning_note(wi,
+ "init-lambda is deprecated for setting lambda state (except for slow growth). Use init-lambda-state instead.");
+ }
+ }
+
+ for (j = 0; j < efptNR; j++)
+ {
+ for (i = 0; i < fep->n_lambda; i++)
+ {
+ sprintf(err_buf, "Entry %d for %s must be between 0 and 1, instead is %g", i, efpt_names[j], fep->all_lambda[j][i]);
+ CHECK((fep->all_lambda[j][i] < 0) || (fep->all_lambda[j][i] > 1));
+ }
+ }
+
+ if ((fep->sc_alpha > 0) && (!fep->bScCoul))
+ {
+ for (i = 0; i < fep->n_lambda; i++)
+ {
+ sprintf(err_buf, "For state %d, vdw-lambdas (%f) is changing with vdw softcore, while coul-lambdas (%f) is nonzero without coulomb softcore: this will lead to crashes, and is not supported.", i, fep->all_lambda[efptVDW][i],
+ fep->all_lambda[efptCOUL][i]);
+ CHECK((fep->sc_alpha > 0) &&
+ (((fep->all_lambda[efptCOUL][i] > 0.0) &&
+ (fep->all_lambda[efptCOUL][i] < 1.0)) &&
+ ((fep->all_lambda[efptVDW][i] > 0.0) &&
+ (fep->all_lambda[efptVDW][i] < 1.0))));
+ }
+ }
+
+ if ((fep->bScCoul) && (EEL_PME(ir->coulombtype)))
+ {
+ real sigma, lambda, r_sc;
+
+ sigma = 0.34;
+ /* Maximum estimate for A and B charges equal with lambda power 1 */
+ lambda = 0.5;
+ r_sc = pow(lambda*fep->sc_alpha*pow(sigma/ir->rcoulomb, fep->sc_r_power) + 1.0, 1.0/fep->sc_r_power);
+ sprintf(warn_buf, "With PME there is a minor soft core effect present at the cut-off, proportional to (LJsigma/rcoulomb)^%g. This could have a minor effect on energy conservation, but usually other effects dominate. With a common sigma value of %g nm the fraction of the particle-particle potential at the cut-off at lambda=%g is around %.1e, while ewald-rtol is %.1e.",
+ fep->sc_r_power,
+ sigma, lambda, r_sc - 1.0, ir->ewald_rtol);
+ warning_note(wi, warn_buf);
+ }
+
+ /* Free Energy Checks -- In an ideal world, slow growth and FEP would
+ be treated differently, but that's the next step */
+
+ for (i = 0; i < efptNR; i++)
+ {
+ for (j = 0; j < fep->n_lambda; j++)
+ {
+ sprintf(err_buf, "%s[%d] must be between 0 and 1", efpt_names[i], j);
+ CHECK((fep->all_lambda[i][j] < 0) || (fep->all_lambda[i][j] > 1));
+ }
+ }
+ }
+
+ if ((ir->bSimTemp) || (ir->efep == efepEXPANDED))
+ {
+ fep = ir->fepvals;
+ expand = ir->expandedvals;
+
+ /* checking equilibration of weights inputs for validity */
+
+ sprintf(err_buf, "weight-equil-number-all-lambda (%d) is ignored if lmc-weights-equil is not equal to %s",
+ expand->equil_n_at_lam, elmceq_names[elmceqNUMATLAM]);
+ CHECK((expand->equil_n_at_lam > 0) && (expand->elmceq != elmceqNUMATLAM));
+
+ sprintf(err_buf, "weight-equil-number-samples (%d) is ignored if lmc-weights-equil is not equal to %s",
+ expand->equil_samples, elmceq_names[elmceqSAMPLES]);
+ CHECK((expand->equil_samples > 0) && (expand->elmceq != elmceqSAMPLES));
+
+ sprintf(err_buf, "weight-equil-number-steps (%d) is ignored if lmc-weights-equil is not equal to %s",
+ expand->equil_steps, elmceq_names[elmceqSTEPS]);
+ CHECK((expand->equil_steps > 0) && (expand->elmceq != elmceqSTEPS));
+
+ sprintf(err_buf, "weight-equil-wl-delta (%d) is ignored if lmc-weights-equil is not equal to %s",
+ expand->equil_samples, elmceq_names[elmceqWLDELTA]);
+ CHECK((expand->equil_wl_delta > 0) && (expand->elmceq != elmceqWLDELTA));
+
+ sprintf(err_buf, "weight-equil-count-ratio (%f) is ignored if lmc-weights-equil is not equal to %s",
+ expand->equil_ratio, elmceq_names[elmceqRATIO]);
+ CHECK((expand->equil_ratio > 0) && (expand->elmceq != elmceqRATIO));
+
+ sprintf(err_buf, "weight-equil-number-all-lambda (%d) must be a positive integer if lmc-weights-equil=%s",
+ expand->equil_n_at_lam, elmceq_names[elmceqNUMATLAM]);
+ CHECK((expand->equil_n_at_lam <= 0) && (expand->elmceq == elmceqNUMATLAM));
+
+ sprintf(err_buf, "weight-equil-number-samples (%d) must be a positive integer if lmc-weights-equil=%s",
+ expand->equil_samples, elmceq_names[elmceqSAMPLES]);
+ CHECK((expand->equil_samples <= 0) && (expand->elmceq == elmceqSAMPLES));
+
+ sprintf(err_buf, "weight-equil-number-steps (%d) must be a positive integer if lmc-weights-equil=%s",
+ expand->equil_steps, elmceq_names[elmceqSTEPS]);
+ CHECK((expand->equil_steps <= 0) && (expand->elmceq == elmceqSTEPS));
+
+ sprintf(err_buf, "weight-equil-wl-delta (%f) must be > 0 if lmc-weights-equil=%s",
+ expand->equil_wl_delta, elmceq_names[elmceqWLDELTA]);
+ CHECK((expand->equil_wl_delta <= 0) && (expand->elmceq == elmceqWLDELTA));
+
+ sprintf(err_buf, "weight-equil-count-ratio (%f) must be > 0 if lmc-weights-equil=%s",
+ expand->equil_ratio, elmceq_names[elmceqRATIO]);
+ CHECK((expand->equil_ratio <= 0) && (expand->elmceq == elmceqRATIO));
+
+ sprintf(err_buf, "lmc-weights-equil=%s only possible when lmc-stats = %s or lmc-stats %s",
+ elmceq_names[elmceqWLDELTA], elamstats_names[elamstatsWL], elamstats_names[elamstatsWWL]);
+ CHECK((expand->elmceq == elmceqWLDELTA) && (!EWL(expand->elamstats)));
+
+ sprintf(err_buf, "lmc-repeats (%d) must be greater than 0", expand->lmc_repeats);
+ CHECK((expand->lmc_repeats <= 0));
+ sprintf(err_buf, "minimum-var-min (%d) must be greater than 0", expand->minvarmin);
+ CHECK((expand->minvarmin <= 0));
+ sprintf(err_buf, "weight-c-range (%d) must be greater or equal to 0", expand->c_range);
+ CHECK((expand->c_range < 0));
+ sprintf(err_buf, "init-lambda-state (%d) must be zero if lmc-forced-nstart (%d)> 0 and lmc-move != 'no'",
+ fep->init_fep_state, expand->lmc_forced_nstart);
+ CHECK((fep->init_fep_state != 0) && (expand->lmc_forced_nstart > 0) && (expand->elmcmove != elmcmoveNO));
+ sprintf(err_buf, "lmc-forced-nstart (%d) must not be negative", expand->lmc_forced_nstart);
+ CHECK((expand->lmc_forced_nstart < 0));
+ sprintf(err_buf, "init-lambda-state (%d) must be in the interval [0,number of lambdas)", fep->init_fep_state);
+ CHECK((fep->init_fep_state < 0) || (fep->init_fep_state >= fep->n_lambda));
+
+ sprintf(err_buf, "init-wl-delta (%f) must be greater than or equal to 0", expand->init_wl_delta);
+ CHECK((expand->init_wl_delta < 0));
+ sprintf(err_buf, "wl-ratio (%f) must be between 0 and 1", expand->wl_ratio);
+ CHECK((expand->wl_ratio <= 0) || (expand->wl_ratio >= 1));
+ sprintf(err_buf, "wl-scale (%f) must be between 0 and 1", expand->wl_scale);
+ CHECK((expand->wl_scale <= 0) || (expand->wl_scale >= 1));
+
+ /* if there is no temperature control, we need to specify an MC temperature */
+ sprintf(err_buf, "If there is no temperature control, and lmc-mcmove!= 'no',mc_temperature must be set to a positive number");
+ if (expand->nstTij > 0)
+ {
+ sprintf(err_buf, "nst-transition-matrix (%d) must be an integer multiple of nstlog (%d)",
+ expand->nstTij, ir->nstlog);
+ CHECK((mod(expand->nstTij, ir->nstlog) != 0));
+ }
+ }
+
+ /* PBC/WALLS */
+ sprintf(err_buf, "walls only work with pbc=%s", epbc_names[epbcXY]);
+ CHECK(ir->nwall && ir->ePBC != epbcXY);
+
+ /* VACUUM STUFF */
+ if (ir->ePBC != epbcXYZ && ir->nwall != 2)
+ {
+ if (ir->ePBC == epbcNONE)
+ {
+ if (ir->epc != epcNO)
+ {
+ warning(wi, "Turning off pressure coupling for vacuum system");
+ ir->epc = epcNO;
+ }
+ }
+ else
+ {
+ sprintf(err_buf, "Can not have pressure coupling with pbc=%s",
+ epbc_names[ir->ePBC]);
+ CHECK(ir->epc != epcNO);
+ }
+ sprintf(err_buf, "Can not have Ewald with pbc=%s", epbc_names[ir->ePBC]);
+ CHECK(EEL_FULL(ir->coulombtype));
+
+ sprintf(err_buf, "Can not have dispersion correction with pbc=%s",
+ epbc_names[ir->ePBC]);
+ CHECK(ir->eDispCorr != edispcNO);
+ }
+
+ if (ir->rlist == 0.0)
+ {
+ sprintf(err_buf, "can only have neighborlist cut-off zero (=infinite)\n"
+ "with coulombtype = %s or coulombtype = %s\n"
+ "without periodic boundary conditions (pbc = %s) and\n"
+ "rcoulomb and rvdw set to zero",
+ eel_names[eelCUT], eel_names[eelUSER], epbc_names[epbcNONE]);
+ CHECK(((ir->coulombtype != eelCUT) && (ir->coulombtype != eelUSER)) ||
+ (ir->ePBC != epbcNONE) ||
+ (ir->rcoulomb != 0.0) || (ir->rvdw != 0.0));
+
+ if (ir->nstlist < 0)
+ {
+ warning_error(wi, "Can not have heuristic neighborlist updates without cut-off");
+ }
+ if (ir->nstlist > 0)
+ {
+ warning_note(wi, "Simulating without cut-offs can be (slightly) faster with nstlist=0, nstype=simple and only one MPI rank");
+ }
+ }
+
+ /* COMM STUFF */
+ if (ir->nstcomm == 0)
+ {
+ ir->comm_mode = ecmNO;
+ }
+ if (ir->comm_mode != ecmNO)
+ {
+ if (ir->nstcomm < 0)
+ {
+ warning(wi, "If you want to remove the rotation around the center of mass, you should set comm_mode = Angular instead of setting nstcomm < 0. nstcomm is modified to its absolute value");
+ ir->nstcomm = abs(ir->nstcomm);
+ }
+
+ if (ir->nstcalcenergy > 0 && ir->nstcomm < ir->nstcalcenergy)
+ {
+ warning_note(wi, "nstcomm < nstcalcenergy defeats the purpose of nstcalcenergy, setting nstcomm to nstcalcenergy");
+ ir->nstcomm = ir->nstcalcenergy;
+ }
+
+ if (ir->comm_mode == ecmANGULAR)
+ {
+ sprintf(err_buf, "Can not remove the rotation around the center of mass with periodic molecules");
+ CHECK(ir->bPeriodicMols);
+ if (ir->ePBC != epbcNONE)
+ {
+ warning(wi, "Removing the rotation around the center of mass in a periodic system (this is not a problem when you have only one molecule).");
+ }
+ }
+ }
+
+ if (EI_STATE_VELOCITY(ir->eI) && ir->ePBC == epbcNONE && ir->comm_mode != ecmANGULAR)
+ {
+ warning_note(wi, "Tumbling and or flying ice-cubes: We are not removing rotation around center of mass in a non-periodic system. You should probably set comm_mode = ANGULAR.");
+ }
+
+ sprintf(err_buf, "Twin-range neighbour searching (NS) with simple NS"
+ " algorithm not implemented");
+ CHECK(((ir->rcoulomb > ir->rlist) || (ir->rvdw > ir->rlist))
+ && (ir->ns_type == ensSIMPLE));
+
+ /* TEMPERATURE COUPLING */
+ if (ir->etc == etcYES)
+ {
+ ir->etc = etcBERENDSEN;
+ warning_note(wi, "Old option for temperature coupling given: "
+ "changing \"yes\" to \"Berendsen\"\n");
+ }
+
+ if ((ir->etc == etcNOSEHOOVER) || (ir->epc == epcMTTK))
+ {
+ if (ir->opts.nhchainlength < 1)
+ {
+ sprintf(warn_buf, "number of Nose-Hoover chains (currently %d) cannot be less than 1,reset to 1\n", ir->opts.nhchainlength);
+ ir->opts.nhchainlength = 1;
+ warning(wi, warn_buf);
+ }
+
+ if (ir->etc == etcNOSEHOOVER && !EI_VV(ir->eI) && ir->opts.nhchainlength > 1)
+ {
+ warning_note(wi, "leapfrog does not yet support Nose-Hoover chains, nhchainlength reset to 1");
+ ir->opts.nhchainlength = 1;
+ }
+ }
+ else
+ {
+ ir->opts.nhchainlength = 0;
+ }
+
+ if (ir->eI == eiVVAK)
+ {
+ sprintf(err_buf, "%s implemented primarily for validation, and requires nsttcouple = 1 and nstpcouple = 1.",
+ ei_names[eiVVAK]);
+ CHECK((ir->nsttcouple != 1) || (ir->nstpcouple != 1));
+ }
+
+ if (ETC_ANDERSEN(ir->etc))
+ {
+ sprintf(err_buf, "%s temperature control not supported for integrator %s.", etcoupl_names[ir->etc], ei_names[ir->eI]);
+ CHECK(!(EI_VV(ir->eI)));
+
+ if (ir->nstcomm > 0 && (ir->etc == etcANDERSEN))
+ {
+ sprintf(warn_buf, "Center of mass removal not necessary for %s. All velocities of coupled groups are rerandomized periodically, so flying ice cube errors will not occur.", etcoupl_names[ir->etc]);
+ warning_note(wi, warn_buf);
+ }
+
+ sprintf(err_buf, "nstcomm must be 1, not %d for %s, as velocities of atoms in coupled groups are randomized every time step", ir->nstcomm, etcoupl_names[ir->etc]);
+ CHECK(ir->nstcomm > 1 && (ir->etc == etcANDERSEN));
+ }
+
+ if (ir->etc == etcBERENDSEN)
+ {
+ sprintf(warn_buf, "The %s thermostat does not generate the correct kinetic energy distribution. You might want to consider using the %s thermostat.",
+ ETCOUPLTYPE(ir->etc), ETCOUPLTYPE(etcVRESCALE));
+ warning_note(wi, warn_buf);
+ }
+
+ if ((ir->etc == etcNOSEHOOVER || ETC_ANDERSEN(ir->etc))
+ && ir->epc == epcBERENDSEN)
+ {
+ sprintf(warn_buf, "Using Berendsen pressure coupling invalidates the "
+ "true ensemble for the thermostat");
+ warning(wi, warn_buf);
+ }
+
+ /* PRESSURE COUPLING */
+ if (ir->epc == epcISOTROPIC)
+ {
+ ir->epc = epcBERENDSEN;
+ warning_note(wi, "Old option for pressure coupling given: "
+ "changing \"Isotropic\" to \"Berendsen\"\n");
+ }
+
+ if (ir->epc != epcNO)
+ {
+ dt_pcoupl = ir->nstpcouple*ir->delta_t;
+
+ sprintf(err_buf, "tau-p must be > 0 instead of %g\n", ir->tau_p);
+ CHECK(ir->tau_p <= 0);
+
+ if (ir->tau_p/dt_pcoupl < pcouple_min_integration_steps(ir->epc))
+ {
+ sprintf(warn_buf, "For proper integration of the %s barostat, tau-p (%g) should be at least %d times larger than nstpcouple*dt (%g)",
+ EPCOUPLTYPE(ir->epc), ir->tau_p, pcouple_min_integration_steps(ir->epc), dt_pcoupl);
+ warning(wi, warn_buf);
+ }
+
+ sprintf(err_buf, "compressibility must be > 0 when using pressure"
+ " coupling %s\n", EPCOUPLTYPE(ir->epc));
+ CHECK(ir->compress[XX][XX] < 0 || ir->compress[YY][YY] < 0 ||
+ ir->compress[ZZ][ZZ] < 0 ||
+ (trace(ir->compress) == 0 && ir->compress[YY][XX] <= 0 &&
+ ir->compress[ZZ][XX] <= 0 && ir->compress[ZZ][YY] <= 0));
+
+ if (epcPARRINELLORAHMAN == ir->epc && opts->bGenVel)
+ {
+ sprintf(warn_buf,
+ "You are generating velocities so I am assuming you "
+ "are equilibrating a system. You are using "
+ "%s pressure coupling, but this can be "
+ "unstable for equilibration. If your system crashes, try "
+ "equilibrating first with Berendsen pressure coupling. If "
+ "you are not equilibrating the system, you can probably "
+ "ignore this warning.",
+ epcoupl_names[ir->epc]);
+ warning(wi, warn_buf);
+ }
+ }
+
+ if (EI_VV(ir->eI))
+ {
+ if (ir->epc > epcNO)
+ {
+ if ((ir->epc != epcBERENDSEN) && (ir->epc != epcMTTK))
+ {
+ warning_error(wi, "for md-vv and md-vv-avek, can only use Berendsen and Martyna-Tuckerman-Tobias-Klein (MTTK) equations for pressure control; MTTK is equivalent to Parrinello-Rahman.");
+ }
+ }
+ }
+ else
+ {
+ if (ir->epc == epcMTTK)
+ {
+ warning_error(wi, "MTTK pressure coupling requires a Velocity-verlet integrator");
+ }
+ }
+
+ /* ELECTROSTATICS */
+ /* More checks are in triple check (grompp.c) */
+
+ if (ir->coulombtype == eelSWITCH)
+ {
+ sprintf(warn_buf, "coulombtype = %s is only for testing purposes and can lead to serious "
+ "artifacts, advice: use coulombtype = %s",
+ eel_names[ir->coulombtype],
+ eel_names[eelRF_ZERO]);
+ warning(wi, warn_buf);
+ }
+
+ if (ir->epsilon_r != 1 && ir->implicit_solvent == eisGBSA)
+ {
+ sprintf(warn_buf, "epsilon-r = %g with GB implicit solvent, will use this value for inner dielectric", ir->epsilon_r);
+ warning_note(wi, warn_buf);
+ }
+
+ if (EEL_RF(ir->coulombtype) && ir->epsilon_rf == 1 && ir->epsilon_r != 1)
+ {
+ sprintf(warn_buf, "epsilon-r = %g and epsilon-rf = 1 with reaction field, proceeding assuming old format and exchanging epsilon-r and epsilon-rf", ir->epsilon_r);
+ warning(wi, warn_buf);
+ ir->epsilon_rf = ir->epsilon_r;
+ ir->epsilon_r = 1.0;
+ }
+
+ if (getenv("GMX_DO_GALACTIC_DYNAMICS") == NULL)
+ {
+ sprintf(err_buf, "epsilon-r must be >= 0 instead of %g\n", ir->epsilon_r);
+ CHECK(ir->epsilon_r < 0);
+ }
+
+ if (EEL_RF(ir->coulombtype))
+ {
+ /* reaction field (at the cut-off) */
+
+ if (ir->coulombtype == eelRF_ZERO)
+ {
+ sprintf(warn_buf, "With coulombtype = %s, epsilon-rf must be 0, assuming you meant epsilon_rf=0",
+ eel_names[ir->coulombtype]);
+ CHECK(ir->epsilon_rf != 0);
+ ir->epsilon_rf = 0.0;
+ }
+
+ sprintf(err_buf, "epsilon-rf must be >= epsilon-r");
+ CHECK((ir->epsilon_rf < ir->epsilon_r && ir->epsilon_rf != 0) ||
+ (ir->epsilon_r == 0));
+ if (ir->epsilon_rf == ir->epsilon_r)
+ {
+ sprintf(warn_buf, "Using epsilon-rf = epsilon-r with %s does not make sense",
+ eel_names[ir->coulombtype]);
+ warning(wi, warn_buf);
+ }
+ }
+ /* Allow rlist>rcoulomb for tabulated long range stuff. This just
+ * means the interaction is zero outside rcoulomb, but it helps to
+ * provide accurate energy conservation.
+ */
+ if (ir_coulomb_might_be_zero_at_cutoff(ir))
+ {
+ if (ir_coulomb_switched(ir))
+ {
+ sprintf(err_buf,
+ "With coulombtype = %s rcoulomb_switch must be < rcoulomb. Or, better: Use the potential modifier options!",
+ eel_names[ir->coulombtype]);
+ CHECK(ir->rcoulomb_switch >= ir->rcoulomb);
+ }
+ }
+ else if (ir->coulombtype == eelCUT || EEL_RF(ir->coulombtype))
+ {
+ if (ir->cutoff_scheme == ecutsGROUP && ir->coulomb_modifier == eintmodNONE)
+ {
+ sprintf(err_buf, "With coulombtype = %s, rcoulomb should be >= rlist unless you use a potential modifier",
+ eel_names[ir->coulombtype]);
+ CHECK(ir->rlist > ir->rcoulomb);
+ }
+ }
+
++ if (ir->coulombtype == eelSWITCH || ir->coulombtype == eelSHIFT)
++ {
++ sprintf(err_buf,
++ "Explicit switch/shift coulomb interactions cannot be used in combination with a secondary coulomb-modifier.");
++ CHECK( ir->coulomb_modifier != eintmodNONE);
++ }
++ if (ir->vdwtype == evdwSWITCH || ir->vdwtype == evdwSHIFT)
++ {
++ sprintf(err_buf,
++ "Explicit switch/shift vdw interactions cannot be used in combination with a secondary vdw-modifier.");
++ CHECK( ir->vdw_modifier != eintmodNONE);
++ }
++
+ if (ir->coulombtype == eelSWITCH || ir->coulombtype == eelSHIFT ||
+ ir->vdwtype == evdwSWITCH || ir->vdwtype == evdwSHIFT)
+ {
+ sprintf(warn_buf,
+ "The switch/shift interaction settings are just for compatibility; you will get better "
+ "performance from applying potential modifiers to your interactions!\n");
+ warning_note(wi, warn_buf);
+ }
+
+ if (ir->coulombtype == eelPMESWITCH || ir->coulomb_modifier == eintmodPOTSWITCH)
+ {
+ if (ir->rcoulomb_switch/ir->rcoulomb < 0.9499)
+ {
+ real percentage = 100*(ir->rcoulomb-ir->rcoulomb_switch)/ir->rcoulomb;
++ sprintf(warn_buf, "The switching range should be 5%% or less (currently %.2f%% using a switching range of %4f-%4f) for accurate electrostatic energies, energy conservation will be good regardless, since ewald_rtol = %g.",
+ percentage, ir->rcoulomb_switch, ir->rcoulomb, ir->ewald_rtol);
+ warning(wi, warn_buf);
+ }
+ }
+
+ if (ir->vdwtype == evdwSWITCH || ir->vdw_modifier == eintmodPOTSWITCH)
+ {
+ if (ir->rvdw_switch == 0)
+ {
+ sprintf(warn_buf, "rvdw-switch is equal 0 even though you are using a switched Lennard-Jones potential. This suggests it was not set in the mdp, which can lead to large energy errors. In GROMACS, 0.05 to 0.1 nm is often a reasonable vdw switching range.");
+ warning(wi, warn_buf);
+ }
+ }
+
+ if (EEL_FULL(ir->coulombtype))
+ {
+ if (ir->coulombtype == eelPMESWITCH || ir->coulombtype == eelPMEUSER ||
+ ir->coulombtype == eelPMEUSERSWITCH)
+ {
+ sprintf(err_buf, "With coulombtype = %s, rcoulomb must be <= rlist",
+ eel_names[ir->coulombtype]);
+ CHECK(ir->rcoulomb > ir->rlist);
+ }
+ else if (ir->cutoff_scheme == ecutsGROUP && ir->coulomb_modifier == eintmodNONE)
+ {
+ if (ir->coulombtype == eelPME || ir->coulombtype == eelP3M_AD)
+ {
+ sprintf(err_buf,
+ "With coulombtype = %s (without modifier), rcoulomb must be equal to rlist,\n"
+ "or rlistlong if nstcalclr=1. For optimal energy conservation,consider using\n"
+ "a potential modifier.", eel_names[ir->coulombtype]);
+ if (ir->nstcalclr == 1)
+ {
+ CHECK(ir->rcoulomb != ir->rlist && ir->rcoulomb != ir->rlistlong);
+ }
+ else
+ {
+ CHECK(ir->rcoulomb != ir->rlist);
+ }
+ }
+ }
+ }
+
+ if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
+ {
+ if (ir->pme_order < 3)
+ {
+ warning_error(wi, "pme-order can not be smaller than 3");
+ }
+ }
+
+ if (ir->nwall == 2 && EEL_FULL(ir->coulombtype))
+ {
+ if (ir->ewald_geometry == eewg3D)
+ {
+ sprintf(warn_buf, "With pbc=%s you should use ewald-geometry=%s",
+ epbc_names[ir->ePBC], eewg_names[eewg3DC]);
+ warning(wi, warn_buf);
+ }
+ /* This check avoids extra pbc coding for exclusion corrections */
+ sprintf(err_buf, "wall-ewald-zfac should be >= 2");
+ CHECK(ir->wall_ewald_zfac < 2);
+ }
+
+ if (ir_vdw_switched(ir))
+ {
+ sprintf(err_buf, "With switched vdw forces or potentials, rvdw-switch must be < rvdw");
+ CHECK(ir->rvdw_switch >= ir->rvdw);
+
+ if (ir->rvdw_switch < 0.5*ir->rvdw)
+ {
+ sprintf(warn_buf, "You are applying a switch function to vdw forces or potentials from %g to %g nm, which is more than half the interaction range, whereas switch functions are intended to act only close to the cut-off.",
+ ir->rvdw_switch, ir->rvdw);
+ warning_note(wi, warn_buf);
+ }
+ }
+ else if (ir->vdwtype == evdwCUT || ir->vdwtype == evdwPME)
+ {
+ if (ir->cutoff_scheme == ecutsGROUP && ir->vdw_modifier == eintmodNONE)
+ {
+ sprintf(err_buf, "With vdwtype = %s, rvdw must be >= rlist unless you use a potential modifier", evdw_names[ir->vdwtype]);
+ CHECK(ir->rlist > ir->rvdw);
+ }
+ }
+
+ if (ir->vdwtype == evdwPME)
+ {
+ if (!(ir->vdw_modifier == eintmodNONE || ir->vdw_modifier == eintmodPOTSHIFT))
+ {
+ sprintf(err_buf, "With vdwtype = %s, the only supported modifiers are %s a\
+nd %s",
+ evdw_names[ir->vdwtype],
+ eintmod_names[eintmodPOTSHIFT],
+ eintmod_names[eintmodNONE]);
+ }
+ }
+
+ if (ir->cutoff_scheme == ecutsGROUP)
+ {
+ if (((ir->coulomb_modifier != eintmodNONE && ir->rcoulomb == ir->rlist) ||
+ (ir->vdw_modifier != eintmodNONE && ir->rvdw == ir->rlist)) &&
+ ir->nstlist != 1)
+ {
+ warning_note(wi, "With exact cut-offs, rlist should be "
+ "larger than rcoulomb and rvdw, so that there "
+ "is a buffer region for particle motion "
+ "between neighborsearch steps");
+ }
+
+ if (ir_coulomb_is_zero_at_cutoff(ir) && ir->rlistlong <= ir->rcoulomb)
+ {
+ sprintf(warn_buf, "For energy conservation with switch/shift potentials, %s should be 0.1 to 0.3 nm larger than rcoulomb.",
+ IR_TWINRANGE(*ir) ? "rlistlong" : "rlist");
+ warning_note(wi, warn_buf);
+ }
+ if (ir_vdw_switched(ir) && (ir->rlistlong <= ir->rvdw))
+ {
+ sprintf(warn_buf, "For energy conservation with switch/shift potentials, %s should be 0.1 to 0.3 nm larger than rvdw.",
+ IR_TWINRANGE(*ir) ? "rlistlong" : "rlist");
+ warning_note(wi, warn_buf);
+ }
+ }
+
+ if (ir->vdwtype == evdwUSER && ir->eDispCorr != edispcNO)
+ {
+ warning_note(wi, "You have selected user tables with dispersion correction, the dispersion will be corrected to -C6/r^6 beyond rvdw_switch (the tabulated interaction between rvdw_switch and rvdw will not be double counted). Make sure that you really want dispersion correction to -C6/r^6.");
+ }
+
+ if (ir->nstlist == -1)
+ {
+ sprintf(err_buf, "With nstlist=-1 rvdw and rcoulomb should be smaller than rlist to account for diffusion and possibly charge-group radii");
+ CHECK(ir->rvdw >= ir->rlist || ir->rcoulomb >= ir->rlist);
+ }
+ sprintf(err_buf, "nstlist can not be smaller than -1");
+ CHECK(ir->nstlist < -1);
+
+ if (ir->eI == eiLBFGS && (ir->coulombtype == eelCUT || ir->vdwtype == evdwCUT)
+ && ir->rvdw != 0)
+ {
+ warning(wi, "For efficient BFGS minimization, use switch/shift/pme instead of cut-off.");
+ }
+
+ if (ir->eI == eiLBFGS && ir->nbfgscorr <= 0)
+ {
+ warning(wi, "Using L-BFGS with nbfgscorr<=0 just gets you steepest descent.");
+ }
+
+ /* ENERGY CONSERVATION */
+ if (ir_NVE(ir) && ir->cutoff_scheme == ecutsGROUP)
+ {
+ if (!ir_vdw_might_be_zero_at_cutoff(ir) && ir->rvdw > 0 && ir->vdw_modifier == eintmodNONE)
+ {
+ sprintf(warn_buf, "You are using a cut-off for VdW interactions with NVE, for good energy conservation use vdwtype = %s (possibly with DispCorr)",
+ evdw_names[evdwSHIFT]);
+ warning_note(wi, warn_buf);
+ }
+ if (!ir_coulomb_might_be_zero_at_cutoff(ir) && ir->rcoulomb > 0)
+ {
+ sprintf(warn_buf, "You are using a cut-off for electrostatics with NVE, for good energy conservation use coulombtype = %s or %s",
+ eel_names[eelPMESWITCH], eel_names[eelRF_ZERO]);
+ warning_note(wi, warn_buf);
+ }
+ }
+
+ if (EI_VV(ir->eI) && IR_TWINRANGE(*ir) && ir->nstlist > 1)
+ {
+ sprintf(warn_buf, "Twin-range multiple time stepping does not work with integrator %s.", ei_names[ir->eI]);
+ warning_error(wi, warn_buf);
+ }
+
+ /* IMPLICIT SOLVENT */
+ if (ir->coulombtype == eelGB_NOTUSED)
+ {
+ ir->coulombtype = eelCUT;
+ ir->implicit_solvent = eisGBSA;
+ fprintf(stderr, "Note: Old option for generalized born electrostatics given:\n"
+ "Changing coulombtype from \"generalized-born\" to \"cut-off\" and instead\n"
+ "setting implicit-solvent value to \"GBSA\" in input section.\n");
+ }
+
+ if (ir->sa_algorithm == esaSTILL)
+ {
+ sprintf(err_buf, "Still SA algorithm not available yet, use %s or %s instead\n", esa_names[esaAPPROX], esa_names[esaNO]);
+ CHECK(ir->sa_algorithm == esaSTILL);
+ }
+
+ if (ir->implicit_solvent == eisGBSA)
+ {
+ sprintf(err_buf, "With GBSA implicit solvent, rgbradii must be equal to rlist.");
+ CHECK(ir->rgbradii != ir->rlist);
+
+ if (ir->coulombtype != eelCUT)
+ {
+ sprintf(err_buf, "With GBSA, coulombtype must be equal to %s\n", eel_names[eelCUT]);
+ CHECK(ir->coulombtype != eelCUT);
+ }
+ if (ir->vdwtype != evdwCUT)
+ {
+ sprintf(err_buf, "With GBSA, vdw-type must be equal to %s\n", evdw_names[evdwCUT]);
+ CHECK(ir->vdwtype != evdwCUT);
+ }
+ if (ir->nstgbradii < 1)
+ {
+ sprintf(warn_buf, "Using GBSA with nstgbradii<1, setting nstgbradii=1");
+ warning_note(wi, warn_buf);
+ ir->nstgbradii = 1;
+ }
+ if (ir->sa_algorithm == esaNO)
+ {
+ sprintf(warn_buf, "No SA (non-polar) calculation requested together with GB. Are you sure this is what you want?\n");
+ warning_note(wi, warn_buf);
+ }
+ if (ir->sa_surface_tension < 0 && ir->sa_algorithm != esaNO)
+ {
+ sprintf(warn_buf, "Value of sa_surface_tension is < 0. Changing it to 2.05016 or 2.25936 kJ/nm^2/mol for Still and HCT/OBC respectively\n");
+ warning_note(wi, warn_buf);
+
+ if (ir->gb_algorithm == egbSTILL)
+ {
+ ir->sa_surface_tension = 0.0049 * CAL2JOULE * 100;
+ }
+ else
+ {
+ ir->sa_surface_tension = 0.0054 * CAL2JOULE * 100;
+ }
+ }
+ if (ir->sa_surface_tension == 0 && ir->sa_algorithm != esaNO)
+ {
+ sprintf(err_buf, "Surface tension set to 0 while SA-calculation requested\n");
+ CHECK(ir->sa_surface_tension == 0 && ir->sa_algorithm != esaNO);
+ }
+
+ }
+
+ if (ir->bAdress)
+ {
+ if (ir->cutoff_scheme != ecutsGROUP)
+ {
+ warning_error(wi, "AdresS simulation supports only cutoff-scheme=group");
+ }
+ if (!EI_SD(ir->eI))
+ {
+ warning_error(wi, "AdresS simulation supports only stochastic dynamics");
+ }
+ if (ir->epc != epcNO)
+ {
+ warning_error(wi, "AdresS simulation does not support pressure coupling");
+ }
+ if (EEL_FULL(ir->coulombtype))
+ {
+ warning_error(wi, "AdresS simulation does not support long-range electrostatics");
+ }
+ }
+}
+
+/* count the number of text elemets separated by whitespace in a string.
+ str = the input string
+ maxptr = the maximum number of allowed elements
+ ptr = the output array of pointers to the first character of each element
+ returns: the number of elements. */
+int str_nelem(const char *str, int maxptr, char *ptr[])
+{
+ int np = 0;
+ char *copy0, *copy;
+
+ copy0 = strdup(str);
+ copy = copy0;
+ ltrim(copy);
+ while (*copy != '\0')
+ {
+ if (np >= maxptr)
+ {
+ gmx_fatal(FARGS, "Too many groups on line: '%s' (max is %d)",
+ str, maxptr);
+ }
+ if (ptr)
+ {
+ ptr[np] = copy;
+ }
+ np++;
+ while ((*copy != '\0') && !isspace(*copy))
+ {
+ copy++;
+ }
+ if (*copy != '\0')
+ {
+ *copy = '\0';
+ copy++;
+ }
+ ltrim(copy);
+ }
+ if (ptr == NULL)
+ {
+ sfree(copy0);
+ }
+
+ return np;
+}
+
+/* interpret a number of doubles from a string and put them in an array,
+ after allocating space for them.
+ str = the input string
+ n = the (pre-allocated) number of doubles read
+ r = the output array of doubles. */
+static void parse_n_real(char *str, int *n, real **r)
+{
+ char *ptr[MAXPTR];
+ int i;
+
+ *n = str_nelem(str, MAXPTR, ptr);
+
+ snew(*r, *n);
+ for (i = 0; i < *n; i++)
+ {
+ (*r)[i] = strtod(ptr[i], NULL);
+ }
+}
+
+static void do_fep_params(t_inputrec *ir, char fep_lambda[][STRLEN], char weights[STRLEN])
+{
+
+ int i, j, max_n_lambda, nweights, nfep[efptNR];
+ t_lambda *fep = ir->fepvals;
+ t_expanded *expand = ir->expandedvals;
+ real **count_fep_lambdas;
+ gmx_bool bOneLambda = TRUE;
+
+ snew(count_fep_lambdas, efptNR);
+
+ /* FEP input processing */
+ /* first, identify the number of lambda values for each type.
+ All that are nonzero must have the same number */
+
+ for (i = 0; i < efptNR; i++)
+ {
+ parse_n_real(fep_lambda[i], &(nfep[i]), &(count_fep_lambdas[i]));
+ }
+
+ /* now, determine the number of components. All must be either zero, or equal. */
+
+ max_n_lambda = 0;
+ for (i = 0; i < efptNR; i++)
+ {
+ if (nfep[i] > max_n_lambda)
+ {
+ max_n_lambda = nfep[i]; /* here's a nonzero one. All of them
+ must have the same number if its not zero.*/
+ break;
+ }
+ }
+
+ for (i = 0; i < efptNR; i++)
+ {
+ if (nfep[i] == 0)
+ {
+ ir->fepvals->separate_dvdl[i] = FALSE;
+ }
+ else if (nfep[i] == max_n_lambda)
+ {
+ if (i != efptTEMPERATURE) /* we treat this differently -- not really a reason to compute the derivative with
+ respect to the temperature currently */
+ {
+ ir->fepvals->separate_dvdl[i] = TRUE;
+ }
+ }
+ else
+ {
+ gmx_fatal(FARGS, "Number of lambdas (%d) for FEP type %s not equal to number of other types (%d)",
+ nfep[i], efpt_names[i], max_n_lambda);
+ }
+ }
+ /* we don't print out dhdl if the temperature is changing, since we can't correctly define dhdl in this case */
+ ir->fepvals->separate_dvdl[efptTEMPERATURE] = FALSE;
+
+ /* the number of lambdas is the number we've read in, which is either zero
+ or the same for all */
+ fep->n_lambda = max_n_lambda;
+
+ /* allocate space for the array of lambda values */
+ snew(fep->all_lambda, efptNR);
+ /* if init_lambda is defined, we need to set lambda */
+ if ((fep->init_lambda > 0) && (fep->n_lambda == 0))
+ {
+ ir->fepvals->separate_dvdl[efptFEP] = TRUE;
+ }
+ /* otherwise allocate the space for all of the lambdas, and transfer the data */
+ for (i = 0; i < efptNR; i++)
+ {
+ snew(fep->all_lambda[i], fep->n_lambda);
+ if (nfep[i] > 0) /* if it's zero, then the count_fep_lambda arrays
+ are zero */
+ {
+ for (j = 0; j < fep->n_lambda; j++)
+ {
+ fep->all_lambda[i][j] = (double)count_fep_lambdas[i][j];
+ }
+ sfree(count_fep_lambdas[i]);
+ }
+ }
+ sfree(count_fep_lambdas);
+
+ /* "fep-vals" is either zero or the full number. If zero, we'll need to define fep-lambdas for internal
+ bookkeeping -- for now, init_lambda */
+
+ if ((nfep[efptFEP] == 0) && (fep->init_lambda >= 0))
+ {
+ for (i = 0; i < fep->n_lambda; i++)
+ {
+ fep->all_lambda[efptFEP][i] = fep->init_lambda;
+ }
+ }
+
+ /* check to see if only a single component lambda is defined, and soft core is defined.
+ In this case, turn on coulomb soft core */
+
+ if (max_n_lambda == 0)
+ {
+ bOneLambda = TRUE;
+ }
+ else
+ {
+ for (i = 0; i < efptNR; i++)
+ {
+ if ((nfep[i] != 0) && (i != efptFEP))
+ {
+ bOneLambda = FALSE;
+ }
+ }
+ }
+ if ((bOneLambda) && (fep->sc_alpha > 0))
+ {
+ fep->bScCoul = TRUE;
+ }
+
+ /* Fill in the others with the efptFEP if they are not explicitly
+ specified (i.e. nfep[i] == 0). This means if fep is not defined,
+ they are all zero. */
+
+ for (i = 0; i < efptNR; i++)
+ {
+ if ((nfep[i] == 0) && (i != efptFEP))
+ {
+ for (j = 0; j < fep->n_lambda; j++)
+ {
+ fep->all_lambda[i][j] = fep->all_lambda[efptFEP][j];
+ }
+ }
+ }
+
+
+ /* make it easier if sc_r_power = 48 by increasing it to the 4th power, to be in the right scale. */
+ if (fep->sc_r_power == 48)
+ {
+ if (fep->sc_alpha > 0.1)
+ {
+ gmx_fatal(FARGS, "sc_alpha (%f) for sc_r_power = 48 should usually be between 0.001 and 0.004", fep->sc_alpha);
+ }
+ }
+
+ expand = ir->expandedvals;
+ /* now read in the weights */
+ parse_n_real(weights, &nweights, &(expand->init_lambda_weights));
+ if (nweights == 0)
+ {
+ snew(expand->init_lambda_weights, fep->n_lambda); /* initialize to zero */
+ }
+ else if (nweights != fep->n_lambda)
+ {
+ gmx_fatal(FARGS, "Number of weights (%d) is not equal to number of lambda values (%d)",
+ nweights, fep->n_lambda);
+ }
+ if ((expand->nstexpanded < 0) && (ir->efep != efepNO))
+ {
+ expand->nstexpanded = fep->nstdhdl;
+ /* if you don't specify nstexpanded when doing expanded ensemble free energy calcs, it is set to nstdhdl */
+ }
+ if ((expand->nstexpanded < 0) && ir->bSimTemp)
+ {
+ expand->nstexpanded = 2*(int)(ir->opts.tau_t[0]/ir->delta_t);
+ /* if you don't specify nstexpanded when doing expanded ensemble simulated tempering, it is set to
+ 2*tau_t just to be careful so it's not to frequent */
+ }
+}
+
+
+static void do_simtemp_params(t_inputrec *ir)
+{
+
+ snew(ir->simtempvals->temperatures, ir->fepvals->n_lambda);
+ GetSimTemps(ir->fepvals->n_lambda, ir->simtempvals, ir->fepvals->all_lambda[efptTEMPERATURE]);
+
+ return;
+}
+
+static void do_wall_params(t_inputrec *ir,
+ char *wall_atomtype, char *wall_density,
+ t_gromppopts *opts)
+{
+ int nstr, i;
+ char *names[MAXPTR];
+ double dbl;
+
+ opts->wall_atomtype[0] = NULL;
+ opts->wall_atomtype[1] = NULL;
+
+ ir->wall_atomtype[0] = -1;
+ ir->wall_atomtype[1] = -1;
+ ir->wall_density[0] = 0;
+ ir->wall_density[1] = 0;
+
+ if (ir->nwall > 0)
+ {
+ nstr = str_nelem(wall_atomtype, MAXPTR, names);
+ if (nstr != ir->nwall)
+ {
+ gmx_fatal(FARGS, "Expected %d elements for wall_atomtype, found %d",
+ ir->nwall, nstr);
+ }
+ for (i = 0; i < ir->nwall; i++)
+ {
+ opts->wall_atomtype[i] = strdup(names[i]);
+ }
+
+ if (ir->wall_type == ewt93 || ir->wall_type == ewt104)
+ {
+ nstr = str_nelem(wall_density, MAXPTR, names);
+ if (nstr != ir->nwall)
+ {
+ gmx_fatal(FARGS, "Expected %d elements for wall-density, found %d", ir->nwall, nstr);
+ }
+ for (i = 0; i < ir->nwall; i++)
+ {
+ sscanf(names[i], "%lf", &dbl);
+ if (dbl <= 0)
+ {
+ gmx_fatal(FARGS, "wall-density[%d] = %f\n", i, dbl);
+ }
+ ir->wall_density[i] = dbl;
+ }
+ }
+ }
+}
+
+static void add_wall_energrps(gmx_groups_t *groups, int nwall, t_symtab *symtab)
+{
+ int i;
+ t_grps *grps;
+ char str[STRLEN];
+
+ if (nwall > 0)
+ {
+ srenew(groups->grpname, groups->ngrpname+nwall);
+ grps = &(groups->grps[egcENER]);
+ srenew(grps->nm_ind, grps->nr+nwall);
+ for (i = 0; i < nwall; i++)
+ {
+ sprintf(str, "wall%d", i);
+ groups->grpname[groups->ngrpname] = put_symtab(symtab, str);
+ grps->nm_ind[grps->nr++] = groups->ngrpname++;
+ }
+ }
+}
+
+void read_expandedparams(int *ninp_p, t_inpfile **inp_p,
+ t_expanded *expand, warninp_t wi)
+{
+ int ninp, nerror = 0;
+ t_inpfile *inp;
+
+ ninp = *ninp_p;
+ inp = *inp_p;
+
+ /* read expanded ensemble parameters */
+ CCTYPE ("expanded ensemble variables");
+ ITYPE ("nstexpanded", expand->nstexpanded, -1);
+ EETYPE("lmc-stats", expand->elamstats, elamstats_names);
+ EETYPE("lmc-move", expand->elmcmove, elmcmove_names);
+ EETYPE("lmc-weights-equil", expand->elmceq, elmceq_names);
+ ITYPE ("weight-equil-number-all-lambda", expand->equil_n_at_lam, -1);
+ ITYPE ("weight-equil-number-samples", expand->equil_samples, -1);
+ ITYPE ("weight-equil-number-steps", expand->equil_steps, -1);
+ RTYPE ("weight-equil-wl-delta", expand->equil_wl_delta, -1);
+ RTYPE ("weight-equil-count-ratio", expand->equil_ratio, -1);
+ CCTYPE("Seed for Monte Carlo in lambda space");
+ ITYPE ("lmc-seed", expand->lmc_seed, -1);
+ RTYPE ("mc-temperature", expand->mc_temp, -1);
+ ITYPE ("lmc-repeats", expand->lmc_repeats, 1);
+ ITYPE ("lmc-gibbsdelta", expand->gibbsdeltalam, -1);
+ ITYPE ("lmc-forced-nstart", expand->lmc_forced_nstart, 0);
+ EETYPE("symmetrized-transition-matrix", expand->bSymmetrizedTMatrix, yesno_names);
+ ITYPE("nst-transition-matrix", expand->nstTij, -1);
+ ITYPE ("mininum-var-min", expand->minvarmin, 100); /*default is reasonable */
+ ITYPE ("weight-c-range", expand->c_range, 0); /* default is just C=0 */
+ RTYPE ("wl-scale", expand->wl_scale, 0.8);
+ RTYPE ("wl-ratio", expand->wl_ratio, 0.8);
+ RTYPE ("init-wl-delta", expand->init_wl_delta, 1.0);
+ EETYPE("wl-oneovert", expand->bWLoneovert, yesno_names);
+
+ *ninp_p = ninp;
+ *inp_p = inp;
+
+ return;
+}
+
+void get_ir(const char *mdparin, const char *mdparout,
+ t_inputrec *ir, t_gromppopts *opts,
+ warninp_t wi)
+{
+ char *dumstr[2];
+ double dumdub[2][6];
+ t_inpfile *inp;
+ const char *tmp;
+ int i, j, m, ninp;
+ char warn_buf[STRLEN];
+ t_lambda *fep = ir->fepvals;
+ t_expanded *expand = ir->expandedvals;
+
+ init_inputrec_strings();
+ inp = read_inpfile(mdparin, &ninp, wi);
+
+ snew(dumstr[0], STRLEN);
+ snew(dumstr[1], STRLEN);
+
+ if (-1 == search_einp(ninp, inp, "cutoff-scheme"))
+ {
+ sprintf(warn_buf,
+ "%s did not specify a value for the .mdp option "
+ "\"cutoff-scheme\". Probably it was first intended for use "
+ "with GROMACS before 4.6. In 4.6, the Verlet scheme was "
+ "introduced, but the group scheme was still the default. "
+ "The default is now the Verlet scheme, so you will observe "
+ "different behaviour.", mdparin);
+ warning_note(wi, warn_buf);
+ }
+
+ /* ignore the following deprecated commands */
+ REM_TYPE("title");
+ REM_TYPE("cpp");
+ REM_TYPE("domain-decomposition");
+ REM_TYPE("andersen-seed");
+ REM_TYPE("dihre");
+ REM_TYPE("dihre-fc");
+ REM_TYPE("dihre-tau");
+ REM_TYPE("nstdihreout");
+ REM_TYPE("nstcheckpoint");
+ REM_TYPE("optimize-fft");
+
+ /* replace the following commands with the clearer new versions*/
+ REPL_TYPE("unconstrained-start", "continuation");
+ REPL_TYPE("foreign-lambda", "fep-lambdas");
+ REPL_TYPE("verlet-buffer-drift", "verlet-buffer-tolerance");
+ REPL_TYPE("nstxtcout", "nstxout-compressed");
+ REPL_TYPE("xtc-grps", "compressed-x-grps");
+ REPL_TYPE("xtc-precision", "compressed-x-precision");
+
+ CCTYPE ("VARIOUS PREPROCESSING OPTIONS");
+ CTYPE ("Preprocessor information: use cpp syntax.");
+ CTYPE ("e.g.: -I/home/joe/doe -I/home/mary/roe");
+ STYPE ("include", opts->include, NULL);
+ CTYPE ("e.g.: -DPOSRES -DFLEXIBLE (note these variable names are case sensitive)");
+ STYPE ("define", opts->define, NULL);
+
+ CCTYPE ("RUN CONTROL PARAMETERS");
+ EETYPE("integrator", ir->eI, ei_names);
+ CTYPE ("Start time and timestep in ps");
+ RTYPE ("tinit", ir->init_t, 0.0);
+ RTYPE ("dt", ir->delta_t, 0.001);
+ STEPTYPE ("nsteps", ir->nsteps, 0);
+ CTYPE ("For exact run continuation or redoing part of a run");
+ STEPTYPE ("init-step", ir->init_step, 0);
+ CTYPE ("Part index is updated automatically on checkpointing (keeps files separate)");
+ ITYPE ("simulation-part", ir->simulation_part, 1);
+ CTYPE ("mode for center of mass motion removal");
+ EETYPE("comm-mode", ir->comm_mode, ecm_names);
+ CTYPE ("number of steps for center of mass motion removal");
+ ITYPE ("nstcomm", ir->nstcomm, 100);
+ CTYPE ("group(s) for center of mass motion removal");
+ STYPE ("comm-grps", is->vcm, NULL);
+
+ CCTYPE ("LANGEVIN DYNAMICS OPTIONS");
+ CTYPE ("Friction coefficient (amu/ps) and random seed");
+ RTYPE ("bd-fric", ir->bd_fric, 0.0);
+ STEPTYPE ("ld-seed", ir->ld_seed, -1);
+
+ /* Em stuff */
+ CCTYPE ("ENERGY MINIMIZATION OPTIONS");
+ CTYPE ("Force tolerance and initial step-size");
+ RTYPE ("emtol", ir->em_tol, 10.0);
+ RTYPE ("emstep", ir->em_stepsize, 0.01);
+ CTYPE ("Max number of iterations in relax-shells");
+ ITYPE ("niter", ir->niter, 20);
+ CTYPE ("Step size (ps^2) for minimization of flexible constraints");
+ RTYPE ("fcstep", ir->fc_stepsize, 0);
+ CTYPE ("Frequency of steepest descents steps when doing CG");
+ ITYPE ("nstcgsteep", ir->nstcgsteep, 1000);
+ ITYPE ("nbfgscorr", ir->nbfgscorr, 10);
+
+ CCTYPE ("TEST PARTICLE INSERTION OPTIONS");
+ RTYPE ("rtpi", ir->rtpi, 0.05);
+
+ /* Output options */
+ CCTYPE ("OUTPUT CONTROL OPTIONS");
+ CTYPE ("Output frequency for coords (x), velocities (v) and forces (f)");
+ ITYPE ("nstxout", ir->nstxout, 0);
+ ITYPE ("nstvout", ir->nstvout, 0);
+ ITYPE ("nstfout", ir->nstfout, 0);
+ CTYPE ("Output frequency for energies to log file and energy file");
+ ITYPE ("nstlog", ir->nstlog, 1000);
+ ITYPE ("nstcalcenergy", ir->nstcalcenergy, 100);
+ ITYPE ("nstenergy", ir->nstenergy, 1000);
+ CTYPE ("Output frequency and precision for .xtc file");
+ ITYPE ("nstxout-compressed", ir->nstxout_compressed, 0);
+ RTYPE ("compressed-x-precision", ir->x_compression_precision, 1000.0);
+ CTYPE ("This selects the subset of atoms for the compressed");
+ CTYPE ("trajectory file. You can select multiple groups. By");
+ CTYPE ("default, all atoms will be written.");
+ STYPE ("compressed-x-grps", is->x_compressed_groups, NULL);
+ CTYPE ("Selection of energy groups");
+ STYPE ("energygrps", is->energy, NULL);
+
+ /* Neighbor searching */
+ CCTYPE ("NEIGHBORSEARCHING PARAMETERS");
+ CTYPE ("cut-off scheme (Verlet: particle based cut-offs, group: using charge groups)");
+ EETYPE("cutoff-scheme", ir->cutoff_scheme, ecutscheme_names);
+ CTYPE ("nblist update frequency");
+ ITYPE ("nstlist", ir->nstlist, 10);
+ CTYPE ("ns algorithm (simple or grid)");
+ EETYPE("ns-type", ir->ns_type, ens_names);
+ CTYPE ("Periodic boundary conditions: xyz, no, xy");
+ EETYPE("pbc", ir->ePBC, epbc_names);
+ EETYPE("periodic-molecules", ir->bPeriodicMols, yesno_names);
+ CTYPE ("Allowed energy error due to the Verlet buffer in kJ/mol/ps per atom,");
+ CTYPE ("a value of -1 means: use rlist");
+ RTYPE("verlet-buffer-tolerance", ir->verletbuf_tol, 0.005);
+ CTYPE ("nblist cut-off");
+ RTYPE ("rlist", ir->rlist, 1.0);
+ CTYPE ("long-range cut-off for switched potentials");
+ RTYPE ("rlistlong", ir->rlistlong, -1);
+ ITYPE ("nstcalclr", ir->nstcalclr, -1);
+
+ /* Electrostatics */
+ CCTYPE ("OPTIONS FOR ELECTROSTATICS AND VDW");
+ CTYPE ("Method for doing electrostatics");
+ EETYPE("coulombtype", ir->coulombtype, eel_names);
+ EETYPE("coulomb-modifier", ir->coulomb_modifier, eintmod_names);
+ CTYPE ("cut-off lengths");
+ RTYPE ("rcoulomb-switch", ir->rcoulomb_switch, 0.0);
+ RTYPE ("rcoulomb", ir->rcoulomb, 1.0);
+ CTYPE ("Relative dielectric constant for the medium and the reaction field");
+ RTYPE ("epsilon-r", ir->epsilon_r, 1.0);
+ RTYPE ("epsilon-rf", ir->epsilon_rf, 0.0);
+ CTYPE ("Method for doing Van der Waals");
+ EETYPE("vdw-type", ir->vdwtype, evdw_names);
+ EETYPE("vdw-modifier", ir->vdw_modifier, eintmod_names);
+ CTYPE ("cut-off lengths");
+ RTYPE ("rvdw-switch", ir->rvdw_switch, 0.0);
+ RTYPE ("rvdw", ir->rvdw, 1.0);
+ CTYPE ("Apply long range dispersion corrections for Energy and Pressure");
+ EETYPE("DispCorr", ir->eDispCorr, edispc_names);
+ CTYPE ("Extension of the potential lookup tables beyond the cut-off");
+ RTYPE ("table-extension", ir->tabext, 1.0);
+ CTYPE ("Separate tables between energy group pairs");
+ STYPE ("energygrp-table", is->egptable, NULL);
+ CTYPE ("Spacing for the PME/PPPM FFT grid");
+ RTYPE ("fourierspacing", ir->fourier_spacing, 0.12);
+ CTYPE ("FFT grid size, when a value is 0 fourierspacing will be used");
+ ITYPE ("fourier-nx", ir->nkx, 0);
+ ITYPE ("fourier-ny", ir->nky, 0);
+ ITYPE ("fourier-nz", ir->nkz, 0);
+ CTYPE ("EWALD/PME/PPPM parameters");
+ ITYPE ("pme-order", ir->pme_order, 4);
+ RTYPE ("ewald-rtol", ir->ewald_rtol, 0.00001);
+ RTYPE ("ewald-rtol-lj", ir->ewald_rtol_lj, 0.001);
+ EETYPE("lj-pme-comb-rule", ir->ljpme_combination_rule, eljpme_names);
+ EETYPE("ewald-geometry", ir->ewald_geometry, eewg_names);
+ RTYPE ("epsilon-surface", ir->epsilon_surface, 0.0);
+
+ CCTYPE("IMPLICIT SOLVENT ALGORITHM");
+ EETYPE("implicit-solvent", ir->implicit_solvent, eis_names);
+
+ CCTYPE ("GENERALIZED BORN ELECTROSTATICS");
+ CTYPE ("Algorithm for calculating Born radii");
+ EETYPE("gb-algorithm", ir->gb_algorithm, egb_names);
+ CTYPE ("Frequency of calculating the Born radii inside rlist");
+ ITYPE ("nstgbradii", ir->nstgbradii, 1);
+ CTYPE ("Cutoff for Born radii calculation; the contribution from atoms");
+ CTYPE ("between rlist and rgbradii is updated every nstlist steps");
+ RTYPE ("rgbradii", ir->rgbradii, 1.0);
+ CTYPE ("Dielectric coefficient of the implicit solvent");
+ RTYPE ("gb-epsilon-solvent", ir->gb_epsilon_solvent, 80.0);
+ CTYPE ("Salt concentration in M for Generalized Born models");
+ RTYPE ("gb-saltconc", ir->gb_saltconc, 0.0);
+ CTYPE ("Scaling factors used in the OBC GB model. Default values are OBC(II)");
+ RTYPE ("gb-obc-alpha", ir->gb_obc_alpha, 1.0);
+ RTYPE ("gb-obc-beta", ir->gb_obc_beta, 0.8);
+ RTYPE ("gb-obc-gamma", ir->gb_obc_gamma, 4.85);
+ RTYPE ("gb-dielectric-offset", ir->gb_dielectric_offset, 0.009);
+ EETYPE("sa-algorithm", ir->sa_algorithm, esa_names);
+ CTYPE ("Surface tension (kJ/mol/nm^2) for the SA (nonpolar surface) part of GBSA");
+ CTYPE ("The value -1 will set default value for Still/HCT/OBC GB-models.");
+ RTYPE ("sa-surface-tension", ir->sa_surface_tension, -1);
+
+ /* Coupling stuff */
+ CCTYPE ("OPTIONS FOR WEAK COUPLING ALGORITHMS");
+ CTYPE ("Temperature coupling");
+ EETYPE("tcoupl", ir->etc, etcoupl_names);
+ ITYPE ("nsttcouple", ir->nsttcouple, -1);
+ ITYPE("nh-chain-length", ir->opts.nhchainlength, 10);
+ EETYPE("print-nose-hoover-chain-variables", ir->bPrintNHChains, yesno_names);
+ CTYPE ("Groups to couple separately");
+ STYPE ("tc-grps", is->tcgrps, NULL);
+ CTYPE ("Time constant (ps) and reference temperature (K)");
+ STYPE ("tau-t", is->tau_t, NULL);
+ STYPE ("ref-t", is->ref_t, NULL);
+ CTYPE ("pressure coupling");
+ EETYPE("pcoupl", ir->epc, epcoupl_names);
+ EETYPE("pcoupltype", ir->epct, epcoupltype_names);
+ ITYPE ("nstpcouple", ir->nstpcouple, -1);
+ CTYPE ("Time constant (ps), compressibility (1/bar) and reference P (bar)");
+ RTYPE ("tau-p", ir->tau_p, 1.0);
+ STYPE ("compressibility", dumstr[0], NULL);
+ STYPE ("ref-p", dumstr[1], NULL);
+ CTYPE ("Scaling of reference coordinates, No, All or COM");
+ EETYPE ("refcoord-scaling", ir->refcoord_scaling, erefscaling_names);
+
+ /* QMMM */
+ CCTYPE ("OPTIONS FOR QMMM calculations");
+ EETYPE("QMMM", ir->bQMMM, yesno_names);
+ CTYPE ("Groups treated Quantum Mechanically");
+ STYPE ("QMMM-grps", is->QMMM, NULL);
+ CTYPE ("QM method");
+ STYPE("QMmethod", is->QMmethod, NULL);
+ CTYPE ("QMMM scheme");
+ EETYPE("QMMMscheme", ir->QMMMscheme, eQMMMscheme_names);
+ CTYPE ("QM basisset");
+ STYPE("QMbasis", is->QMbasis, NULL);
+ CTYPE ("QM charge");
+ STYPE ("QMcharge", is->QMcharge, NULL);
+ CTYPE ("QM multiplicity");
+ STYPE ("QMmult", is->QMmult, NULL);
+ CTYPE ("Surface Hopping");
+ STYPE ("SH", is->bSH, NULL);
+ CTYPE ("CAS space options");
+ STYPE ("CASorbitals", is->CASorbitals, NULL);
+ STYPE ("CASelectrons", is->CASelectrons, NULL);
+ STYPE ("SAon", is->SAon, NULL);
+ STYPE ("SAoff", is->SAoff, NULL);
+ STYPE ("SAsteps", is->SAsteps, NULL);
+ CTYPE ("Scale factor for MM charges");
+ RTYPE ("MMChargeScaleFactor", ir->scalefactor, 1.0);
+ CTYPE ("Optimization of QM subsystem");
+ STYPE ("bOPT", is->bOPT, NULL);
+ STYPE ("bTS", is->bTS, NULL);
+
+ /* Simulated annealing */
+ CCTYPE("SIMULATED ANNEALING");
+ CTYPE ("Type of annealing for each temperature group (no/single/periodic)");
+ STYPE ("annealing", is->anneal, NULL);
+ CTYPE ("Number of time points to use for specifying annealing in each group");
+ STYPE ("annealing-npoints", is->anneal_npoints, NULL);
+ CTYPE ("List of times at the annealing points for each group");
+ STYPE ("annealing-time", is->anneal_time, NULL);
+ CTYPE ("Temp. at each annealing point, for each group.");
+ STYPE ("annealing-temp", is->anneal_temp, NULL);
+
+ /* Startup run */
+ CCTYPE ("GENERATE VELOCITIES FOR STARTUP RUN");
+ EETYPE("gen-vel", opts->bGenVel, yesno_names);
+ RTYPE ("gen-temp", opts->tempi, 300.0);
+ ITYPE ("gen-seed", opts->seed, -1);
+
+ /* Shake stuff */
+ CCTYPE ("OPTIONS FOR BONDS");
+ EETYPE("constraints", opts->nshake, constraints);
+ CTYPE ("Type of constraint algorithm");
+ EETYPE("constraint-algorithm", ir->eConstrAlg, econstr_names);
+ CTYPE ("Do not constrain the start configuration");
+ EETYPE("continuation", ir->bContinuation, yesno_names);
+ CTYPE ("Use successive overrelaxation to reduce the number of shake iterations");
+ EETYPE("Shake-SOR", ir->bShakeSOR, yesno_names);
+ CTYPE ("Relative tolerance of shake");
+ RTYPE ("shake-tol", ir->shake_tol, 0.0001);
+ CTYPE ("Highest order in the expansion of the constraint coupling matrix");
+ ITYPE ("lincs-order", ir->nProjOrder, 4);
+ CTYPE ("Number of iterations in the final step of LINCS. 1 is fine for");
+ CTYPE ("normal simulations, but use 2 to conserve energy in NVE runs.");
+ CTYPE ("For energy minimization with constraints it should be 4 to 8.");
+ ITYPE ("lincs-iter", ir->nLincsIter, 1);
+ CTYPE ("Lincs will write a warning to the stderr if in one step a bond");
+ CTYPE ("rotates over more degrees than");
+ RTYPE ("lincs-warnangle", ir->LincsWarnAngle, 30.0);
+ CTYPE ("Convert harmonic bonds to morse potentials");
+ EETYPE("morse", opts->bMorse, yesno_names);
+
+ /* Energy group exclusions */
+ CCTYPE ("ENERGY GROUP EXCLUSIONS");
+ CTYPE ("Pairs of energy groups for which all non-bonded interactions are excluded");
+ STYPE ("energygrp-excl", is->egpexcl, NULL);
+
+ /* Walls */
+ CCTYPE ("WALLS");
+ CTYPE ("Number of walls, type, atom types, densities and box-z scale factor for Ewald");
+ ITYPE ("nwall", ir->nwall, 0);
+ EETYPE("wall-type", ir->wall_type, ewt_names);
+ RTYPE ("wall-r-linpot", ir->wall_r_linpot, -1);
+ STYPE ("wall-atomtype", is->wall_atomtype, NULL);
+ STYPE ("wall-density", is->wall_density, NULL);
+ RTYPE ("wall-ewald-zfac", ir->wall_ewald_zfac, 3);
+
+ /* COM pulling */
+ CCTYPE("COM PULLING");
+ CTYPE("Pull type: no, umbrella, constraint or constant-force");
+ EETYPE("pull", ir->ePull, epull_names);
+ if (ir->ePull != epullNO)
+ {
+ snew(ir->pull, 1);
+ is->pull_grp = read_pullparams(&ninp, &inp, ir->pull, &opts->pull_start, wi);
+ }
+
+ /* Enforced rotation */
+ CCTYPE("ENFORCED ROTATION");
+ CTYPE("Enforced rotation: No or Yes");
+ EETYPE("rotation", ir->bRot, yesno_names);
+ if (ir->bRot)
+ {
+ snew(ir->rot, 1);
+ is->rot_grp = read_rotparams(&ninp, &inp, ir->rot, wi);
+ }
+
+ /* Interactive MD */
+ ir->bIMD = FALSE;
+ CCTYPE("Group to display and/or manipulate in interactive MD session");
+ STYPE ("IMD-group", is->imd_grp, NULL);
+ if (is->imd_grp[0] != '\0')
+ {
+ snew(ir->imd, 1);
+ ir->bIMD = TRUE;
+ }
+
+ /* Refinement */
+ CCTYPE("NMR refinement stuff");
+ CTYPE ("Distance restraints type: No, Simple or Ensemble");
+ EETYPE("disre", ir->eDisre, edisre_names);
+ CTYPE ("Force weighting of pairs in one distance restraint: Conservative or Equal");
+ EETYPE("disre-weighting", ir->eDisreWeighting, edisreweighting_names);
+ CTYPE ("Use sqrt of the time averaged times the instantaneous violation");
+ EETYPE("disre-mixed", ir->bDisreMixed, yesno_names);
+ RTYPE ("disre-fc", ir->dr_fc, 1000.0);
+ RTYPE ("disre-tau", ir->dr_tau, 0.0);
+ CTYPE ("Output frequency for pair distances to energy file");
+ ITYPE ("nstdisreout", ir->nstdisreout, 100);
+ CTYPE ("Orientation restraints: No or Yes");
+ EETYPE("orire", opts->bOrire, yesno_names);
+ CTYPE ("Orientation restraints force constant and tau for time averaging");
+ RTYPE ("orire-fc", ir->orires_fc, 0.0);
+ RTYPE ("orire-tau", ir->orires_tau, 0.0);
+ STYPE ("orire-fitgrp", is->orirefitgrp, NULL);
+ CTYPE ("Output frequency for trace(SD) and S to energy file");
+ ITYPE ("nstorireout", ir->nstorireout, 100);
+
+ /* free energy variables */
+ CCTYPE ("Free energy variables");
+ EETYPE("free-energy", ir->efep, efep_names);
+ STYPE ("couple-moltype", is->couple_moltype, NULL);
+ EETYPE("couple-lambda0", opts->couple_lam0, couple_lam);
+ EETYPE("couple-lambda1", opts->couple_lam1, couple_lam);
+ EETYPE("couple-intramol", opts->bCoupleIntra, yesno_names);
+
+ RTYPE ("init-lambda", fep->init_lambda, -1); /* start with -1 so
+ we can recognize if
+ it was not entered */
+ ITYPE ("init-lambda-state", fep->init_fep_state, -1);
+ RTYPE ("delta-lambda", fep->delta_lambda, 0.0);
+ ITYPE ("nstdhdl", fep->nstdhdl, 50);
+ STYPE ("fep-lambdas", is->fep_lambda[efptFEP], NULL);
+ STYPE ("mass-lambdas", is->fep_lambda[efptMASS], NULL);
+ STYPE ("coul-lambdas", is->fep_lambda[efptCOUL], NULL);
+ STYPE ("vdw-lambdas", is->fep_lambda[efptVDW], NULL);
+ STYPE ("bonded-lambdas", is->fep_lambda[efptBONDED], NULL);
+ STYPE ("restraint-lambdas", is->fep_lambda[efptRESTRAINT], NULL);
+ STYPE ("temperature-lambdas", is->fep_lambda[efptTEMPERATURE], NULL);
+ ITYPE ("calc-lambda-neighbors", fep->lambda_neighbors, 1);
+ STYPE ("init-lambda-weights", is->lambda_weights, NULL);
+ EETYPE("dhdl-print-energy", fep->bPrintEnergy, yesno_names);
+ RTYPE ("sc-alpha", fep->sc_alpha, 0.0);
+ ITYPE ("sc-power", fep->sc_power, 1);
+ RTYPE ("sc-r-power", fep->sc_r_power, 6.0);
+ RTYPE ("sc-sigma", fep->sc_sigma, 0.3);
+ EETYPE("sc-coul", fep->bScCoul, yesno_names);
+ ITYPE ("dh_hist_size", fep->dh_hist_size, 0);
+ RTYPE ("dh_hist_spacing", fep->dh_hist_spacing, 0.1);
+ EETYPE("separate-dhdl-file", fep->separate_dhdl_file,
+ separate_dhdl_file_names);
+ EETYPE("dhdl-derivatives", fep->dhdl_derivatives, dhdl_derivatives_names);
+ ITYPE ("dh_hist_size", fep->dh_hist_size, 0);
+ RTYPE ("dh_hist_spacing", fep->dh_hist_spacing, 0.1);
+
+ /* Non-equilibrium MD stuff */
+ CCTYPE("Non-equilibrium MD stuff");
+ STYPE ("acc-grps", is->accgrps, NULL);
+ STYPE ("accelerate", is->acc, NULL);
+ STYPE ("freezegrps", is->freeze, NULL);
+ STYPE ("freezedim", is->frdim, NULL);
+ RTYPE ("cos-acceleration", ir->cos_accel, 0);
+ STYPE ("deform", is->deform, NULL);
+
+ /* simulated tempering variables */
+ CCTYPE("simulated tempering variables");
+ EETYPE("simulated-tempering", ir->bSimTemp, yesno_names);
+ EETYPE("simulated-tempering-scaling", ir->simtempvals->eSimTempScale, esimtemp_names);
+ RTYPE("sim-temp-low", ir->simtempvals->simtemp_low, 300.0);
+ RTYPE("sim-temp-high", ir->simtempvals->simtemp_high, 300.0);
+
+ /* expanded ensemble variables */
+ if (ir->efep == efepEXPANDED || ir->bSimTemp)
+ {
+ read_expandedparams(&ninp, &inp, expand, wi);
+ }
+
+ /* Electric fields */
+ CCTYPE("Electric fields");
+ CTYPE ("Format is number of terms (int) and for all terms an amplitude (real)");
+ CTYPE ("and a phase angle (real)");
+ STYPE ("E-x", is->efield_x, NULL);
+ STYPE ("E-xt", is->efield_xt, NULL);
+ STYPE ("E-y", is->efield_y, NULL);
+ STYPE ("E-yt", is->efield_yt, NULL);
+ STYPE ("E-z", is->efield_z, NULL);
+ STYPE ("E-zt", is->efield_zt, NULL);
+
+ CCTYPE("Ion/water position swapping for computational electrophysiology setups");
+ CTYPE("Swap positions along direction: no, X, Y, Z");
+ EETYPE("swapcoords", ir->eSwapCoords, eSwapTypes_names);
+ if (ir->eSwapCoords != eswapNO)
+ {
+ snew(ir->swap, 1);
+ CTYPE("Swap attempt frequency");
+ ITYPE("swap-frequency", ir->swap->nstswap, 1);
+ CTYPE("Two index groups that contain the compartment-partitioning atoms");
+ STYPE("split-group0", splitgrp0, NULL);
+ STYPE("split-group1", splitgrp1, NULL);
+ CTYPE("Use center of mass of split groups (yes/no), otherwise center of geometry is used");
+ EETYPE("massw-split0", ir->swap->massw_split[0], yesno_names);
+ EETYPE("massw-split1", ir->swap->massw_split[1], yesno_names);
+
+ CTYPE("Group name of ions that can be exchanged with solvent molecules");
+ STYPE("swap-group", swapgrp, NULL);
+ CTYPE("Group name of solvent molecules");
+ STYPE("solvent-group", solgrp, NULL);
+
+ CTYPE("Split cylinder: radius, upper and lower extension (nm) (this will define the channels)");
+ CTYPE("Note that the split cylinder settings do not have an influence on the swapping protocol,");
+ CTYPE("however, if correctly defined, the ion permeation events are counted per channel");
+ RTYPE("cyl0-r", ir->swap->cyl0r, 2.0);
+ RTYPE("cyl0-up", ir->swap->cyl0u, 1.0);
+ RTYPE("cyl0-down", ir->swap->cyl0l, 1.0);
+ RTYPE("cyl1-r", ir->swap->cyl1r, 2.0);
+ RTYPE("cyl1-up", ir->swap->cyl1u, 1.0);
+ RTYPE("cyl1-down", ir->swap->cyl1l, 1.0);
+
+ CTYPE("Average the number of ions per compartment over these many swap attempt steps");
+ ITYPE("coupl-steps", ir->swap->nAverage, 10);
+ CTYPE("Requested number of anions and cations for each of the two compartments");
+ CTYPE("-1 means fix the numbers as found in time step 0");
+ ITYPE("anionsA", ir->swap->nanions[0], -1);
+ ITYPE("cationsA", ir->swap->ncations[0], -1);
+ ITYPE("anionsB", ir->swap->nanions[1], -1);
+ ITYPE("cationsB", ir->swap->ncations[1], -1);
+ CTYPE("Start to swap ions if threshold difference to requested count is reached");
+ RTYPE("threshold", ir->swap->threshold, 1.0);
+ }
+
+ /* AdResS defined thingies */
+ CCTYPE ("AdResS parameters");
+ EETYPE("adress", ir->bAdress, yesno_names);
+ if (ir->bAdress)
+ {
+ snew(ir->adress, 1);
+ read_adressparams(&ninp, &inp, ir->adress, wi);
+ }
+
+ /* User defined thingies */
+ CCTYPE ("User defined thingies");
+ STYPE ("user1-grps", is->user1, NULL);
+ STYPE ("user2-grps", is->user2, NULL);
+ ITYPE ("userint1", ir->userint1, 0);
+ ITYPE ("userint2", ir->userint2, 0);
+ ITYPE ("userint3", ir->userint3, 0);
+ ITYPE ("userint4", ir->userint4, 0);
+ RTYPE ("userreal1", ir->userreal1, 0);
+ RTYPE ("userreal2", ir->userreal2, 0);
+ RTYPE ("userreal3", ir->userreal3, 0);
+ RTYPE ("userreal4", ir->userreal4, 0);
+#undef CTYPE
+
+ write_inpfile(mdparout, ninp, inp, FALSE, wi);
+ for (i = 0; (i < ninp); i++)
+ {
+ sfree(inp[i].name);
+ sfree(inp[i].value);
+ }
+ sfree(inp);
+
+ /* Process options if necessary */
+ for (m = 0; m < 2; m++)
+ {
+ for (i = 0; i < 2*DIM; i++)
+ {
+ dumdub[m][i] = 0.0;
+ }
+ if (ir->epc)
+ {
+ switch (ir->epct)
+ {
+ case epctISOTROPIC:
+ if (sscanf(dumstr[m], "%lf", &(dumdub[m][XX])) != 1)
+ {
+ warning_error(wi, "Pressure coupling not enough values (I need 1)");
+ }
+ dumdub[m][YY] = dumdub[m][ZZ] = dumdub[m][XX];
+ break;
+ case epctSEMIISOTROPIC:
+ case epctSURFACETENSION:
+ if (sscanf(dumstr[m], "%lf%lf",
+ &(dumdub[m][XX]), &(dumdub[m][ZZ])) != 2)
+ {
+ warning_error(wi, "Pressure coupling not enough values (I need 2)");
+ }
+ dumdub[m][YY] = dumdub[m][XX];
+ break;
+ case epctANISOTROPIC:
+ if (sscanf(dumstr[m], "%lf%lf%lf%lf%lf%lf",
+ &(dumdub[m][XX]), &(dumdub[m][YY]), &(dumdub[m][ZZ]),
+ &(dumdub[m][3]), &(dumdub[m][4]), &(dumdub[m][5])) != 6)
+ {
+ warning_error(wi, "Pressure coupling not enough values (I need 6)");
+ }
+ break;
+ default:
+ gmx_fatal(FARGS, "Pressure coupling type %s not implemented yet",
+ epcoupltype_names[ir->epct]);
+ }
+ }
+ }
+ clear_mat(ir->ref_p);
+ clear_mat(ir->compress);
+ for (i = 0; i < DIM; i++)
+ {
+ ir->ref_p[i][i] = dumdub[1][i];
+ ir->compress[i][i] = dumdub[0][i];
+ }
+ if (ir->epct == epctANISOTROPIC)
+ {
+ ir->ref_p[XX][YY] = dumdub[1][3];
+ ir->ref_p[XX][ZZ] = dumdub[1][4];
+ ir->ref_p[YY][ZZ] = dumdub[1][5];
+ if (ir->ref_p[XX][YY] != 0 && ir->ref_p[XX][ZZ] != 0 && ir->ref_p[YY][ZZ] != 0)
+ {
+ warning(wi, "All off-diagonal reference pressures are non-zero. Are you sure you want to apply a threefold shear stress?\n");
+ }
+ ir->compress[XX][YY] = dumdub[0][3];
+ ir->compress[XX][ZZ] = dumdub[0][4];
+ ir->compress[YY][ZZ] = dumdub[0][5];
+ for (i = 0; i < DIM; i++)
+ {
+ for (m = 0; m < i; m++)
+ {
+ ir->ref_p[i][m] = ir->ref_p[m][i];
+ ir->compress[i][m] = ir->compress[m][i];
+ }
+ }
+ }
+
+ if (ir->comm_mode == ecmNO)
+ {
+ ir->nstcomm = 0;
+ }
+
+ opts->couple_moltype = NULL;
+ if (strlen(is->couple_moltype) > 0)
+ {
+ if (ir->efep != efepNO)
+ {
+ opts->couple_moltype = strdup(is->couple_moltype);
+ if (opts->couple_lam0 == opts->couple_lam1)
+ {
+ warning(wi, "The lambda=0 and lambda=1 states for coupling are identical");
+ }
+ if (ir->eI == eiMD && (opts->couple_lam0 == ecouplamNONE ||
+ opts->couple_lam1 == ecouplamNONE))
+ {
+ warning(wi, "For proper sampling of the (nearly) decoupled state, stochastic dynamics should be used");
+ }
+ }
+ else
+ {
+ warning(wi, "Can not couple a molecule with free_energy = no");
+ }
+ }
+ /* FREE ENERGY AND EXPANDED ENSEMBLE OPTIONS */
+ if (ir->efep != efepNO)
+ {
+ if (fep->delta_lambda > 0)
+ {
+ ir->efep = efepSLOWGROWTH;
+ }
+ }
+
+ if (ir->bSimTemp)
+ {
+ fep->bPrintEnergy = TRUE;
+ /* always print out the energy to dhdl if we are doing expanded ensemble, since we need the total energy
+ if the temperature is changing. */
+ }
+
+ if ((ir->efep != efepNO) || ir->bSimTemp)
+ {
+ ir->bExpanded = FALSE;
+ if ((ir->efep == efepEXPANDED) || ir->bSimTemp)
+ {
+ ir->bExpanded = TRUE;
+ }
+ do_fep_params(ir, is->fep_lambda, is->lambda_weights);
+ if (ir->bSimTemp) /* done after fep params */
+ {
+ do_simtemp_params(ir);
+ }
+ }
+ else
+ {
+ ir->fepvals->n_lambda = 0;
+ }
+
+ /* WALL PARAMETERS */
+
+ do_wall_params(ir, is->wall_atomtype, is->wall_density, opts);
+
+ /* ORIENTATION RESTRAINT PARAMETERS */
+
+ if (opts->bOrire && str_nelem(is->orirefitgrp, MAXPTR, NULL) != 1)
+ {
+ warning_error(wi, "ERROR: Need one orientation restraint fit group\n");
+ }
+
+ /* DEFORMATION PARAMETERS */
+
+ clear_mat(ir->deform);
+ for (i = 0; i < 6; i++)
+ {
+ dumdub[0][i] = 0;
+ }
+ m = sscanf(is->deform, "%lf %lf %lf %lf %lf %lf",
+ &(dumdub[0][0]), &(dumdub[0][1]), &(dumdub[0][2]),
+ &(dumdub[0][3]), &(dumdub[0][4]), &(dumdub[0][5]));
+ for (i = 0; i < 3; i++)
+ {
+ ir->deform[i][i] = dumdub[0][i];
+ }
+ ir->deform[YY][XX] = dumdub[0][3];
+ ir->deform[ZZ][XX] = dumdub[0][4];
+ ir->deform[ZZ][YY] = dumdub[0][5];
+ if (ir->epc != epcNO)
+ {
+ for (i = 0; i < 3; i++)
+ {
+ for (j = 0; j <= i; j++)
+ {
+ if (ir->deform[i][j] != 0 && ir->compress[i][j] != 0)
+ {
+ warning_error(wi, "A box element has deform set and compressibility > 0");
+ }
+ }
+ }
+ for (i = 0; i < 3; i++)
+ {
+ for (j = 0; j < i; j++)
+ {
+ if (ir->deform[i][j] != 0)
+ {
+ for (m = j; m < DIM; m++)
+ {
+ if (ir->compress[m][j] != 0)
+ {
+ sprintf(warn_buf, "An off-diagonal box element has deform set while compressibility > 0 for the same component of another box vector, this might lead to spurious periodicity effects.");
+ warning(wi, warn_buf);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ /* Ion/water position swapping checks */
+ if (ir->eSwapCoords != eswapNO)
+ {
+ if (ir->swap->nstswap < 1)
+ {
+ warning_error(wi, "swap_frequency must be 1 or larger when ion swapping is requested");
+ }
+ if (ir->swap->nAverage < 1)
+ {
+ warning_error(wi, "coupl_steps must be 1 or larger.\n");
+ }
+ if (ir->swap->threshold < 1.0)
+ {
+ warning_error(wi, "Ion count threshold must be at least 1.\n");
+ }
+ }
+
+ sfree(dumstr[0]);
+ sfree(dumstr[1]);
+}
+
+static int search_QMstring(const char *s, int ng, const char *gn[])
+{
+ /* same as normal search_string, but this one searches QM strings */
+ int i;
+
+ for (i = 0; (i < ng); i++)
+ {
+ if (gmx_strcasecmp(s, gn[i]) == 0)
+ {
+ return i;
+ }
+ }
+
+ gmx_fatal(FARGS, "this QM method or basisset (%s) is not implemented\n!", s);
+
+ return -1;
+
+} /* search_QMstring */
+
+/* We would like gn to be const as well, but C doesn't allow this */
+int search_string(const char *s, int ng, char *gn[])
+{
+ int i;
+
+ for (i = 0; (i < ng); i++)
+ {
+ if (gmx_strcasecmp(s, gn[i]) == 0)
+ {
+ return i;
+ }
+ }
+
+ gmx_fatal(FARGS,
+ "Group %s referenced in the .mdp file was not found in the index file.\n"
+ "Group names must match either [moleculetype] names or custom index group\n"
+ "names, in which case you must supply an index file to the '-n' option\n"
+ "of grompp.",
+ s);
+
+ return -1;
+}
+
+static gmx_bool do_numbering(int natoms, gmx_groups_t *groups, int ng, char *ptrs[],
+ t_blocka *block, char *gnames[],
+ int gtype, int restnm,
+ int grptp, gmx_bool bVerbose,
+ warninp_t wi)
+{
+ unsigned short *cbuf;
+ t_grps *grps = &(groups->grps[gtype]);
+ int i, j, gid, aj, ognr, ntot = 0;
+ const char *title;
+ gmx_bool bRest;
+ char warn_buf[STRLEN];
+
+ if (debug)
+ {
+ fprintf(debug, "Starting numbering %d groups of type %d\n", ng, gtype);
+ }
+
+ title = gtypes[gtype];
+
+ snew(cbuf, natoms);
+ /* Mark all id's as not set */
+ for (i = 0; (i < natoms); i++)
+ {
+ cbuf[i] = NOGID;
+ }
+
+ snew(grps->nm_ind, ng+1); /* +1 for possible rest group */
+ for (i = 0; (i < ng); i++)
+ {
+ /* Lookup the group name in the block structure */
+ gid = search_string(ptrs[i], block->nr, gnames);
+ if ((grptp != egrptpONE) || (i == 0))
+ {
+ grps->nm_ind[grps->nr++] = gid;
+ }
+ if (debug)
+ {
+ fprintf(debug, "Found gid %d for group %s\n", gid, ptrs[i]);
+ }
+
+ /* Now go over the atoms in the group */
+ for (j = block->index[gid]; (j < block->index[gid+1]); j++)
+ {
+
+ aj = block->a[j];
+
+ /* Range checking */
+ if ((aj < 0) || (aj >= natoms))
+ {
+ gmx_fatal(FARGS, "Invalid atom number %d in indexfile", aj);
+ }
+ /* Lookup up the old group number */
+ ognr = cbuf[aj];
+ if (ognr != NOGID)
+ {
+ gmx_fatal(FARGS, "Atom %d in multiple %s groups (%d and %d)",
+ aj+1, title, ognr+1, i+1);
+ }
+ else
+ {
+ /* Store the group number in buffer */
+ if (grptp == egrptpONE)
+ {
+ cbuf[aj] = 0;
+ }
+ else
+ {
+ cbuf[aj] = i;
+ }
+ ntot++;
+ }
+ }
+ }
+
+ /* Now check whether we have done all atoms */
+ bRest = FALSE;
+ if (ntot != natoms)
+ {
+ if (grptp == egrptpALL)
+ {
+ gmx_fatal(FARGS, "%d atoms are not part of any of the %s groups",
+ natoms-ntot, title);
+ }
+ else if (grptp == egrptpPART)
+ {
+ sprintf(warn_buf, "%d atoms are not part of any of the %s groups",
+ natoms-ntot, title);
+ warning_note(wi, warn_buf);
+ }
+ /* Assign all atoms currently unassigned to a rest group */
+ for (j = 0; (j < natoms); j++)
+ {
+ if (cbuf[j] == NOGID)
+ {
+ cbuf[j] = grps->nr;
+ bRest = TRUE;
+ }
+ }
+ if (grptp != egrptpPART)
+ {
+ if (bVerbose)
+ {
+ fprintf(stderr,
+ "Making dummy/rest group for %s containing %d elements\n",
+ title, natoms-ntot);
+ }
+ /* Add group name "rest" */
+ grps->nm_ind[grps->nr] = restnm;
+
+ /* Assign the rest name to all atoms not currently assigned to a group */
+ for (j = 0; (j < natoms); j++)
+ {
+ if (cbuf[j] == NOGID)
+ {
+ cbuf[j] = grps->nr;
+ }
+ }
+ grps->nr++;
+ }
+ }
+
+ if (grps->nr == 1 && (ntot == 0 || ntot == natoms))
+ {
+ /* All atoms are part of one (or no) group, no index required */
+ groups->ngrpnr[gtype] = 0;
+ groups->grpnr[gtype] = NULL;
+ }
+ else
+ {
+ groups->ngrpnr[gtype] = natoms;
+ snew(groups->grpnr[gtype], natoms);
+ for (j = 0; (j < natoms); j++)
+ {
+ groups->grpnr[gtype][j] = cbuf[j];
+ }
+ }
+
+ sfree(cbuf);
+
+ return (bRest && grptp == egrptpPART);
+}
+
+static void calc_nrdf(gmx_mtop_t *mtop, t_inputrec *ir, char **gnames)
+{
+ t_grpopts *opts;
+ gmx_groups_t *groups;
+ t_pull *pull;
+ int natoms, ai, aj, i, j, d, g, imin, jmin;
+ t_iatom *ia;
+ int *nrdf2, *na_vcm, na_tot;
+ double *nrdf_tc, *nrdf_vcm, nrdf_uc, n_sub = 0;
+ gmx_mtop_atomloop_all_t aloop;
+ t_atom *atom;
+ int mb, mol, ftype, as;
+ gmx_molblock_t *molb;
+ gmx_moltype_t *molt;
+
+ /* Calculate nrdf.
+ * First calc 3xnr-atoms for each group
+ * then subtract half a degree of freedom for each constraint
+ *
+ * Only atoms and nuclei contribute to the degrees of freedom...
+ */
+
+ opts = &ir->opts;
+
+ groups = &mtop->groups;
+ natoms = mtop->natoms;
+
+ /* Allocate one more for a possible rest group */
+ /* We need to sum degrees of freedom into doubles,
+ * since floats give too low nrdf's above 3 million atoms.
+ */
+ snew(nrdf_tc, groups->grps[egcTC].nr+1);
+ snew(nrdf_vcm, groups->grps[egcVCM].nr+1);
+ snew(na_vcm, groups->grps[egcVCM].nr+1);
+
+ for (i = 0; i < groups->grps[egcTC].nr; i++)
+ {
+ nrdf_tc[i] = 0;
+ }
+ for (i = 0; i < groups->grps[egcVCM].nr+1; i++)
+ {
+ nrdf_vcm[i] = 0;
+ }
+
+ snew(nrdf2, natoms);
+ aloop = gmx_mtop_atomloop_all_init(mtop);
+ while (gmx_mtop_atomloop_all_next(aloop, &i, &atom))
+ {
+ nrdf2[i] = 0;
+ if (atom->ptype == eptAtom || atom->ptype == eptNucleus)
+ {
+ g = ggrpnr(groups, egcFREEZE, i);
+ /* Double count nrdf for particle i */
+ for (d = 0; d < DIM; d++)
+ {
+ if (opts->nFreeze[g][d] == 0)
+ {
+ nrdf2[i] += 2;
+ }
+ }
+ nrdf_tc [ggrpnr(groups, egcTC, i)] += 0.5*nrdf2[i];
+ nrdf_vcm[ggrpnr(groups, egcVCM, i)] += 0.5*nrdf2[i];
+ }
+ }
+
+ as = 0;
+ for (mb = 0; mb < mtop->nmolblock; mb++)
+ {
+ molb = &mtop->molblock[mb];
+ molt = &mtop->moltype[molb->type];
+ atom = molt->atoms.atom;
+ for (mol = 0; mol < molb->nmol; mol++)
+ {
+ for (ftype = F_CONSTR; ftype <= F_CONSTRNC; ftype++)
+ {
+ ia = molt->ilist[ftype].iatoms;
+ for (i = 0; i < molt->ilist[ftype].nr; )
+ {
+ /* Subtract degrees of freedom for the constraints,
+ * if the particles still have degrees of freedom left.
+ * If one of the particles is a vsite or a shell, then all
+ * constraint motion will go there, but since they do not
+ * contribute to the constraints the degrees of freedom do not
+ * change.
+ */
+ ai = as + ia[1];
+ aj = as + ia[2];
+ if (((atom[ia[1]].ptype == eptNucleus) ||
+ (atom[ia[1]].ptype == eptAtom)) &&
+ ((atom[ia[2]].ptype == eptNucleus) ||
+ (atom[ia[2]].ptype == eptAtom)))
+ {
+ if (nrdf2[ai] > 0)
+ {
+ jmin = 1;
+ }
+ else
+ {
+ jmin = 2;
+ }
+ if (nrdf2[aj] > 0)
+ {
+ imin = 1;
+ }
+ else
+ {
+ imin = 2;
+ }
+ imin = min(imin, nrdf2[ai]);
+ jmin = min(jmin, nrdf2[aj]);
+ nrdf2[ai] -= imin;
+ nrdf2[aj] -= jmin;
+ nrdf_tc [ggrpnr(groups, egcTC, ai)] -= 0.5*imin;
+ nrdf_tc [ggrpnr(groups, egcTC, aj)] -= 0.5*jmin;
+ nrdf_vcm[ggrpnr(groups, egcVCM, ai)] -= 0.5*imin;
+ nrdf_vcm[ggrpnr(groups, egcVCM, aj)] -= 0.5*jmin;
+ }
+ ia += interaction_function[ftype].nratoms+1;
+ i += interaction_function[ftype].nratoms+1;
+ }
+ }
+ ia = molt->ilist[F_SETTLE].iatoms;
+ for (i = 0; i < molt->ilist[F_SETTLE].nr; )
+ {
+ /* Subtract 1 dof from every atom in the SETTLE */
+ for (j = 0; j < 3; j++)
+ {
+ ai = as + ia[1+j];
+ imin = min(2, nrdf2[ai]);
+ nrdf2[ai] -= imin;
+ nrdf_tc [ggrpnr(groups, egcTC, ai)] -= 0.5*imin;
+ nrdf_vcm[ggrpnr(groups, egcVCM, ai)] -= 0.5*imin;
+ }
+ ia += 4;
+ i += 4;
+ }
+ as += molt->atoms.nr;
+ }
+ }
+
+ if (ir->ePull == epullCONSTRAINT)
+ {
+ /* Correct nrdf for the COM constraints.
+ * We correct using the TC and VCM group of the first atom
+ * in the reference and pull group. If atoms in one pull group
+ * belong to different TC or VCM groups it is anyhow difficult
+ * to determine the optimal nrdf assignment.
+ */
+ pull = ir->pull;
+
+ for (i = 0; i < pull->ncoord; i++)
+ {
+ imin = 1;
+
+ for (j = 0; j < 2; j++)
+ {
+ const t_pull_group *pgrp;
+
+ pgrp = &pull->group[pull->coord[i].group[j]];
+
+ if (pgrp->nat > 0)
+ {
+ /* Subtract 1/2 dof from each group */
+ ai = pgrp->ind[0];
+ nrdf_tc [ggrpnr(groups, egcTC, ai)] -= 0.5*imin;
+ nrdf_vcm[ggrpnr(groups, egcVCM, ai)] -= 0.5*imin;
+ if (nrdf_tc[ggrpnr(groups, egcTC, ai)] < 0)
+ {
+ gmx_fatal(FARGS, "Center of mass pulling constraints caused the number of degrees of freedom for temperature coupling group %s to be negative", gnames[groups->grps[egcTC].nm_ind[ggrpnr(groups, egcTC, ai)]]);
+ }
+ }
+ else
+ {
+ /* We need to subtract the whole DOF from group j=1 */
+ imin += 1;
+ }
+ }
+ }
+ }
+
+ if (ir->nstcomm != 0)
+ {
+ /* Subtract 3 from the number of degrees of freedom in each vcm group
+ * when com translation is removed and 6 when rotation is removed
+ * as well.
+ */
+ switch (ir->comm_mode)
+ {
+ case ecmLINEAR:
+ n_sub = ndof_com(ir);
+ break;
+ case ecmANGULAR:
+ n_sub = 6;
+ break;
+ default:
+ n_sub = 0;
+ gmx_incons("Checking comm_mode");
+ }
+
+ for (i = 0; i < groups->grps[egcTC].nr; i++)
+ {
+ /* Count the number of atoms of TC group i for every VCM group */
+ for (j = 0; j < groups->grps[egcVCM].nr+1; j++)
+ {
+ na_vcm[j] = 0;
+ }
+ na_tot = 0;
+ for (ai = 0; ai < natoms; ai++)
+ {
+ if (ggrpnr(groups, egcTC, ai) == i)
+ {
+ na_vcm[ggrpnr(groups, egcVCM, ai)]++;
+ na_tot++;
+ }
+ }
+ /* Correct for VCM removal according to the fraction of each VCM
+ * group present in this TC group.
+ */
+ nrdf_uc = nrdf_tc[i];
+ if (debug)
+ {
+ fprintf(debug, "T-group[%d] nrdf_uc = %g, n_sub = %g\n",
+ i, nrdf_uc, n_sub);
+ }
+ nrdf_tc[i] = 0;
+ for (j = 0; j < groups->grps[egcVCM].nr+1; j++)
+ {
+ if (nrdf_vcm[j] > n_sub)
+ {
+ nrdf_tc[i] += nrdf_uc*((double)na_vcm[j]/(double)na_tot)*
+ (nrdf_vcm[j] - n_sub)/nrdf_vcm[j];
+ }
+ if (debug)
+ {
+ fprintf(debug, " nrdf_vcm[%d] = %g, nrdf = %g\n",
+ j, nrdf_vcm[j], nrdf_tc[i]);
+ }
+ }
+ }
+ }
+ for (i = 0; (i < groups->grps[egcTC].nr); i++)
+ {
+ opts->nrdf[i] = nrdf_tc[i];
+ if (opts->nrdf[i] < 0)
+ {
+ opts->nrdf[i] = 0;
+ }
+ fprintf(stderr,
+ "Number of degrees of freedom in T-Coupling group %s is %.2f\n",
+ gnames[groups->grps[egcTC].nm_ind[i]], opts->nrdf[i]);
+ }
+
+ sfree(nrdf2);
+ sfree(nrdf_tc);
+ sfree(nrdf_vcm);
+ sfree(na_vcm);
+}
+
+static void decode_cos(char *s, t_cosines *cosine)
+{
+ char *t;
+ char format[STRLEN], f1[STRLEN];
+ double a, phi;
+ int i;
+
+ t = strdup(s);
+ trim(t);
+
+ cosine->n = 0;
+ cosine->a = NULL;
+ cosine->phi = NULL;
+ if (strlen(t))
+ {
+ sscanf(t, "%d", &(cosine->n));
+ if (cosine->n <= 0)
+ {
+ cosine->n = 0;
+ }
+ else
+ {
+ snew(cosine->a, cosine->n);
+ snew(cosine->phi, cosine->n);
+
+ sprintf(format, "%%*d");
+ for (i = 0; (i < cosine->n); i++)
+ {
+ strcpy(f1, format);
+ strcat(f1, "%lf%lf");
+ if (sscanf(t, f1, &a, &phi) < 2)
+ {
+ gmx_fatal(FARGS, "Invalid input for electric field shift: '%s'", t);
+ }
+ cosine->a[i] = a;
+ cosine->phi[i] = phi;
+ strcat(format, "%*lf%*lf");
+ }
+ }
+ }
+ sfree(t);
+}
+
+static gmx_bool do_egp_flag(t_inputrec *ir, gmx_groups_t *groups,
+ const char *option, const char *val, int flag)
+{
+ /* The maximum number of energy group pairs would be MAXPTR*(MAXPTR+1)/2.
+ * But since this is much larger than STRLEN, such a line can not be parsed.
+ * The real maximum is the number of names that fit in a string: STRLEN/2.
+ */
+#define EGP_MAX (STRLEN/2)
+ int nelem, i, j, k, nr;
+ char *names[EGP_MAX];
+ char ***gnames;
+ gmx_bool bSet;
+
+ gnames = groups->grpname;
+
+ nelem = str_nelem(val, EGP_MAX, names);
+ if (nelem % 2 != 0)
+ {
+ gmx_fatal(FARGS, "The number of groups for %s is odd", option);
+ }
+ nr = groups->grps[egcENER].nr;
+ bSet = FALSE;
+ for (i = 0; i < nelem/2; i++)
+ {
+ j = 0;
+ while ((j < nr) &&
+ gmx_strcasecmp(names[2*i], *(gnames[groups->grps[egcENER].nm_ind[j]])))
+ {
+ j++;
+ }
+ if (j == nr)
+ {
+ gmx_fatal(FARGS, "%s in %s is not an energy group\n",
+ names[2*i], option);
+ }
+ k = 0;
+ while ((k < nr) &&
+ gmx_strcasecmp(names[2*i+1], *(gnames[groups->grps[egcENER].nm_ind[k]])))
+ {
+ k++;
+ }
+ if (k == nr)
+ {
+ gmx_fatal(FARGS, "%s in %s is not an energy group\n",
+ names[2*i+1], option);
+ }
+ if ((j < nr) && (k < nr))
+ {
+ ir->opts.egp_flags[nr*j+k] |= flag;
+ ir->opts.egp_flags[nr*k+j] |= flag;
+ bSet = TRUE;
+ }
+ }
+
+ return bSet;
+}
+
+
+static void make_swap_groups(
+ t_swapcoords *swap,
+ char *swapgname,
+ char *splitg0name,
+ char *splitg1name,
+ char *solgname,
+ t_blocka *grps,
+ char **gnames)
+{
+ int ig = -1, i = 0, j;
+ char *splitg;
+
+
+ /* Just a quick check here, more thorough checks are in mdrun */
+ if (strcmp(splitg0name, splitg1name) == 0)
+ {
+ gmx_fatal(FARGS, "The split groups can not both be '%s'.", splitg0name);
+ }
+
+ /* First get the swap group index atoms */
+ ig = search_string(swapgname, grps->nr, gnames);
+ swap->nat = grps->index[ig+1] - grps->index[ig];
+ if (swap->nat > 0)
+ {
+ fprintf(stderr, "Swap group '%s' contains %d atoms.\n", swapgname, swap->nat);
+ snew(swap->ind, swap->nat);
+ for (i = 0; i < swap->nat; i++)
+ {
+ swap->ind[i] = grps->a[grps->index[ig]+i];
+ }
+ }
+ else
+ {
+ gmx_fatal(FARGS, "You defined an empty group of atoms for swapping.");
+ }
+
+ /* Now do so for the split groups */
+ for (j = 0; j < 2; j++)
+ {
+ if (j == 0)
+ {
+ splitg = splitg0name;
+ }
+ else
+ {
+ splitg = splitg1name;
+ }
+
+ ig = search_string(splitg, grps->nr, gnames);
+ swap->nat_split[j] = grps->index[ig+1] - grps->index[ig];
+ if (swap->nat_split[j] > 0)
+ {
+ fprintf(stderr, "Split group %d '%s' contains %d atom%s.\n",
+ j, splitg, swap->nat_split[j], (swap->nat_split[j] > 1) ? "s" : "");
+ snew(swap->ind_split[j], swap->nat_split[j]);
+ for (i = 0; i < swap->nat_split[j]; i++)
+ {
+ swap->ind_split[j][i] = grps->a[grps->index[ig]+i];
+ }
+ }
+ else
+ {
+ gmx_fatal(FARGS, "Split group %d has to contain at least 1 atom!", j);
+ }
+ }
+
+ /* Now get the solvent group index atoms */
+ ig = search_string(solgname, grps->nr, gnames);
+ swap->nat_sol = grps->index[ig+1] - grps->index[ig];
+ if (swap->nat_sol > 0)
+ {
+ fprintf(stderr, "Solvent group '%s' contains %d atoms.\n", solgname, swap->nat_sol);
+ snew(swap->ind_sol, swap->nat_sol);
+ for (i = 0; i < swap->nat_sol; i++)
+ {
+ swap->ind_sol[i] = grps->a[grps->index[ig]+i];
+ }
+ }
+ else
+ {
+ gmx_fatal(FARGS, "You defined an empty group of solvent. Cannot exchange ions.");
+ }
+}
+
+
+void make_IMD_group(t_IMD *IMDgroup, char *IMDgname, t_blocka *grps, char **gnames)
+{
+ int ig = -1, i;
+
+
+ ig = search_string(IMDgname, grps->nr, gnames);
+ IMDgroup->nat = grps->index[ig+1] - grps->index[ig];
+
+ if (IMDgroup->nat > 0)
+ {
+ fprintf(stderr, "Group '%s' with %d atoms can be activated for interactive molecular dynamics (IMD).\n",
+ IMDgname, IMDgroup->nat);
+ snew(IMDgroup->ind, IMDgroup->nat);
+ for (i = 0; i < IMDgroup->nat; i++)
+ {
+ IMDgroup->ind[i] = grps->a[grps->index[ig]+i];
+ }
+ }
+}
+
+
+void do_index(const char* mdparin, const char *ndx,
+ gmx_mtop_t *mtop,
+ gmx_bool bVerbose,
+ t_inputrec *ir, rvec *v,
+ warninp_t wi)
+{
+ t_blocka *grps;
+ gmx_groups_t *groups;
+ int natoms;
+ t_symtab *symtab;
+ t_atoms atoms_all;
+ char warnbuf[STRLEN], **gnames;
+ int nr, ntcg, ntau_t, nref_t, nacc, nofg, nSA, nSA_points, nSA_time, nSA_temp;
+ real tau_min;
+ int nstcmin;
+ int nacg, nfreeze, nfrdim, nenergy, nvcm, nuser;
+ char *ptr1[MAXPTR], *ptr2[MAXPTR], *ptr3[MAXPTR];
+ int i, j, k, restnm;
+ real SAtime;
+ gmx_bool bExcl, bTable, bSetTCpar, bAnneal, bRest;
+ int nQMmethod, nQMbasis, nQMcharge, nQMmult, nbSH, nCASorb, nCASelec,
+ nSAon, nSAoff, nSAsteps, nQMg, nbOPT, nbTS;
+ char warn_buf[STRLEN];
+
+ if (bVerbose)
+ {
+ fprintf(stderr, "processing index file...\n");
+ }
+ debug_gmx();
+ if (ndx == NULL)
+ {
+ snew(grps, 1);
+ snew(grps->index, 1);
+ snew(gnames, 1);
+ atoms_all = gmx_mtop_global_atoms(mtop);
+ analyse(&atoms_all, grps, &gnames, FALSE, TRUE);
+ free_t_atoms(&atoms_all, FALSE);
+ }
+ else
+ {
+ grps = init_index(ndx, &gnames);
+ }
+
+ groups = &mtop->groups;
+ natoms = mtop->natoms;
+ symtab = &mtop->symtab;
+
+ snew(groups->grpname, grps->nr+1);
+
+ for (i = 0; (i < grps->nr); i++)
+ {
+ groups->grpname[i] = put_symtab(symtab, gnames[i]);
+ }
+ groups->grpname[i] = put_symtab(symtab, "rest");
+ restnm = i;
+ srenew(gnames, grps->nr+1);
+ gnames[restnm] = *(groups->grpname[i]);
+ groups->ngrpname = grps->nr+1;
+
+ set_warning_line(wi, mdparin, -1);
+
+ ntau_t = str_nelem(is->tau_t, MAXPTR, ptr1);
+ nref_t = str_nelem(is->ref_t, MAXPTR, ptr2);
+ ntcg = str_nelem(is->tcgrps, MAXPTR, ptr3);
+ if ((ntau_t != ntcg) || (nref_t != ntcg))
+ {
+ gmx_fatal(FARGS, "Invalid T coupling input: %d groups, %d ref-t values and "
+ "%d tau-t values", ntcg, nref_t, ntau_t);
+ }
+
+ bSetTCpar = (ir->etc || EI_SD(ir->eI) || ir->eI == eiBD || EI_TPI(ir->eI));
+ do_numbering(natoms, groups, ntcg, ptr3, grps, gnames, egcTC,
+ restnm, bSetTCpar ? egrptpALL : egrptpALL_GENREST, bVerbose, wi);
+ nr = groups->grps[egcTC].nr;
+ ir->opts.ngtc = nr;
+ snew(ir->opts.nrdf, nr);
+ snew(ir->opts.tau_t, nr);
+ snew(ir->opts.ref_t, nr);
+ if (ir->eI == eiBD && ir->bd_fric == 0)
+ {
+ fprintf(stderr, "bd-fric=0, so tau-t will be used as the inverse friction constant(s)\n");
+ }
+
+ if (bSetTCpar)
+ {
+ if (nr != nref_t)
+ {
+ gmx_fatal(FARGS, "Not enough ref-t and tau-t values!");
+ }
+
+ tau_min = 1e20;
+ for (i = 0; (i < nr); i++)
+ {
+ ir->opts.tau_t[i] = strtod(ptr1[i], NULL);
+ if ((ir->eI == eiBD || ir->eI == eiSD2) && ir->opts.tau_t[i] <= 0)
+ {
+ sprintf(warn_buf, "With integrator %s tau-t should be larger than 0", ei_names[ir->eI]);
+ warning_error(wi, warn_buf);
+ }
+
+ if (ir->etc != etcVRESCALE && ir->opts.tau_t[i] == 0)
+ {
+ warning_note(wi, "tau-t = -1 is the value to signal that a group should not have temperature coupling. Treating your use of tau-t = 0 as if you used -1.");
+ }
+
+ if (ir->opts.tau_t[i] >= 0)
+ {
+ tau_min = min(tau_min, ir->opts.tau_t[i]);
+ }
+ }
+ if (ir->etc != etcNO && ir->nsttcouple == -1)
+ {
+ ir->nsttcouple = ir_optimal_nsttcouple(ir);
+ }
+
+ if (EI_VV(ir->eI))
+ {
+ if ((ir->etc == etcNOSEHOOVER) && (ir->epc == epcBERENDSEN))
+ {
+ gmx_fatal(FARGS, "Cannot do Nose-Hoover temperature with Berendsen pressure control with md-vv; use either vrescale temperature with berendsen pressure or Nose-Hoover temperature with MTTK pressure");
+ }
+ if ((ir->epc == epcMTTK) && (ir->etc > etcNO))
+ {
+ if (ir->nstpcouple != ir->nsttcouple)
+ {
+ int mincouple = min(ir->nstpcouple, ir->nsttcouple);
+ ir->nstpcouple = ir->nsttcouple = mincouple;
+ sprintf(warn_buf, "for current Trotter decomposition methods with vv, nsttcouple and nstpcouple must be equal. Both have been reset to min(nsttcouple,nstpcouple) = %d", mincouple);
+ warning_note(wi, warn_buf);
+ }
+ }
+ }
+ /* velocity verlet with averaged kinetic energy KE = 0.5*(v(t+1/2) - v(t-1/2)) is implemented
+ primarily for testing purposes, and does not work with temperature coupling other than 1 */
+
+ if (ETC_ANDERSEN(ir->etc))
+ {
+ if (ir->nsttcouple != 1)
+ {
+ ir->nsttcouple = 1;
+ sprintf(warn_buf, "Andersen temperature control methods assume nsttcouple = 1; there is no need for larger nsttcouple > 1, since no global parameters are computed. nsttcouple has been reset to 1");
+ warning_note(wi, warn_buf);
+ }
+ }
+ nstcmin = tcouple_min_integration_steps(ir->etc);
+ if (nstcmin > 1)
+ {
+ if (tau_min/(ir->delta_t*ir->nsttcouple) < nstcmin)
+ {
+ sprintf(warn_buf, "For proper integration of the %s thermostat, tau-t (%g) should be at least %d times larger than nsttcouple*dt (%g)",
+ ETCOUPLTYPE(ir->etc),
+ tau_min, nstcmin,
+ ir->nsttcouple*ir->delta_t);
+ warning(wi, warn_buf);
+ }
+ }
+ for (i = 0; (i < nr); i++)
+ {
+ ir->opts.ref_t[i] = strtod(ptr2[i], NULL);
+ if (ir->opts.ref_t[i] < 0)
+ {
+ gmx_fatal(FARGS, "ref-t for group %d negative", i);
+ }
+ }
+ /* set the lambda mc temperature to the md integrator temperature (which should be defined
+ if we are in this conditional) if mc_temp is negative */
+ if (ir->expandedvals->mc_temp < 0)
+ {
+ ir->expandedvals->mc_temp = ir->opts.ref_t[0]; /*for now, set to the first reft */
+ }
+ }
+
+ /* Simulated annealing for each group. There are nr groups */
+ nSA = str_nelem(is->anneal, MAXPTR, ptr1);
+ if (nSA == 1 && (ptr1[0][0] == 'n' || ptr1[0][0] == 'N'))
+ {
+ nSA = 0;
+ }
+ if (nSA > 0 && nSA != nr)
+ {
+ gmx_fatal(FARGS, "Not enough annealing values: %d (for %d groups)\n", nSA, nr);
+ }
+ else
+ {
+ snew(ir->opts.annealing, nr);
+ snew(ir->opts.anneal_npoints, nr);
+ snew(ir->opts.anneal_time, nr);
+ snew(ir->opts.anneal_temp, nr);
+ for (i = 0; i < nr; i++)
+ {
+ ir->opts.annealing[i] = eannNO;
+ ir->opts.anneal_npoints[i] = 0;
+ ir->opts.anneal_time[i] = NULL;
+ ir->opts.anneal_temp[i] = NULL;
+ }
+ if (nSA > 0)
+ {
+ bAnneal = FALSE;
+ for (i = 0; i < nr; i++)
+ {
+ if (ptr1[i][0] == 'n' || ptr1[i][0] == 'N')
+ {
+ ir->opts.annealing[i] = eannNO;
+ }
+ else if (ptr1[i][0] == 's' || ptr1[i][0] == 'S')
+ {
+ ir->opts.annealing[i] = eannSINGLE;
+ bAnneal = TRUE;
+ }
+ else if (ptr1[i][0] == 'p' || ptr1[i][0] == 'P')
+ {
+ ir->opts.annealing[i] = eannPERIODIC;
+ bAnneal = TRUE;
+ }
+ }
+ if (bAnneal)
+ {
+ /* Read the other fields too */
+ nSA_points = str_nelem(is->anneal_npoints, MAXPTR, ptr1);
+ if (nSA_points != nSA)
+ {
+ gmx_fatal(FARGS, "Found %d annealing-npoints values for %d groups\n", nSA_points, nSA);
+ }
+ for (k = 0, i = 0; i < nr; i++)
+ {
+ ir->opts.anneal_npoints[i] = strtol(ptr1[i], NULL, 10);
+ if (ir->opts.anneal_npoints[i] == 1)
+ {
+ gmx_fatal(FARGS, "Please specify at least a start and an end point for annealing\n");
+ }
+ snew(ir->opts.anneal_time[i], ir->opts.anneal_npoints[i]);
+ snew(ir->opts.anneal_temp[i], ir->opts.anneal_npoints[i]);
+ k += ir->opts.anneal_npoints[i];
+ }
+
+ nSA_time = str_nelem(is->anneal_time, MAXPTR, ptr1);
+ if (nSA_time != k)
+ {
+ gmx_fatal(FARGS, "Found %d annealing-time values, wanter %d\n", nSA_time, k);
+ }
+ nSA_temp = str_nelem(is->anneal_temp, MAXPTR, ptr2);
+ if (nSA_temp != k)
+ {
+ gmx_fatal(FARGS, "Found %d annealing-temp values, wanted %d\n", nSA_temp, k);
+ }
+
+ for (i = 0, k = 0; i < nr; i++)
+ {
+
+ for (j = 0; j < ir->opts.anneal_npoints[i]; j++)
+ {
+ ir->opts.anneal_time[i][j] = strtod(ptr1[k], NULL);
+ ir->opts.anneal_temp[i][j] = strtod(ptr2[k], NULL);
+ if (j == 0)
+ {
+ if (ir->opts.anneal_time[i][0] > (ir->init_t+GMX_REAL_EPS))
+ {
+ gmx_fatal(FARGS, "First time point for annealing > init_t.\n");
+ }
+ }
+ else
+ {
+ /* j>0 */
+ if (ir->opts.anneal_time[i][j] < ir->opts.anneal_time[i][j-1])
+ {
+ gmx_fatal(FARGS, "Annealing timepoints out of order: t=%f comes after t=%f\n",
+ ir->opts.anneal_time[i][j], ir->opts.anneal_time[i][j-1]);
+ }
+ }
+ if (ir->opts.anneal_temp[i][j] < 0)
+ {
+ gmx_fatal(FARGS, "Found negative temperature in annealing: %f\n", ir->opts.anneal_temp[i][j]);
+ }
+ k++;
+ }
+ }
+ /* Print out some summary information, to make sure we got it right */
+ for (i = 0, k = 0; i < nr; i++)
+ {
+ if (ir->opts.annealing[i] != eannNO)
+ {
+ j = groups->grps[egcTC].nm_ind[i];
+ fprintf(stderr, "Simulated annealing for group %s: %s, %d timepoints\n",
+ *(groups->grpname[j]), eann_names[ir->opts.annealing[i]],
+ ir->opts.anneal_npoints[i]);
+ fprintf(stderr, "Time (ps) Temperature (K)\n");
+ /* All terms except the last one */
+ for (j = 0; j < (ir->opts.anneal_npoints[i]-1); j++)
+ {
+ fprintf(stderr, "%9.1f %5.1f\n", ir->opts.anneal_time[i][j], ir->opts.anneal_temp[i][j]);
+ }
+
+ /* Finally the last one */
+ j = ir->opts.anneal_npoints[i]-1;
+ if (ir->opts.annealing[i] == eannSINGLE)
+ {
+ fprintf(stderr, "%9.1f- %5.1f\n", ir->opts.anneal_time[i][j], ir->opts.anneal_temp[i][j]);
+ }
+ else
+ {
+ fprintf(stderr, "%9.1f %5.1f\n", ir->opts.anneal_time[i][j], ir->opts.anneal_temp[i][j]);
+ if (fabs(ir->opts.anneal_temp[i][j]-ir->opts.anneal_temp[i][0]) > GMX_REAL_EPS)
+ {
+ warning_note(wi, "There is a temperature jump when your annealing loops back.\n");
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if (ir->ePull != epullNO)
+ {
+ make_pull_groups(ir->pull, is->pull_grp, grps, gnames);
+
+ make_pull_coords(ir->pull);
+ }
+
+ if (ir->bRot)
+ {
+ make_rotation_groups(ir->rot, is->rot_grp, grps, gnames);
+ }
+
+ if (ir->eSwapCoords != eswapNO)
+ {
+ make_swap_groups(ir->swap, swapgrp, splitgrp0, splitgrp1, solgrp, grps, gnames);
+ }
+
+ /* Make indices for IMD session */
+ if (ir->bIMD)
+ {
+ make_IMD_group(ir->imd, is->imd_grp, grps, gnames);
+ }
+
+ nacc = str_nelem(is->acc, MAXPTR, ptr1);
+ nacg = str_nelem(is->accgrps, MAXPTR, ptr2);
+ if (nacg*DIM != nacc)
+ {
+ gmx_fatal(FARGS, "Invalid Acceleration input: %d groups and %d acc. values",
+ nacg, nacc);
+ }
+ do_numbering(natoms, groups, nacg, ptr2, grps, gnames, egcACC,
+ restnm, egrptpALL_GENREST, bVerbose, wi);
+ nr = groups->grps[egcACC].nr;
+ snew(ir->opts.acc, nr);
+ ir->opts.ngacc = nr;
+
+ for (i = k = 0; (i < nacg); i++)
+ {
+ for (j = 0; (j < DIM); j++, k++)
+ {
+ ir->opts.acc[i][j] = strtod(ptr1[k], NULL);
+ }
+ }
+ for (; (i < nr); i++)
+ {
+ for (j = 0; (j < DIM); j++)
+ {
+ ir->opts.acc[i][j] = 0;
+ }
+ }
+
+ nfrdim = str_nelem(is->frdim, MAXPTR, ptr1);
+ nfreeze = str_nelem(is->freeze, MAXPTR, ptr2);
+ if (nfrdim != DIM*nfreeze)
+ {
+ gmx_fatal(FARGS, "Invalid Freezing input: %d groups and %d freeze values",
+ nfreeze, nfrdim);
+ }
+ do_numbering(natoms, groups, nfreeze, ptr2, grps, gnames, egcFREEZE,
+ restnm, egrptpALL_GENREST, bVerbose, wi);
+ nr = groups->grps[egcFREEZE].nr;
+ ir->opts.ngfrz = nr;
+ snew(ir->opts.nFreeze, nr);
+ for (i = k = 0; (i < nfreeze); i++)
+ {
+ for (j = 0; (j < DIM); j++, k++)
+ {
+ ir->opts.nFreeze[i][j] = (gmx_strncasecmp(ptr1[k], "Y", 1) == 0);
+ if (!ir->opts.nFreeze[i][j])
+ {
+ if (gmx_strncasecmp(ptr1[k], "N", 1) != 0)
+ {
+ sprintf(warnbuf, "Please use Y(ES) or N(O) for freezedim only "
+ "(not %s)", ptr1[k]);
+ warning(wi, warn_buf);
+ }
+ }
+ }
+ }
+ for (; (i < nr); i++)
+ {
+ for (j = 0; (j < DIM); j++)
+ {
+ ir->opts.nFreeze[i][j] = 0;
+ }
+ }
+
+ nenergy = str_nelem(is->energy, MAXPTR, ptr1);
+ do_numbering(natoms, groups, nenergy, ptr1, grps, gnames, egcENER,
+ restnm, egrptpALL_GENREST, bVerbose, wi);
+ add_wall_energrps(groups, ir->nwall, symtab);
+ ir->opts.ngener = groups->grps[egcENER].nr;
+ nvcm = str_nelem(is->vcm, MAXPTR, ptr1);
+ bRest =
+ do_numbering(natoms, groups, nvcm, ptr1, grps, gnames, egcVCM,
+ restnm, nvcm == 0 ? egrptpALL_GENREST : egrptpPART, bVerbose, wi);
+ if (bRest)
+ {
+ warning(wi, "Some atoms are not part of any center of mass motion removal group.\n"
+ "This may lead to artifacts.\n"
+ "In most cases one should use one group for the whole system.");
+ }
+
+ /* Now we have filled the freeze struct, so we can calculate NRDF */
+ calc_nrdf(mtop, ir, gnames);
+
+ if (v && NULL)
+ {
+ real fac, ntot = 0;
+
+ /* Must check per group! */
+ for (i = 0; (i < ir->opts.ngtc); i++)
+ {
+ ntot += ir->opts.nrdf[i];
+ }
+ if (ntot != (DIM*natoms))
+ {
+ fac = sqrt(ntot/(DIM*natoms));
+ if (bVerbose)
+ {
+ fprintf(stderr, "Scaling velocities by a factor of %.3f to account for constraints\n"
+ "and removal of center of mass motion\n", fac);
+ }
+ for (i = 0; (i < natoms); i++)
+ {
+ svmul(fac, v[i], v[i]);
+ }
+ }
+ }
+
+ nuser = str_nelem(is->user1, MAXPTR, ptr1);
+ do_numbering(natoms, groups, nuser, ptr1, grps, gnames, egcUser1,
+ restnm, egrptpALL_GENREST, bVerbose, wi);
+ nuser = str_nelem(is->user2, MAXPTR, ptr1);
+ do_numbering(natoms, groups, nuser, ptr1, grps, gnames, egcUser2,
+ restnm, egrptpALL_GENREST, bVerbose, wi);
+ nuser = str_nelem(is->x_compressed_groups, MAXPTR, ptr1);
+ do_numbering(natoms, groups, nuser, ptr1, grps, gnames, egcCompressedX,
+ restnm, egrptpONE, bVerbose, wi);
+ nofg = str_nelem(is->orirefitgrp, MAXPTR, ptr1);
+ do_numbering(natoms, groups, nofg, ptr1, grps, gnames, egcORFIT,
+ restnm, egrptpALL_GENREST, bVerbose, wi);
+
+ /* QMMM input processing */
+ nQMg = str_nelem(is->QMMM, MAXPTR, ptr1);
+ nQMmethod = str_nelem(is->QMmethod, MAXPTR, ptr2);
+ nQMbasis = str_nelem(is->QMbasis, MAXPTR, ptr3);
+ if ((nQMmethod != nQMg) || (nQMbasis != nQMg))
+ {
+ gmx_fatal(FARGS, "Invalid QMMM input: %d groups %d basissets"
+ " and %d methods\n", nQMg, nQMbasis, nQMmethod);
+ }
+ /* group rest, if any, is always MM! */
+ do_numbering(natoms, groups, nQMg, ptr1, grps, gnames, egcQMMM,
+ restnm, egrptpALL_GENREST, bVerbose, wi);
+ nr = nQMg; /*atoms->grps[egcQMMM].nr;*/
+ ir->opts.ngQM = nQMg;
+ snew(ir->opts.QMmethod, nr);
+ snew(ir->opts.QMbasis, nr);
+ for (i = 0; i < nr; i++)
+ {
+ /* input consists of strings: RHF CASSCF PM3 .. These need to be
+ * converted to the corresponding enum in names.c
+ */
+ ir->opts.QMmethod[i] = search_QMstring(ptr2[i], eQMmethodNR,
+ eQMmethod_names);
+ ir->opts.QMbasis[i] = search_QMstring(ptr3[i], eQMbasisNR,
+ eQMbasis_names);
+
+ }
+ nQMmult = str_nelem(is->QMmult, MAXPTR, ptr1);
+ nQMcharge = str_nelem(is->QMcharge, MAXPTR, ptr2);
+ nbSH = str_nelem(is->bSH, MAXPTR, ptr3);
+ snew(ir->opts.QMmult, nr);
+ snew(ir->opts.QMcharge, nr);
+ snew(ir->opts.bSH, nr);
+
+ for (i = 0; i < nr; i++)
+ {
+ ir->opts.QMmult[i] = strtol(ptr1[i], NULL, 10);
+ ir->opts.QMcharge[i] = strtol(ptr2[i], NULL, 10);
+ ir->opts.bSH[i] = (gmx_strncasecmp(ptr3[i], "Y", 1) == 0);
+ }
+
+ nCASelec = str_nelem(is->CASelectrons, MAXPTR, ptr1);
+ nCASorb = str_nelem(is->CASorbitals, MAXPTR, ptr2);
+ snew(ir->opts.CASelectrons, nr);
+ snew(ir->opts.CASorbitals, nr);
+ for (i = 0; i < nr; i++)
+ {
+ ir->opts.CASelectrons[i] = strtol(ptr1[i], NULL, 10);
+ ir->opts.CASorbitals[i] = strtol(ptr2[i], NULL, 10);
+ }
+ /* special optimization options */
+
+ nbOPT = str_nelem(is->bOPT, MAXPTR, ptr1);
+ nbTS = str_nelem(is->bTS, MAXPTR, ptr2);
+ snew(ir->opts.bOPT, nr);
+ snew(ir->opts.bTS, nr);
+ for (i = 0; i < nr; i++)
+ {
+ ir->opts.bOPT[i] = (gmx_strncasecmp(ptr1[i], "Y", 1) == 0);
+ ir->opts.bTS[i] = (gmx_strncasecmp(ptr2[i], "Y", 1) == 0);
+ }
+ nSAon = str_nelem(is->SAon, MAXPTR, ptr1);
+ nSAoff = str_nelem(is->SAoff, MAXPTR, ptr2);
+ nSAsteps = str_nelem(is->SAsteps, MAXPTR, ptr3);
+ snew(ir->opts.SAon, nr);
+ snew(ir->opts.SAoff, nr);
+ snew(ir->opts.SAsteps, nr);
+
+ for (i = 0; i < nr; i++)
+ {
+ ir->opts.SAon[i] = strtod(ptr1[i], NULL);
+ ir->opts.SAoff[i] = strtod(ptr2[i], NULL);
+ ir->opts.SAsteps[i] = strtol(ptr3[i], NULL, 10);
+ }
+ /* end of QMMM input */
+
+ if (bVerbose)
+ {
+ for (i = 0; (i < egcNR); i++)
+ {
+ fprintf(stderr, "%-16s has %d element(s):", gtypes[i], groups->grps[i].nr);
+ for (j = 0; (j < groups->grps[i].nr); j++)
+ {
+ fprintf(stderr, " %s", *(groups->grpname[groups->grps[i].nm_ind[j]]));
+ }
+ fprintf(stderr, "\n");
+ }
+ }
+
+ nr = groups->grps[egcENER].nr;
+ snew(ir->opts.egp_flags, nr*nr);
+
+ bExcl = do_egp_flag(ir, groups, "energygrp-excl", is->egpexcl, EGP_EXCL);
+ if (bExcl && ir->cutoff_scheme == ecutsVERLET)
+ {
+ warning_error(wi, "Energy group exclusions are not (yet) implemented for the Verlet scheme");
+ }
+ if (bExcl && EEL_FULL(ir->coulombtype))
+ {
+ warning(wi, "Can not exclude the lattice Coulomb energy between energy groups");
+ }
+
+ bTable = do_egp_flag(ir, groups, "energygrp-table", is->egptable, EGP_TABLE);
+ if (bTable && !(ir->vdwtype == evdwUSER) &&
+ !(ir->coulombtype == eelUSER) && !(ir->coulombtype == eelPMEUSER) &&
+ !(ir->coulombtype == eelPMEUSERSWITCH))
+ {
+ gmx_fatal(FARGS, "Can only have energy group pair tables in combination with user tables for VdW and/or Coulomb");
+ }
+
+ decode_cos(is->efield_x, &(ir->ex[XX]));
+ decode_cos(is->efield_xt, &(ir->et[XX]));
+ decode_cos(is->efield_y, &(ir->ex[YY]));
+ decode_cos(is->efield_yt, &(ir->et[YY]));
+ decode_cos(is->efield_z, &(ir->ex[ZZ]));
+ decode_cos(is->efield_zt, &(ir->et[ZZ]));
+
+ if (ir->bAdress)
+ {
+ do_adress_index(ir->adress, groups, gnames, &(ir->opts), wi);
+ }
+
+ for (i = 0; (i < grps->nr); i++)
+ {
+ sfree(gnames[i]);
+ }
+ sfree(gnames);
+ done_blocka(grps);
+ sfree(grps);
+
+}
+
+
+
+static void check_disre(gmx_mtop_t *mtop)
+{
+ gmx_ffparams_t *ffparams;
+ t_functype *functype;
+ t_iparams *ip;
+ int i, ndouble, ftype;
+ int label, old_label;
+
+ if (gmx_mtop_ftype_count(mtop, F_DISRES) > 0)
+ {
+ ffparams = &mtop->ffparams;
+ functype = ffparams->functype;
+ ip = ffparams->iparams;
+ ndouble = 0;
+ old_label = -1;
+ for (i = 0; i < ffparams->ntypes; i++)
+ {
+ ftype = functype[i];
+ if (ftype == F_DISRES)
+ {
+ label = ip[i].disres.label;
+ if (label == old_label)
+ {
+ fprintf(stderr, "Distance restraint index %d occurs twice\n", label);
+ ndouble++;
+ }
+ old_label = label;
+ }
+ }
+ if (ndouble > 0)
+ {
+ gmx_fatal(FARGS, "Found %d double distance restraint indices,\n"
+ "probably the parameters for multiple pairs in one restraint "
+ "are not identical\n", ndouble);
+ }
+ }
+}
+
+static gmx_bool absolute_reference(t_inputrec *ir, gmx_mtop_t *sys,
+ gmx_bool posres_only,
+ ivec AbsRef)
+{
+ int d, g, i;
+ gmx_mtop_ilistloop_t iloop;
+ t_ilist *ilist;
+ int nmol;
+ t_iparams *pr;
+
+ clear_ivec(AbsRef);
+
+ if (!posres_only)
+ {
+ /* Check the COM */
+ for (d = 0; d < DIM; d++)
+ {
+ AbsRef[d] = (d < ndof_com(ir) ? 0 : 1);
+ }
+ /* Check for freeze groups */
+ for (g = 0; g < ir->opts.ngfrz; g++)
+ {
+ for (d = 0; d < DIM; d++)
+ {
+ if (ir->opts.nFreeze[g][d] != 0)
+ {
+ AbsRef[d] = 1;
+ }
+ }
+ }
+ }
+
+ /* Check for position restraints */
+ iloop = gmx_mtop_ilistloop_init(sys);
+ while (gmx_mtop_ilistloop_next(iloop, &ilist, &nmol))
+ {
+ if (nmol > 0 &&
+ (AbsRef[XX] == 0 || AbsRef[YY] == 0 || AbsRef[ZZ] == 0))
+ {
+ for (i = 0; i < ilist[F_POSRES].nr; i += 2)
+ {
+ pr = &sys->ffparams.iparams[ilist[F_POSRES].iatoms[i]];
+ for (d = 0; d < DIM; d++)
+ {
+ if (pr->posres.fcA[d] != 0)
+ {
+ AbsRef[d] = 1;
+ }
+ }
+ }
+ for (i = 0; i < ilist[F_FBPOSRES].nr; i += 2)
+ {
+ /* Check for flat-bottom posres */
+ pr = &sys->ffparams.iparams[ilist[F_FBPOSRES].iatoms[i]];
+ if (pr->fbposres.k != 0)
+ {
+ switch (pr->fbposres.geom)
+ {
+ case efbposresSPHERE:
+ AbsRef[XX] = AbsRef[YY] = AbsRef[ZZ] = 1;
+ break;
+ case efbposresCYLINDER:
+ AbsRef[XX] = AbsRef[YY] = 1;
+ break;
+ case efbposresX: /* d=XX */
+ case efbposresY: /* d=YY */
+ case efbposresZ: /* d=ZZ */
+ d = pr->fbposres.geom - efbposresX;
+ AbsRef[d] = 1;
+ break;
+ default:
+ gmx_fatal(FARGS, " Invalid geometry for flat-bottom position restraint.\n"
+ "Expected nr between 1 and %d. Found %d\n", efbposresNR-1,
+ pr->fbposres.geom);
+ }
+ }
+ }
+ }
+ }
+
+ return (AbsRef[XX] != 0 && AbsRef[YY] != 0 && AbsRef[ZZ] != 0);
+}
+
+static void
+check_combination_rule_differences(const gmx_mtop_t *mtop, int state,
+ gmx_bool *bC6ParametersWorkWithGeometricRules,
+ gmx_bool *bC6ParametersWorkWithLBRules,
+ gmx_bool *bLBRulesPossible)
+{
+ int ntypes, tpi, tpj, thisLBdiff, thisgeomdiff;
+ int *typecount;
+ real tol;
+ double geometricdiff, LBdiff;
+ double c6i, c6j, c12i, c12j;
+ double c6, c6_geometric, c6_LB;
+ double sigmai, sigmaj, epsi, epsj;
+ gmx_bool bCanDoLBRules, bCanDoGeometricRules;
+ const char *ptr;
+
+ /* A tolerance of 1e-5 seems reasonable for (possibly hand-typed)
+ * force-field floating point parameters.
+ */
+ tol = 1e-5;
+ ptr = getenv("GMX_LJCOMB_TOL");
+ if (ptr != NULL)
+ {
+ double dbl;
+
+ sscanf(ptr, "%lf", &dbl);
+ tol = dbl;
+ }
+
+ *bC6ParametersWorkWithLBRules = TRUE;
+ *bC6ParametersWorkWithGeometricRules = TRUE;
+ bCanDoLBRules = TRUE;
+ bCanDoGeometricRules = TRUE;
+ ntypes = mtop->ffparams.atnr;
+ snew(typecount, ntypes);
+ gmx_mtop_count_atomtypes(mtop, state, typecount);
+ geometricdiff = LBdiff = 0.0;
+ *bLBRulesPossible = TRUE;
+ for (tpi = 0; tpi < ntypes; ++tpi)
+ {
+ c6i = mtop->ffparams.iparams[(ntypes + 1) * tpi].lj.c6;
+ c12i = mtop->ffparams.iparams[(ntypes + 1) * tpi].lj.c12;
+ for (tpj = tpi; tpj < ntypes; ++tpj)
+ {
+ c6j = mtop->ffparams.iparams[(ntypes + 1) * tpj].lj.c6;
+ c12j = mtop->ffparams.iparams[(ntypes + 1) * tpj].lj.c12;
+ c6 = mtop->ffparams.iparams[ntypes * tpi + tpj].lj.c6;
+ c6_geometric = sqrt(c6i * c6j);
+ if (!gmx_numzero(c6_geometric))
+ {
+ if (!gmx_numzero(c12i) && !gmx_numzero(c12j))
+ {
+ sigmai = pow(c12i / c6i, 1.0/6.0);
+ sigmaj = pow(c12j / c6j, 1.0/6.0);
+ epsi = c6i * c6i /(4.0 * c12i);
+ epsj = c6j * c6j /(4.0 * c12j);
+ c6_LB = 4.0 * pow(epsi * epsj, 1.0/2.0) * pow(0.5 * (sigmai + sigmaj), 6);
+ }
+ else
+ {
+ *bLBRulesPossible = FALSE;
+ c6_LB = c6_geometric;
+ }
+ bCanDoLBRules = gmx_within_tol(c6_LB, c6, tol);
+ }
+
+ if (FALSE == bCanDoLBRules)
+ {
+ *bC6ParametersWorkWithLBRules = FALSE;
+ }
+
+ bCanDoGeometricRules = gmx_within_tol(c6_geometric, c6, tol);
+
+ if (FALSE == bCanDoGeometricRules)
+ {
+ *bC6ParametersWorkWithGeometricRules = FALSE;
+ }
+ }
+ }
+ sfree(typecount);
+}
+
+static void
+check_combination_rules(const t_inputrec *ir, const gmx_mtop_t *mtop,
+ warninp_t wi)
+{
+ char err_buf[256];
+ gmx_bool bLBRulesPossible, bC6ParametersWorkWithGeometricRules, bC6ParametersWorkWithLBRules;
+
+ check_combination_rule_differences(mtop, 0,
+ &bC6ParametersWorkWithGeometricRules,
+ &bC6ParametersWorkWithLBRules,
+ &bLBRulesPossible);
+ if (ir->ljpme_combination_rule == eljpmeLB)
+ {
+ if (FALSE == bC6ParametersWorkWithLBRules || FALSE == bLBRulesPossible)
+ {
+ warning(wi, "You are using arithmetic-geometric combination rules "
+ "in LJ-PME, but your non-bonded C6 parameters do not "
+ "follow these rules.");
+ }
+ }
+ else
+ {
+ if (FALSE == bC6ParametersWorkWithGeometricRules)
+ {
+ if (ir->eDispCorr != edispcNO)
+ {
+ warning_note(wi, "You are using geometric combination rules in "
+ "LJ-PME, but your non-bonded C6 parameters do "
+ "not follow these rules. "
+ "This will introduce very small errors in the forces and energies in "
+ "your simulations. Dispersion correction will correct total energy "
+ "and/or pressure for isotropic systems, but not forces or surface tensions.");
+ }
+ else
+ {
+ warning_note(wi, "You are using geometric combination rules in "
+ "LJ-PME, but your non-bonded C6 parameters do "
+ "not follow these rules. "
+ "This will introduce very small errors in the forces and energies in "
+ "your simulations. If your system is homogeneous, consider using dispersion correction "
+ "for the total energy and pressure.");
+ }
+ }
+ }
+}
+
+void triple_check(const char *mdparin, t_inputrec *ir, gmx_mtop_t *sys,
+ warninp_t wi)
+{
+ char err_buf[STRLEN];
+ int i, m, c, nmol, npct;
+ gmx_bool bCharge, bAcc;
+ real gdt_max, *mgrp, mt;
+ rvec acc;
+ gmx_mtop_atomloop_block_t aloopb;
+ gmx_mtop_atomloop_all_t aloop;
+ t_atom *atom;
+ ivec AbsRef;
+ char warn_buf[STRLEN];
+
+ set_warning_line(wi, mdparin, -1);
+
+ if (ir->cutoff_scheme == ecutsVERLET &&
+ ir->verletbuf_tol > 0 &&
+ ir->nstlist > 1 &&
+ ((EI_MD(ir->eI) || EI_SD(ir->eI)) &&
+ (ir->etc == etcVRESCALE || ir->etc == etcBERENDSEN)))
+ {
+ /* Check if a too small Verlet buffer might potentially
+ * cause more drift than the thermostat can couple off.
+ */
+ /* Temperature error fraction for warning and suggestion */
+ const real T_error_warn = 0.002;
+ const real T_error_suggest = 0.001;
+ /* For safety: 2 DOF per atom (typical with constraints) */
+ const real nrdf_at = 2;
+ real T, tau, max_T_error;
+ int i;
+
+ T = 0;
+ tau = 0;
+ for (i = 0; i < ir->opts.ngtc; i++)
+ {
+ T = max(T, ir->opts.ref_t[i]);
+ tau = max(tau, ir->opts.tau_t[i]);
+ }
+ if (T > 0)
+ {
+ /* This is a worst case estimate of the temperature error,
+ * assuming perfect buffer estimation and no cancelation
+ * of errors. The factor 0.5 is because energy distributes
+ * equally over Ekin and Epot.
+ */
+ max_T_error = 0.5*tau*ir->verletbuf_tol/(nrdf_at*BOLTZ*T);
+ if (max_T_error > T_error_warn)
+ {
+ sprintf(warn_buf, "With a verlet-buffer-tolerance of %g kJ/mol/ps, a reference temperature of %g and a tau_t of %g, your temperature might be off by up to %.1f%%. To ensure the error is below %.1f%%, decrease verlet-buffer-tolerance to %.0e or decrease tau_t.",
+ ir->verletbuf_tol, T, tau,
+ 100*max_T_error,
+ 100*T_error_suggest,
+ ir->verletbuf_tol*T_error_suggest/max_T_error);
+ warning(wi, warn_buf);
+ }
+ }
+ }
+
+ if (ETC_ANDERSEN(ir->etc))
+ {
+ int i;
+
+ for (i = 0; i < ir->opts.ngtc; i++)
+ {
+ sprintf(err_buf, "all tau_t must currently be equal using Andersen temperature control, violated for group %d", i);
+ CHECK(ir->opts.tau_t[0] != ir->opts.tau_t[i]);
+ sprintf(err_buf, "all tau_t must be postive using Andersen temperature control, tau_t[%d]=%10.6f",
+ i, ir->opts.tau_t[i]);
+ CHECK(ir->opts.tau_t[i] < 0);
+ }
+
+ for (i = 0; i < ir->opts.ngtc; i++)
+ {
+ int nsteps = (int)(ir->opts.tau_t[i]/ir->delta_t);
+ sprintf(err_buf, "tau_t/delta_t for group %d for temperature control method %s must be a multiple of nstcomm (%d), as velocities of atoms in coupled groups are randomized every time step. The input tau_t (%8.3f) leads to %d steps per randomization", i, etcoupl_names[ir->etc], ir->nstcomm, ir->opts.tau_t[i], nsteps);
+ CHECK((nsteps % ir->nstcomm) && (ir->etc == etcANDERSENMASSIVE));
+ }
+ }
+
+ if (EI_DYNAMICS(ir->eI) && !EI_SD(ir->eI) && ir->eI != eiBD &&
+ ir->comm_mode == ecmNO &&
+ !(absolute_reference(ir, sys, FALSE, AbsRef) || ir->nsteps <= 10) &&
+ !ETC_ANDERSEN(ir->etc))
+ {
+ warning(wi, "You are not using center of mass motion removal (mdp option comm-mode), numerical rounding errors can lead to build up of kinetic energy of the center of mass");
+ }
+
+ /* Check for pressure coupling with absolute position restraints */
+ if (ir->epc != epcNO && ir->refcoord_scaling == erscNO)
+ {
+ absolute_reference(ir, sys, TRUE, AbsRef);
+ {
+ for (m = 0; m < DIM; m++)
+ {
+ if (AbsRef[m] && norm2(ir->compress[m]) > 0)
+ {
+ warning(wi, "You are using pressure coupling with absolute position restraints, this will give artifacts. Use the refcoord_scaling option.");
+ break;
+ }
+ }
+ }
+ }
+
+ bCharge = FALSE;
+ aloopb = gmx_mtop_atomloop_block_init(sys);
+ while (gmx_mtop_atomloop_block_next(aloopb, &atom, &nmol))
+ {
+ if (atom->q != 0 || atom->qB != 0)
+ {
+ bCharge = TRUE;
+ }
+ }
+
+ if (!bCharge)
+ {
+ if (EEL_FULL(ir->coulombtype))
+ {
+ sprintf(err_buf,
+ "You are using full electrostatics treatment %s for a system without charges.\n"
+ "This costs a lot of performance for just processing zeros, consider using %s instead.\n",
+ EELTYPE(ir->coulombtype), EELTYPE(eelCUT));
+ warning(wi, err_buf);
+ }
+ }
+ else
+ {
+ if (ir->coulombtype == eelCUT && ir->rcoulomb > 0 && !ir->implicit_solvent)
+ {
+ sprintf(err_buf,
+ "You are using a plain Coulomb cut-off, which might produce artifacts.\n"
+ "You might want to consider using %s electrostatics.\n",
+ EELTYPE(eelPME));
+ warning_note(wi, err_buf);
+ }
+ }
+
+ /* Check if combination rules used in LJ-PME are the same as in the force field */
+ if (EVDW_PME(ir->vdwtype))
+ {
+ check_combination_rules(ir, sys, wi);
+ }
+
+ /* Generalized reaction field */
+ if (ir->opts.ngtc == 0)
+ {
+ sprintf(err_buf, "No temperature coupling while using coulombtype %s",
+ eel_names[eelGRF]);
+ CHECK(ir->coulombtype == eelGRF);
+ }
+ else
+ {
+ sprintf(err_buf, "When using coulombtype = %s"
+ " ref-t for temperature coupling should be > 0",
+ eel_names[eelGRF]);
+ CHECK((ir->coulombtype == eelGRF) && (ir->opts.ref_t[0] <= 0));
+ }
+
+ if (ir->eI == eiSD1 &&
+ (gmx_mtop_ftype_count(sys, F_CONSTR) > 0 ||
+ gmx_mtop_ftype_count(sys, F_SETTLE) > 0))
+ {
+ sprintf(warn_buf, "With constraints integrator %s is less accurate, consider using %s instead", ei_names[ir->eI], ei_names[eiSD2]);
+ warning_note(wi, warn_buf);
+ }
+
+ bAcc = FALSE;
+ for (i = 0; (i < sys->groups.grps[egcACC].nr); i++)
+ {
+ for (m = 0; (m < DIM); m++)
+ {
+ if (fabs(ir->opts.acc[i][m]) > 1e-6)
+ {
+ bAcc = TRUE;
+ }
+ }
+ }
+ if (bAcc)
+ {
+ clear_rvec(acc);
+ snew(mgrp, sys->groups.grps[egcACC].nr);
+ aloop = gmx_mtop_atomloop_all_init(sys);
+ while (gmx_mtop_atomloop_all_next(aloop, &i, &atom))
+ {
+ mgrp[ggrpnr(&sys->groups, egcACC, i)] += atom->m;
+ }
+ mt = 0.0;
+ for (i = 0; (i < sys->groups.grps[egcACC].nr); i++)
+ {
+ for (m = 0; (m < DIM); m++)
+ {
+ acc[m] += ir->opts.acc[i][m]*mgrp[i];
+ }
+ mt += mgrp[i];
+ }
+ for (m = 0; (m < DIM); m++)
+ {
+ if (fabs(acc[m]) > 1e-6)
+ {
+ const char *dim[DIM] = { "X", "Y", "Z" };
+ fprintf(stderr,
+ "Net Acceleration in %s direction, will %s be corrected\n",
+ dim[m], ir->nstcomm != 0 ? "" : "not");
+ if (ir->nstcomm != 0 && m < ndof_com(ir))
+ {
+ acc[m] /= mt;
+ for (i = 0; (i < sys->groups.grps[egcACC].nr); i++)
+ {
+ ir->opts.acc[i][m] -= acc[m];
+ }
+ }
+ }
+ }
+ sfree(mgrp);
+ }
+
+ if (ir->efep != efepNO && ir->fepvals->sc_alpha != 0 &&
+ !gmx_within_tol(sys->ffparams.reppow, 12.0, 10*GMX_DOUBLE_EPS))
+ {
+ gmx_fatal(FARGS, "Soft-core interactions are only supported with VdW repulsion power 12");
+ }
+
+ if (ir->ePull != epullNO)
+ {
+ gmx_bool bPullAbsoluteRef;
+
+ bPullAbsoluteRef = FALSE;
+ for (i = 0; i < ir->pull->ncoord; i++)
+ {
+ bPullAbsoluteRef = bPullAbsoluteRef ||
+ ir->pull->coord[i].group[0] == 0 ||
+ ir->pull->coord[i].group[1] == 0;
+ }
+ if (bPullAbsoluteRef)
+ {
+ absolute_reference(ir, sys, FALSE, AbsRef);
+ for (m = 0; m < DIM; m++)
+ {
+ if (ir->pull->dim[m] && !AbsRef[m])
+ {
+ warning(wi, "You are using an absolute reference for pulling, but the rest of the system does not have an absolute reference. This will lead to artifacts.");
+ break;
+ }
+ }
+ }
+
+ if (ir->pull->eGeom == epullgDIRPBC)
+ {
+ for (i = 0; i < 3; i++)
+ {
+ for (m = 0; m <= i; m++)
+ {
+ if ((ir->epc != epcNO && ir->compress[i][m] != 0) ||
+ ir->deform[i][m] != 0)
+ {
+ for (c = 0; c < ir->pull->ncoord; c++)
+ {
+ if (ir->pull->coord[c].vec[m] != 0)
+ {
+ gmx_fatal(FARGS, "Can not have dynamic box while using pull geometry '%s' (dim %c)", EPULLGEOM(ir->pull->eGeom), 'x'+m);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ check_disre(sys);
+}
+
+void double_check(t_inputrec *ir, matrix box, gmx_bool bConstr, warninp_t wi)
+{
+ real min_size;
+ gmx_bool bTWIN;
+ char warn_buf[STRLEN];
+ const char *ptr;
+
+ ptr = check_box(ir->ePBC, box);
+ if (ptr)
+ {
+ warning_error(wi, ptr);
+ }
+
+ if (bConstr && ir->eConstrAlg == econtSHAKE)
+ {
+ if (ir->shake_tol <= 0.0)
+ {
+ sprintf(warn_buf, "ERROR: shake-tol must be > 0 instead of %g\n",
+ ir->shake_tol);
+ warning_error(wi, warn_buf);
+ }
+
+ if (IR_TWINRANGE(*ir) && ir->nstlist > 1)
+ {
+ sprintf(warn_buf, "With twin-range cut-off's and SHAKE the virial and the pressure are incorrect.");
+ if (ir->epc == epcNO)
+ {
+ warning(wi, warn_buf);
+ }
+ else
+ {
+ warning_error(wi, warn_buf);
+ }
+ }
+ }
+
+ if ( (ir->eConstrAlg == econtLINCS) && bConstr)
+ {
+ /* If we have Lincs constraints: */
+ if (ir->eI == eiMD && ir->etc == etcNO &&
+ ir->eConstrAlg == econtLINCS && ir->nLincsIter == 1)
+ {
+ sprintf(warn_buf, "For energy conservation with LINCS, lincs_iter should be 2 or larger.\n");
+ warning_note(wi, warn_buf);
+ }
+
+ if ((ir->eI == eiCG || ir->eI == eiLBFGS) && (ir->nProjOrder < 8))
+ {
+ sprintf(warn_buf, "For accurate %s with LINCS constraints, lincs-order should be 8 or more.", ei_names[ir->eI]);
+ warning_note(wi, warn_buf);
+ }
+ if (ir->epc == epcMTTK)
+ {
+ warning_error(wi, "MTTK not compatible with lincs -- use shake instead.");
+ }
+ }
+
+ if (bConstr && ir->epc == epcMTTK)
+ {
+ warning_note(wi, "MTTK with constraints is deprecated, and will be removed in GROMACS 5.1");
+ }
+
+ if (ir->LincsWarnAngle > 90.0)
+ {
+ sprintf(warn_buf, "lincs-warnangle can not be larger than 90 degrees, setting it to 90.\n");
+ warning(wi, warn_buf);
+ ir->LincsWarnAngle = 90.0;
+ }
+
+ if (ir->ePBC != epbcNONE)
+ {
+ if (ir->nstlist == 0)
+ {
+ warning(wi, "With nstlist=0 atoms are only put into the box at step 0, therefore drifting atoms might cause the simulation to crash.");
+ }
+ bTWIN = (ir->rlistlong > ir->rlist);
+ if (ir->ns_type == ensGRID)
+ {
+ if (sqr(ir->rlistlong) >= max_cutoff2(ir->ePBC, box))
+ {
+ sprintf(warn_buf, "ERROR: The cut-off length is longer than half the shortest box vector or longer than the smallest box diagonal element. Increase the box size or decrease %s.\n",
+ bTWIN ? (ir->rcoulomb == ir->rlistlong ? "rcoulomb" : "rvdw") : "rlist");
+ warning_error(wi, warn_buf);
+ }
+ }
+ else
+ {
+ min_size = min(box[XX][XX], min(box[YY][YY], box[ZZ][ZZ]));
+ if (2*ir->rlistlong >= min_size)
+ {
+ sprintf(warn_buf, "ERROR: One of the box lengths is smaller than twice the cut-off length. Increase the box size or decrease rlist.");
+ warning_error(wi, warn_buf);
+ if (TRICLINIC(box))
+ {
+ fprintf(stderr, "Grid search might allow larger cut-off's than simple search with triclinic boxes.");
+ }
+ }
+ }
+ }
+}
+
+void check_chargegroup_radii(const gmx_mtop_t *mtop, const t_inputrec *ir,
+ rvec *x,
+ warninp_t wi)
+{
+ real rvdw1, rvdw2, rcoul1, rcoul2;
+ char warn_buf[STRLEN];
+
+ calc_chargegroup_radii(mtop, x, &rvdw1, &rvdw2, &rcoul1, &rcoul2);
+
+ if (rvdw1 > 0)
+ {
+ printf("Largest charge group radii for Van der Waals: %5.3f, %5.3f nm\n",
+ rvdw1, rvdw2);
+ }
+ if (rcoul1 > 0)
+ {
+ printf("Largest charge group radii for Coulomb: %5.3f, %5.3f nm\n",
+ rcoul1, rcoul2);
+ }
+
+ if (ir->rlist > 0)
+ {
+ if (rvdw1 + rvdw2 > ir->rlist ||
+ rcoul1 + rcoul2 > ir->rlist)
+ {
+ sprintf(warn_buf,
+ "The sum of the two largest charge group radii (%f) "
+ "is larger than rlist (%f)\n",
+ max(rvdw1+rvdw2, rcoul1+rcoul2), ir->rlist);
+ warning(wi, warn_buf);
+ }
+ else
+ {
+ /* Here we do not use the zero at cut-off macro,
+ * since user defined interactions might purposely
+ * not be zero at the cut-off.
+ */
+ if (ir_vdw_is_zero_at_cutoff(ir) &&
+ rvdw1 + rvdw2 > ir->rlistlong - ir->rvdw)
+ {
+ sprintf(warn_buf, "The sum of the two largest charge group "
+ "radii (%f) is larger than %s (%f) - rvdw (%f).\n"
+ "With exact cut-offs, better performance can be "
+ "obtained with cutoff-scheme = %s, because it "
+ "does not use charge groups at all.",
+ rvdw1+rvdw2,
+ ir->rlistlong > ir->rlist ? "rlistlong" : "rlist",
+ ir->rlistlong, ir->rvdw,
+ ecutscheme_names[ecutsVERLET]);
+ if (ir_NVE(ir))
+ {
+ warning(wi, warn_buf);
+ }
+ else
+ {
+ warning_note(wi, warn_buf);
+ }
+ }
+ if (ir_coulomb_is_zero_at_cutoff(ir) &&
+ rcoul1 + rcoul2 > ir->rlistlong - ir->rcoulomb)
+ {
+ sprintf(warn_buf, "The sum of the two largest charge group radii (%f) is larger than %s (%f) - rcoulomb (%f).\n"
+ "With exact cut-offs, better performance can be obtained with cutoff-scheme = %s, because it does not use charge groups at all.",
+ rcoul1+rcoul2,
+ ir->rlistlong > ir->rlist ? "rlistlong" : "rlist",
+ ir->rlistlong, ir->rcoulomb,
+ ecutscheme_names[ecutsVERLET]);
+ if (ir_NVE(ir))
+ {
+ warning(wi, warn_buf);
+ }
+ else
+ {
+ warning_note(wi, warn_buf);
+ }
+ }
+ }
+ }
+}
--- /dev/null
- t_nblist * nl);
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ * Copyright (c) 2001-2004, The GROMACS development team.
+ * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+
+#ifndef _nonbonded_h
+#define _nonbonded_h
+
+#include "typedefs.h"
+#include "pbc.h"
+#include "network.h"
+#include "tgroup.h"
+#include "genborn.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+#if 0
+} /* fixes auto-indentation problems */
+#endif
+
+
+
+void
+gmx_nonbonded_setup(t_forcerec * fr,
+ gmx_bool bGenericKernelOnly);
+
+
+
+
+
+void
+gmx_nonbonded_set_kernel_pointers(FILE * fplog,
++ t_nblist * nl,
++ gmx_bool bElecAndVdwSwitchDiffers);
+
+
+
+#define GMX_NONBONDED_DO_LR (1<<0)
+#define GMX_NONBONDED_DO_FORCE (1<<1)
+#define GMX_NONBONDED_DO_SHIFTFORCE (1<<2)
+#define GMX_NONBONDED_DO_FOREIGNLAMBDA (1<<3)
+#define GMX_NONBONDED_DO_POTENTIAL (1<<4)
+#define GMX_NONBONDED_DO_SR (1<<5)
+
+void
+do_nonbonded(t_forcerec *fr,
+ rvec x[], rvec f_shortrange[], rvec f_longrange[], t_mdatoms *md, t_blocka *excl,
+ gmx_grppairener_t *grppener,
+ t_nrnb *nrnb, real *lambda, real dvdlambda[],
+ int nls, int eNL, int flags);
+
+/* Calculate VdW/charge listed pair interactions (usually 1-4 interactions).
+ * global_atom_index is only passed for printing error messages.
+ */
+real
+do_nonbonded_listed(int ftype, int nbonds, const t_iatom iatoms[], const t_iparams iparams[],
+ const rvec x[], rvec f[], rvec fshift[], const t_pbc *pbc, const t_graph *g,
+ real *lambda, real *dvdl, const t_mdatoms *md, const t_forcerec *fr,
+ gmx_grppairener_t *grppener, int *global_atom_index);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- /dev/null
- if (fr->nbkernel_elec_modifier == eintmodPOTSWITCH && fr->nbkernel_vdw_modifier == eintmodPOTSWITCH)
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ * Copyright (c) 2001-2004, The GROMACS development team.
+ * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+#include <string.h>
+#include <assert.h>
+#include "sysstuff.h"
+#include "typedefs.h"
+#include "types/commrec.h"
+#include "vec.h"
+#include "gromacs/math/utilities.h"
+#include "macros.h"
+#include "gromacs/utility/smalloc.h"
+#include "macros.h"
+#include "gmx_fatal.h"
+#include "physics.h"
+#include "force.h"
+#include "tables.h"
+#include "nonbonded.h"
+#include "invblock.h"
+#include "names.h"
+#include "network.h"
+#include "pbc.h"
+#include "ns.h"
+#include "mshift.h"
+#include "txtdump.h"
+#include "coulomb.h"
+#include "md_support.h"
+#include "md_logging.h"
+#include "domdec.h"
+#include "qmmm.h"
+#include "copyrite.h"
+#include "mtop_util.h"
+#include "nbnxn_simd.h"
+#include "nbnxn_search.h"
+#include "nbnxn_atomdata.h"
+#include "nbnxn_consts.h"
+#include "gmx_omp_nthreads.h"
+#include "gmx_detect_hardware.h"
+#include "inputrec.h"
+
+#include "types/nbnxn_cuda_types_ext.h"
+#include "gpu_utils.h"
+#include "nbnxn_cuda_data_mgmt.h"
+#include "pmalloc_cuda.h"
+
+t_forcerec *mk_forcerec(void)
+{
+ t_forcerec *fr;
+
+ snew(fr, 1);
+
+ return fr;
+}
+
+#ifdef DEBUG
+static void pr_nbfp(FILE *fp, real *nbfp, gmx_bool bBHAM, int atnr)
+{
+ int i, j;
+
+ for (i = 0; (i < atnr); i++)
+ {
+ for (j = 0; (j < atnr); j++)
+ {
+ fprintf(fp, "%2d - %2d", i, j);
+ if (bBHAM)
+ {
+ fprintf(fp, " a=%10g, b=%10g, c=%10g\n", BHAMA(nbfp, atnr, i, j),
+ BHAMB(nbfp, atnr, i, j), BHAMC(nbfp, atnr, i, j)/6.0);
+ }
+ else
+ {
+ fprintf(fp, " c6=%10g, c12=%10g\n", C6(nbfp, atnr, i, j)/6.0,
+ C12(nbfp, atnr, i, j)/12.0);
+ }
+ }
+ }
+}
+#endif
+
+static real *mk_nbfp(const gmx_ffparams_t *idef, gmx_bool bBHAM)
+{
+ real *nbfp;
+ int i, j, k, atnr;
+
+ atnr = idef->atnr;
+ if (bBHAM)
+ {
+ snew(nbfp, 3*atnr*atnr);
+ for (i = k = 0; (i < atnr); i++)
+ {
+ for (j = 0; (j < atnr); j++, k++)
+ {
+ BHAMA(nbfp, atnr, i, j) = idef->iparams[k].bham.a;
+ BHAMB(nbfp, atnr, i, j) = idef->iparams[k].bham.b;
+ /* nbfp now includes the 6.0 derivative prefactor */
+ BHAMC(nbfp, atnr, i, j) = idef->iparams[k].bham.c*6.0;
+ }
+ }
+ }
+ else
+ {
+ snew(nbfp, 2*atnr*atnr);
+ for (i = k = 0; (i < atnr); i++)
+ {
+ for (j = 0; (j < atnr); j++, k++)
+ {
+ /* nbfp now includes the 6.0/12.0 derivative prefactors */
+ C6(nbfp, atnr, i, j) = idef->iparams[k].lj.c6*6.0;
+ C12(nbfp, atnr, i, j) = idef->iparams[k].lj.c12*12.0;
+ }
+ }
+ }
+
+ return nbfp;
+}
+
+static real *make_ljpme_c6grid(const gmx_ffparams_t *idef, t_forcerec *fr)
+{
+ int i, j, k, atnr;
+ real c6, c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
+ real *grid;
+
+ /* For LJ-PME simulations, we correct the energies with the reciprocal space
+ * inside of the cut-off. To do this the non-bonded kernels needs to have
+ * access to the C6-values used on the reciprocal grid in pme.c
+ */
+
+ atnr = idef->atnr;
+ snew(grid, 2*atnr*atnr);
+ for (i = k = 0; (i < atnr); i++)
+ {
+ for (j = 0; (j < atnr); j++, k++)
+ {
+ c6i = idef->iparams[i*(atnr+1)].lj.c6;
+ c12i = idef->iparams[i*(atnr+1)].lj.c12;
+ c6j = idef->iparams[j*(atnr+1)].lj.c6;
+ c12j = idef->iparams[j*(atnr+1)].lj.c12;
+ c6 = sqrt(c6i * c6j);
+ if (fr->ljpme_combination_rule == eljpmeLB
+ && !gmx_numzero(c6) && !gmx_numzero(c12i) && !gmx_numzero(c12j))
+ {
+ sigmai = pow(c12i / c6i, 1.0/6.0);
+ sigmaj = pow(c12j / c6j, 1.0/6.0);
+ epsi = c6i * c6i / c12i;
+ epsj = c6j * c6j / c12j;
+ c6 = sqrt(epsi * epsj) * pow(0.5*(sigmai+sigmaj), 6);
+ }
+ /* Store the elements at the same relative positions as C6 in nbfp in order
+ * to simplify access in the kernels
+ */
+ grid[2*(atnr*i+j)] = c6*6.0;
+ }
+ }
+ return grid;
+}
+
+static real *mk_nbfp_combination_rule(const gmx_ffparams_t *idef, int comb_rule)
+{
+ real *nbfp;
+ int i, j, k, atnr;
+ real c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
+ real c6, c12;
+
+ atnr = idef->atnr;
+ snew(nbfp, 2*atnr*atnr);
+ for (i = 0; i < atnr; ++i)
+ {
+ for (j = 0; j < atnr; ++j)
+ {
+ c6i = idef->iparams[i*(atnr+1)].lj.c6;
+ c12i = idef->iparams[i*(atnr+1)].lj.c12;
+ c6j = idef->iparams[j*(atnr+1)].lj.c6;
+ c12j = idef->iparams[j*(atnr+1)].lj.c12;
+ c6 = sqrt(c6i * c6j);
+ c12 = sqrt(c12i * c12j);
+ if (comb_rule == eCOMB_ARITHMETIC
+ && !gmx_numzero(c6) && !gmx_numzero(c12))
+ {
+ sigmai = pow(c12i / c6i, 1.0/6.0);
+ sigmaj = pow(c12j / c6j, 1.0/6.0);
+ epsi = c6i * c6i / c12i;
+ epsj = c6j * c6j / c12j;
+ c6 = epsi * epsj * pow(0.5*(sigmai+sigmaj), 6);
+ c12 = epsi * epsj * pow(0.5*(sigmai+sigmaj), 12);
+ }
+ C6(nbfp, atnr, i, j) = c6*6.0;
+ C12(nbfp, atnr, i, j) = c12*12.0;
+ }
+ }
+ return nbfp;
+}
+
+/* This routine sets fr->solvent_opt to the most common solvent in the
+ * system, e.g. esolSPC or esolTIP4P. It will also mark each charge group in
+ * the fr->solvent_type array with the correct type (or esolNO).
+ *
+ * Charge groups that fulfill the conditions but are not identical to the
+ * most common one will be marked as esolNO in the solvent_type array.
+ *
+ * TIP3p is identical to SPC for these purposes, so we call it
+ * SPC in the arrays (Apologies to Bill Jorgensen ;-)
+ *
+ * NOTE: QM particle should not
+ * become an optimized solvent. Not even if there is only one charge
+ * group in the Qm
+ */
+
+typedef struct
+{
+ int model;
+ int count;
+ int vdwtype[4];
+ real charge[4];
+} solvent_parameters_t;
+
+static void
+check_solvent_cg(const gmx_moltype_t *molt,
+ int cg0,
+ int nmol,
+ const unsigned char *qm_grpnr,
+ const t_grps *qm_grps,
+ t_forcerec * fr,
+ int *n_solvent_parameters,
+ solvent_parameters_t **solvent_parameters_p,
+ int cginfo,
+ int *cg_sp)
+{
+ const t_blocka *excl;
+ t_atom *atom;
+ int j, k;
+ int j0, j1, nj;
+ gmx_bool perturbed;
+ gmx_bool has_vdw[4];
+ gmx_bool match;
+ real tmp_charge[4] = { 0.0 }; /* init to zero to make gcc4.8 happy */
+ int tmp_vdwtype[4] = { 0 }; /* init to zero to make gcc4.8 happy */
+ int tjA;
+ gmx_bool qm;
+ solvent_parameters_t *solvent_parameters;
+
+ /* We use a list with parameters for each solvent type.
+ * Every time we discover a new molecule that fulfills the basic
+ * conditions for a solvent we compare with the previous entries
+ * in these lists. If the parameters are the same we just increment
+ * the counter for that type, and otherwise we create a new type
+ * based on the current molecule.
+ *
+ * Once we've finished going through all molecules we check which
+ * solvent is most common, and mark all those molecules while we
+ * clear the flag on all others.
+ */
+
+ solvent_parameters = *solvent_parameters_p;
+
+ /* Mark the cg first as non optimized */
+ *cg_sp = -1;
+
+ /* Check if this cg has no exclusions with atoms in other charge groups
+ * and all atoms inside the charge group excluded.
+ * We only have 3 or 4 atom solvent loops.
+ */
+ if (GET_CGINFO_EXCL_INTER(cginfo) ||
+ !GET_CGINFO_EXCL_INTRA(cginfo))
+ {
+ return;
+ }
+
+ /* Get the indices of the first atom in this charge group */
+ j0 = molt->cgs.index[cg0];
+ j1 = molt->cgs.index[cg0+1];
+
+ /* Number of atoms in our molecule */
+ nj = j1 - j0;
+
+ if (debug)
+ {
+ fprintf(debug,
+ "Moltype '%s': there are %d atoms in this charge group\n",
+ *molt->name, nj);
+ }
+
+ /* Check if it could be an SPC (3 atoms) or TIP4p (4) water,
+ * otherwise skip it.
+ */
+ if (nj < 3 || nj > 4)
+ {
+ return;
+ }
+
+ /* Check if we are doing QM on this group */
+ qm = FALSE;
+ if (qm_grpnr != NULL)
+ {
+ for (j = j0; j < j1 && !qm; j++)
+ {
+ qm = (qm_grpnr[j] < qm_grps->nr - 1);
+ }
+ }
+ /* Cannot use solvent optimization with QM */
+ if (qm)
+ {
+ return;
+ }
+
+ atom = molt->atoms.atom;
+
+ /* Still looks like a solvent, time to check parameters */
+
+ /* If it is perturbed (free energy) we can't use the solvent loops,
+ * so then we just skip to the next molecule.
+ */
+ perturbed = FALSE;
+
+ for (j = j0; j < j1 && !perturbed; j++)
+ {
+ perturbed = PERTURBED(atom[j]);
+ }
+
+ if (perturbed)
+ {
+ return;
+ }
+
+ /* Now it's only a question if the VdW and charge parameters
+ * are OK. Before doing the check we compare and see if they are
+ * identical to a possible previous solvent type.
+ * First we assign the current types and charges.
+ */
+ for (j = 0; j < nj; j++)
+ {
+ tmp_vdwtype[j] = atom[j0+j].type;
+ tmp_charge[j] = atom[j0+j].q;
+ }
+
+ /* Does it match any previous solvent type? */
+ for (k = 0; k < *n_solvent_parameters; k++)
+ {
+ match = TRUE;
+
+
+ /* We can only match SPC with 3 atoms and TIP4p with 4 atoms */
+ if ( (solvent_parameters[k].model == esolSPC && nj != 3) ||
+ (solvent_parameters[k].model == esolTIP4P && nj != 4) )
+ {
+ match = FALSE;
+ }
+
+ /* Check that types & charges match for all atoms in molecule */
+ for (j = 0; j < nj && match == TRUE; j++)
+ {
+ if (tmp_vdwtype[j] != solvent_parameters[k].vdwtype[j])
+ {
+ match = FALSE;
+ }
+ if (tmp_charge[j] != solvent_parameters[k].charge[j])
+ {
+ match = FALSE;
+ }
+ }
+ if (match == TRUE)
+ {
+ /* Congratulations! We have a matched solvent.
+ * Flag it with this type for later processing.
+ */
+ *cg_sp = k;
+ solvent_parameters[k].count += nmol;
+
+ /* We are done with this charge group */
+ return;
+ }
+ }
+
+ /* If we get here, we have a tentative new solvent type.
+ * Before we add it we must check that it fulfills the requirements
+ * of the solvent optimized loops. First determine which atoms have
+ * VdW interactions.
+ */
+ for (j = 0; j < nj; j++)
+ {
+ has_vdw[j] = FALSE;
+ tjA = tmp_vdwtype[j];
+
+ /* Go through all other tpes and see if any have non-zero
+ * VdW parameters when combined with this one.
+ */
+ for (k = 0; k < fr->ntype && (has_vdw[j] == FALSE); k++)
+ {
+ /* We already checked that the atoms weren't perturbed,
+ * so we only need to check state A now.
+ */
+ if (fr->bBHAM)
+ {
+ has_vdw[j] = (has_vdw[j] ||
+ (BHAMA(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
+ (BHAMB(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
+ (BHAMC(fr->nbfp, fr->ntype, tjA, k) != 0.0));
+ }
+ else
+ {
+ /* Standard LJ */
+ has_vdw[j] = (has_vdw[j] ||
+ (C6(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
+ (C12(fr->nbfp, fr->ntype, tjA, k) != 0.0));
+ }
+ }
+ }
+
+ /* Now we know all we need to make the final check and assignment. */
+ if (nj == 3)
+ {
+ /* So, is it an SPC?
+ * For this we require thatn all atoms have charge,
+ * the charges on atom 2 & 3 should be the same, and only
+ * atom 1 might have VdW.
+ */
+ if (has_vdw[1] == FALSE &&
+ has_vdw[2] == FALSE &&
+ tmp_charge[0] != 0 &&
+ tmp_charge[1] != 0 &&
+ tmp_charge[2] == tmp_charge[1])
+ {
+ srenew(solvent_parameters, *n_solvent_parameters+1);
+ solvent_parameters[*n_solvent_parameters].model = esolSPC;
+ solvent_parameters[*n_solvent_parameters].count = nmol;
+ for (k = 0; k < 3; k++)
+ {
+ solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
+ solvent_parameters[*n_solvent_parameters].charge[k] = tmp_charge[k];
+ }
+
+ *cg_sp = *n_solvent_parameters;
+ (*n_solvent_parameters)++;
+ }
+ }
+ else if (nj == 4)
+ {
+ /* Or could it be a TIP4P?
+ * For this we require thatn atoms 2,3,4 have charge, but not atom 1.
+ * Only atom 1 mght have VdW.
+ */
+ if (has_vdw[1] == FALSE &&
+ has_vdw[2] == FALSE &&
+ has_vdw[3] == FALSE &&
+ tmp_charge[0] == 0 &&
+ tmp_charge[1] != 0 &&
+ tmp_charge[2] == tmp_charge[1] &&
+ tmp_charge[3] != 0)
+ {
+ srenew(solvent_parameters, *n_solvent_parameters+1);
+ solvent_parameters[*n_solvent_parameters].model = esolTIP4P;
+ solvent_parameters[*n_solvent_parameters].count = nmol;
+ for (k = 0; k < 4; k++)
+ {
+ solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
+ solvent_parameters[*n_solvent_parameters].charge[k] = tmp_charge[k];
+ }
+
+ *cg_sp = *n_solvent_parameters;
+ (*n_solvent_parameters)++;
+ }
+ }
+
+ *solvent_parameters_p = solvent_parameters;
+}
+
+static void
+check_solvent(FILE * fp,
+ const gmx_mtop_t * mtop,
+ t_forcerec * fr,
+ cginfo_mb_t *cginfo_mb)
+{
+ const t_block * cgs;
+ const t_block * mols;
+ const gmx_moltype_t *molt;
+ int mb, mol, cg_mol, at_offset, cg_offset, am, cgm, i, nmol_ch, nmol;
+ int n_solvent_parameters;
+ solvent_parameters_t *solvent_parameters;
+ int **cg_sp;
+ int bestsp, bestsol;
+
+ if (debug)
+ {
+ fprintf(debug, "Going to determine what solvent types we have.\n");
+ }
+
+ mols = &mtop->mols;
+
+ n_solvent_parameters = 0;
+ solvent_parameters = NULL;
+ /* Allocate temporary array for solvent type */
+ snew(cg_sp, mtop->nmolblock);
+
+ cg_offset = 0;
+ at_offset = 0;
+ for (mb = 0; mb < mtop->nmolblock; mb++)
+ {
+ molt = &mtop->moltype[mtop->molblock[mb].type];
+ cgs = &molt->cgs;
+ /* Here we have to loop over all individual molecules
+ * because we need to check for QMMM particles.
+ */
+ snew(cg_sp[mb], cginfo_mb[mb].cg_mod);
+ nmol_ch = cginfo_mb[mb].cg_mod/cgs->nr;
+ nmol = mtop->molblock[mb].nmol/nmol_ch;
+ for (mol = 0; mol < nmol_ch; mol++)
+ {
+ cgm = mol*cgs->nr;
+ am = mol*cgs->index[cgs->nr];
+ for (cg_mol = 0; cg_mol < cgs->nr; cg_mol++)
+ {
+ check_solvent_cg(molt, cg_mol, nmol,
+ mtop->groups.grpnr[egcQMMM] ?
+ mtop->groups.grpnr[egcQMMM]+at_offset+am : 0,
+ &mtop->groups.grps[egcQMMM],
+ fr,
+ &n_solvent_parameters, &solvent_parameters,
+ cginfo_mb[mb].cginfo[cgm+cg_mol],
+ &cg_sp[mb][cgm+cg_mol]);
+ }
+ }
+ cg_offset += cgs->nr;
+ at_offset += cgs->index[cgs->nr];
+ }
+
+ /* Puh! We finished going through all charge groups.
+ * Now find the most common solvent model.
+ */
+
+ /* Most common solvent this far */
+ bestsp = -2;
+ for (i = 0; i < n_solvent_parameters; i++)
+ {
+ if (bestsp == -2 ||
+ solvent_parameters[i].count > solvent_parameters[bestsp].count)
+ {
+ bestsp = i;
+ }
+ }
+
+ if (bestsp >= 0)
+ {
+ bestsol = solvent_parameters[bestsp].model;
+ }
+ else
+ {
+ bestsol = esolNO;
+ }
+
+#ifdef DISABLE_WATER_NLIST
+ bestsol = esolNO;
+#endif
+
+ fr->nWatMol = 0;
+ for (mb = 0; mb < mtop->nmolblock; mb++)
+ {
+ cgs = &mtop->moltype[mtop->molblock[mb].type].cgs;
+ nmol = (mtop->molblock[mb].nmol*cgs->nr)/cginfo_mb[mb].cg_mod;
+ for (i = 0; i < cginfo_mb[mb].cg_mod; i++)
+ {
+ if (cg_sp[mb][i] == bestsp)
+ {
+ SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], bestsol);
+ fr->nWatMol += nmol;
+ }
+ else
+ {
+ SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], esolNO);
+ }
+ }
+ sfree(cg_sp[mb]);
+ }
+ sfree(cg_sp);
+
+ if (bestsol != esolNO && fp != NULL)
+ {
+ fprintf(fp, "\nEnabling %s-like water optimization for %d molecules.\n\n",
+ esol_names[bestsol],
+ solvent_parameters[bestsp].count);
+ }
+
+ sfree(solvent_parameters);
+ fr->solvent_opt = bestsol;
+}
+
+enum {
+ acNONE = 0, acCONSTRAINT, acSETTLE
+};
+
+static cginfo_mb_t *init_cginfo_mb(FILE *fplog, const gmx_mtop_t *mtop,
+ t_forcerec *fr, gmx_bool bNoSolvOpt,
+ gmx_bool *bFEP_NonBonded,
+ gmx_bool *bExcl_IntraCGAll_InterCGNone)
+{
+ const t_block *cgs;
+ const t_blocka *excl;
+ const gmx_moltype_t *molt;
+ const gmx_molblock_t *molb;
+ cginfo_mb_t *cginfo_mb;
+ gmx_bool *type_VDW;
+ int *cginfo;
+ int cg_offset, a_offset, cgm, am;
+ int mb, m, ncg_tot, cg, a0, a1, gid, ai, j, aj, excl_nalloc;
+ int *a_con;
+ int ftype;
+ int ia;
+ gmx_bool bId, *bExcl, bExclIntraAll, bExclInter, bHaveVDW, bHaveQ, bHavePerturbedAtoms;
+
+ ncg_tot = ncg_mtop(mtop);
+ snew(cginfo_mb, mtop->nmolblock);
+
+ snew(type_VDW, fr->ntype);
+ for (ai = 0; ai < fr->ntype; ai++)
+ {
+ type_VDW[ai] = FALSE;
+ for (j = 0; j < fr->ntype; j++)
+ {
+ type_VDW[ai] = type_VDW[ai] ||
+ fr->bBHAM ||
+ C6(fr->nbfp, fr->ntype, ai, j) != 0 ||
+ C12(fr->nbfp, fr->ntype, ai, j) != 0;
+ }
+ }
+
+ *bFEP_NonBonded = FALSE;
+ *bExcl_IntraCGAll_InterCGNone = TRUE;
+
+ excl_nalloc = 10;
+ snew(bExcl, excl_nalloc);
+ cg_offset = 0;
+ a_offset = 0;
+ for (mb = 0; mb < mtop->nmolblock; mb++)
+ {
+ molb = &mtop->molblock[mb];
+ molt = &mtop->moltype[molb->type];
+ cgs = &molt->cgs;
+ excl = &molt->excls;
+
+ /* Check if the cginfo is identical for all molecules in this block.
+ * If so, we only need an array of the size of one molecule.
+ * Otherwise we make an array of #mol times #cgs per molecule.
+ */
+ bId = TRUE;
+ am = 0;
+ for (m = 0; m < molb->nmol; m++)
+ {
+ am = m*cgs->index[cgs->nr];
+ for (cg = 0; cg < cgs->nr; cg++)
+ {
+ a0 = cgs->index[cg];
+ a1 = cgs->index[cg+1];
+ if (ggrpnr(&mtop->groups, egcENER, a_offset+am+a0) !=
+ ggrpnr(&mtop->groups, egcENER, a_offset +a0))
+ {
+ bId = FALSE;
+ }
+ if (mtop->groups.grpnr[egcQMMM] != NULL)
+ {
+ for (ai = a0; ai < a1; ai++)
+ {
+ if (mtop->groups.grpnr[egcQMMM][a_offset+am+ai] !=
+ mtop->groups.grpnr[egcQMMM][a_offset +ai])
+ {
+ bId = FALSE;
+ }
+ }
+ }
+ }
+ }
+
+ cginfo_mb[mb].cg_start = cg_offset;
+ cginfo_mb[mb].cg_end = cg_offset + molb->nmol*cgs->nr;
+ cginfo_mb[mb].cg_mod = (bId ? 1 : molb->nmol)*cgs->nr;
+ snew(cginfo_mb[mb].cginfo, cginfo_mb[mb].cg_mod);
+ cginfo = cginfo_mb[mb].cginfo;
+
+ /* Set constraints flags for constrained atoms */
+ snew(a_con, molt->atoms.nr);
+ for (ftype = 0; ftype < F_NRE; ftype++)
+ {
+ if (interaction_function[ftype].flags & IF_CONSTRAINT)
+ {
+ int nral;
+
+ nral = NRAL(ftype);
+ for (ia = 0; ia < molt->ilist[ftype].nr; ia += 1+nral)
+ {
+ int a;
+
+ for (a = 0; a < nral; a++)
+ {
+ a_con[molt->ilist[ftype].iatoms[ia+1+a]] =
+ (ftype == F_SETTLE ? acSETTLE : acCONSTRAINT);
+ }
+ }
+ }
+ }
+
+ for (m = 0; m < (bId ? 1 : molb->nmol); m++)
+ {
+ cgm = m*cgs->nr;
+ am = m*cgs->index[cgs->nr];
+ for (cg = 0; cg < cgs->nr; cg++)
+ {
+ a0 = cgs->index[cg];
+ a1 = cgs->index[cg+1];
+
+ /* Store the energy group in cginfo */
+ gid = ggrpnr(&mtop->groups, egcENER, a_offset+am+a0);
+ SET_CGINFO_GID(cginfo[cgm+cg], gid);
+
+ /* Check the intra/inter charge group exclusions */
+ if (a1-a0 > excl_nalloc)
+ {
+ excl_nalloc = a1 - a0;
+ srenew(bExcl, excl_nalloc);
+ }
+ /* bExclIntraAll: all intra cg interactions excluded
+ * bExclInter: any inter cg interactions excluded
+ */
+ bExclIntraAll = TRUE;
+ bExclInter = FALSE;
+ bHaveVDW = FALSE;
+ bHaveQ = FALSE;
+ bHavePerturbedAtoms = FALSE;
+ for (ai = a0; ai < a1; ai++)
+ {
+ /* Check VDW and electrostatic interactions */
+ bHaveVDW = bHaveVDW || (type_VDW[molt->atoms.atom[ai].type] ||
+ type_VDW[molt->atoms.atom[ai].typeB]);
+ bHaveQ = bHaveQ || (molt->atoms.atom[ai].q != 0 ||
+ molt->atoms.atom[ai].qB != 0);
+
+ bHavePerturbedAtoms = bHavePerturbedAtoms || (PERTURBED(molt->atoms.atom[ai]) != 0);
+
+ /* Clear the exclusion list for atom ai */
+ for (aj = a0; aj < a1; aj++)
+ {
+ bExcl[aj-a0] = FALSE;
+ }
+ /* Loop over all the exclusions of atom ai */
+ for (j = excl->index[ai]; j < excl->index[ai+1]; j++)
+ {
+ aj = excl->a[j];
+ if (aj < a0 || aj >= a1)
+ {
+ bExclInter = TRUE;
+ }
+ else
+ {
+ bExcl[aj-a0] = TRUE;
+ }
+ }
+ /* Check if ai excludes a0 to a1 */
+ for (aj = a0; aj < a1; aj++)
+ {
+ if (!bExcl[aj-a0])
+ {
+ bExclIntraAll = FALSE;
+ }
+ }
+
+ switch (a_con[ai])
+ {
+ case acCONSTRAINT:
+ SET_CGINFO_CONSTR(cginfo[cgm+cg]);
+ break;
+ case acSETTLE:
+ SET_CGINFO_SETTLE(cginfo[cgm+cg]);
+ break;
+ default:
+ break;
+ }
+ }
+ if (bExclIntraAll)
+ {
+ SET_CGINFO_EXCL_INTRA(cginfo[cgm+cg]);
+ }
+ if (bExclInter)
+ {
+ SET_CGINFO_EXCL_INTER(cginfo[cgm+cg]);
+ }
+ if (a1 - a0 > MAX_CHARGEGROUP_SIZE)
+ {
+ /* The size in cginfo is currently only read with DD */
+ gmx_fatal(FARGS, "A charge group has size %d which is larger than the limit of %d atoms", a1-a0, MAX_CHARGEGROUP_SIZE);
+ }
+ if (bHaveVDW)
+ {
+ SET_CGINFO_HAS_VDW(cginfo[cgm+cg]);
+ }
+ if (bHaveQ)
+ {
+ SET_CGINFO_HAS_Q(cginfo[cgm+cg]);
+ }
+ if (bHavePerturbedAtoms && fr->efep != efepNO)
+ {
+ SET_CGINFO_FEP(cginfo[cgm+cg]);
+ *bFEP_NonBonded = TRUE;
+ }
+ /* Store the charge group size */
+ SET_CGINFO_NATOMS(cginfo[cgm+cg], a1-a0);
+
+ if (!bExclIntraAll || bExclInter)
+ {
+ *bExcl_IntraCGAll_InterCGNone = FALSE;
+ }
+ }
+ }
+
+ sfree(a_con);
+
+ cg_offset += molb->nmol*cgs->nr;
+ a_offset += molb->nmol*cgs->index[cgs->nr];
+ }
+ sfree(bExcl);
+
+ /* the solvent optimizer is called after the QM is initialized,
+ * because we don't want to have the QM subsystemto become an
+ * optimized solvent
+ */
+
+ check_solvent(fplog, mtop, fr, cginfo_mb);
+
+ if (getenv("GMX_NO_SOLV_OPT"))
+ {
+ if (fplog)
+ {
+ fprintf(fplog, "Found environment variable GMX_NO_SOLV_OPT.\n"
+ "Disabling all solvent optimization\n");
+ }
+ fr->solvent_opt = esolNO;
+ }
+ if (bNoSolvOpt)
+ {
+ fr->solvent_opt = esolNO;
+ }
+ if (!fr->solvent_opt)
+ {
+ for (mb = 0; mb < mtop->nmolblock; mb++)
+ {
+ for (cg = 0; cg < cginfo_mb[mb].cg_mod; cg++)
+ {
+ SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[cg], esolNO);
+ }
+ }
+ }
+
+ return cginfo_mb;
+}
+
+static int *cginfo_expand(int nmb, cginfo_mb_t *cgi_mb)
+{
+ int ncg, mb, cg;
+ int *cginfo;
+
+ ncg = cgi_mb[nmb-1].cg_end;
+ snew(cginfo, ncg);
+ mb = 0;
+ for (cg = 0; cg < ncg; cg++)
+ {
+ while (cg >= cgi_mb[mb].cg_end)
+ {
+ mb++;
+ }
+ cginfo[cg] =
+ cgi_mb[mb].cginfo[(cg - cgi_mb[mb].cg_start) % cgi_mb[mb].cg_mod];
+ }
+
+ return cginfo;
+}
+
+static void set_chargesum(FILE *log, t_forcerec *fr, const gmx_mtop_t *mtop)
+{
+ /*This now calculates sum for q and c6*/
+ double qsum, q2sum, q, c6sum, c6;
+ int mb, nmol, i;
+ const t_atoms *atoms;
+
+ qsum = 0;
+ q2sum = 0;
+ c6sum = 0;
+ for (mb = 0; mb < mtop->nmolblock; mb++)
+ {
+ nmol = mtop->molblock[mb].nmol;
+ atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
+ for (i = 0; i < atoms->nr; i++)
+ {
+ q = atoms->atom[i].q;
+ qsum += nmol*q;
+ q2sum += nmol*q*q;
+ c6 = mtop->ffparams.iparams[atoms->atom[i].type*(mtop->ffparams.atnr+1)].lj.c6;
+ c6sum += nmol*c6;
+ }
+ }
+ fr->qsum[0] = qsum;
+ fr->q2sum[0] = q2sum;
+ fr->c6sum[0] = c6sum;
+
+ if (fr->efep != efepNO)
+ {
+ qsum = 0;
+ q2sum = 0;
+ c6sum = 0;
+ for (mb = 0; mb < mtop->nmolblock; mb++)
+ {
+ nmol = mtop->molblock[mb].nmol;
+ atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
+ for (i = 0; i < atoms->nr; i++)
+ {
+ q = atoms->atom[i].qB;
+ qsum += nmol*q;
+ q2sum += nmol*q*q;
+ c6 = mtop->ffparams.iparams[atoms->atom[i].typeB*(mtop->ffparams.atnr+1)].lj.c6;
+ c6sum += nmol*c6;
+ }
+ fr->qsum[1] = qsum;
+ fr->q2sum[1] = q2sum;
+ fr->c6sum[1] = c6sum;
+ }
+ }
+ else
+ {
+ fr->qsum[1] = fr->qsum[0];
+ fr->q2sum[1] = fr->q2sum[0];
+ fr->c6sum[1] = fr->c6sum[0];
+ }
+ if (log)
+ {
+ if (fr->efep == efepNO)
+ {
+ fprintf(log, "System total charge: %.3f\n", fr->qsum[0]);
+ }
+ else
+ {
+ fprintf(log, "System total charge, top. A: %.3f top. B: %.3f\n",
+ fr->qsum[0], fr->qsum[1]);
+ }
+ }
+}
+
+void update_forcerec(t_forcerec *fr, matrix box)
+{
+ if (fr->eeltype == eelGRF)
+ {
+ calc_rffac(NULL, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
+ fr->rcoulomb, fr->temp, fr->zsquare, box,
+ &fr->kappa, &fr->k_rf, &fr->c_rf);
+ }
+}
+
+void set_avcsixtwelve(FILE *fplog, t_forcerec *fr, const gmx_mtop_t *mtop)
+{
+ const t_atoms *atoms, *atoms_tpi;
+ const t_blocka *excl;
+ int mb, nmol, nmolc, i, j, tpi, tpj, j1, j2, k, n, nexcl, q;
+ gmx_int64_t npair, npair_ij, tmpi, tmpj;
+ double csix, ctwelve;
+ int ntp, *typecount;
+ gmx_bool bBHAM;
+ real *nbfp;
+ real *nbfp_comb = NULL;
+
+ ntp = fr->ntype;
+ bBHAM = fr->bBHAM;
+ nbfp = fr->nbfp;
+
+ /* For LJ-PME, we want to correct for the difference between the
+ * actual C6 values and the C6 values used by the LJ-PME based on
+ * combination rules. */
+
+ if (EVDW_PME(fr->vdwtype))
+ {
+ nbfp_comb = mk_nbfp_combination_rule(&mtop->ffparams,
+ (fr->ljpme_combination_rule == eljpmeLB) ? eCOMB_ARITHMETIC : eCOMB_GEOMETRIC);
+ for (tpi = 0; tpi < ntp; ++tpi)
+ {
+ for (tpj = 0; tpj < ntp; ++tpj)
+ {
+ C6(nbfp_comb, ntp, tpi, tpj) =
+ C6(nbfp, ntp, tpi, tpj) - C6(nbfp_comb, ntp, tpi, tpj);
+ C12(nbfp_comb, ntp, tpi, tpj) = C12(nbfp, ntp, tpi, tpj);
+ }
+ }
+ nbfp = nbfp_comb;
+ }
+ for (q = 0; q < (fr->efep == efepNO ? 1 : 2); q++)
+ {
+ csix = 0;
+ ctwelve = 0;
+ npair = 0;
+ nexcl = 0;
+ if (!fr->n_tpi)
+ {
+ /* Count the types so we avoid natoms^2 operations */
+ snew(typecount, ntp);
+ gmx_mtop_count_atomtypes(mtop, q, typecount);
+
+ for (tpi = 0; tpi < ntp; tpi++)
+ {
+ for (tpj = tpi; tpj < ntp; tpj++)
+ {
+ tmpi = typecount[tpi];
+ tmpj = typecount[tpj];
+ if (tpi != tpj)
+ {
+ npair_ij = tmpi*tmpj;
+ }
+ else
+ {
+ npair_ij = tmpi*(tmpi - 1)/2;
+ }
+ if (bBHAM)
+ {
+ /* nbfp now includes the 6.0 derivative prefactor */
+ csix += npair_ij*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
+ }
+ else
+ {
+ /* nbfp now includes the 6.0/12.0 derivative prefactors */
+ csix += npair_ij* C6(nbfp, ntp, tpi, tpj)/6.0;
+ ctwelve += npair_ij* C12(nbfp, ntp, tpi, tpj)/12.0;
+ }
+ npair += npair_ij;
+ }
+ }
+ sfree(typecount);
+ /* Subtract the excluded pairs.
+ * The main reason for substracting exclusions is that in some cases
+ * some combinations might never occur and the parameters could have
+ * any value. These unused values should not influence the dispersion
+ * correction.
+ */
+ for (mb = 0; mb < mtop->nmolblock; mb++)
+ {
+ nmol = mtop->molblock[mb].nmol;
+ atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
+ excl = &mtop->moltype[mtop->molblock[mb].type].excls;
+ for (i = 0; (i < atoms->nr); i++)
+ {
+ if (q == 0)
+ {
+ tpi = atoms->atom[i].type;
+ }
+ else
+ {
+ tpi = atoms->atom[i].typeB;
+ }
+ j1 = excl->index[i];
+ j2 = excl->index[i+1];
+ for (j = j1; j < j2; j++)
+ {
+ k = excl->a[j];
+ if (k > i)
+ {
+ if (q == 0)
+ {
+ tpj = atoms->atom[k].type;
+ }
+ else
+ {
+ tpj = atoms->atom[k].typeB;
+ }
+ if (bBHAM)
+ {
+ /* nbfp now includes the 6.0 derivative prefactor */
+ csix -= nmol*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
+ }
+ else
+ {
+ /* nbfp now includes the 6.0/12.0 derivative prefactors */
+ csix -= nmol*C6 (nbfp, ntp, tpi, tpj)/6.0;
+ ctwelve -= nmol*C12(nbfp, ntp, tpi, tpj)/12.0;
+ }
+ nexcl += nmol;
+ }
+ }
+ }
+ }
+ }
+ else
+ {
+ /* Only correct for the interaction of the test particle
+ * with the rest of the system.
+ */
+ atoms_tpi =
+ &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].atoms;
+
+ npair = 0;
+ for (mb = 0; mb < mtop->nmolblock; mb++)
+ {
+ nmol = mtop->molblock[mb].nmol;
+ atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
+ for (j = 0; j < atoms->nr; j++)
+ {
+ nmolc = nmol;
+ /* Remove the interaction of the test charge group
+ * with itself.
+ */
+ if (mb == mtop->nmolblock-1)
+ {
+ nmolc--;
+
+ if (mb == 0 && nmol == 1)
+ {
+ gmx_fatal(FARGS, "Old format tpr with TPI, please generate a new tpr file");
+ }
+ }
+ if (q == 0)
+ {
+ tpj = atoms->atom[j].type;
+ }
+ else
+ {
+ tpj = atoms->atom[j].typeB;
+ }
+ for (i = 0; i < fr->n_tpi; i++)
+ {
+ if (q == 0)
+ {
+ tpi = atoms_tpi->atom[i].type;
+ }
+ else
+ {
+ tpi = atoms_tpi->atom[i].typeB;
+ }
+ if (bBHAM)
+ {
+ /* nbfp now includes the 6.0 derivative prefactor */
+ csix += nmolc*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
+ }
+ else
+ {
+ /* nbfp now includes the 6.0/12.0 derivative prefactors */
+ csix += nmolc*C6 (nbfp, ntp, tpi, tpj)/6.0;
+ ctwelve += nmolc*C12(nbfp, ntp, tpi, tpj)/12.0;
+ }
+ npair += nmolc;
+ }
+ }
+ }
+ }
+ if (npair - nexcl <= 0 && fplog)
+ {
+ fprintf(fplog, "\nWARNING: There are no atom pairs for dispersion correction\n\n");
+ csix = 0;
+ ctwelve = 0;
+ }
+ else
+ {
+ csix /= npair - nexcl;
+ ctwelve /= npair - nexcl;
+ }
+ if (debug)
+ {
+ fprintf(debug, "Counted %d exclusions\n", nexcl);
+ fprintf(debug, "Average C6 parameter is: %10g\n", (double)csix);
+ fprintf(debug, "Average C12 parameter is: %10g\n", (double)ctwelve);
+ }
+ fr->avcsix[q] = csix;
+ fr->avctwelve[q] = ctwelve;
+ }
+
+ if (EVDW_PME(fr->vdwtype))
+ {
+ sfree(nbfp_comb);
+ }
+
+ if (fplog != NULL)
+ {
+ if (fr->eDispCorr == edispcAllEner ||
+ fr->eDispCorr == edispcAllEnerPres)
+ {
+ fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
+ fr->avcsix[0], fr->avctwelve[0]);
+ }
+ else
+ {
+ fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e\n", fr->avcsix[0]);
+ }
+ }
+}
+
+
+static void set_bham_b_max(FILE *fplog, t_forcerec *fr,
+ const gmx_mtop_t *mtop)
+{
+ const t_atoms *at1, *at2;
+ int mt1, mt2, i, j, tpi, tpj, ntypes;
+ real b, bmin;
+ real *nbfp;
+
+ if (fplog)
+ {
+ fprintf(fplog, "Determining largest Buckingham b parameter for table\n");
+ }
+ nbfp = fr->nbfp;
+ ntypes = fr->ntype;
+
+ bmin = -1;
+ fr->bham_b_max = 0;
+ for (mt1 = 0; mt1 < mtop->nmoltype; mt1++)
+ {
+ at1 = &mtop->moltype[mt1].atoms;
+ for (i = 0; (i < at1->nr); i++)
+ {
+ tpi = at1->atom[i].type;
+ if (tpi >= ntypes)
+ {
+ gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", i, tpi, ntypes);
+ }
+
+ for (mt2 = mt1; mt2 < mtop->nmoltype; mt2++)
+ {
+ at2 = &mtop->moltype[mt2].atoms;
+ for (j = 0; (j < at2->nr); j++)
+ {
+ tpj = at2->atom[j].type;
+ if (tpj >= ntypes)
+ {
+ gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", j, tpj, ntypes);
+ }
+ b = BHAMB(nbfp, ntypes, tpi, tpj);
+ if (b > fr->bham_b_max)
+ {
+ fr->bham_b_max = b;
+ }
+ if ((b < bmin) || (bmin == -1))
+ {
+ bmin = b;
+ }
+ }
+ }
+ }
+ }
+ if (fplog)
+ {
+ fprintf(fplog, "Buckingham b parameters, min: %g, max: %g\n",
+ bmin, fr->bham_b_max);
+ }
+}
+
+static void make_nbf_tables(FILE *fp, const output_env_t oenv,
+ t_forcerec *fr, real rtab,
+ const t_commrec *cr,
+ const char *tabfn, char *eg1, char *eg2,
+ t_nblists *nbl)
+{
+ char buf[STRLEN];
+ int i, j;
+
+ if (tabfn == NULL)
+ {
+ if (debug)
+ {
+ fprintf(debug, "No table file name passed, can not read table, can not do non-bonded interactions\n");
+ }
+ return;
+ }
+
+ sprintf(buf, "%s", tabfn);
+ if (eg1 && eg2)
+ {
+ /* Append the two energy group names */
+ sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "_%s_%s.%s",
+ eg1, eg2, ftp2ext(efXVG));
+ }
+ nbl->table_elec_vdw = make_tables(fp, oenv, fr, MASTER(cr), buf, rtab, 0);
+ /* Copy the contents of the table to separate coulomb and LJ tables too,
+ * to improve cache performance.
+ */
+ /* For performance reasons we want
+ * the table data to be aligned to 16-byte. The pointers could be freed
+ * but currently aren't.
+ */
+ nbl->table_elec.interaction = GMX_TABLE_INTERACTION_ELEC;
+ nbl->table_elec.format = nbl->table_elec_vdw.format;
+ nbl->table_elec.r = nbl->table_elec_vdw.r;
+ nbl->table_elec.n = nbl->table_elec_vdw.n;
+ nbl->table_elec.scale = nbl->table_elec_vdw.scale;
+ nbl->table_elec.scale_exp = nbl->table_elec_vdw.scale_exp;
+ nbl->table_elec.formatsize = nbl->table_elec_vdw.formatsize;
+ nbl->table_elec.ninteractions = 1;
+ nbl->table_elec.stride = nbl->table_elec.formatsize * nbl->table_elec.ninteractions;
+ snew_aligned(nbl->table_elec.data, nbl->table_elec.stride*(nbl->table_elec.n+1), 32);
+
+ nbl->table_vdw.interaction = GMX_TABLE_INTERACTION_VDWREP_VDWDISP;
+ nbl->table_vdw.format = nbl->table_elec_vdw.format;
+ nbl->table_vdw.r = nbl->table_elec_vdw.r;
+ nbl->table_vdw.n = nbl->table_elec_vdw.n;
+ nbl->table_vdw.scale = nbl->table_elec_vdw.scale;
+ nbl->table_vdw.scale_exp = nbl->table_elec_vdw.scale_exp;
+ nbl->table_vdw.formatsize = nbl->table_elec_vdw.formatsize;
+ nbl->table_vdw.ninteractions = 2;
+ nbl->table_vdw.stride = nbl->table_vdw.formatsize * nbl->table_vdw.ninteractions;
+ snew_aligned(nbl->table_vdw.data, nbl->table_vdw.stride*(nbl->table_vdw.n+1), 32);
+
+ for (i = 0; i <= nbl->table_elec_vdw.n; i++)
+ {
+ for (j = 0; j < 4; j++)
+ {
+ nbl->table_elec.data[4*i+j] = nbl->table_elec_vdw.data[12*i+j];
+ }
+ for (j = 0; j < 8; j++)
+ {
+ nbl->table_vdw.data[8*i+j] = nbl->table_elec_vdw.data[12*i+4+j];
+ }
+ }
+}
+
+static void count_tables(int ftype1, int ftype2, const gmx_mtop_t *mtop,
+ int *ncount, int **count)
+{
+ const gmx_moltype_t *molt;
+ const t_ilist *il;
+ int mt, ftype, stride, i, j, tabnr;
+
+ for (mt = 0; mt < mtop->nmoltype; mt++)
+ {
+ molt = &mtop->moltype[mt];
+ for (ftype = 0; ftype < F_NRE; ftype++)
+ {
+ if (ftype == ftype1 || ftype == ftype2)
+ {
+ il = &molt->ilist[ftype];
+ stride = 1 + NRAL(ftype);
+ for (i = 0; i < il->nr; i += stride)
+ {
+ tabnr = mtop->ffparams.iparams[il->iatoms[i]].tab.table;
+ if (tabnr < 0)
+ {
+ gmx_fatal(FARGS, "A bonded table number is smaller than 0: %d\n", tabnr);
+ }
+ if (tabnr >= *ncount)
+ {
+ srenew(*count, tabnr+1);
+ for (j = *ncount; j < tabnr+1; j++)
+ {
+ (*count)[j] = 0;
+ }
+ *ncount = tabnr+1;
+ }
+ (*count)[tabnr]++;
+ }
+ }
+ }
+ }
+}
+
+static bondedtable_t *make_bonded_tables(FILE *fplog,
+ int ftype1, int ftype2,
+ const gmx_mtop_t *mtop,
+ const char *basefn, const char *tabext)
+{
+ int i, ncount, *count;
+ char tabfn[STRLEN];
+ bondedtable_t *tab;
+
+ tab = NULL;
+
+ ncount = 0;
+ count = NULL;
+ count_tables(ftype1, ftype2, mtop, &ncount, &count);
+
+ if (ncount > 0)
+ {
+ snew(tab, ncount);
+ for (i = 0; i < ncount; i++)
+ {
+ if (count[i] > 0)
+ {
+ sprintf(tabfn, "%s", basefn);
+ sprintf(tabfn + strlen(basefn) - strlen(ftp2ext(efXVG)) - 1, "_%s%d.%s",
+ tabext, i, ftp2ext(efXVG));
+ tab[i] = make_bonded_table(fplog, tabfn, NRAL(ftype1)-2);
+ }
+ }
+ sfree(count);
+ }
+
+ return tab;
+}
+
+void forcerec_set_ranges(t_forcerec *fr,
+ int ncg_home, int ncg_force,
+ int natoms_force,
+ int natoms_force_constr, int natoms_f_novirsum)
+{
+ fr->cg0 = 0;
+ fr->hcg = ncg_home;
+
+ /* fr->ncg_force is unused in the standard code,
+ * but it can be useful for modified code dealing with charge groups.
+ */
+ fr->ncg_force = ncg_force;
+ fr->natoms_force = natoms_force;
+ fr->natoms_force_constr = natoms_force_constr;
+
+ if (fr->natoms_force_constr > fr->nalloc_force)
+ {
+ fr->nalloc_force = over_alloc_dd(fr->natoms_force_constr);
+
+ if (fr->bTwinRange)
+ {
+ srenew(fr->f_twin, fr->nalloc_force);
+ }
+ }
+
+ if (fr->bF_NoVirSum)
+ {
+ fr->f_novirsum_n = natoms_f_novirsum;
+ if (fr->f_novirsum_n > fr->f_novirsum_nalloc)
+ {
+ fr->f_novirsum_nalloc = over_alloc_dd(fr->f_novirsum_n);
+ srenew(fr->f_novirsum_alloc, fr->f_novirsum_nalloc);
+ }
+ }
+ else
+ {
+ fr->f_novirsum_n = 0;
+ }
+}
+
+static real cutoff_inf(real cutoff)
+{
+ if (cutoff == 0)
+ {
+ cutoff = GMX_CUTOFF_INF;
+ }
+
+ return cutoff;
+}
+
+static void make_adress_tf_tables(FILE *fp, const output_env_t oenv,
+ t_forcerec *fr, const t_inputrec *ir,
+ const char *tabfn, const gmx_mtop_t *mtop,
+ matrix box)
+{
+ char buf[STRLEN];
+ int i, j;
+
+ if (tabfn == NULL)
+ {
+ gmx_fatal(FARGS, "No thermoforce table file given. Use -tabletf to specify a file\n");
+ return;
+ }
+
+ snew(fr->atf_tabs, ir->adress->n_tf_grps);
+
+ sprintf(buf, "%s", tabfn);
+ for (i = 0; i < ir->adress->n_tf_grps; i++)
+ {
+ j = ir->adress->tf_table_index[i]; /* get energy group index */
+ sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "tf_%s.%s",
+ *(mtop->groups.grpname[mtop->groups.grps[egcENER].nm_ind[j]]), ftp2ext(efXVG));
+ if (fp)
+ {
+ fprintf(fp, "loading tf table for energygrp index %d from %s\n", ir->adress->tf_table_index[i], buf);
+ }
+ fr->atf_tabs[i] = make_atf_table(fp, oenv, fr, buf, box);
+ }
+
+}
+
+gmx_bool can_use_allvsall(const t_inputrec *ir, gmx_bool bPrintNote, t_commrec *cr, FILE *fp)
+{
+ gmx_bool bAllvsAll;
+
+ bAllvsAll =
+ (
+ ir->rlist == 0 &&
+ ir->rcoulomb == 0 &&
+ ir->rvdw == 0 &&
+ ir->ePBC == epbcNONE &&
+ ir->vdwtype == evdwCUT &&
+ ir->coulombtype == eelCUT &&
+ ir->efep == efepNO &&
+ (ir->implicit_solvent == eisNO ||
+ (ir->implicit_solvent == eisGBSA && (ir->gb_algorithm == egbSTILL ||
+ ir->gb_algorithm == egbHCT ||
+ ir->gb_algorithm == egbOBC))) &&
+ getenv("GMX_NO_ALLVSALL") == NULL
+ );
+
+ if (bAllvsAll && ir->opts.ngener > 1)
+ {
+ const char *note = "NOTE: Can not use all-vs-all force loops, because there are multiple energy monitor groups; you might get significantly higher performance when using only a single energy monitor group.\n";
+
+ if (bPrintNote)
+ {
+ if (MASTER(cr))
+ {
+ fprintf(stderr, "\n%s\n", note);
+ }
+ if (fp != NULL)
+ {
+ fprintf(fp, "\n%s\n", note);
+ }
+ }
+ bAllvsAll = FALSE;
+ }
+
+ if (bAllvsAll && fp && MASTER(cr))
+ {
+ fprintf(fp, "\nUsing SIMD all-vs-all kernels.\n\n");
+ }
+
+ return bAllvsAll;
+}
+
+
+static void init_forcerec_f_threads(t_forcerec *fr, int nenergrp)
+{
+ int t, i;
+
+ /* These thread local data structures are used for bondeds only */
+ fr->nthreads = gmx_omp_nthreads_get(emntBonded);
+
+ if (fr->nthreads > 1)
+ {
+ snew(fr->f_t, fr->nthreads);
+ /* Thread 0 uses the global force and energy arrays */
+ for (t = 1; t < fr->nthreads; t++)
+ {
+ fr->f_t[t].f = NULL;
+ fr->f_t[t].f_nalloc = 0;
+ snew(fr->f_t[t].fshift, SHIFTS);
+ fr->f_t[t].grpp.nener = nenergrp*nenergrp;
+ for (i = 0; i < egNR; i++)
+ {
+ snew(fr->f_t[t].grpp.ener[i], fr->f_t[t].grpp.nener);
+ }
+ }
+ }
+}
+
+
+gmx_bool nbnxn_acceleration_supported(FILE *fplog,
+ const t_commrec *cr,
+ const t_inputrec *ir,
+ gmx_bool bGPU)
+{
+ if (!bGPU && (ir->vdwtype == evdwPME && ir->ljpme_combination_rule == eljpmeLB))
+ {
+ md_print_warn(cr, fplog, "LJ-PME with Lorentz-Berthelot is not supported with %s, falling back to %s\n",
+ bGPU ? "GPUs" : "SIMD kernels",
+ bGPU ? "CPU only" : "plain-C kernels");
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+
+static void pick_nbnxn_kernel_cpu(const t_inputrec gmx_unused *ir,
+ int *kernel_type,
+ int *ewald_excl)
+{
+ *kernel_type = nbnxnk4x4_PlainC;
+ *ewald_excl = ewaldexclTable;
+
+#ifdef GMX_NBNXN_SIMD
+ {
+#ifdef GMX_NBNXN_SIMD_4XN
+ *kernel_type = nbnxnk4xN_SIMD_4xN;
+#endif
+#ifdef GMX_NBNXN_SIMD_2XNN
+ *kernel_type = nbnxnk4xN_SIMD_2xNN;
+#endif
+
+#if defined GMX_NBNXN_SIMD_2XNN && defined GMX_NBNXN_SIMD_4XN
+ /* We need to choose if we want 2x(N+N) or 4xN kernels.
+ * Currently this is based on the SIMD acceleration choice,
+ * but it might be better to decide this at runtime based on CPU.
+ *
+ * 4xN calculates more (zero) interactions, but has less pair-search
+ * work and much better kernel instruction scheduling.
+ *
+ * Up till now we have only seen that on Intel Sandy/Ivy Bridge,
+ * which doesn't have FMA, both the analytical and tabulated Ewald
+ * kernels have similar pair rates for 4x8 and 2x(4+4), so we choose
+ * 2x(4+4) because it results in significantly fewer pairs.
+ * For RF, the raw pair rate of the 4x8 kernel is higher than 2x(4+4),
+ * 10% with HT, 50% without HT. As we currently don't detect the actual
+ * use of HT, use 4x8 to avoid a potential performance hit.
+ * On Intel Haswell 4x8 is always faster.
+ */
+ *kernel_type = nbnxnk4xN_SIMD_4xN;
+
+#ifndef GMX_SIMD_HAVE_FMA
+ if (EEL_PME_EWALD(ir->coulombtype) ||
+ EVDW_PME(ir->vdwtype))
+ {
+ /* We have Ewald kernels without FMA (Intel Sandy/Ivy Bridge).
+ * There are enough instructions to make 2x(4+4) efficient.
+ */
+ *kernel_type = nbnxnk4xN_SIMD_2xNN;
+ }
+#endif
+#endif /* GMX_NBNXN_SIMD_2XNN && GMX_NBNXN_SIMD_4XN */
+
+
+ if (getenv("GMX_NBNXN_SIMD_4XN") != NULL)
+ {
+#ifdef GMX_NBNXN_SIMD_4XN
+ *kernel_type = nbnxnk4xN_SIMD_4xN;
+#else
+ gmx_fatal(FARGS, "SIMD 4xN kernels requested, but Gromacs has been compiled without support for these kernels");
+#endif
+ }
+ if (getenv("GMX_NBNXN_SIMD_2XNN") != NULL)
+ {
+#ifdef GMX_NBNXN_SIMD_2XNN
+ *kernel_type = nbnxnk4xN_SIMD_2xNN;
+#else
+ gmx_fatal(FARGS, "SIMD 2x(N+N) kernels requested, but Gromacs has been compiled without support for these kernels");
+#endif
+ }
+
+ /* Analytical Ewald exclusion correction is only an option in
+ * the SIMD kernel.
+ * Since table lookup's don't parallelize with SIMD, analytical
+ * will probably always be faster for a SIMD width of 8 or more.
+ * With FMA analytical is sometimes faster for a width if 4 as well.
+ * On BlueGene/Q, this is faster regardless of precision.
+ * In single precision, this is faster on Bulldozer.
+ */
+#if GMX_SIMD_REAL_WIDTH >= 8 || \
+ (GMX_SIMD_REAL_WIDTH >= 4 && defined GMX_SIMD_HAVE_FMA && !defined GMX_DOUBLE) || \
+ defined GMX_SIMD_IBM_QPX
+ *ewald_excl = ewaldexclAnalytical;
+#endif
+ if (getenv("GMX_NBNXN_EWALD_TABLE") != NULL)
+ {
+ *ewald_excl = ewaldexclTable;
+ }
+ if (getenv("GMX_NBNXN_EWALD_ANALYTICAL") != NULL)
+ {
+ *ewald_excl = ewaldexclAnalytical;
+ }
+
+ }
+#endif /* GMX_NBNXN_SIMD */
+}
+
+
+const char *lookup_nbnxn_kernel_name(int kernel_type)
+{
+ const char *returnvalue = NULL;
+ switch (kernel_type)
+ {
+ case nbnxnkNotSet:
+ returnvalue = "not set";
+ break;
+ case nbnxnk4x4_PlainC:
+ returnvalue = "plain C";
+ break;
+ case nbnxnk4xN_SIMD_4xN:
+ case nbnxnk4xN_SIMD_2xNN:
+#ifdef GMX_NBNXN_SIMD
+#if defined GMX_SIMD_X86_SSE2
+ returnvalue = "SSE2";
+#elif defined GMX_SIMD_X86_SSE4_1
+ returnvalue = "SSE4.1";
+#elif defined GMX_SIMD_X86_AVX_128_FMA
+ returnvalue = "AVX_128_FMA";
+#elif defined GMX_SIMD_X86_AVX_256
+ returnvalue = "AVX_256";
+#elif defined GMX_SIMD_X86_AVX2_256
+ returnvalue = "AVX2_256";
+#else
+ returnvalue = "SIMD";
+#endif
+#else /* GMX_NBNXN_SIMD */
+ returnvalue = "not available";
+#endif /* GMX_NBNXN_SIMD */
+ break;
+ case nbnxnk8x8x8_CUDA: returnvalue = "CUDA"; break;
+ case nbnxnk8x8x8_PlainC: returnvalue = "plain C"; break;
+
+ case nbnxnkNR:
+ default:
+ gmx_fatal(FARGS, "Illegal kernel type selected");
+ returnvalue = NULL;
+ break;
+ }
+ return returnvalue;
+};
+
+static void pick_nbnxn_kernel(FILE *fp,
+ const t_commrec *cr,
+ gmx_bool use_simd_kernels,
+ gmx_bool bUseGPU,
+ gmx_bool bEmulateGPU,
+ const t_inputrec *ir,
+ int *kernel_type,
+ int *ewald_excl,
+ gmx_bool bDoNonbonded)
+{
+ assert(kernel_type);
+
+ *kernel_type = nbnxnkNotSet;
+ *ewald_excl = ewaldexclTable;
+
+ if (bEmulateGPU)
+ {
+ *kernel_type = nbnxnk8x8x8_PlainC;
+
+ if (bDoNonbonded)
+ {
+ md_print_warn(cr, fp, "Emulating a GPU run on the CPU (slow)");
+ }
+ }
+ else if (bUseGPU)
+ {
+ *kernel_type = nbnxnk8x8x8_CUDA;
+ }
+
+ if (*kernel_type == nbnxnkNotSet)
+ {
+ /* LJ PME with LB combination rule does 7 mesh operations.
+ * This so slow that we don't compile SIMD non-bonded kernels for that.
+ */
+ if (use_simd_kernels &&
+ nbnxn_acceleration_supported(fp, cr, ir, FALSE))
+ {
+ pick_nbnxn_kernel_cpu(ir, kernel_type, ewald_excl);
+ }
+ else
+ {
+ *kernel_type = nbnxnk4x4_PlainC;
+ }
+ }
+
+ if (bDoNonbonded && fp != NULL)
+ {
+ fprintf(fp, "\nUsing %s %dx%d non-bonded kernels\n\n",
+ lookup_nbnxn_kernel_name(*kernel_type),
+ nbnxn_kernel_pairlist_simple(*kernel_type) ? NBNXN_CPU_CLUSTER_I_SIZE : NBNXN_GPU_CLUSTER_SIZE,
+ nbnxn_kernel_to_cj_size(*kernel_type));
+
+ if (nbnxnk4x4_PlainC == *kernel_type ||
+ nbnxnk8x8x8_PlainC == *kernel_type)
+ {
+ md_print_warn(cr, fp,
+ "WARNING: Using the slow %s kernels. This should\n"
+ "not happen during routine usage on supported platforms.\n\n",
+ lookup_nbnxn_kernel_name(*kernel_type));
+ }
+ }
+}
+
+static void pick_nbnxn_resources(const t_commrec *cr,
+ const gmx_hw_info_t *hwinfo,
+ gmx_bool bDoNonbonded,
+ gmx_bool *bUseGPU,
+ gmx_bool *bEmulateGPU,
+ const gmx_gpu_opt_t *gpu_opt)
+{
+ gmx_bool bEmulateGPUEnvVarSet;
+ char gpu_err_str[STRLEN];
+
+ *bUseGPU = FALSE;
+
+ bEmulateGPUEnvVarSet = (getenv("GMX_EMULATE_GPU") != NULL);
+
+ /* Run GPU emulation mode if GMX_EMULATE_GPU is defined. Because
+ * GPUs (currently) only handle non-bonded calculations, we will
+ * automatically switch to emulation if non-bonded calculations are
+ * turned off via GMX_NO_NONBONDED - this is the simple and elegant
+ * way to turn off GPU initialization, data movement, and cleanup.
+ *
+ * GPU emulation can be useful to assess the performance one can expect by
+ * adding GPU(s) to the machine. The conditional below allows this even
+ * if mdrun is compiled without GPU acceleration support.
+ * Note that you should freezing the system as otherwise it will explode.
+ */
+ *bEmulateGPU = (bEmulateGPUEnvVarSet ||
+ (!bDoNonbonded &&
+ gpu_opt->ncuda_dev_use > 0));
+
+ /* Enable GPU mode when GPUs are available or no GPU emulation is requested.
+ */
+ if (gpu_opt->ncuda_dev_use > 0 && !(*bEmulateGPU))
+ {
+ /* Each PP node will use the intra-node id-th device from the
+ * list of detected/selected GPUs. */
+ if (!init_gpu(cr->rank_pp_intranode, gpu_err_str,
+ &hwinfo->gpu_info, gpu_opt))
+ {
+ /* At this point the init should never fail as we made sure that
+ * we have all the GPUs we need. If it still does, we'll bail. */
+ gmx_fatal(FARGS, "On node %d failed to initialize GPU #%d: %s",
+ cr->nodeid,
+ get_gpu_device_id(&hwinfo->gpu_info, gpu_opt,
+ cr->rank_pp_intranode),
+ gpu_err_str);
+ }
+
+ /* Here we actually turn on hardware GPU acceleration */
+ *bUseGPU = TRUE;
+ }
+}
+
+gmx_bool uses_simple_tables(int cutoff_scheme,
+ nonbonded_verlet_t *nbv,
+ int group)
+{
+ gmx_bool bUsesSimpleTables = TRUE;
+ int grp_index;
+
+ switch (cutoff_scheme)
+ {
+ case ecutsGROUP:
+ bUsesSimpleTables = TRUE;
+ break;
+ case ecutsVERLET:
+ assert(NULL != nbv && NULL != nbv->grp);
+ grp_index = (group < 0) ? 0 : (nbv->ngrp - 1);
+ bUsesSimpleTables = nbnxn_kernel_pairlist_simple(nbv->grp[grp_index].kernel_type);
+ break;
+ default:
+ gmx_incons("unimplemented");
+ }
+ return bUsesSimpleTables;
+}
+
+static void init_ewald_f_table(interaction_const_t *ic,
+ gmx_bool bUsesSimpleTables,
+ real rtab)
+{
+ real maxr;
+
+ if (bUsesSimpleTables)
+ {
+ /* With a spacing of 0.0005 we are at the force summation accuracy
+ * for the SSE kernels for "normal" atomistic simulations.
+ */
+ ic->tabq_scale = ewald_spline3_table_scale(ic->ewaldcoeff_q,
+ ic->rcoulomb);
+
+ maxr = (rtab > ic->rcoulomb) ? rtab : ic->rcoulomb;
+ ic->tabq_size = (int)(maxr*ic->tabq_scale) + 2;
+ }
+ else
+ {
+ ic->tabq_size = GPU_EWALD_COULOMB_FORCE_TABLE_SIZE;
+ /* Subtract 2 iso 1 to avoid access out of range due to rounding */
+ ic->tabq_scale = (ic->tabq_size - 2)/ic->rcoulomb;
+ }
+
+ sfree_aligned(ic->tabq_coul_FDV0);
+ sfree_aligned(ic->tabq_coul_F);
+ sfree_aligned(ic->tabq_coul_V);
+
+ sfree_aligned(ic->tabq_vdw_FDV0);
+ sfree_aligned(ic->tabq_vdw_F);
+ sfree_aligned(ic->tabq_vdw_V);
+
+ if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype))
+ {
+ /* Create the original table data in FDV0 */
+ snew_aligned(ic->tabq_coul_FDV0, ic->tabq_size*4, 32);
+ snew_aligned(ic->tabq_coul_F, ic->tabq_size, 32);
+ snew_aligned(ic->tabq_coul_V, ic->tabq_size, 32);
+ table_spline3_fill_ewald_lr(ic->tabq_coul_F, ic->tabq_coul_V, ic->tabq_coul_FDV0,
+ ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff_q, v_q_ewald_lr);
+ }
+
+ if (EVDW_PME(ic->vdwtype))
+ {
+ snew_aligned(ic->tabq_vdw_FDV0, ic->tabq_size*4, 32);
+ snew_aligned(ic->tabq_vdw_F, ic->tabq_size, 32);
+ snew_aligned(ic->tabq_vdw_V, ic->tabq_size, 32);
+ table_spline3_fill_ewald_lr(ic->tabq_vdw_F, ic->tabq_vdw_V, ic->tabq_vdw_FDV0,
+ ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff_lj, v_lj_ewald_lr);
+ }
+}
+
+void init_interaction_const_tables(FILE *fp,
+ interaction_const_t *ic,
+ gmx_bool bUsesSimpleTables,
+ real rtab)
+{
+ real spacing;
+
+ if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype) || EVDW_PME(ic->vdwtype))
+ {
+ init_ewald_f_table(ic, bUsesSimpleTables, rtab);
+
+ if (fp != NULL)
+ {
+ fprintf(fp, "Initialized non-bonded Ewald correction tables, spacing: %.2e size: %d\n\n",
+ 1/ic->tabq_scale, ic->tabq_size);
+ }
+ }
+}
+
+static void clear_force_switch_constants(shift_consts_t *sc)
+{
+ sc->c2 = 0;
+ sc->c3 = 0;
+ sc->cpot = 0;
+}
+
+static void force_switch_constants(real p,
+ real rsw, real rc,
+ shift_consts_t *sc)
+{
+ /* Here we determine the coefficient for shifting the force to zero
+ * between distance rsw and the cut-off rc.
+ * For a potential of r^-p, we have force p*r^-(p+1).
+ * But to save flops we absorb p in the coefficient.
+ * Thus we get:
+ * force/p = r^-(p+1) + c2*r^2 + c3*r^3
+ * potential = r^-p + c2/3*r^3 + c3/4*r^4 + cpot
+ */
+ sc->c2 = ((p + 1)*rsw - (p + 4)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 2));
+ sc->c3 = -((p + 1)*rsw - (p + 3)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 3));
+ sc->cpot = -pow(rc, -p) + p*sc->c2/3*pow(rc - rsw, 3) + p*sc->c3/4*pow(rc - rsw, 4);
+}
+
+static void potential_switch_constants(real rsw, real rc,
+ switch_consts_t *sc)
+{
+ /* The switch function is 1 at rsw and 0 at rc.
+ * The derivative and second derivate are zero at both ends.
+ * rsw = max(r - r_switch, 0)
+ * sw = 1 + c3*rsw^3 + c4*rsw^4 + c5*rsw^5
+ * dsw = 3*c3*rsw^2 + 4*c4*rsw^3 + 5*c5*rsw^4
+ * force = force*dsw - potential*sw
+ * potential *= sw
+ */
+ sc->c3 = -10*pow(rc - rsw, -3);
+ sc->c4 = 15*pow(rc - rsw, -4);
+ sc->c5 = -6*pow(rc - rsw, -5);
+}
+
+static void
+init_interaction_const(FILE *fp,
+ const t_commrec gmx_unused *cr,
+ interaction_const_t **interaction_const,
+ const t_forcerec *fr,
+ real rtab)
+{
+ interaction_const_t *ic;
+ gmx_bool bUsesSimpleTables = TRUE;
+
+ snew(ic, 1);
+
+ /* Just allocate something so we can free it */
+ snew_aligned(ic->tabq_coul_FDV0, 16, 32);
+ snew_aligned(ic->tabq_coul_F, 16, 32);
+ snew_aligned(ic->tabq_coul_V, 16, 32);
+
+ ic->rlist = fr->rlist;
+ ic->rlistlong = fr->rlistlong;
+
+ /* Lennard-Jones */
+ ic->vdwtype = fr->vdwtype;
+ ic->vdw_modifier = fr->vdw_modifier;
+ ic->rvdw = fr->rvdw;
+ ic->rvdw_switch = fr->rvdw_switch;
+ ic->ewaldcoeff_lj = fr->ewaldcoeff_lj;
+ ic->ljpme_comb_rule = fr->ljpme_combination_rule;
+ ic->sh_lj_ewald = 0;
+ clear_force_switch_constants(&ic->dispersion_shift);
+ clear_force_switch_constants(&ic->repulsion_shift);
+
+ switch (ic->vdw_modifier)
+ {
+ case eintmodPOTSHIFT:
+ /* Only shift the potential, don't touch the force */
+ ic->dispersion_shift.cpot = -pow(ic->rvdw, -6.0);
+ ic->repulsion_shift.cpot = -pow(ic->rvdw, -12.0);
+ if (EVDW_PME(ic->vdwtype))
+ {
+ real crc2;
+
+ crc2 = sqr(ic->ewaldcoeff_lj*ic->rvdw);
+ ic->sh_lj_ewald = (exp(-crc2)*(1 + crc2 + 0.5*crc2*crc2) - 1)*pow(ic->rvdw, -6.0);
+ }
+ break;
+ case eintmodFORCESWITCH:
+ /* Switch the force, switch and shift the potential */
+ force_switch_constants(6.0, ic->rvdw_switch, ic->rvdw,
+ &ic->dispersion_shift);
+ force_switch_constants(12.0, ic->rvdw_switch, ic->rvdw,
+ &ic->repulsion_shift);
+ break;
+ case eintmodPOTSWITCH:
+ /* Switch the potential and force */
+ potential_switch_constants(ic->rvdw_switch, ic->rvdw,
+ &ic->vdw_switch);
+ break;
+ case eintmodNONE:
+ case eintmodEXACTCUTOFF:
+ /* Nothing to do here */
+ break;
+ default:
+ gmx_incons("unimplemented potential modifier");
+ }
+
+ ic->sh_invrc6 = -ic->dispersion_shift.cpot;
+
+ /* Electrostatics */
+ ic->eeltype = fr->eeltype;
+ ic->coulomb_modifier = fr->coulomb_modifier;
+ ic->rcoulomb = fr->rcoulomb;
+ ic->epsilon_r = fr->epsilon_r;
+ ic->epsfac = fr->epsfac;
+ ic->ewaldcoeff_q = fr->ewaldcoeff_q;
+
+ if (fr->coulomb_modifier == eintmodPOTSHIFT)
+ {
+ ic->sh_ewald = gmx_erfc(ic->ewaldcoeff_q*ic->rcoulomb);
+ }
+ else
+ {
+ ic->sh_ewald = 0;
+ }
+
+ /* Reaction-field */
+ if (EEL_RF(ic->eeltype))
+ {
+ ic->epsilon_rf = fr->epsilon_rf;
+ ic->k_rf = fr->k_rf;
+ ic->c_rf = fr->c_rf;
+ }
+ else
+ {
+ /* For plain cut-off we might use the reaction-field kernels */
+ ic->epsilon_rf = ic->epsilon_r;
+ ic->k_rf = 0;
+ if (fr->coulomb_modifier == eintmodPOTSHIFT)
+ {
+ ic->c_rf = 1/ic->rcoulomb;
+ }
+ else
+ {
+ ic->c_rf = 0;
+ }
+ }
+
+ if (fp != NULL)
+ {
+ real dispersion_shift;
+
+ dispersion_shift = ic->dispersion_shift.cpot;
+ if (EVDW_PME(ic->vdwtype))
+ {
+ dispersion_shift -= ic->sh_lj_ewald;
+ }
+ fprintf(fp, "Potential shift: LJ r^-12: %.3e r^-6: %.3e",
+ ic->repulsion_shift.cpot, dispersion_shift);
+
+ if (ic->eeltype == eelCUT)
+ {
+ fprintf(fp, ", Coulomb %.e", -ic->c_rf);
+ }
+ else if (EEL_PME(ic->eeltype))
+ {
+ fprintf(fp, ", Ewald %.3e", -ic->sh_ewald);
+ }
+ fprintf(fp, "\n");
+ }
+
+ *interaction_const = ic;
+
+ if (fr->nbv != NULL && fr->nbv->bUseGPU)
+ {
+ nbnxn_cuda_init_const(fr->nbv->cu_nbv, ic, fr->nbv->grp);
+
+ /* With tMPI + GPUs some ranks may be sharing GPU(s) and therefore
+ * also sharing texture references. To keep the code simple, we don't
+ * treat texture references as shared resources, but this means that
+ * the coulomb_tab and nbfp texture refs will get updated by multiple threads.
+ * Hence, to ensure that the non-bonded kernels don't start before all
+ * texture binding operations are finished, we need to wait for all ranks
+ * to arrive here before continuing.
+ *
+ * Note that we could omit this barrier if GPUs are not shared (or
+ * texture objects are used), but as this is initialization code, there
+ * is not point in complicating things.
+ */
+#ifdef GMX_THREAD_MPI
+ if (PAR(cr))
+ {
+ gmx_barrier(cr);
+ }
+#endif /* GMX_THREAD_MPI */
+ }
+
+ bUsesSimpleTables = uses_simple_tables(fr->cutoff_scheme, fr->nbv, -1);
+ init_interaction_const_tables(fp, ic, bUsesSimpleTables, rtab);
+}
+
+static void init_nb_verlet(FILE *fp,
+ nonbonded_verlet_t **nb_verlet,
+ gmx_bool bFEP_NonBonded,
+ const t_inputrec *ir,
+ const t_forcerec *fr,
+ const t_commrec *cr,
+ const char *nbpu_opt)
+{
+ nonbonded_verlet_t *nbv;
+ int i;
+ char *env;
+ gmx_bool bEmulateGPU, bHybridGPURun = FALSE;
+
+ nbnxn_alloc_t *nb_alloc;
+ nbnxn_free_t *nb_free;
+
+ snew(nbv, 1);
+
+ pick_nbnxn_resources(cr, fr->hwinfo,
+ fr->bNonbonded,
+ &nbv->bUseGPU,
+ &bEmulateGPU,
+ fr->gpu_opt);
+
+ nbv->nbs = NULL;
+
+ nbv->ngrp = (DOMAINDECOMP(cr) ? 2 : 1);
+ for (i = 0; i < nbv->ngrp; i++)
+ {
+ nbv->grp[i].nbl_lists.nnbl = 0;
+ nbv->grp[i].nbat = NULL;
+ nbv->grp[i].kernel_type = nbnxnkNotSet;
+
+ if (i == 0) /* local */
+ {
+ pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels,
+ nbv->bUseGPU, bEmulateGPU, ir,
+ &nbv->grp[i].kernel_type,
+ &nbv->grp[i].ewald_excl,
+ fr->bNonbonded);
+ }
+ else /* non-local */
+ {
+ if (nbpu_opt != NULL && strcmp(nbpu_opt, "gpu_cpu") == 0)
+ {
+ /* Use GPU for local, select a CPU kernel for non-local */
+ pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels,
+ FALSE, FALSE, ir,
+ &nbv->grp[i].kernel_type,
+ &nbv->grp[i].ewald_excl,
+ fr->bNonbonded);
+
+ bHybridGPURun = TRUE;
+ }
+ else
+ {
+ /* Use the same kernel for local and non-local interactions */
+ nbv->grp[i].kernel_type = nbv->grp[0].kernel_type;
+ nbv->grp[i].ewald_excl = nbv->grp[0].ewald_excl;
+ }
+ }
+ }
+
+ if (nbv->bUseGPU)
+ {
+ /* init the NxN GPU data; the last argument tells whether we'll have
+ * both local and non-local NB calculation on GPU */
+ nbnxn_cuda_init(fp, &nbv->cu_nbv,
+ &fr->hwinfo->gpu_info, fr->gpu_opt,
+ cr->rank_pp_intranode,
+ (nbv->ngrp > 1) && !bHybridGPURun);
+
+ if ((env = getenv("GMX_NB_MIN_CI")) != NULL)
+ {
+ char *end;
+
+ nbv->min_ci_balanced = strtol(env, &end, 10);
+ if (!end || (*end != 0) || nbv->min_ci_balanced <= 0)
+ {
+ gmx_fatal(FARGS, "Invalid value passed in GMX_NB_MIN_CI=%s, positive integer required", env);
+ }
+
+ if (debug)
+ {
+ fprintf(debug, "Neighbor-list balancing parameter: %d (passed as env. var.)\n",
+ nbv->min_ci_balanced);
+ }
+ }
+ else
+ {
+ nbv->min_ci_balanced = nbnxn_cuda_min_ci_balanced(nbv->cu_nbv);
+ if (debug)
+ {
+ fprintf(debug, "Neighbor-list balancing parameter: %d (auto-adjusted to the number of GPU multi-processors)\n",
+ nbv->min_ci_balanced);
+ }
+ }
+ }
+ else
+ {
+ nbv->min_ci_balanced = 0;
+ }
+
+ *nb_verlet = nbv;
+
+ nbnxn_init_search(&nbv->nbs,
+ DOMAINDECOMP(cr) ? &cr->dd->nc : NULL,
+ DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : NULL,
+ bFEP_NonBonded,
+ gmx_omp_nthreads_get(emntNonbonded));
+
+ for (i = 0; i < nbv->ngrp; i++)
+ {
+ if (nbv->grp[0].kernel_type == nbnxnk8x8x8_CUDA)
+ {
+ nb_alloc = &pmalloc;
+ nb_free = &pfree;
+ }
+ else
+ {
+ nb_alloc = NULL;
+ nb_free = NULL;
+ }
+
+ nbnxn_init_pairlist_set(&nbv->grp[i].nbl_lists,
+ nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
+ /* 8x8x8 "non-simple" lists are ATM always combined */
+ !nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
+ nb_alloc, nb_free);
+
+ if (i == 0 ||
+ nbv->grp[0].kernel_type != nbv->grp[i].kernel_type)
+ {
+ gmx_bool bSimpleList;
+ int enbnxninitcombrule;
+
+ bSimpleList = nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type);
+
+ if (bSimpleList && (fr->vdwtype == evdwCUT && (fr->vdw_modifier == eintmodNONE || fr->vdw_modifier == eintmodPOTSHIFT)))
+ {
+ /* Plain LJ cut-off: we can optimize with combination rules */
+ enbnxninitcombrule = enbnxninitcombruleDETECT;
+ }
+ else if (fr->vdwtype == evdwPME)
+ {
+ /* LJ-PME: we need to use a combination rule for the grid */
+ if (fr->ljpme_combination_rule == eljpmeGEOM)
+ {
+ enbnxninitcombrule = enbnxninitcombruleGEOM;
+ }
+ else
+ {
+ enbnxninitcombrule = enbnxninitcombruleLB;
+ }
+ }
+ else
+ {
+ /* We use a full combination matrix: no rule required */
+ enbnxninitcombrule = enbnxninitcombruleNONE;
+ }
+
+
+ snew(nbv->grp[i].nbat, 1);
+ nbnxn_atomdata_init(fp,
+ nbv->grp[i].nbat,
+ nbv->grp[i].kernel_type,
+ enbnxninitcombrule,
+ fr->ntype, fr->nbfp,
+ ir->opts.ngener,
+ bSimpleList ? gmx_omp_nthreads_get(emntNonbonded) : 1,
+ nb_alloc, nb_free);
+ }
+ else
+ {
+ nbv->grp[i].nbat = nbv->grp[0].nbat;
+ }
+ }
+}
+
+void init_forcerec(FILE *fp,
+ const output_env_t oenv,
+ t_forcerec *fr,
+ t_fcdata *fcd,
+ const t_inputrec *ir,
+ const gmx_mtop_t *mtop,
+ const t_commrec *cr,
+ matrix box,
+ const char *tabfn,
+ const char *tabafn,
+ const char *tabpfn,
+ const char *tabbfn,
+ const char *nbpu_opt,
+ gmx_bool bNoSolvOpt,
+ real print_force)
+{
+ int i, j, m, natoms, ngrp, negp_pp, negptable, egi, egj;
+ real rtab;
+ char *env;
+ double dbl;
+ const t_block *cgs;
+ gmx_bool bGenericKernelOnly;
+ gmx_bool bMakeTables, bMakeSeparate14Table, bSomeNormalNbListsAreInUse;
+ gmx_bool bFEP_NonBonded;
+ t_nblists *nbl;
+ int *nm_ind, egp_flags;
+
+ if (fr->hwinfo == NULL)
+ {
+ /* Detect hardware, gather information.
+ * In mdrun, hwinfo has already been set before calling init_forcerec.
+ * Here we ignore GPUs, as tools will not use them anyhow.
+ */
+ fr->hwinfo = gmx_detect_hardware(fp, cr, FALSE);
+ }
+
+ /* By default we turn SIMD kernels on, but it might be turned off further down... */
+ fr->use_simd_kernels = TRUE;
+
+ fr->bDomDec = DOMAINDECOMP(cr);
+
+ natoms = mtop->natoms;
+
+ if (check_box(ir->ePBC, box))
+ {
+ gmx_fatal(FARGS, check_box(ir->ePBC, box));
+ }
+
+ /* Test particle insertion ? */
+ if (EI_TPI(ir->eI))
+ {
+ /* Set to the size of the molecule to be inserted (the last one) */
+ /* Because of old style topologies, we have to use the last cg
+ * instead of the last molecule type.
+ */
+ cgs = &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].cgs;
+ fr->n_tpi = cgs->index[cgs->nr] - cgs->index[cgs->nr-1];
+ if (fr->n_tpi != mtop->mols.index[mtop->mols.nr] - mtop->mols.index[mtop->mols.nr-1])
+ {
+ gmx_fatal(FARGS, "The molecule to insert can not consist of multiple charge groups.\nMake it a single charge group.");
+ }
+ }
+ else
+ {
+ fr->n_tpi = 0;
+ }
+
+ /* Copy AdResS parameters */
+ if (ir->bAdress)
+ {
+ fr->adress_type = ir->adress->type;
+ fr->adress_const_wf = ir->adress->const_wf;
+ fr->adress_ex_width = ir->adress->ex_width;
+ fr->adress_hy_width = ir->adress->hy_width;
+ fr->adress_icor = ir->adress->icor;
+ fr->adress_site = ir->adress->site;
+ fr->adress_ex_forcecap = ir->adress->ex_forcecap;
+ fr->adress_do_hybridpairs = ir->adress->do_hybridpairs;
+
+
+ snew(fr->adress_group_explicit, ir->adress->n_energy_grps);
+ for (i = 0; i < ir->adress->n_energy_grps; i++)
+ {
+ fr->adress_group_explicit[i] = ir->adress->group_explicit[i];
+ }
+
+ fr->n_adress_tf_grps = ir->adress->n_tf_grps;
+ snew(fr->adress_tf_table_index, fr->n_adress_tf_grps);
+ for (i = 0; i < fr->n_adress_tf_grps; i++)
+ {
+ fr->adress_tf_table_index[i] = ir->adress->tf_table_index[i];
+ }
+ copy_rvec(ir->adress->refs, fr->adress_refs);
+ }
+ else
+ {
+ fr->adress_type = eAdressOff;
+ fr->adress_do_hybridpairs = FALSE;
+ }
+
+ /* Copy the user determined parameters */
+ fr->userint1 = ir->userint1;
+ fr->userint2 = ir->userint2;
+ fr->userint3 = ir->userint3;
+ fr->userint4 = ir->userint4;
+ fr->userreal1 = ir->userreal1;
+ fr->userreal2 = ir->userreal2;
+ fr->userreal3 = ir->userreal3;
+ fr->userreal4 = ir->userreal4;
+
+ /* Shell stuff */
+ fr->fc_stepsize = ir->fc_stepsize;
+
+ /* Free energy */
+ fr->efep = ir->efep;
+ fr->sc_alphavdw = ir->fepvals->sc_alpha;
+ if (ir->fepvals->bScCoul)
+ {
+ fr->sc_alphacoul = ir->fepvals->sc_alpha;
+ fr->sc_sigma6_min = pow(ir->fepvals->sc_sigma_min, 6);
+ }
+ else
+ {
+ fr->sc_alphacoul = 0;
+ fr->sc_sigma6_min = 0; /* only needed when bScCoul is on */
+ }
+ fr->sc_power = ir->fepvals->sc_power;
+ fr->sc_r_power = ir->fepvals->sc_r_power;
+ fr->sc_sigma6_def = pow(ir->fepvals->sc_sigma, 6);
+
+ env = getenv("GMX_SCSIGMA_MIN");
+ if (env != NULL)
+ {
+ dbl = 0;
+ sscanf(env, "%lf", &dbl);
+ fr->sc_sigma6_min = pow(dbl, 6);
+ if (fp)
+ {
+ fprintf(fp, "Setting the minimum soft core sigma to %g nm\n", dbl);
+ }
+ }
+
+ fr->bNonbonded = TRUE;
+ if (getenv("GMX_NO_NONBONDED") != NULL)
+ {
+ /* turn off non-bonded calculations */
+ fr->bNonbonded = FALSE;
+ md_print_warn(cr, fp,
+ "Found environment variable GMX_NO_NONBONDED.\n"
+ "Disabling nonbonded calculations.\n");
+ }
+
+ bGenericKernelOnly = FALSE;
+
+ /* We now check in the NS code whether a particular combination of interactions
+ * can be used with water optimization, and disable it if that is not the case.
+ */
+
+ if (getenv("GMX_NB_GENERIC") != NULL)
+ {
+ if (fp != NULL)
+ {
+ fprintf(fp,
+ "Found environment variable GMX_NB_GENERIC.\n"
+ "Disabling all interaction-specific nonbonded kernels, will only\n"
+ "use the slow generic ones in src/gmxlib/nonbonded/nb_generic.c\n\n");
+ }
+ bGenericKernelOnly = TRUE;
+ }
+
+ if (bGenericKernelOnly == TRUE)
+ {
+ bNoSolvOpt = TRUE;
+ }
+
+ if ( (getenv("GMX_DISABLE_SIMD_KERNELS") != NULL) || (getenv("GMX_NOOPTIMIZEDKERNELS") != NULL) )
+ {
+ fr->use_simd_kernels = FALSE;
+ if (fp != NULL)
+ {
+ fprintf(fp,
+ "\nFound environment variable GMX_DISABLE_SIMD_KERNELS.\n"
+ "Disabling the usage of any SIMD-specific kernel routines (e.g. SSE2/SSE4.1/AVX).\n\n");
+ }
+ }
+
+ fr->bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
+
+ /* Check if we can/should do all-vs-all kernels */
+ fr->bAllvsAll = can_use_allvsall(ir, FALSE, NULL, NULL);
+ fr->AllvsAll_work = NULL;
+ fr->AllvsAll_workgb = NULL;
+
+ /* All-vs-all kernels have not been implemented in 4.6, and
+ * the SIMD group kernels are also buggy in this case. Non-SIMD
+ * group kernels are OK. See Redmine #1249. */
+ if (fr->bAllvsAll)
+ {
+ fr->bAllvsAll = FALSE;
+ fr->use_simd_kernels = FALSE;
+ if (fp != NULL)
+ {
+ fprintf(fp,
+ "\nYour simulation settings would have triggered the efficient all-vs-all\n"
+ "kernels in GROMACS 4.5, but these have not been implemented in GROMACS\n"
+ "4.6. Also, we can't use the accelerated SIMD kernels here because\n"
+ "of an unfixed bug. The reference C kernels are correct, though, so\n"
+ "we are proceeding by disabling all CPU architecture-specific\n"
+ "(e.g. SSE2/SSE4/AVX) routines. If performance is important, please\n"
+ "use GROMACS 4.5.7 or try cutoff-scheme = Verlet.\n\n");
+ }
+ }
+
+ /* Neighbour searching stuff */
+ fr->cutoff_scheme = ir->cutoff_scheme;
+ fr->bGrid = (ir->ns_type == ensGRID);
+ fr->ePBC = ir->ePBC;
+
+ if (fr->cutoff_scheme == ecutsGROUP)
+ {
+ const char *note = "NOTE: This file uses the deprecated 'group' cutoff_scheme. This will be\n"
+ "removed in a future release when 'verlet' supports all interaction forms.\n";
+
+ if (MASTER(cr))
+ {
+ fprintf(stderr, "\n%s\n", note);
+ }
+ if (fp != NULL)
+ {
+ fprintf(fp, "\n%s\n", note);
+ }
+ }
+
+ /* Determine if we will do PBC for distances in bonded interactions */
+ if (fr->ePBC == epbcNONE)
+ {
+ fr->bMolPBC = FALSE;
+ }
+ else
+ {
+ if (!DOMAINDECOMP(cr))
+ {
+ /* The group cut-off scheme and SHAKE assume charge groups
+ * are whole, but not using molpbc is faster in most cases.
+ */
+ if (fr->cutoff_scheme == ecutsGROUP ||
+ (ir->eConstrAlg == econtSHAKE &&
+ (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0 ||
+ gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0)))
+ {
+ fr->bMolPBC = ir->bPeriodicMols;
+ }
+ else
+ {
+ fr->bMolPBC = TRUE;
+ if (getenv("GMX_USE_GRAPH") != NULL)
+ {
+ fr->bMolPBC = FALSE;
+ if (fp)
+ {
+ fprintf(fp, "\nGMX_MOLPBC is set, using the graph for bonded interactions\n\n");
+ }
+ }
+ }
+ }
+ else
+ {
+ fr->bMolPBC = dd_bonded_molpbc(cr->dd, fr->ePBC);
+ }
+ }
+ fr->bGB = (ir->implicit_solvent == eisGBSA);
+
+ fr->rc_scaling = ir->refcoord_scaling;
+ copy_rvec(ir->posres_com, fr->posres_com);
+ copy_rvec(ir->posres_comB, fr->posres_comB);
+ fr->rlist = cutoff_inf(ir->rlist);
+ fr->rlistlong = cutoff_inf(ir->rlistlong);
+ fr->eeltype = ir->coulombtype;
+ fr->vdwtype = ir->vdwtype;
+ fr->ljpme_combination_rule = ir->ljpme_combination_rule;
+
+ fr->coulomb_modifier = ir->coulomb_modifier;
+ fr->vdw_modifier = ir->vdw_modifier;
+
+ /* Electrostatics: Translate from interaction-setting-in-mdp-file to kernel interaction format */
+ switch (fr->eeltype)
+ {
+ case eelCUT:
+ fr->nbkernel_elec_interaction = (fr->bGB) ? GMX_NBKERNEL_ELEC_GENERALIZEDBORN : GMX_NBKERNEL_ELEC_COULOMB;
+ break;
+
+ case eelRF:
+ case eelGRF:
+ case eelRF_NEC:
+ fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
+ break;
+
+ case eelRF_ZERO:
+ fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
+ fr->coulomb_modifier = eintmodEXACTCUTOFF;
+ break;
+
+ case eelSWITCH:
+ case eelSHIFT:
+ case eelUSER:
+ case eelENCADSHIFT:
+ case eelPMESWITCH:
+ case eelPMEUSER:
+ case eelPMEUSERSWITCH:
+ fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
+ break;
+
+ case eelPME:
+ case eelEWALD:
+ fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_EWALD;
+ break;
+
+ default:
+ gmx_fatal(FARGS, "Unsupported electrostatic interaction: %s", eel_names[fr->eeltype]);
+ break;
+ }
+
+ /* Vdw: Translate from mdp settings to kernel format */
+ switch (fr->vdwtype)
+ {
+ case evdwCUT:
+ if (fr->bBHAM)
+ {
+ fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_BUCKINGHAM;
+ }
+ else
+ {
+ fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LENNARDJONES;
+ }
+ break;
+ case evdwPME:
+ fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LJEWALD;
+ break;
+
+ case evdwSWITCH:
+ case evdwSHIFT:
+ case evdwUSER:
+ case evdwENCADSHIFT:
+ fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
+ break;
+
+ default:
+ gmx_fatal(FARGS, "Unsupported vdw interaction: %s", evdw_names[fr->vdwtype]);
+ break;
+ }
+
+ /* These start out identical to ir, but might be altered if we e.g. tabulate the interaction in the kernel */
+ fr->nbkernel_elec_modifier = fr->coulomb_modifier;
+ fr->nbkernel_vdw_modifier = fr->vdw_modifier;
+
++ fr->rvdw = cutoff_inf(ir->rvdw);
++ fr->rvdw_switch = ir->rvdw_switch;
++ fr->rcoulomb = cutoff_inf(ir->rcoulomb);
++ fr->rcoulomb_switch = ir->rcoulomb_switch;
++
+ fr->bTwinRange = fr->rlistlong > fr->rlist;
+ fr->bEwald = (EEL_PME(fr->eeltype) || fr->eeltype == eelEWALD);
+
+ fr->reppow = mtop->ffparams.reppow;
+
+ if (ir->cutoff_scheme == ecutsGROUP)
+ {
+ fr->bvdwtab = ((fr->vdwtype != evdwCUT || !gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
+ && !EVDW_PME(fr->vdwtype));
+ /* We have special kernels for standard Ewald and PME, but the pme-switch ones are tabulated above */
+ fr->bcoultab = !(fr->eeltype == eelCUT ||
+ fr->eeltype == eelEWALD ||
+ fr->eeltype == eelPME ||
+ fr->eeltype == eelRF ||
+ fr->eeltype == eelRF_ZERO);
+
+ /* If the user absolutely wants different switch/shift settings for coul/vdw, it is likely
+ * going to be faster to tabulate the interaction than calling the generic kernel.
++ * However, if generic kernels have been requested we keep things analytically.
+ */
- if (fr->rcoulomb != fr->rvdw)
++ if (fr->nbkernel_elec_modifier == eintmodPOTSWITCH &&
++ fr->nbkernel_vdw_modifier == eintmodPOTSWITCH &&
++ bGenericKernelOnly == FALSE)
+ {
+ if ((fr->rcoulomb_switch != fr->rvdw_switch) || (fr->rcoulomb != fr->rvdw))
+ {
+ fr->bcoultab = TRUE;
++ /* Once we tabulate electrostatics, we can use the switch function for LJ,
++ * which would otherwise need two tables.
++ */
+ }
+ }
+ else if ((fr->nbkernel_elec_modifier == eintmodPOTSHIFT && fr->nbkernel_vdw_modifier == eintmodPOTSHIFT) ||
+ ((fr->nbkernel_elec_interaction == GMX_NBKERNEL_ELEC_REACTIONFIELD &&
+ fr->nbkernel_elec_modifier == eintmodEXACTCUTOFF &&
+ (fr->nbkernel_vdw_modifier == eintmodPOTSWITCH || fr->nbkernel_vdw_modifier == eintmodPOTSHIFT))))
+ {
- fr->rcoulomb_switch = ir->rcoulomb_switch;
- fr->rcoulomb = cutoff_inf(ir->rcoulomb);
++ if ((fr->rcoulomb != fr->rvdw) && (bGenericKernelOnly == FALSE))
+ {
+ fr->bcoultab = TRUE;
+ }
+ }
+
++ if (fr->nbkernel_elec_modifier == eintmodFORCESWITCH)
++ {
++ fr->bcoultab = TRUE;
++ }
++ if (fr->nbkernel_vdw_modifier == eintmodFORCESWITCH)
++ {
++ fr->bvdwtab = TRUE;
++ }
++
+ if (getenv("GMX_REQUIRE_TABLES"))
+ {
+ fr->bvdwtab = TRUE;
+ fr->bcoultab = TRUE;
+ }
+
+ if (fp)
+ {
+ fprintf(fp, "Table routines are used for coulomb: %s\n", bool_names[fr->bcoultab]);
+ fprintf(fp, "Table routines are used for vdw: %s\n", bool_names[fr->bvdwtab ]);
+ }
+
+ if (fr->bvdwtab == TRUE)
+ {
+ fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
+ fr->nbkernel_vdw_modifier = eintmodNONE;
+ }
+ if (fr->bcoultab == TRUE)
+ {
+ fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
+ fr->nbkernel_elec_modifier = eintmodNONE;
+ }
+ }
+
+ if (ir->cutoff_scheme == ecutsVERLET)
+ {
+ if (!gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
+ {
+ gmx_fatal(FARGS, "Cut-off scheme %S only supports LJ repulsion power 12", ecutscheme_names[ir->cutoff_scheme]);
+ }
+ fr->bvdwtab = FALSE;
+ fr->bcoultab = FALSE;
+ }
+
+ /* Tables are used for direct ewald sum */
+ if (fr->bEwald)
+ {
+ if (EEL_PME(ir->coulombtype))
+ {
+ if (fp)
+ {
+ fprintf(fp, "Will do PME sum in reciprocal space for electrostatic interactions.\n");
+ }
+ if (ir->coulombtype == eelP3M_AD)
+ {
+ please_cite(fp, "Hockney1988");
+ please_cite(fp, "Ballenegger2012");
+ }
+ else
+ {
+ please_cite(fp, "Essmann95a");
+ }
+
+ if (ir->ewald_geometry == eewg3DC)
+ {
+ if (fp)
+ {
+ fprintf(fp, "Using the Ewald3DC correction for systems with a slab geometry.\n");
+ }
+ please_cite(fp, "In-Chul99a");
+ }
+ }
+ fr->ewaldcoeff_q = calc_ewaldcoeff_q(ir->rcoulomb, ir->ewald_rtol);
+ init_ewald_tab(&(fr->ewald_table), ir, fp);
+ if (fp)
+ {
+ fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for Ewald\n",
+ 1/fr->ewaldcoeff_q);
+ }
+ }
+
+ if (EVDW_PME(ir->vdwtype))
+ {
+ if (fp)
+ {
+ fprintf(fp, "Will do PME sum in reciprocal space for LJ dispersion interactions.\n");
+ }
+ please_cite(fp, "Essmann95a");
+ fr->ewaldcoeff_lj = calc_ewaldcoeff_lj(ir->rvdw, ir->ewald_rtol_lj);
+ if (fp)
+ {
+ fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for LJ Ewald\n",
+ 1/fr->ewaldcoeff_lj);
+ }
+ }
+
+ /* Electrostatics */
+ fr->epsilon_r = ir->epsilon_r;
+ fr->epsilon_rf = ir->epsilon_rf;
+ fr->fudgeQQ = mtop->ffparams.fudgeQQ;
- fr->rvdw = cutoff_inf(ir->rvdw);
- fr->rvdw_switch = ir->rvdw_switch;
+
+ /* Parameters for generalized RF */
+ fr->zsquare = 0.0;
+ fr->temp = 0.0;
+
+ if (fr->eeltype == eelGRF)
+ {
+ init_generalized_rf(fp, mtop, ir, fr);
+ }
+
+ fr->bF_NoVirSum = (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype) ||
+ gmx_mtop_ftype_count(mtop, F_POSRES) > 0 ||
+ gmx_mtop_ftype_count(mtop, F_FBPOSRES) > 0 ||
+ IR_ELEC_FIELD(*ir) ||
+ (fr->adress_icor != eAdressICOff)
+ );
+
+ if (fr->cutoff_scheme == ecutsGROUP &&
+ ncg_mtop(mtop) > fr->cg_nalloc && !DOMAINDECOMP(cr))
+ {
+ /* Count the total number of charge groups */
+ fr->cg_nalloc = ncg_mtop(mtop);
+ srenew(fr->cg_cm, fr->cg_nalloc);
+ }
+ if (fr->shift_vec == NULL)
+ {
+ snew(fr->shift_vec, SHIFTS);
+ }
+
+ if (fr->fshift == NULL)
+ {
+ snew(fr->fshift, SHIFTS);
+ }
+
+ if (fr->nbfp == NULL)
+ {
+ fr->ntype = mtop->ffparams.atnr;
+ fr->nbfp = mk_nbfp(&mtop->ffparams, fr->bBHAM);
+ if (EVDW_PME(fr->vdwtype))
+ {
+ fr->ljpme_c6grid = make_ljpme_c6grid(&mtop->ffparams, fr);
+ }
+ }
+
+ /* Copy the energy group exclusions */
+ fr->egp_flags = ir->opts.egp_flags;
+
+ /* Van der Waals stuff */
+ if ((fr->vdwtype != evdwCUT) && (fr->vdwtype != evdwUSER) && !fr->bBHAM)
+ {
+ if (fr->rvdw_switch >= fr->rvdw)
+ {
+ gmx_fatal(FARGS, "rvdw_switch (%f) must be < rvdw (%f)",
+ fr->rvdw_switch, fr->rvdw);
+ }
+ if (fp)
+ {
+ fprintf(fp, "Using %s Lennard-Jones, switch between %g and %g nm\n",
+ (fr->eeltype == eelSWITCH) ? "switched" : "shifted",
+ fr->rvdw_switch, fr->rvdw);
+ }
+ }
+
+ if (fr->bBHAM && EVDW_PME(fr->vdwtype))
+ {
+ gmx_fatal(FARGS, "LJ PME not supported with Buckingham");
+ }
+
+ if (fr->bBHAM && (fr->vdwtype == evdwSHIFT || fr->vdwtype == evdwSWITCH))
+ {
+ gmx_fatal(FARGS, "Switch/shift interaction not supported with Buckingham");
+ }
+
+ if (fr->bBHAM && fr->cutoff_scheme == ecutsVERLET)
+ {
+ gmx_fatal(FARGS, "Verlet cutoff-scheme is not supported with Buckingham");
+ }
+
+ if (fp)
+ {
+ fprintf(fp, "Cut-off's: NS: %g Coulomb: %g %s: %g\n",
+ fr->rlist, fr->rcoulomb, fr->bBHAM ? "BHAM" : "LJ", fr->rvdw);
+ }
+
+ fr->eDispCorr = ir->eDispCorr;
+ if (ir->eDispCorr != edispcNO)
+ {
+ set_avcsixtwelve(fp, fr, mtop);
+ }
+
+ if (fr->bBHAM)
+ {
+ set_bham_b_max(fp, fr, mtop);
+ }
+
+ fr->gb_epsilon_solvent = ir->gb_epsilon_solvent;
+
+ /* Copy the GBSA data (radius, volume and surftens for each
+ * atomtype) from the topology atomtype section to forcerec.
+ */
+ snew(fr->atype_radius, fr->ntype);
+ snew(fr->atype_vol, fr->ntype);
+ snew(fr->atype_surftens, fr->ntype);
+ snew(fr->atype_gb_radius, fr->ntype);
+ snew(fr->atype_S_hct, fr->ntype);
+
+ if (mtop->atomtypes.nr > 0)
+ {
+ for (i = 0; i < fr->ntype; i++)
+ {
+ fr->atype_radius[i] = mtop->atomtypes.radius[i];
+ }
+ for (i = 0; i < fr->ntype; i++)
+ {
+ fr->atype_vol[i] = mtop->atomtypes.vol[i];
+ }
+ for (i = 0; i < fr->ntype; i++)
+ {
+ fr->atype_surftens[i] = mtop->atomtypes.surftens[i];
+ }
+ for (i = 0; i < fr->ntype; i++)
+ {
+ fr->atype_gb_radius[i] = mtop->atomtypes.gb_radius[i];
+ }
+ for (i = 0; i < fr->ntype; i++)
+ {
+ fr->atype_S_hct[i] = mtop->atomtypes.S_hct[i];
+ }
+ }
+
+ /* Generate the GB table if needed */
+ if (fr->bGB)
+ {
+#ifdef GMX_DOUBLE
+ fr->gbtabscale = 2000;
+#else
+ fr->gbtabscale = 500;
+#endif
+
+ fr->gbtabr = 100;
+ fr->gbtab = make_gb_table(oenv, fr);
+
+ init_gb(&fr->born, fr, ir, mtop, ir->gb_algorithm);
+
+ /* Copy local gb data (for dd, this is done in dd_partition_system) */
+ if (!DOMAINDECOMP(cr))
+ {
+ make_local_gb(cr, fr->born, ir->gb_algorithm);
+ }
+ }
+
+ /* Set the charge scaling */
+ if (fr->epsilon_r != 0)
+ {
+ fr->epsfac = ONE_4PI_EPS0/fr->epsilon_r;
+ }
+ else
+ {
+ /* eps = 0 is infinite dieletric: no coulomb interactions */
+ fr->epsfac = 0;
+ }
+
+ /* Reaction field constants */
+ if (EEL_RF(fr->eeltype))
+ {
+ calc_rffac(fp, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
+ fr->rcoulomb, fr->temp, fr->zsquare, box,
+ &fr->kappa, &fr->k_rf, &fr->c_rf);
+ }
+
+ /*This now calculates sum for q and c6*/
+ set_chargesum(fp, fr, mtop);
+
+ /* if we are using LR electrostatics, and they are tabulated,
+ * the tables will contain modified coulomb interactions.
+ * Since we want to use the non-shifted ones for 1-4
+ * coulombic interactions, we must have an extra set of tables.
+ */
+
+ /* Construct tables.
+ * A little unnecessary to make both vdw and coul tables sometimes,
+ * but what the heck... */
+
+ bMakeTables = fr->bcoultab || fr->bvdwtab || fr->bEwald ||
+ (ir->eDispCorr != edispcNO && ir_vdw_switched(ir));
+
+ bMakeSeparate14Table = ((!bMakeTables || fr->eeltype != eelCUT || fr->vdwtype != evdwCUT ||
++ fr->coulomb_modifier != eintmodNONE ||
++ fr->vdw_modifier != eintmodNONE ||
+ fr->bBHAM || fr->bEwald) &&
+ (gmx_mtop_ftype_count(mtop, F_LJ14) > 0 ||
+ gmx_mtop_ftype_count(mtop, F_LJC14_Q) > 0 ||
+ gmx_mtop_ftype_count(mtop, F_LJC_PAIRS_NB) > 0));
+
+ negp_pp = ir->opts.ngener - ir->nwall;
+ negptable = 0;
+ if (!bMakeTables)
+ {
+ bSomeNormalNbListsAreInUse = TRUE;
+ fr->nnblists = 1;
+ }
+ else
+ {
+ bSomeNormalNbListsAreInUse = (ir->eDispCorr != edispcNO);
+ for (egi = 0; egi < negp_pp; egi++)
+ {
+ for (egj = egi; egj < negp_pp; egj++)
+ {
+ egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
+ if (!(egp_flags & EGP_EXCL))
+ {
+ if (egp_flags & EGP_TABLE)
+ {
+ negptable++;
+ }
+ else
+ {
+ bSomeNormalNbListsAreInUse = TRUE;
+ }
+ }
+ }
+ }
+ if (bSomeNormalNbListsAreInUse)
+ {
+ fr->nnblists = negptable + 1;
+ }
+ else
+ {
+ fr->nnblists = negptable;
+ }
+ if (fr->nnblists > 1)
+ {
+ snew(fr->gid2nblists, ir->opts.ngener*ir->opts.ngener);
+ }
+ }
+
+ if (ir->adress)
+ {
+ fr->nnblists *= 2;
+ }
+
+ snew(fr->nblists, fr->nnblists);
+
+ /* This code automatically gives table length tabext without cut-off's,
+ * in that case grompp should already have checked that we do not need
+ * normal tables and we only generate tables for 1-4 interactions.
+ */
+ rtab = ir->rlistlong + ir->tabext;
+
+ if (bMakeTables)
+ {
+ /* make tables for ordinary interactions */
+ if (bSomeNormalNbListsAreInUse)
+ {
+ make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]);
+ if (ir->adress)
+ {
+ make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[fr->nnblists/2]);
+ }
+ if (!bMakeSeparate14Table)
+ {
+ fr->tab14 = fr->nblists[0].table_elec_vdw;
+ }
+ m = 1;
+ }
+ else
+ {
+ m = 0;
+ }
+ if (negptable > 0)
+ {
+ /* Read the special tables for certain energy group pairs */
+ nm_ind = mtop->groups.grps[egcENER].nm_ind;
+ for (egi = 0; egi < negp_pp; egi++)
+ {
+ for (egj = egi; egj < negp_pp; egj++)
+ {
+ egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
+ if ((egp_flags & EGP_TABLE) && !(egp_flags & EGP_EXCL))
+ {
+ nbl = &(fr->nblists[m]);
+ if (fr->nnblists > 1)
+ {
+ fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = m;
+ }
+ /* Read the table file with the two energy groups names appended */
+ make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
+ *mtop->groups.grpname[nm_ind[egi]],
+ *mtop->groups.grpname[nm_ind[egj]],
+ &fr->nblists[m]);
+ if (ir->adress)
+ {
+ make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
+ *mtop->groups.grpname[nm_ind[egi]],
+ *mtop->groups.grpname[nm_ind[egj]],
+ &fr->nblists[fr->nnblists/2+m]);
+ }
+ m++;
+ }
+ else if (fr->nnblists > 1)
+ {
+ fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = 0;
+ }
+ }
+ }
+ }
+ }
++ else if ((fr->eDispCorr != edispcNO) &&
++ ((fr->vdw_modifier == eintmodPOTSWITCH) ||
++ (fr->vdw_modifier == eintmodFORCESWITCH) ||
++ (fr->vdw_modifier == eintmodPOTSHIFT)))
++ {
++ /* Tables might not be used for the potential modifier interactions per se, but
++ * we still need them to evaluate switch/shift dispersion corrections in this case.
++ */
++ make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]);
++ }
++
+ if (bMakeSeparate14Table)
+ {
+ /* generate extra tables with plain Coulomb for 1-4 interactions only */
+ fr->tab14 = make_tables(fp, oenv, fr, MASTER(cr), tabpfn, rtab,
+ GMX_MAKETABLES_14ONLY);
+ }
+
+ /* Read AdResS Thermo Force table if needed */
+ if (fr->adress_icor == eAdressICThermoForce)
+ {
+ /* old todo replace */
+
+ if (ir->adress->n_tf_grps > 0)
+ {
+ make_adress_tf_tables(fp, oenv, fr, ir, tabfn, mtop, box);
+
+ }
+ else
+ {
+ /* load the default table */
+ snew(fr->atf_tabs, 1);
+ fr->atf_tabs[DEFAULT_TF_TABLE] = make_atf_table(fp, oenv, fr, tabafn, box);
+ }
+ }
+
+ /* Wall stuff */
+ fr->nwall = ir->nwall;
+ if (ir->nwall && ir->wall_type == ewtTABLE)
+ {
+ make_wall_tables(fp, oenv, ir, tabfn, &mtop->groups, fr);
+ }
+
+ if (fcd && tabbfn)
+ {
+ fcd->bondtab = make_bonded_tables(fp,
+ F_TABBONDS, F_TABBONDSNC,
+ mtop, tabbfn, "b");
+ fcd->angletab = make_bonded_tables(fp,
+ F_TABANGLES, -1,
+ mtop, tabbfn, "a");
+ fcd->dihtab = make_bonded_tables(fp,
+ F_TABDIHS, -1,
+ mtop, tabbfn, "d");
+ }
+ else
+ {
+ if (debug)
+ {
+ fprintf(debug, "No fcdata or table file name passed, can not read table, can not do bonded interactions\n");
+ }
+ }
+
+ /* QM/MM initialization if requested
+ */
+ if (ir->bQMMM)
+ {
+ fprintf(stderr, "QM/MM calculation requested.\n");
+ }
+
+ fr->bQMMM = ir->bQMMM;
+ fr->qr = mk_QMMMrec();
+
+ /* Set all the static charge group info */
+ fr->cginfo_mb = init_cginfo_mb(fp, mtop, fr, bNoSolvOpt,
+ &bFEP_NonBonded,
+ &fr->bExcl_IntraCGAll_InterCGNone);
+ if (DOMAINDECOMP(cr))
+ {
+ fr->cginfo = NULL;
+ }
+ else
+ {
+ fr->cginfo = cginfo_expand(mtop->nmolblock, fr->cginfo_mb);
+ }
+
+ if (!DOMAINDECOMP(cr))
+ {
+ forcerec_set_ranges(fr, ncg_mtop(mtop), ncg_mtop(mtop),
+ mtop->natoms, mtop->natoms, mtop->natoms);
+ }
+
+ fr->print_force = print_force;
+
+
+ /* coarse load balancing vars */
+ fr->t_fnbf = 0.;
+ fr->t_wait = 0.;
+ fr->timesteps = 0;
+
+ /* Initialize neighbor search */
+ init_ns(fp, cr, &fr->ns, fr, mtop);
+
+ if (cr->duty & DUTY_PP)
+ {
+ gmx_nonbonded_setup(fr, bGenericKernelOnly);
+ /*
+ if (ir->bAdress)
+ {
+ gmx_setup_adress_kernels(fp,bGenericKernelOnly);
+ }
+ */
+ }
+
+ /* Initialize the thread working data for bonded interactions */
+ init_forcerec_f_threads(fr, mtop->groups.grps[egcENER].nr);
+
+ snew(fr->excl_load, fr->nthreads+1);
+
+ if (fr->cutoff_scheme == ecutsVERLET)
+ {
+ if (ir->rcoulomb != ir->rvdw)
+ {
+ gmx_fatal(FARGS, "With Verlet lists rcoulomb and rvdw should be identical");
+ }
+
+ init_nb_verlet(fp, &fr->nbv, bFEP_NonBonded, ir, fr, cr, nbpu_opt);
+ }
+
+ /* fr->ic is used both by verlet and group kernels (to some extent) now */
+ init_interaction_const(fp, cr, &fr->ic, fr, rtab);
+
+ if (ir->eDispCorr != edispcNO)
+ {
+ calc_enervirdiff(fp, ir->eDispCorr, fr);
+ }
+}
+
+#define pr_real(fp, r) fprintf(fp, "%s: %e\n",#r, r)
+#define pr_int(fp, i) fprintf((fp), "%s: %d\n",#i, i)
+#define pr_bool(fp, b) fprintf((fp), "%s: %s\n",#b, bool_names[b])
+
+void pr_forcerec(FILE *fp, t_forcerec *fr)
+{
+ int i;
+
+ pr_real(fp, fr->rlist);
+ pr_real(fp, fr->rcoulomb);
+ pr_real(fp, fr->fudgeQQ);
+ pr_bool(fp, fr->bGrid);
+ pr_bool(fp, fr->bTwinRange);
+ /*pr_int(fp,fr->cg0);
+ pr_int(fp,fr->hcg);*/
+ for (i = 0; i < fr->nnblists; i++)
+ {
+ pr_int(fp, fr->nblists[i].table_elec_vdw.n);
+ }
+ pr_real(fp, fr->rcoulomb_switch);
+ pr_real(fp, fr->rcoulomb);
+
+ fflush(fp);
+}
+
+void forcerec_set_excl_load(t_forcerec *fr,
+ const gmx_localtop_t *top)
+{
+ const int *ind, *a;
+ int t, i, j, ntot, n, ntarget;
+
+ ind = top->excls.index;
+ a = top->excls.a;
+
+ ntot = 0;
+ for (i = 0; i < top->excls.nr; i++)
+ {
+ for (j = ind[i]; j < ind[i+1]; j++)
+ {
+ if (a[j] > i)
+ {
+ ntot++;
+ }
+ }
+ }
+
+ fr->excl_load[0] = 0;
+ n = 0;
+ i = 0;
+ for (t = 1; t <= fr->nthreads; t++)
+ {
+ ntarget = (ntot*t)/fr->nthreads;
+ while (i < top->excls.nr && n < ntarget)
+ {
+ for (j = ind[i]; j < ind[i+1]; j++)
+ {
+ if (a[j] > i)
+ {
+ n++;
+ }
+ }
+ i++;
+ }
+ fr->excl_load[t] = i;
+ }
+}
--- /dev/null
- int igeometry, int type)
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ * Copyright (c) 2001-2004, The GROMACS development team.
+ * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+#include <string.h>
+#include "sysstuff.h"
+#include "gromacs/utility/smalloc.h"
+#include "macros.h"
+#include "gromacs/math/utilities.h"
+#include "vec.h"
+#include "types/commrec.h"
+#include "network.h"
+#include "nsgrid.h"
+#include "force.h"
+#include "nonbonded.h"
+#include "ns.h"
+#include "pbc.h"
+#include "names.h"
+#include "gmx_fatal.h"
+#include "nrnb.h"
+#include "txtdump.h"
+#include "mtop_util.h"
+
+#include "domdec.h"
+#include "adress.h"
+
+
+/*
+ * E X C L U S I O N H A N D L I N G
+ */
+
+#ifdef DEBUG
+static void SETEXCL_(t_excl e[], atom_id i, atom_id j)
+{
+ e[j] = e[j] | (1<<i);
+}
+static void RMEXCL_(t_excl e[], atom_id i, atom_id j)
+{
+ e[j] = e[j] & ~(1<<i);
+}
+static gmx_bool ISEXCL_(t_excl e[], atom_id i, atom_id j)
+{
+ return (gmx_bool)(e[j] & (1<<i));
+}
+static gmx_bool NOTEXCL_(t_excl e[], atom_id i, atom_id j)
+{
+ return !(ISEXCL(e, i, j));
+}
+#else
+#define SETEXCL(e, i, j) (e)[((atom_id) (j))] |= (1<<((atom_id) (i)))
+#define RMEXCL(e, i, j) (e)[((atom_id) (j))] &= (~(1<<((atom_id) (i))))
+#define ISEXCL(e, i, j) (gmx_bool) ((e)[((atom_id) (j))] & (1<<((atom_id) (i))))
+#define NOTEXCL(e, i, j) !(ISEXCL(e, i, j))
+#endif
+
+static int
+round_up_to_simd_width(int length, int simd_width)
+{
+ int offset, newlength;
+
+ offset = (simd_width > 0) ? length % simd_width : 0;
+
+ return (offset == 0) ? length : length-offset+simd_width;
+}
+/************************************************
+ *
+ * U T I L I T I E S F O R N S
+ *
+ ************************************************/
+
+void reallocate_nblist(t_nblist *nl)
+{
+ if (gmx_debug_at)
+ {
+ fprintf(debug, "reallocating neigborlist (ielec=%d, ivdw=%d, igeometry=%d, type=%d), maxnri=%d\n",
+ nl->ielec, nl->ivdw, nl->igeometry, nl->type, nl->maxnri);
+ }
+ srenew(nl->iinr, nl->maxnri);
+ if (nl->igeometry == GMX_NBLIST_GEOMETRY_CG_CG)
+ {
+ srenew(nl->iinr_end, nl->maxnri);
+ }
+ srenew(nl->gid, nl->maxnri);
+ srenew(nl->shift, nl->maxnri);
+ srenew(nl->jindex, nl->maxnri+1);
+}
+
+
+static void init_nblist(FILE *log, t_nblist *nl_sr, t_nblist *nl_lr,
+ int maxsr, int maxlr,
+ int ivdw, int ivdwmod,
+ int ielec, int ielecmod,
- gmx_nonbonded_set_kernel_pointers( (i == 0) ? log : NULL, nl);
++ int igeometry, int type,
++ gmx_bool bElecAndVdwSwitchDiffers)
+{
+ t_nblist *nl;
+ int homenr;
+ int i, nn;
+
+ for (i = 0; (i < 2); i++)
+ {
+ nl = (i == 0) ? nl_sr : nl_lr;
+ homenr = (i == 0) ? maxsr : maxlr;
+
+ if (nl == NULL)
+ {
+ continue;
+ }
+
+
+ /* Set coul/vdw in neighborlist, and for the normal loops we determine
+ * an index of which one to call.
+ */
+ nl->ivdw = ivdw;
+ nl->ivdwmod = ivdwmod;
+ nl->ielec = ielec;
+ nl->ielecmod = ielecmod;
+ nl->type = type;
+ nl->igeometry = igeometry;
+
+ if (nl->type == GMX_NBLIST_INTERACTION_FREE_ENERGY)
+ {
+ nl->igeometry = GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE;
+ }
+
+ /* This will also set the simd_padding_width field */
- int ielec, ielecf, ivdw, ielecmod, ielecmodf, ivdwmod, type;
++ gmx_nonbonded_set_kernel_pointers( (i == 0) ? log : NULL, nl, bElecAndVdwSwitchDiffers);
+
+ /* maxnri is influenced by the number of shifts (maximum is 8)
+ * and the number of energy groups.
+ * If it is not enough, nl memory will be reallocated during the run.
+ * 4 seems to be a reasonable factor, which only causes reallocation
+ * during runs with tiny and many energygroups.
+ */
+ nl->maxnri = homenr*4;
+ nl->maxnrj = 0;
+ nl->nri = -1;
+ nl->nrj = 0;
+ nl->iinr = NULL;
+ nl->gid = NULL;
+ nl->shift = NULL;
+ nl->jindex = NULL;
+ nl->jjnr = NULL;
+ nl->excl_fep = NULL;
+ reallocate_nblist(nl);
+ nl->jindex[0] = 0;
+
+ if (debug)
+ {
+ fprintf(debug, "Initiating neighbourlist (ielec=%d, ivdw=%d, type=%d) for %s interactions,\nwith %d SR, %d LR atoms.\n",
+ nl->ielec, nl->ivdw, nl->type, gmx_nblist_geometry_names[nl->igeometry], maxsr, maxlr);
+ }
+ }
+}
+
+void init_neighbor_list(FILE *log, t_forcerec *fr, int homenr)
+{
+ /* Make maxlr tunable! (does not seem to be a big difference though)
+ * This parameter determines the number of i particles in a long range
+ * neighbourlist. Too few means many function calls, too many means
+ * cache trashing.
+ */
+ int maxsr, maxsr_wat, maxlr, maxlr_wat;
- ielec = fr->nbkernel_elec_interaction;
- ivdw = fr->nbkernel_vdw_interaction;
- ielecmod = fr->nbkernel_elec_modifier;
- ivdwmod = fr->nbkernel_vdw_modifier;
- type = GMX_NBLIST_INTERACTION_STANDARD;
++ int ielec, ivdw, ielecmod, ivdwmod, type;
+ int solvent;
+ int igeometry_def, igeometry_w, igeometry_ww;
+ int i;
++ gmx_bool bElecAndVdwSwitchDiffers;
+ t_nblists *nbl;
+
+ /* maxsr = homenr-fr->nWatMol*3; */
+ maxsr = homenr;
+
+ if (maxsr < 0)
+ {
+ gmx_fatal(FARGS, "%s, %d: Negative number of short range atoms.\n"
+ "Call your Gromacs dealer for assistance.", __FILE__, __LINE__);
+ }
+ /* This is just for initial allocation, so we do not reallocate
+ * all the nlist arrays many times in a row.
+ * The numbers seem very accurate, but they are uncritical.
+ */
+ maxsr_wat = min(fr->nWatMol, (homenr+2)/3);
+ if (fr->bTwinRange)
+ {
+ maxlr = 50;
+ maxlr_wat = min(maxsr_wat, maxlr);
+ }
+ else
+ {
+ maxlr = maxlr_wat = 0;
+ }
+
+ /* Determine the values for ielec/ivdw. */
- maxsr, maxlr, ivdw, ivdwmod, ielec, ielecmod, igeometry_def, type);
++ ielec = fr->nbkernel_elec_interaction;
++ ivdw = fr->nbkernel_vdw_interaction;
++ ielecmod = fr->nbkernel_elec_modifier;
++ ivdwmod = fr->nbkernel_vdw_modifier;
++ type = GMX_NBLIST_INTERACTION_STANDARD;
++ bElecAndVdwSwitchDiffers = ( (fr->rcoulomb_switch != fr->rvdw_switch) || (fr->rcoulomb != fr->rvdw));
+
+ fr->ns.bCGlist = (getenv("GMX_NBLISTCG") != 0);
+ if (!fr->ns.bCGlist)
+ {
+ igeometry_def = GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE;
+ }
+ else
+ {
+ igeometry_def = GMX_NBLIST_GEOMETRY_CG_CG;
+ if (log != NULL)
+ {
+ fprintf(log, "\nUsing charge-group - charge-group neighbor lists and kernels\n\n");
+ }
+ }
+
+ if (fr->solvent_opt == esolTIP4P)
+ {
+ igeometry_w = GMX_NBLIST_GEOMETRY_WATER4_PARTICLE;
+ igeometry_ww = GMX_NBLIST_GEOMETRY_WATER4_WATER4;
+ }
+ else
+ {
+ igeometry_w = GMX_NBLIST_GEOMETRY_WATER3_PARTICLE;
+ igeometry_ww = GMX_NBLIST_GEOMETRY_WATER3_WATER3;
+ }
+
+ for (i = 0; i < fr->nnblists; i++)
+ {
+ nbl = &(fr->nblists[i]);
+
+ if ((fr->adress_type != eAdressOff) && (i >= fr->nnblists/2))
+ {
+ type = GMX_NBLIST_INTERACTION_ADRESS;
+ }
+ init_nblist(log, &nbl->nlist_sr[eNL_VDWQQ], &nbl->nlist_lr[eNL_VDWQQ],
- maxsr, maxlr, ivdw, ivdwmod, GMX_NBKERNEL_ELEC_NONE, eintmodNONE, igeometry_def, type);
++ maxsr, maxlr, ivdw, ivdwmod, ielec, ielecmod, igeometry_def, type, bElecAndVdwSwitchDiffers);
+ init_nblist(log, &nbl->nlist_sr[eNL_VDW], &nbl->nlist_lr[eNL_VDW],
- maxsr, maxlr, GMX_NBKERNEL_VDW_NONE, eintmodNONE, ielec, ielecmod, igeometry_def, type);
++ maxsr, maxlr, ivdw, ivdwmod, GMX_NBKERNEL_ELEC_NONE, eintmodNONE, igeometry_def, type, bElecAndVdwSwitchDiffers);
+ init_nblist(log, &nbl->nlist_sr[eNL_QQ], &nbl->nlist_lr[eNL_QQ],
- maxsr_wat, maxlr_wat, ivdw, ivdwmod, ielec, ielecmod, igeometry_w, type);
++ maxsr, maxlr, GMX_NBKERNEL_VDW_NONE, eintmodNONE, ielec, ielecmod, igeometry_def, type, bElecAndVdwSwitchDiffers);
+ init_nblist(log, &nbl->nlist_sr[eNL_VDWQQ_WATER], &nbl->nlist_lr[eNL_VDWQQ_WATER],
- maxsr_wat, maxlr_wat, GMX_NBKERNEL_VDW_NONE, eintmodNONE, ielec, ielecmod, igeometry_w, type);
++ maxsr_wat, maxlr_wat, ivdw, ivdwmod, ielec, ielecmod, igeometry_w, type, bElecAndVdwSwitchDiffers);
+ init_nblist(log, &nbl->nlist_sr[eNL_QQ_WATER], &nbl->nlist_lr[eNL_QQ_WATER],
- maxsr_wat, maxlr_wat, ivdw, ivdwmod, ielec, ielecmod, igeometry_ww, type);
++ maxsr_wat, maxlr_wat, GMX_NBKERNEL_VDW_NONE, eintmodNONE, ielec, ielecmod, igeometry_w, type, bElecAndVdwSwitchDiffers);
+ init_nblist(log, &nbl->nlist_sr[eNL_VDWQQ_WATERWATER], &nbl->nlist_lr[eNL_VDWQQ_WATERWATER],
- maxsr_wat, maxlr_wat, GMX_NBKERNEL_VDW_NONE, eintmodNONE, ielec, ielecmod, igeometry_ww, type);
++ maxsr_wat, maxlr_wat, ivdw, ivdwmod, ielec, ielecmod, igeometry_ww, type, bElecAndVdwSwitchDiffers);
+ init_nblist(log, &nbl->nlist_sr[eNL_QQ_WATERWATER], &nbl->nlist_lr[eNL_QQ_WATERWATER],
- if ((fr->bEwald) && (fr->sc_alphacoul > 0)) /* need to handle long range differently if using softcore */
- {
- ielecf = GMX_NBKERNEL_ELEC_EWALD;
- ielecmodf = eintmodNONE;
- }
- else
- {
- ielecf = ielec;
- ielecmodf = ielecmod;
- }
-
++ maxsr_wat, maxlr_wat, GMX_NBKERNEL_VDW_NONE, eintmodNONE, ielec, ielecmod, igeometry_ww, type, bElecAndVdwSwitchDiffers);
+
+ /* Did we get the solvent loops so we can use optimized water kernels? */
+ if (nbl->nlist_sr[eNL_VDWQQ_WATER].kernelptr_vf == NULL
+ || nbl->nlist_sr[eNL_QQ_WATER].kernelptr_vf == NULL
+#ifndef DISABLE_WATERWATER_NLIST
+ || nbl->nlist_sr[eNL_VDWQQ_WATERWATER].kernelptr_vf == NULL
+ || nbl->nlist_sr[eNL_QQ_WATERWATER].kernelptr_vf == NULL
+#endif
+ )
+ {
+ fr->solvent_opt = esolNO;
+ if (log != NULL)
+ {
+ fprintf(log, "Note: The available nonbonded kernels do not support water optimization - disabling.\n");
+ }
+ }
+
+ if (fr->efep != efepNO)
+ {
- maxsr, maxlr, ivdw, ivdwmod, ielecf, ielecmod, GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE, GMX_NBLIST_INTERACTION_FREE_ENERGY);
+ init_nblist(log, &nbl->nlist_sr[eNL_VDWQQ_FREE], &nbl->nlist_lr[eNL_VDWQQ_FREE],
- maxsr, maxlr, ivdw, ivdwmod, GMX_NBKERNEL_ELEC_NONE, eintmodNONE, GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE, GMX_NBLIST_INTERACTION_FREE_ENERGY);
++ maxsr, maxlr, ivdw, ivdwmod, ielec, ielecmod, GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE, GMX_NBLIST_INTERACTION_FREE_ENERGY, bElecAndVdwSwitchDiffers);
+ init_nblist(log, &nbl->nlist_sr[eNL_VDW_FREE], &nbl->nlist_lr[eNL_VDW_FREE],
- maxsr, maxlr, GMX_NBKERNEL_VDW_NONE, eintmodNONE, ielecf, ielecmod, GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE, GMX_NBLIST_INTERACTION_FREE_ENERGY);
++ maxsr, maxlr, ivdw, ivdwmod, GMX_NBKERNEL_ELEC_NONE, eintmodNONE, GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE, GMX_NBLIST_INTERACTION_FREE_ENERGY, bElecAndVdwSwitchDiffers);
+ init_nblist(log, &nbl->nlist_sr[eNL_QQ_FREE], &nbl->nlist_lr[eNL_QQ_FREE],
- maxsr, maxlr, 0, 0, ielec, ielecmod, GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE, GMX_NBLIST_INTERACTION_STANDARD);
++ maxsr, maxlr, GMX_NBKERNEL_VDW_NONE, eintmodNONE, ielec, ielecmod, GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE, GMX_NBLIST_INTERACTION_FREE_ENERGY, bElecAndVdwSwitchDiffers);
+ }
+ }
+ /* QMMM MM list */
+ if (fr->bQMMM && fr->qr->QMMMscheme != eQMMMschemeoniom)
+ {
+ init_nblist(log, &fr->QMMMlist, NULL,
++ maxsr, maxlr, 0, 0, ielec, ielecmod, GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE, GMX_NBLIST_INTERACTION_STANDARD, bElecAndVdwSwitchDiffers);
+ }
+
+ if (log != NULL)
+ {
+ fprintf(log, "\n");
+ }
+
+ fr->ns.nblist_initialized = TRUE;
+}
+
+static void reset_nblist(t_nblist *nl)
+{
+ nl->nri = -1;
+ nl->nrj = 0;
+ if (nl->jindex)
+ {
+ nl->jindex[0] = 0;
+ }
+}
+
+static void reset_neighbor_lists(t_forcerec *fr, gmx_bool bResetSR, gmx_bool bResetLR)
+{
+ int n, i;
+
+ if (fr->bQMMM)
+ {
+ /* only reset the short-range nblist */
+ reset_nblist(&(fr->QMMMlist));
+ }
+
+ for (n = 0; n < fr->nnblists; n++)
+ {
+ for (i = 0; i < eNL_NR; i++)
+ {
+ if (bResetSR)
+ {
+ reset_nblist( &(fr->nblists[n].nlist_sr[i]) );
+ }
+ if (bResetLR)
+ {
+ reset_nblist( &(fr->nblists[n].nlist_lr[i]) );
+ }
+ }
+ }
+}
+
+
+
+
+static gmx_inline void new_i_nblist(t_nblist *nlist, atom_id i_atom, int shift, int gid)
+{
+ int i, k, nri, nshift;
+
+ nri = nlist->nri;
+
+ /* Check whether we have to increase the i counter */
+ if ((nri == -1) ||
+ (nlist->iinr[nri] != i_atom) ||
+ (nlist->shift[nri] != shift) ||
+ (nlist->gid[nri] != gid))
+ {
+ /* This is something else. Now see if any entries have
+ * been added in the list of the previous atom.
+ */
+ if ((nri == -1) ||
+ ((nlist->jindex[nri+1] > nlist->jindex[nri]) &&
+ (nlist->gid[nri] != -1)))
+ {
+ /* If so increase the counter */
+ nlist->nri++;
+ nri++;
+ if (nlist->nri >= nlist->maxnri)
+ {
+ nlist->maxnri += over_alloc_large(nlist->nri);
+ reallocate_nblist(nlist);
+ }
+ }
+ /* Set the number of neighbours and the atom number */
+ nlist->jindex[nri+1] = nlist->jindex[nri];
+ nlist->iinr[nri] = i_atom;
+ nlist->gid[nri] = gid;
+ nlist->shift[nri] = shift;
+ }
+ else
+ {
+ /* Adding to previous list. First remove possible previous padding */
+ if (nlist->simd_padding_width > 1)
+ {
+ while (nlist->nrj > 0 && nlist->jjnr[nlist->nrj-1] < 0)
+ {
+ nlist->nrj--;
+ }
+ }
+ }
+}
+
+static gmx_inline void close_i_nblist(t_nblist *nlist)
+{
+ int nri = nlist->nri;
+ int len;
+
+ if (nri >= 0)
+ {
+ /* Add elements up to padding. Since we allocate memory in units
+ * of the simd_padding width, we do not have to check for possible
+ * list reallocation here.
+ */
+ while ((nlist->nrj % nlist->simd_padding_width) != 0)
+ {
+ /* Use -4 here, so we can write forces for 4 atoms before real data */
+ nlist->jjnr[nlist->nrj++] = -4;
+ }
+ nlist->jindex[nri+1] = nlist->nrj;
+
+ len = nlist->nrj - nlist->jindex[nri];
+ }
+}
+
+static gmx_inline void close_nblist(t_nblist *nlist)
+{
+ /* Only close this nblist when it has been initialized.
+ * Avoid the creation of i-lists with no j-particles.
+ */
+ if (nlist->nrj == 0)
+ {
+ /* Some assembly kernels do not support empty lists,
+ * make sure here that we don't generate any empty lists.
+ * With the current ns code this branch is taken in two cases:
+ * No i-particles at all: nri=-1 here
+ * There are i-particles, but no j-particles; nri=0 here
+ */
+ nlist->nri = 0;
+ }
+ else
+ {
+ /* Close list number nri by incrementing the count */
+ nlist->nri++;
+ }
+}
+
+static gmx_inline void close_neighbor_lists(t_forcerec *fr, gmx_bool bMakeQMMMnblist)
+{
+ int n, i;
+
+ if (bMakeQMMMnblist)
+ {
+ close_nblist(&(fr->QMMMlist));
+ }
+
+ for (n = 0; n < fr->nnblists; n++)
+ {
+ for (i = 0; (i < eNL_NR); i++)
+ {
+ close_nblist(&(fr->nblists[n].nlist_sr[i]));
+ close_nblist(&(fr->nblists[n].nlist_lr[i]));
+ }
+ }
+}
+
+
+static gmx_inline void add_j_to_nblist(t_nblist *nlist, atom_id j_atom, gmx_bool bLR)
+{
+ int nrj = nlist->nrj;
+
+ if (nlist->nrj >= nlist->maxnrj)
+ {
+ nlist->maxnrj = round_up_to_simd_width(over_alloc_small(nlist->nrj + 1), nlist->simd_padding_width);
+
+ if (gmx_debug_at)
+ {
+ fprintf(debug, "Increasing %s nblist (ielec=%d,ivdw=%d,type=%d,igeometry=%d) j size to %d\n",
+ bLR ? "LR" : "SR", nlist->ielec, nlist->ivdw, nlist->type, nlist->igeometry, nlist->maxnrj);
+ }
+
+ srenew(nlist->jjnr, nlist->maxnrj);
+ }
+
+ nlist->jjnr[nrj] = j_atom;
+ nlist->nrj++;
+}
+
+static gmx_inline void add_j_to_nblist_cg(t_nblist *nlist,
+ atom_id j_start, int j_end,
+ t_excl *bexcl, gmx_bool i_is_j,
+ gmx_bool bLR)
+{
+ int nrj = nlist->nrj;
+ int j;
+
+ if (nlist->nrj >= nlist->maxnrj)
+ {
+ nlist->maxnrj = over_alloc_small(nlist->nrj + 1);
+ if (gmx_debug_at)
+ {
+ fprintf(debug, "Increasing %s nblist (ielec=%d,ivdw=%d,type=%d,igeometry=%d) j size to %d\n",
+ bLR ? "LR" : "SR", nlist->ielec, nlist->ivdw, nlist->type, nlist->igeometry, nlist->maxnrj);
+ }
+
+ srenew(nlist->jjnr, nlist->maxnrj);
+ srenew(nlist->jjnr_end, nlist->maxnrj);
+ srenew(nlist->excl, nlist->maxnrj*MAX_CGCGSIZE);
+ }
+
+ nlist->jjnr[nrj] = j_start;
+ nlist->jjnr_end[nrj] = j_end;
+
+ if (j_end - j_start > MAX_CGCGSIZE)
+ {
+ gmx_fatal(FARGS, "The charge-group - charge-group neighborlist do not support charge groups larger than %d, found a charge group of size %d", MAX_CGCGSIZE, j_end-j_start);
+ }
+
+ /* Set the exclusions */
+ for (j = j_start; j < j_end; j++)
+ {
+ nlist->excl[nrj*MAX_CGCGSIZE + j - j_start] = bexcl[j];
+ }
+ if (i_is_j)
+ {
+ /* Avoid double counting of intra-cg interactions */
+ for (j = 1; j < j_end-j_start; j++)
+ {
+ nlist->excl[nrj*MAX_CGCGSIZE + j] |= (1<<j) - 1;
+ }
+ }
+
+ nlist->nrj++;
+}
+
+typedef void
+ put_in_list_t (gmx_bool bHaveVdW[],
+ int ngid,
+ t_mdatoms * md,
+ int icg,
+ int jgid,
+ int nj,
+ atom_id jjcg[],
+ atom_id index[],
+ t_excl bExcl[],
+ int shift,
+ t_forcerec * fr,
+ gmx_bool bLR,
+ gmx_bool bDoVdW,
+ gmx_bool bDoCoul,
+ int solvent_opt);
+
+static void
+put_in_list_at(gmx_bool bHaveVdW[],
+ int ngid,
+ t_mdatoms * md,
+ int icg,
+ int jgid,
+ int nj,
+ atom_id jjcg[],
+ atom_id index[],
+ t_excl bExcl[],
+ int shift,
+ t_forcerec * fr,
+ gmx_bool bLR,
+ gmx_bool bDoVdW,
+ gmx_bool bDoCoul,
+ int solvent_opt)
+{
+ /* The a[] index has been removed,
+ * to put it back in i_atom should be a[i0] and jj should be a[jj].
+ */
+ t_nblist * vdwc;
+ t_nblist * vdw;
+ t_nblist * coul;
+ t_nblist * vdwc_free = NULL;
+ t_nblist * vdw_free = NULL;
+ t_nblist * coul_free = NULL;
+ t_nblist * vdwc_ww = NULL;
+ t_nblist * coul_ww = NULL;
+
+ int i, j, jcg, igid, gid, nbl_ind, ind_ij;
+ atom_id jj, jj0, jj1, i_atom;
+ int i0, nicg, len;
+
+ int *cginfo;
+ int *type, *typeB;
+ real *charge, *chargeB;
+ real qi, qiB, qq, rlj;
+ gmx_bool bFreeEnergy, bFree, bFreeJ, bNotEx, *bPert;
+ gmx_bool bDoVdW_i, bDoCoul_i, bDoCoul_i_sol;
+ int iwater, jwater;
+ t_nblist *nlist;
+
+ /* Copy some pointers */
+ cginfo = fr->cginfo;
+ charge = md->chargeA;
+ chargeB = md->chargeB;
+ type = md->typeA;
+ typeB = md->typeB;
+ bPert = md->bPerturbed;
+
+ /* Get atom range */
+ i0 = index[icg];
+ nicg = index[icg+1]-i0;
+
+ /* Get the i charge group info */
+ igid = GET_CGINFO_GID(cginfo[icg]);
+
+ iwater = (solvent_opt != esolNO) ? GET_CGINFO_SOLOPT(cginfo[icg]) : esolNO;
+
+ bFreeEnergy = FALSE;
+ if (md->nPerturbed)
+ {
+ /* Check if any of the particles involved are perturbed.
+ * If not we can do the cheaper normal put_in_list
+ * and use more solvent optimization.
+ */
+ for (i = 0; i < nicg; i++)
+ {
+ bFreeEnergy |= bPert[i0+i];
+ }
+ /* Loop over the j charge groups */
+ for (j = 0; (j < nj && !bFreeEnergy); j++)
+ {
+ jcg = jjcg[j];
+ jj0 = index[jcg];
+ jj1 = index[jcg+1];
+ /* Finally loop over the atoms in the j-charge group */
+ for (jj = jj0; jj < jj1; jj++)
+ {
+ bFreeEnergy |= bPert[jj];
+ }
+ }
+ }
+
+ /* Unpack pointers to neighbourlist structs */
+ if (fr->nnblists == 1)
+ {
+ nbl_ind = 0;
+ }
+ else
+ {
+ nbl_ind = fr->gid2nblists[GID(igid, jgid, ngid)];
+ }
+ if (bLR)
+ {
+ nlist = fr->nblists[nbl_ind].nlist_lr;
+ }
+ else
+ {
+ nlist = fr->nblists[nbl_ind].nlist_sr;
+ }
+
+ if (iwater != esolNO)
+ {
+ vdwc = &nlist[eNL_VDWQQ_WATER];
+ vdw = &nlist[eNL_VDW];
+ coul = &nlist[eNL_QQ_WATER];
+#ifndef DISABLE_WATERWATER_NLIST
+ vdwc_ww = &nlist[eNL_VDWQQ_WATERWATER];
+ coul_ww = &nlist[eNL_QQ_WATERWATER];
+#endif
+ }
+ else
+ {
+ vdwc = &nlist[eNL_VDWQQ];
+ vdw = &nlist[eNL_VDW];
+ coul = &nlist[eNL_QQ];
+ }
+
+ if (!bFreeEnergy)
+ {
+ if (iwater != esolNO)
+ {
+ /* Loop over the atoms in the i charge group */
+ i_atom = i0;
+ gid = GID(igid, jgid, ngid);
+ /* Create new i_atom for each energy group */
+ if (bDoCoul && bDoVdW)
+ {
+ new_i_nblist(vdwc, i_atom, shift, gid);
+#ifndef DISABLE_WATERWATER_NLIST
+ new_i_nblist(vdwc_ww, i_atom, shift, gid);
+#endif
+ }
+ if (bDoVdW)
+ {
+ new_i_nblist(vdw, i_atom, shift, gid);
+ }
+ if (bDoCoul)
+ {
+ new_i_nblist(coul, i_atom, shift, gid);
+#ifndef DISABLE_WATERWATER_NLIST
+ new_i_nblist(coul_ww, i_atom, shift, gid);
+#endif
+ }
+ /* Loop over the j charge groups */
+ for (j = 0; (j < nj); j++)
+ {
+ jcg = jjcg[j];
+
+ if (jcg == icg)
+ {
+ continue;
+ }
+
+ jj0 = index[jcg];
+ jwater = GET_CGINFO_SOLOPT(cginfo[jcg]);
+
+ if (iwater == esolSPC && jwater == esolSPC)
+ {
+ /* Interaction between two SPC molecules */
+ if (!bDoCoul)
+ {
+ /* VdW only - only first atoms in each water interact */
+ add_j_to_nblist(vdw, jj0, bLR);
+ }
+ else
+ {
+#ifdef DISABLE_WATERWATER_NLIST
+ /* Add entries for the three atoms - only do VdW if we need to */
+ if (!bDoVdW)
+ {
+ add_j_to_nblist(coul, jj0, bLR);
+ }
+ else
+ {
+ add_j_to_nblist(vdwc, jj0, bLR);
+ }
+ add_j_to_nblist(coul, jj0+1, bLR);
+ add_j_to_nblist(coul, jj0+2, bLR);
+#else
+ /* One entry for the entire water-water interaction */
+ if (!bDoVdW)
+ {
+ add_j_to_nblist(coul_ww, jj0, bLR);
+ }
+ else
+ {
+ add_j_to_nblist(vdwc_ww, jj0, bLR);
+ }
+#endif
+ }
+ }
+ else if (iwater == esolTIP4P && jwater == esolTIP4P)
+ {
+ /* Interaction between two TIP4p molecules */
+ if (!bDoCoul)
+ {
+ /* VdW only - only first atoms in each water interact */
+ add_j_to_nblist(vdw, jj0, bLR);
+ }
+ else
+ {
+#ifdef DISABLE_WATERWATER_NLIST
+ /* Add entries for the four atoms - only do VdW if we need to */
+ if (bDoVdW)
+ {
+ add_j_to_nblist(vdw, jj0, bLR);
+ }
+ add_j_to_nblist(coul, jj0+1, bLR);
+ add_j_to_nblist(coul, jj0+2, bLR);
+ add_j_to_nblist(coul, jj0+3, bLR);
+#else
+ /* One entry for the entire water-water interaction */
+ if (!bDoVdW)
+ {
+ add_j_to_nblist(coul_ww, jj0, bLR);
+ }
+ else
+ {
+ add_j_to_nblist(vdwc_ww, jj0, bLR);
+ }
+#endif
+ }
+ }
+ else
+ {
+ /* j charge group is not water, but i is.
+ * Add entries to the water-other_atom lists; the geometry of the water
+ * molecule doesn't matter - that is taken care of in the nonbonded kernel,
+ * so we don't care if it is SPC or TIP4P...
+ */
+
+ jj1 = index[jcg+1];
+
+ if (!bDoVdW)
+ {
+ for (jj = jj0; (jj < jj1); jj++)
+ {
+ if (charge[jj] != 0)
+ {
+ add_j_to_nblist(coul, jj, bLR);
+ }
+ }
+ }
+ else if (!bDoCoul)
+ {
+ for (jj = jj0; (jj < jj1); jj++)
+ {
+ if (bHaveVdW[type[jj]])
+ {
+ add_j_to_nblist(vdw, jj, bLR);
+ }
+ }
+ }
+ else
+ {
+ /* _charge_ _groups_ interact with both coulomb and LJ */
+ /* Check which atoms we should add to the lists! */
+ for (jj = jj0; (jj < jj1); jj++)
+ {
+ if (bHaveVdW[type[jj]])
+ {
+ if (charge[jj] != 0)
+ {
+ add_j_to_nblist(vdwc, jj, bLR);
+ }
+ else
+ {
+ add_j_to_nblist(vdw, jj, bLR);
+ }
+ }
+ else if (charge[jj] != 0)
+ {
+ add_j_to_nblist(coul, jj, bLR);
+ }
+ }
+ }
+ }
+ }
+ close_i_nblist(vdw);
+ close_i_nblist(coul);
+ close_i_nblist(vdwc);
+#ifndef DISABLE_WATERWATER_NLIST
+ close_i_nblist(coul_ww);
+ close_i_nblist(vdwc_ww);
+#endif
+ }
+ else
+ {
+ /* no solvent as i charge group */
+ /* Loop over the atoms in the i charge group */
+ for (i = 0; i < nicg; i++)
+ {
+ i_atom = i0+i;
+ gid = GID(igid, jgid, ngid);
+ qi = charge[i_atom];
+
+ /* Create new i_atom for each energy group */
+ if (bDoVdW && bDoCoul)
+ {
+ new_i_nblist(vdwc, i_atom, shift, gid);
+ }
+ if (bDoVdW)
+ {
+ new_i_nblist(vdw, i_atom, shift, gid);
+ }
+ if (bDoCoul)
+ {
+ new_i_nblist(coul, i_atom, shift, gid);
+ }
+ bDoVdW_i = (bDoVdW && bHaveVdW[type[i_atom]]);
+ bDoCoul_i = (bDoCoul && qi != 0);
+
+ if (bDoVdW_i || bDoCoul_i)
+ {
+ /* Loop over the j charge groups */
+ for (j = 0; (j < nj); j++)
+ {
+ jcg = jjcg[j];
+
+ /* Check for large charge groups */
+ if (jcg == icg)
+ {
+ jj0 = i0 + i + 1;
+ }
+ else
+ {
+ jj0 = index[jcg];
+ }
+
+ jj1 = index[jcg+1];
+ /* Finally loop over the atoms in the j-charge group */
+ for (jj = jj0; jj < jj1; jj++)
+ {
+ bNotEx = NOTEXCL(bExcl, i, jj);
+
+ if (bNotEx)
+ {
+ if (!bDoVdW_i)
+ {
+ if (charge[jj] != 0)
+ {
+ add_j_to_nblist(coul, jj, bLR);
+ }
+ }
+ else if (!bDoCoul_i)
+ {
+ if (bHaveVdW[type[jj]])
+ {
+ add_j_to_nblist(vdw, jj, bLR);
+ }
+ }
+ else
+ {
+ if (bHaveVdW[type[jj]])
+ {
+ if (charge[jj] != 0)
+ {
+ add_j_to_nblist(vdwc, jj, bLR);
+ }
+ else
+ {
+ add_j_to_nblist(vdw, jj, bLR);
+ }
+ }
+ else if (charge[jj] != 0)
+ {
+ add_j_to_nblist(coul, jj, bLR);
+ }
+ }
+ }
+ }
+ }
+ }
+ close_i_nblist(vdw);
+ close_i_nblist(coul);
+ close_i_nblist(vdwc);
+ }
+ }
+ }
+ else
+ {
+ /* we are doing free energy */
+ vdwc_free = &nlist[eNL_VDWQQ_FREE];
+ vdw_free = &nlist[eNL_VDW_FREE];
+ coul_free = &nlist[eNL_QQ_FREE];
+ /* Loop over the atoms in the i charge group */
+ for (i = 0; i < nicg; i++)
+ {
+ i_atom = i0+i;
+ gid = GID(igid, jgid, ngid);
+ qi = charge[i_atom];
+ qiB = chargeB[i_atom];
+
+ /* Create new i_atom for each energy group */
+ if (bDoVdW && bDoCoul)
+ {
+ new_i_nblist(vdwc, i_atom, shift, gid);
+ }
+ if (bDoVdW)
+ {
+ new_i_nblist(vdw, i_atom, shift, gid);
+ }
+ if (bDoCoul)
+ {
+ new_i_nblist(coul, i_atom, shift, gid);
+ }
+
+ new_i_nblist(vdw_free, i_atom, shift, gid);
+ new_i_nblist(coul_free, i_atom, shift, gid);
+ new_i_nblist(vdwc_free, i_atom, shift, gid);
+
+ bDoVdW_i = (bDoVdW &&
+ (bHaveVdW[type[i_atom]] || bHaveVdW[typeB[i_atom]]));
+ bDoCoul_i = (bDoCoul && (qi != 0 || qiB != 0));
+ /* For TIP4P the first atom does not have a charge,
+ * but the last three do. So we should still put an atom
+ * without LJ but with charge in the water-atom neighborlist
+ * for a TIP4p i charge group.
+ * For SPC type water the first atom has LJ and charge,
+ * so there is no such problem.
+ */
+ if (iwater == esolNO)
+ {
+ bDoCoul_i_sol = bDoCoul_i;
+ }
+ else
+ {
+ bDoCoul_i_sol = bDoCoul;
+ }
+
+ if (bDoVdW_i || bDoCoul_i_sol)
+ {
+ /* Loop over the j charge groups */
+ for (j = 0; (j < nj); j++)
+ {
+ jcg = jjcg[j];
+
+ /* Check for large charge groups */
+ if (jcg == icg)
+ {
+ jj0 = i0 + i + 1;
+ }
+ else
+ {
+ jj0 = index[jcg];
+ }
+
+ jj1 = index[jcg+1];
+ /* Finally loop over the atoms in the j-charge group */
+ bFree = bPert[i_atom];
+ for (jj = jj0; (jj < jj1); jj++)
+ {
+ bFreeJ = bFree || bPert[jj];
+ /* Complicated if, because the water H's should also
+ * see perturbed j-particles
+ */
+ if (iwater == esolNO || i == 0 || bFreeJ)
+ {
+ bNotEx = NOTEXCL(bExcl, i, jj);
+
+ if (bNotEx)
+ {
+ if (bFreeJ)
+ {
+ if (!bDoVdW_i)
+ {
+ if (charge[jj] != 0 || chargeB[jj] != 0)
+ {
+ add_j_to_nblist(coul_free, jj, bLR);
+ }
+ }
+ else if (!bDoCoul_i)
+ {
+ if (bHaveVdW[type[jj]] || bHaveVdW[typeB[jj]])
+ {
+ add_j_to_nblist(vdw_free, jj, bLR);
+ }
+ }
+ else
+ {
+ if (bHaveVdW[type[jj]] || bHaveVdW[typeB[jj]])
+ {
+ if (charge[jj] != 0 || chargeB[jj] != 0)
+ {
+ add_j_to_nblist(vdwc_free, jj, bLR);
+ }
+ else
+ {
+ add_j_to_nblist(vdw_free, jj, bLR);
+ }
+ }
+ else if (charge[jj] != 0 || chargeB[jj] != 0)
+ {
+ add_j_to_nblist(coul_free, jj, bLR);
+ }
+ }
+ }
+ else if (!bDoVdW_i)
+ {
+ /* This is done whether or not bWater is set */
+ if (charge[jj] != 0)
+ {
+ add_j_to_nblist(coul, jj, bLR);
+ }
+ }
+ else if (!bDoCoul_i_sol)
+ {
+ if (bHaveVdW[type[jj]])
+ {
+ add_j_to_nblist(vdw, jj, bLR);
+ }
+ }
+ else
+ {
+ if (bHaveVdW[type[jj]])
+ {
+ if (charge[jj] != 0)
+ {
+ add_j_to_nblist(vdwc, jj, bLR);
+ }
+ else
+ {
+ add_j_to_nblist(vdw, jj, bLR);
+ }
+ }
+ else if (charge[jj] != 0)
+ {
+ add_j_to_nblist(coul, jj, bLR);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ close_i_nblist(vdw);
+ close_i_nblist(coul);
+ close_i_nblist(vdwc);
+ close_i_nblist(vdw_free);
+ close_i_nblist(coul_free);
+ close_i_nblist(vdwc_free);
+ }
+ }
+}
+
+static void
+put_in_list_adress(gmx_bool bHaveVdW[],
+ int ngid,
+ t_mdatoms * md,
+ int icg,
+ int jgid,
+ int nj,
+ atom_id jjcg[],
+ atom_id index[],
+ t_excl bExcl[],
+ int shift,
+ t_forcerec * fr,
+ gmx_bool bLR,
+ gmx_bool bDoVdW,
+ gmx_bool bDoCoul,
+ int solvent_opt)
+{
+ /* The a[] index has been removed,
+ * to put it back in i_atom should be a[i0] and jj should be a[jj].
+ */
+ t_nblist * vdwc;
+ t_nblist * vdw;
+ t_nblist * coul;
+ t_nblist * vdwc_adress = NULL;
+ t_nblist * vdw_adress = NULL;
+ t_nblist * coul_adress = NULL;
+ t_nblist * vdwc_ww = NULL;
+ t_nblist * coul_ww = NULL;
+
+ int i, j, jcg, igid, gid, nbl_ind, nbl_ind_adress;
+ atom_id jj, jj0, jj1, i_atom;
+ int i0, nicg, len;
+
+ int *cginfo;
+ int *type, *typeB;
+ real *charge, *chargeB;
+ real *wf;
+ real qi, qiB, qq, rlj;
+ gmx_bool bFreeEnergy, bFree, bFreeJ, bNotEx, *bPert;
+ gmx_bool bDoVdW_i, bDoCoul_i, bDoCoul_i_sol;
+ gmx_bool b_hybrid;
+ gmx_bool j_all_atom;
+ int iwater, jwater;
+ t_nblist *nlist, *nlist_adress;
+ gmx_bool bEnergyGroupCG;
+
+ /* Copy some pointers */
+ cginfo = fr->cginfo;
+ charge = md->chargeA;
+ chargeB = md->chargeB;
+ type = md->typeA;
+ typeB = md->typeB;
+ bPert = md->bPerturbed;
+ wf = md->wf;
+
+ /* Get atom range */
+ i0 = index[icg];
+ nicg = index[icg+1]-i0;
+
+ /* Get the i charge group info */
+ igid = GET_CGINFO_GID(cginfo[icg]);
+
+ iwater = (solvent_opt != esolNO) ? GET_CGINFO_SOLOPT(cginfo[icg]) : esolNO;
+
+ if (md->nPerturbed)
+ {
+ gmx_fatal(FARGS, "AdResS does not support free energy pertubation\n");
+ }
+
+ /* Unpack pointers to neighbourlist structs */
+ if (fr->nnblists == 2)
+ {
+ nbl_ind = 0;
+ nbl_ind_adress = 1;
+ }
+ else
+ {
+ nbl_ind = fr->gid2nblists[GID(igid, jgid, ngid)];
+ nbl_ind_adress = nbl_ind+fr->nnblists/2;
+ }
+ if (bLR)
+ {
+ nlist = fr->nblists[nbl_ind].nlist_lr;
+ nlist_adress = fr->nblists[nbl_ind_adress].nlist_lr;
+ }
+ else
+ {
+ nlist = fr->nblists[nbl_ind].nlist_sr;
+ nlist_adress = fr->nblists[nbl_ind_adress].nlist_sr;
+ }
+
+
+ vdwc = &nlist[eNL_VDWQQ];
+ vdw = &nlist[eNL_VDW];
+ coul = &nlist[eNL_QQ];
+
+ vdwc_adress = &nlist_adress[eNL_VDWQQ];
+ vdw_adress = &nlist_adress[eNL_VDW];
+ coul_adress = &nlist_adress[eNL_QQ];
+
+ /* We do not support solvent optimization with AdResS for now.
+ For this we would need hybrid solvent-other kernels */
+
+ /* no solvent as i charge group */
+ /* Loop over the atoms in the i charge group */
+ for (i = 0; i < nicg; i++)
+ {
+ i_atom = i0+i;
+ gid = GID(igid, jgid, ngid);
+ qi = charge[i_atom];
+
+ /* Create new i_atom for each energy group */
+ if (bDoVdW && bDoCoul)
+ {
+ new_i_nblist(vdwc, i_atom, shift, gid);
+ new_i_nblist(vdwc_adress, i_atom, shift, gid);
+
+ }
+ if (bDoVdW)
+ {
+ new_i_nblist(vdw, i_atom, shift, gid);
+ new_i_nblist(vdw_adress, i_atom, shift, gid);
+
+ }
+ if (bDoCoul)
+ {
+ new_i_nblist(coul, i_atom, shift, gid);
+ new_i_nblist(coul_adress, i_atom, shift, gid);
+ }
+ bDoVdW_i = (bDoVdW && bHaveVdW[type[i_atom]]);
+ bDoCoul_i = (bDoCoul && qi != 0);
+
+ /* Here we find out whether the energy groups interaction belong to a
+ * coarse-grained (vsite) or atomistic interaction. Note that, beacuse
+ * interactions between coarse-grained and other (atomistic) energygroups
+ * are excluded automatically by grompp, it is sufficient to check for
+ * the group id of atom i (igid) */
+ bEnergyGroupCG = !egp_explicit(fr, igid);
+
+ if (bDoVdW_i || bDoCoul_i)
+ {
+ /* Loop over the j charge groups */
+ for (j = 0; (j < nj); j++)
+ {
+ jcg = jjcg[j];
+
+ /* Check for large charge groups */
+ if (jcg == icg)
+ {
+ jj0 = i0 + i + 1;
+ }
+ else
+ {
+ jj0 = index[jcg];
+ }
+
+ jj1 = index[jcg+1];
+ /* Finally loop over the atoms in the j-charge group */
+ for (jj = jj0; jj < jj1; jj++)
+ {
+ bNotEx = NOTEXCL(bExcl, i, jj);
+
+ /* Now we have to exclude interactions which will be zero
+ * anyway due to the AdResS weights (in previous implementations
+ * this was done in the force kernel). This is necessary as
+ * pure interactions (those with b_hybrid=false, i.e. w_i*w_j==1 or 0)
+ * are put into neighbour lists which will be passed to the
+ * standard (optimized) kernels for speed. The interactions with
+ * b_hybrid=true are placed into the _adress neighbour lists and
+ * processed by the generic AdResS kernel.
+ */
+ if ( (bEnergyGroupCG &&
+ wf[i_atom] >= 1-GMX_REAL_EPS && wf[jj] >= 1-GMX_REAL_EPS ) ||
+ ( !bEnergyGroupCG && wf[jj] <= GMX_REAL_EPS ) )
+ {
+ continue;
+ }
+
+ b_hybrid = !((wf[i_atom] >= 1-GMX_REAL_EPS && wf[jj] >= 1-GMX_REAL_EPS) ||
+ (wf[i_atom] <= GMX_REAL_EPS && wf[jj] <= GMX_REAL_EPS));
+
+ if (bNotEx)
+ {
+ if (!bDoVdW_i)
+ {
+ if (charge[jj] != 0)
+ {
+ if (!b_hybrid)
+ {
+ add_j_to_nblist(coul, jj, bLR);
+ }
+ else
+ {
+ add_j_to_nblist(coul_adress, jj, bLR);
+ }
+ }
+ }
+ else if (!bDoCoul_i)
+ {
+ if (bHaveVdW[type[jj]])
+ {
+ if (!b_hybrid)
+ {
+ add_j_to_nblist(vdw, jj, bLR);
+ }
+ else
+ {
+ add_j_to_nblist(vdw_adress, jj, bLR);
+ }
+ }
+ }
+ else
+ {
+ if (bHaveVdW[type[jj]])
+ {
+ if (charge[jj] != 0)
+ {
+ if (!b_hybrid)
+ {
+ add_j_to_nblist(vdwc, jj, bLR);
+ }
+ else
+ {
+ add_j_to_nblist(vdwc_adress, jj, bLR);
+ }
+ }
+ else
+ {
+ if (!b_hybrid)
+ {
+ add_j_to_nblist(vdw, jj, bLR);
+ }
+ else
+ {
+ add_j_to_nblist(vdw_adress, jj, bLR);
+ }
+
+ }
+ }
+ else if (charge[jj] != 0)
+ {
+ if (!b_hybrid)
+ {
+ add_j_to_nblist(coul, jj, bLR);
+ }
+ else
+ {
+ add_j_to_nblist(coul_adress, jj, bLR);
+ }
+
+ }
+ }
+ }
+ }
+ }
+
+ close_i_nblist(vdw);
+ close_i_nblist(coul);
+ close_i_nblist(vdwc);
+ close_i_nblist(vdw_adress);
+ close_i_nblist(coul_adress);
+ close_i_nblist(vdwc_adress);
+ }
+ }
+}
+
+static void
+put_in_list_qmmm(gmx_bool gmx_unused bHaveVdW[],
+ int ngid,
+ t_mdatoms gmx_unused * md,
+ int icg,
+ int jgid,
+ int nj,
+ atom_id jjcg[],
+ atom_id index[],
+ t_excl bExcl[],
+ int shift,
+ t_forcerec * fr,
+ gmx_bool bLR,
+ gmx_bool gmx_unused bDoVdW,
+ gmx_bool gmx_unused bDoCoul,
+ int gmx_unused solvent_opt)
+{
+ t_nblist * coul;
+ int i, j, jcg, igid, gid;
+ atom_id jj, jj0, jj1, i_atom;
+ int i0, nicg;
+ gmx_bool bNotEx;
+
+ /* Get atom range */
+ i0 = index[icg];
+ nicg = index[icg+1]-i0;
+
+ /* Get the i charge group info */
+ igid = GET_CGINFO_GID(fr->cginfo[icg]);
+
+ coul = &fr->QMMMlist;
+
+ /* Loop over atoms in the ith charge group */
+ for (i = 0; i < nicg; i++)
+ {
+ i_atom = i0+i;
+ gid = GID(igid, jgid, ngid);
+ /* Create new i_atom for each energy group */
+ new_i_nblist(coul, i_atom, shift, gid);
+
+ /* Loop over the j charge groups */
+ for (j = 0; j < nj; j++)
+ {
+ jcg = jjcg[j];
+
+ /* Charge groups cannot have QM and MM atoms simultaneously */
+ if (jcg != icg)
+ {
+ jj0 = index[jcg];
+ jj1 = index[jcg+1];
+ /* Finally loop over the atoms in the j-charge group */
+ for (jj = jj0; jj < jj1; jj++)
+ {
+ bNotEx = NOTEXCL(bExcl, i, jj);
+ if (bNotEx)
+ {
+ add_j_to_nblist(coul, jj, bLR);
+ }
+ }
+ }
+ }
+ close_i_nblist(coul);
+ }
+}
+
+static void
+put_in_list_cg(gmx_bool gmx_unused bHaveVdW[],
+ int ngid,
+ t_mdatoms gmx_unused * md,
+ int icg,
+ int jgid,
+ int nj,
+ atom_id jjcg[],
+ atom_id index[],
+ t_excl bExcl[],
+ int shift,
+ t_forcerec * fr,
+ gmx_bool bLR,
+ gmx_bool gmx_unused bDoVdW,
+ gmx_bool gmx_unused bDoCoul,
+ int gmx_unused solvent_opt)
+{
+ int cginfo;
+ int igid, gid, nbl_ind;
+ t_nblist * vdwc;
+ int j, jcg;
+
+ cginfo = fr->cginfo[icg];
+
+ igid = GET_CGINFO_GID(cginfo);
+ gid = GID(igid, jgid, ngid);
+
+ /* Unpack pointers to neighbourlist structs */
+ if (fr->nnblists == 1)
+ {
+ nbl_ind = 0;
+ }
+ else
+ {
+ nbl_ind = fr->gid2nblists[gid];
+ }
+ if (bLR)
+ {
+ vdwc = &fr->nblists[nbl_ind].nlist_lr[eNL_VDWQQ];
+ }
+ else
+ {
+ vdwc = &fr->nblists[nbl_ind].nlist_sr[eNL_VDWQQ];
+ }
+
+ /* Make a new neighbor list for charge group icg.
+ * Currently simply one neighbor list is made with LJ and Coulomb.
+ * If required, zero interactions could be removed here
+ * or in the force loop.
+ */
+ new_i_nblist(vdwc, index[icg], shift, gid);
+ vdwc->iinr_end[vdwc->nri] = index[icg+1];
+
+ for (j = 0; (j < nj); j++)
+ {
+ jcg = jjcg[j];
+ /* Skip the icg-icg pairs if all self interactions are excluded */
+ if (!(jcg == icg && GET_CGINFO_EXCL_INTRA(cginfo)))
+ {
+ /* Here we add the j charge group jcg to the list,
+ * exclusions are also added to the list.
+ */
+ add_j_to_nblist_cg(vdwc, index[jcg], index[jcg+1], bExcl, icg == jcg, bLR);
+ }
+ }
+
+ close_i_nblist(vdwc);
+}
+
+static void setexcl(atom_id start, atom_id end, t_blocka *excl, gmx_bool b,
+ t_excl bexcl[])
+{
+ atom_id i, k;
+
+ if (b)
+ {
+ for (i = start; i < end; i++)
+ {
+ for (k = excl->index[i]; k < excl->index[i+1]; k++)
+ {
+ SETEXCL(bexcl, i-start, excl->a[k]);
+ }
+ }
+ }
+ else
+ {
+ for (i = start; i < end; i++)
+ {
+ for (k = excl->index[i]; k < excl->index[i+1]; k++)
+ {
+ RMEXCL(bexcl, i-start, excl->a[k]);
+ }
+ }
+ }
+}
+
+int calc_naaj(int icg, int cgtot)
+{
+ int naaj;
+
+ if ((cgtot % 2) == 1)
+ {
+ /* Odd number of charge groups, easy */
+ naaj = 1 + (cgtot/2);
+ }
+ else if ((cgtot % 4) == 0)
+ {
+ /* Multiple of four is hard */
+ if (icg < cgtot/2)
+ {
+ if ((icg % 2) == 0)
+ {
+ naaj = 1+(cgtot/2);
+ }
+ else
+ {
+ naaj = cgtot/2;
+ }
+ }
+ else
+ {
+ if ((icg % 2) == 1)
+ {
+ naaj = 1+(cgtot/2);
+ }
+ else
+ {
+ naaj = cgtot/2;
+ }
+ }
+ }
+ else
+ {
+ /* cgtot/2 = odd */
+ if ((icg % 2) == 0)
+ {
+ naaj = 1+(cgtot/2);
+ }
+ else
+ {
+ naaj = cgtot/2;
+ }
+ }
+#ifdef DEBUG
+ fprintf(log, "naaj=%d\n", naaj);
+#endif
+
+ return naaj;
+}
+
+/************************************************
+ *
+ * S I M P L E C O R E S T U F F
+ *
+ ************************************************/
+
+static real calc_image_tric(rvec xi, rvec xj, matrix box,
+ rvec b_inv, int *shift)
+{
+ /* This code assumes that the cut-off is smaller than
+ * a half times the smallest diagonal element of the box.
+ */
+ const real h25 = 2.5;
+ real dx, dy, dz;
+ real r2;
+ int tx, ty, tz;
+
+ /* Compute diff vector */
+ dz = xj[ZZ] - xi[ZZ];
+ dy = xj[YY] - xi[YY];
+ dx = xj[XX] - xi[XX];
+
+ /* Perform NINT operation, using trunc operation, therefore
+ * we first add 2.5 then subtract 2 again
+ */
+ tz = dz*b_inv[ZZ] + h25;
+ tz -= 2;
+ dz -= tz*box[ZZ][ZZ];
+ dy -= tz*box[ZZ][YY];
+ dx -= tz*box[ZZ][XX];
+
+ ty = dy*b_inv[YY] + h25;
+ ty -= 2;
+ dy -= ty*box[YY][YY];
+ dx -= ty*box[YY][XX];
+
+ tx = dx*b_inv[XX]+h25;
+ tx -= 2;
+ dx -= tx*box[XX][XX];
+
+ /* Distance squared */
+ r2 = (dx*dx) + (dy*dy) + (dz*dz);
+
+ *shift = XYZ2IS(tx, ty, tz);
+
+ return r2;
+}
+
+static real calc_image_rect(rvec xi, rvec xj, rvec box_size,
+ rvec b_inv, int *shift)
+{
+ const real h15 = 1.5;
+ real ddx, ddy, ddz;
+ real dx, dy, dz;
+ real r2;
+ int tx, ty, tz;
+
+ /* Compute diff vector */
+ dx = xj[XX] - xi[XX];
+ dy = xj[YY] - xi[YY];
+ dz = xj[ZZ] - xi[ZZ];
+
+ /* Perform NINT operation, using trunc operation, therefore
+ * we first add 1.5 then subtract 1 again
+ */
+ tx = dx*b_inv[XX] + h15;
+ ty = dy*b_inv[YY] + h15;
+ tz = dz*b_inv[ZZ] + h15;
+ tx--;
+ ty--;
+ tz--;
+
+ /* Correct diff vector for translation */
+ ddx = tx*box_size[XX] - dx;
+ ddy = ty*box_size[YY] - dy;
+ ddz = tz*box_size[ZZ] - dz;
+
+ /* Distance squared */
+ r2 = (ddx*ddx) + (ddy*ddy) + (ddz*ddz);
+
+ *shift = XYZ2IS(tx, ty, tz);
+
+ return r2;
+}
+
+static void add_simple(t_ns_buf *nsbuf, int nrj, atom_id cg_j,
+ gmx_bool bHaveVdW[], int ngid, t_mdatoms *md,
+ int icg, int jgid, t_block *cgs, t_excl bexcl[],
+ int shift, t_forcerec *fr, put_in_list_t *put_in_list)
+{
+ if (nsbuf->nj + nrj > MAX_CG)
+ {
+ put_in_list(bHaveVdW, ngid, md, icg, jgid, nsbuf->ncg, nsbuf->jcg,
+ cgs->index, bexcl, shift, fr, FALSE, TRUE, TRUE, fr->solvent_opt);
+ /* Reset buffer contents */
+ nsbuf->ncg = nsbuf->nj = 0;
+ }
+ nsbuf->jcg[nsbuf->ncg++] = cg_j;
+ nsbuf->nj += nrj;
+}
+
+static void ns_inner_tric(rvec x[], int icg, int *i_egp_flags,
+ int njcg, atom_id jcg[],
+ matrix box, rvec b_inv, real rcut2,
+ t_block *cgs, t_ns_buf **ns_buf,
+ gmx_bool bHaveVdW[], int ngid, t_mdatoms *md,
+ t_excl bexcl[], t_forcerec *fr,
+ put_in_list_t *put_in_list)
+{
+ int shift;
+ int j, nrj, jgid;
+ int *cginfo = fr->cginfo;
+ atom_id cg_j, *cgindex;
+ t_ns_buf *nsbuf;
+
+ cgindex = cgs->index;
+ shift = CENTRAL;
+ for (j = 0; (j < njcg); j++)
+ {
+ cg_j = jcg[j];
+ nrj = cgindex[cg_j+1]-cgindex[cg_j];
+ if (calc_image_tric(x[icg], x[cg_j], box, b_inv, &shift) < rcut2)
+ {
+ jgid = GET_CGINFO_GID(cginfo[cg_j]);
+ if (!(i_egp_flags[jgid] & EGP_EXCL))
+ {
+ add_simple(&ns_buf[jgid][shift], nrj, cg_j,
+ bHaveVdW, ngid, md, icg, jgid, cgs, bexcl, shift, fr,
+ put_in_list);
+ }
+ }
+ }
+}
+
+static void ns_inner_rect(rvec x[], int icg, int *i_egp_flags,
+ int njcg, atom_id jcg[],
+ gmx_bool bBox, rvec box_size, rvec b_inv, real rcut2,
+ t_block *cgs, t_ns_buf **ns_buf,
+ gmx_bool bHaveVdW[], int ngid, t_mdatoms *md,
+ t_excl bexcl[], t_forcerec *fr,
+ put_in_list_t *put_in_list)
+{
+ int shift;
+ int j, nrj, jgid;
+ int *cginfo = fr->cginfo;
+ atom_id cg_j, *cgindex;
+ t_ns_buf *nsbuf;
+
+ cgindex = cgs->index;
+ if (bBox)
+ {
+ shift = CENTRAL;
+ for (j = 0; (j < njcg); j++)
+ {
+ cg_j = jcg[j];
+ nrj = cgindex[cg_j+1]-cgindex[cg_j];
+ if (calc_image_rect(x[icg], x[cg_j], box_size, b_inv, &shift) < rcut2)
+ {
+ jgid = GET_CGINFO_GID(cginfo[cg_j]);
+ if (!(i_egp_flags[jgid] & EGP_EXCL))
+ {
+ add_simple(&ns_buf[jgid][shift], nrj, cg_j,
+ bHaveVdW, ngid, md, icg, jgid, cgs, bexcl, shift, fr,
+ put_in_list);
+ }
+ }
+ }
+ }
+ else
+ {
+ for (j = 0; (j < njcg); j++)
+ {
+ cg_j = jcg[j];
+ nrj = cgindex[cg_j+1]-cgindex[cg_j];
+ if ((rcut2 == 0) || (distance2(x[icg], x[cg_j]) < rcut2))
+ {
+ jgid = GET_CGINFO_GID(cginfo[cg_j]);
+ if (!(i_egp_flags[jgid] & EGP_EXCL))
+ {
+ add_simple(&ns_buf[jgid][CENTRAL], nrj, cg_j,
+ bHaveVdW, ngid, md, icg, jgid, cgs, bexcl, CENTRAL, fr,
+ put_in_list);
+ }
+ }
+ }
+ }
+}
+
+/* ns_simple_core needs to be adapted for QMMM still 2005 */
+
+static int ns_simple_core(t_forcerec *fr,
+ gmx_localtop_t *top,
+ t_mdatoms *md,
+ matrix box, rvec box_size,
+ t_excl bexcl[], atom_id *aaj,
+ int ngid, t_ns_buf **ns_buf,
+ put_in_list_t *put_in_list, gmx_bool bHaveVdW[])
+{
+ int naaj, k;
+ real rlist2;
+ int nsearch, icg, jcg, igid, i0, nri, nn;
+ int *cginfo;
+ t_ns_buf *nsbuf;
+ /* atom_id *i_atoms; */
+ t_block *cgs = &(top->cgs);
+ t_blocka *excl = &(top->excls);
+ rvec b_inv;
+ int m;
+ gmx_bool bBox, bTriclinic;
+ int *i_egp_flags;
+
+ rlist2 = sqr(fr->rlist);
+
+ bBox = (fr->ePBC != epbcNONE);
+ if (bBox)
+ {
+ for (m = 0; (m < DIM); m++)
+ {
+ b_inv[m] = divide_err(1.0, box_size[m]);
+ }
+ bTriclinic = TRICLINIC(box);
+ }
+ else
+ {
+ bTriclinic = FALSE;
+ }
+
+ cginfo = fr->cginfo;
+
+ nsearch = 0;
+ for (icg = fr->cg0; (icg < fr->hcg); icg++)
+ {
+ /*
+ i0 = cgs->index[icg];
+ nri = cgs->index[icg+1]-i0;
+ i_atoms = &(cgs->a[i0]);
+ i_eg_excl = fr->eg_excl + ngid*md->cENER[*i_atoms];
+ setexcl(nri,i_atoms,excl,TRUE,bexcl);
+ */
+ igid = GET_CGINFO_GID(cginfo[icg]);
+ i_egp_flags = fr->egp_flags + ngid*igid;
+ setexcl(cgs->index[icg], cgs->index[icg+1], excl, TRUE, bexcl);
+
+ naaj = calc_naaj(icg, cgs->nr);
+ if (bTriclinic)
+ {
+ ns_inner_tric(fr->cg_cm, icg, i_egp_flags, naaj, &(aaj[icg]),
+ box, b_inv, rlist2, cgs, ns_buf,
+ bHaveVdW, ngid, md, bexcl, fr, put_in_list);
+ }
+ else
+ {
+ ns_inner_rect(fr->cg_cm, icg, i_egp_flags, naaj, &(aaj[icg]),
+ bBox, box_size, b_inv, rlist2, cgs, ns_buf,
+ bHaveVdW, ngid, md, bexcl, fr, put_in_list);
+ }
+ nsearch += naaj;
+
+ for (nn = 0; (nn < ngid); nn++)
+ {
+ for (k = 0; (k < SHIFTS); k++)
+ {
+ nsbuf = &(ns_buf[nn][k]);
+ if (nsbuf->ncg > 0)
+ {
+ put_in_list(bHaveVdW, ngid, md, icg, nn, nsbuf->ncg, nsbuf->jcg,
+ cgs->index, bexcl, k, fr, FALSE, TRUE, TRUE, fr->solvent_opt);
+ nsbuf->ncg = nsbuf->nj = 0;
+ }
+ }
+ }
+ /* setexcl(nri,i_atoms,excl,FALSE,bexcl); */
+ setexcl(cgs->index[icg], cgs->index[icg+1], excl, FALSE, bexcl);
+ }
+ close_neighbor_lists(fr, FALSE);
+
+ return nsearch;
+}
+
+/************************************************
+ *
+ * N S 5 G R I D S T U F F
+ *
+ ************************************************/
+
+static gmx_inline void get_dx(int Nx, real gridx, real rc2, int xgi, real x,
+ int *dx0, int *dx1, real *dcx2)
+{
+ real dcx, tmp;
+ int xgi0, xgi1, i;
+
+ if (xgi < 0)
+ {
+ *dx0 = 0;
+ xgi0 = -1;
+ *dx1 = -1;
+ xgi1 = 0;
+ }
+ else if (xgi >= Nx)
+ {
+ *dx0 = Nx;
+ xgi0 = Nx-1;
+ *dx1 = Nx-1;
+ xgi1 = Nx;
+ }
+ else
+ {
+ dcx2[xgi] = 0;
+ *dx0 = xgi;
+ xgi0 = xgi-1;
+ *dx1 = xgi;
+ xgi1 = xgi+1;
+ }
+
+ for (i = xgi0; i >= 0; i--)
+ {
+ dcx = (i+1)*gridx-x;
+ tmp = dcx*dcx;
+ if (tmp >= rc2)
+ {
+ break;
+ }
+ *dx0 = i;
+ dcx2[i] = tmp;
+ }
+ for (i = xgi1; i < Nx; i++)
+ {
+ dcx = i*gridx-x;
+ tmp = dcx*dcx;
+ if (tmp >= rc2)
+ {
+ break;
+ }
+ *dx1 = i;
+ dcx2[i] = tmp;
+ }
+}
+
+static gmx_inline void get_dx_dd(int Nx, real gridx, real rc2, int xgi, real x,
+ int ncpddc, int shift_min, int shift_max,
+ int *g0, int *g1, real *dcx2)
+{
+ real dcx, tmp;
+ int g_min, g_max, shift_home;
+
+ if (xgi < 0)
+ {
+ g_min = 0;
+ g_max = Nx - 1;
+ *g0 = 0;
+ *g1 = -1;
+ }
+ else if (xgi >= Nx)
+ {
+ g_min = 0;
+ g_max = Nx - 1;
+ *g0 = Nx;
+ *g1 = Nx - 1;
+ }
+ else
+ {
+ if (ncpddc == 0)
+ {
+ g_min = 0;
+ g_max = Nx - 1;
+ }
+ else
+ {
+ if (xgi < ncpddc)
+ {
+ shift_home = 0;
+ }
+ else
+ {
+ shift_home = -1;
+ }
+ g_min = (shift_min == shift_home ? 0 : ncpddc);
+ g_max = (shift_max == shift_home ? ncpddc - 1 : Nx - 1);
+ }
+ if (shift_min > 0)
+ {
+ *g0 = g_min;
+ *g1 = g_min - 1;
+ }
+ else if (shift_max < 0)
+ {
+ *g0 = g_max + 1;
+ *g1 = g_max;
+ }
+ else
+ {
+ *g0 = xgi;
+ *g1 = xgi;
+ dcx2[xgi] = 0;
+ }
+ }
+
+ while (*g0 > g_min)
+ {
+ /* Check one grid cell down */
+ dcx = ((*g0 - 1) + 1)*gridx - x;
+ tmp = dcx*dcx;
+ if (tmp >= rc2)
+ {
+ break;
+ }
+ (*g0)--;
+ dcx2[*g0] = tmp;
+ }
+
+ while (*g1 < g_max)
+ {
+ /* Check one grid cell up */
+ dcx = (*g1 + 1)*gridx - x;
+ tmp = dcx*dcx;
+ if (tmp >= rc2)
+ {
+ break;
+ }
+ (*g1)++;
+ dcx2[*g1] = tmp;
+ }
+}
+
+
+#define sqr(x) ((x)*(x))
+#define calc_dx2(XI, YI, ZI, y) (sqr(XI-y[XX]) + sqr(YI-y[YY]) + sqr(ZI-y[ZZ]))
+#define calc_cyl_dx2(XI, YI, y) (sqr(XI-y[XX]) + sqr(YI-y[YY]))
+/****************************************************
+ *
+ * F A S T N E I G H B O R S E A R C H I N G
+ *
+ * Optimized neighboursearching routine using grid
+ * at least 1x1x1, see GROMACS manual
+ *
+ ****************************************************/
+
+
+static void get_cutoff2(t_forcerec *fr, gmx_bool bDoLongRange,
+ real *rvdw2, real *rcoul2,
+ real *rs2, real *rm2, real *rl2)
+{
+ *rs2 = sqr(fr->rlist);
+
+ if (bDoLongRange && fr->bTwinRange)
+ {
+ /* With plain cut-off or RF we need to make the list exactly
+ * up to the cut-off and the cut-off's can be different,
+ * so we can not simply set them to rlistlong.
+ * To keep this code compatible with (exotic) old cases,
+ * we also create lists up to rvdw/rcoulomb for PME and Ewald.
+ * The interaction check should correspond to:
+ * !ir_vdw/coulomb_might_be_zero_at_cutoff from inputrec.c.
+ */
+ if (((fr->vdwtype == evdwCUT || fr->vdwtype == evdwPME) &&
+ fr->vdw_modifier == eintmodNONE) ||
+ fr->rvdw <= fr->rlist)
+ {
+ *rvdw2 = sqr(fr->rvdw);
+ }
+ else
+ {
+ *rvdw2 = sqr(fr->rlistlong);
+ }
+ if (((fr->eeltype == eelCUT ||
+ (EEL_RF(fr->eeltype) && fr->eeltype != eelRF_ZERO) ||
+ fr->eeltype == eelPME ||
+ fr->eeltype == eelEWALD) &&
+ fr->coulomb_modifier == eintmodNONE) ||
+ fr->rcoulomb <= fr->rlist)
+ {
+ *rcoul2 = sqr(fr->rcoulomb);
+ }
+ else
+ {
+ *rcoul2 = sqr(fr->rlistlong);
+ }
+ }
+ else
+ {
+ /* Workaround for a gcc -O3 or -ffast-math problem */
+ *rvdw2 = *rs2;
+ *rcoul2 = *rs2;
+ }
+ *rm2 = min(*rvdw2, *rcoul2);
+ *rl2 = max(*rvdw2, *rcoul2);
+}
+
+static void init_nsgrid_lists(t_forcerec *fr, int ngid, gmx_ns_t *ns)
+{
+ real rvdw2, rcoul2, rs2, rm2, rl2;
+ int j;
+
+ get_cutoff2(fr, TRUE, &rvdw2, &rcoul2, &rs2, &rm2, &rl2);
+
+ /* Short range buffers */
+ snew(ns->nl_sr, ngid);
+ /* Counters */
+ snew(ns->nsr, ngid);
+ snew(ns->nlr_ljc, ngid);
+ snew(ns->nlr_one, ngid);
+
+ /* Always allocate both list types, since rcoulomb might now change with PME load balancing */
+ /* Long range VdW and Coul buffers */
+ snew(ns->nl_lr_ljc, ngid);
+ /* Long range VdW or Coul only buffers */
+ snew(ns->nl_lr_one, ngid);
+
+ for (j = 0; (j < ngid); j++)
+ {
+ snew(ns->nl_sr[j], MAX_CG);
+ snew(ns->nl_lr_ljc[j], MAX_CG);
+ snew(ns->nl_lr_one[j], MAX_CG);
+ }
+ if (debug)
+ {
+ fprintf(debug,
+ "ns5_core: rs2 = %g, rm2 = %g, rl2 = %g (nm^2)\n",
+ rs2, rm2, rl2);
+ }
+}
+
+static int nsgrid_core(t_commrec *cr, t_forcerec *fr,
+ matrix box, int ngid,
+ gmx_localtop_t *top,
+ t_grid *grid,
+ t_excl bexcl[], gmx_bool *bExcludeAlleg,
+ t_mdatoms *md,
+ put_in_list_t *put_in_list,
+ gmx_bool bHaveVdW[],
+ gmx_bool bDoLongRange, gmx_bool bMakeQMMMnblist)
+{
+ gmx_ns_t *ns;
+ atom_id **nl_lr_ljc, **nl_lr_one, **nl_sr;
+ int *nlr_ljc, *nlr_one, *nsr;
+ gmx_domdec_t *dd = NULL;
+ t_block *cgs = &(top->cgs);
+ int *cginfo = fr->cginfo;
+ /* atom_id *i_atoms,*cgsindex=cgs->index; */
+ ivec sh0, sh1, shp;
+ int cell_x, cell_y, cell_z;
+ int d, tx, ty, tz, dx, dy, dz, cj;
+#ifdef ALLOW_OFFDIAG_LT_HALFDIAG
+ int zsh_ty, zsh_tx, ysh_tx;
+#endif
+ int dx0, dx1, dy0, dy1, dz0, dz1;
+ int Nx, Ny, Nz, shift = -1, j, nrj, nns, nn = -1;
+ real gridx, gridy, gridz, grid_x, grid_y, grid_z;
+ real *dcx2, *dcy2, *dcz2;
+ int zgi, ygi, xgi;
+ int cg0, cg1, icg = -1, cgsnr, i0, igid, nri, naaj, max_jcg;
+ int jcg0, jcg1, jjcg, cgj0, jgid;
+ int *grida, *gridnra, *gridind;
+ gmx_bool rvdw_lt_rcoul, rcoul_lt_rvdw;
+ rvec xi, *cgcm, grid_offset;
+ real r2, rs2, rvdw2, rcoul2, rm2, rl2, XI, YI, ZI, dcx, dcy, dcz, tmp1, tmp2;
+ int *i_egp_flags;
+ gmx_bool bDomDec, bTriclinicX, bTriclinicY;
+ ivec ncpddc;
+
+ ns = &fr->ns;
+
+ bDomDec = DOMAINDECOMP(cr);
+ if (bDomDec)
+ {
+ dd = cr->dd;
+ }
+
+ bTriclinicX = ((YY < grid->npbcdim &&
+ (!bDomDec || dd->nc[YY] == 1) && box[YY][XX] != 0) ||
+ (ZZ < grid->npbcdim &&
+ (!bDomDec || dd->nc[ZZ] == 1) && box[ZZ][XX] != 0));
+ bTriclinicY = (ZZ < grid->npbcdim &&
+ (!bDomDec || dd->nc[ZZ] == 1) && box[ZZ][YY] != 0);
+
+ cgsnr = cgs->nr;
+
+ get_cutoff2(fr, bDoLongRange, &rvdw2, &rcoul2, &rs2, &rm2, &rl2);
+
+ rvdw_lt_rcoul = (rvdw2 >= rcoul2);
+ rcoul_lt_rvdw = (rcoul2 >= rvdw2);
+
+ if (bMakeQMMMnblist)
+ {
+ rm2 = rl2;
+ rs2 = rl2;
+ }
+
+ nl_sr = ns->nl_sr;
+ nsr = ns->nsr;
+ nl_lr_ljc = ns->nl_lr_ljc;
+ nl_lr_one = ns->nl_lr_one;
+ nlr_ljc = ns->nlr_ljc;
+ nlr_one = ns->nlr_one;
+
+ /* Unpack arrays */
+ cgcm = fr->cg_cm;
+ Nx = grid->n[XX];
+ Ny = grid->n[YY];
+ Nz = grid->n[ZZ];
+ grida = grid->a;
+ gridind = grid->index;
+ gridnra = grid->nra;
+ nns = 0;
+
+ gridx = grid->cell_size[XX];
+ gridy = grid->cell_size[YY];
+ gridz = grid->cell_size[ZZ];
+ grid_x = 1/gridx;
+ grid_y = 1/gridy;
+ grid_z = 1/gridz;
+ copy_rvec(grid->cell_offset, grid_offset);
+ copy_ivec(grid->ncpddc, ncpddc);
+ dcx2 = grid->dcx2;
+ dcy2 = grid->dcy2;
+ dcz2 = grid->dcz2;
+
+#ifdef ALLOW_OFFDIAG_LT_HALFDIAG
+ zsh_ty = floor(-box[ZZ][YY]/box[YY][YY]+0.5);
+ zsh_tx = floor(-box[ZZ][XX]/box[XX][XX]+0.5);
+ ysh_tx = floor(-box[YY][XX]/box[XX][XX]+0.5);
+ if (zsh_tx != 0 && ysh_tx != 0)
+ {
+ /* This could happen due to rounding, when both ratios are 0.5 */
+ ysh_tx = 0;
+ }
+#endif
+
+ debug_gmx();
+
+ if (fr->n_tpi)
+ {
+ /* We only want a list for the test particle */
+ cg0 = cgsnr - 1;
+ }
+ else
+ {
+ cg0 = grid->icg0;
+ }
+ cg1 = grid->icg1;
+
+ /* Set the shift range */
+ for (d = 0; d < DIM; d++)
+ {
+ sh0[d] = -1;
+ sh1[d] = 1;
+ /* Check if we need periodicity shifts.
+ * Without PBC or with domain decomposition we don't need them.
+ */
+ if (d >= ePBC2npbcdim(fr->ePBC) || (bDomDec && dd->nc[d] > 1))
+ {
+ shp[d] = 0;
+ }
+ else
+ {
+ if (d == XX &&
+ box[XX][XX] - fabs(box[YY][XX]) - fabs(box[ZZ][XX]) < sqrt(rl2))
+ {
+ shp[d] = 2;
+ }
+ else
+ {
+ shp[d] = 1;
+ }
+ }
+ }
+
+ /* Loop over charge groups */
+ for (icg = cg0; (icg < cg1); icg++)
+ {
+ igid = GET_CGINFO_GID(cginfo[icg]);
+ /* Skip this charge group if all energy groups are excluded! */
+ if (bExcludeAlleg[igid])
+ {
+ continue;
+ }
+
+ i0 = cgs->index[icg];
+
+ if (bMakeQMMMnblist)
+ {
+ /* Skip this charge group if it is not a QM atom while making a
+ * QM/MM neighbourlist
+ */
+ if (md->bQM[i0] == FALSE)
+ {
+ continue; /* MM particle, go to next particle */
+ }
+
+ /* Compute the number of charge groups that fall within the control
+ * of this one (icg)
+ */
+ naaj = calc_naaj(icg, cgsnr);
+ jcg0 = icg;
+ jcg1 = icg + naaj;
+ max_jcg = cgsnr;
+ }
+ else
+ {
+ /* make a normal neighbourlist */
+
+ if (bDomDec)
+ {
+ /* Get the j charge-group and dd cell shift ranges */
+ dd_get_ns_ranges(cr->dd, icg, &jcg0, &jcg1, sh0, sh1);
+ max_jcg = 0;
+ }
+ else
+ {
+ /* Compute the number of charge groups that fall within the control
+ * of this one (icg)
+ */
+ naaj = calc_naaj(icg, cgsnr);
+ jcg0 = icg;
+ jcg1 = icg + naaj;
+
+ if (fr->n_tpi)
+ {
+ /* The i-particle is awlways the test particle,
+ * so we want all j-particles
+ */
+ max_jcg = cgsnr - 1;
+ }
+ else
+ {
+ max_jcg = jcg1 - cgsnr;
+ }
+ }
+ }
+
+ i_egp_flags = fr->egp_flags + igid*ngid;
+
+ /* Set the exclusions for the atoms in charge group icg using a bitmask */
+ setexcl(i0, cgs->index[icg+1], &top->excls, TRUE, bexcl);
+
+ ci2xyz(grid, icg, &cell_x, &cell_y, &cell_z);
+
+ /* Changed iicg to icg, DvdS 990115
+ * (but see consistency check above, DvdS 990330)
+ */
+#ifdef NS5DB
+ fprintf(log, "icg=%5d, naaj=%5d, cell %d %d %d\n",
+ icg, naaj, cell_x, cell_y, cell_z);
+#endif
+ /* Loop over shift vectors in three dimensions */
+ for (tz = -shp[ZZ]; tz <= shp[ZZ]; tz++)
+ {
+ ZI = cgcm[icg][ZZ]+tz*box[ZZ][ZZ];
+ /* Calculate range of cells in Z direction that have the shift tz */
+ zgi = cell_z + tz*Nz;
+#define FAST_DD_NS
+#ifndef FAST_DD_NS
+ get_dx(Nz, gridz, rl2, zgi, ZI, &dz0, &dz1, dcz2);
+#else
+ get_dx_dd(Nz, gridz, rl2, zgi, ZI-grid_offset[ZZ],
+ ncpddc[ZZ], sh0[ZZ], sh1[ZZ], &dz0, &dz1, dcz2);
+#endif
+ if (dz0 > dz1)
+ {
+ continue;
+ }
+ for (ty = -shp[YY]; ty <= shp[YY]; ty++)
+ {
+ YI = cgcm[icg][YY]+ty*box[YY][YY]+tz*box[ZZ][YY];
+ /* Calculate range of cells in Y direction that have the shift ty */
+ if (bTriclinicY)
+ {
+ ygi = (int)(Ny + (YI - grid_offset[YY])*grid_y) - Ny;
+ }
+ else
+ {
+ ygi = cell_y + ty*Ny;
+ }
+#ifndef FAST_DD_NS
+ get_dx(Ny, gridy, rl2, ygi, YI, &dy0, &dy1, dcy2);
+#else
+ get_dx_dd(Ny, gridy, rl2, ygi, YI-grid_offset[YY],
+ ncpddc[YY], sh0[YY], sh1[YY], &dy0, &dy1, dcy2);
+#endif
+ if (dy0 > dy1)
+ {
+ continue;
+ }
+ for (tx = -shp[XX]; tx <= shp[XX]; tx++)
+ {
+ XI = cgcm[icg][XX]+tx*box[XX][XX]+ty*box[YY][XX]+tz*box[ZZ][XX];
+ /* Calculate range of cells in X direction that have the shift tx */
+ if (bTriclinicX)
+ {
+ xgi = (int)(Nx + (XI - grid_offset[XX])*grid_x) - Nx;
+ }
+ else
+ {
+ xgi = cell_x + tx*Nx;
+ }
+#ifndef FAST_DD_NS
+ get_dx(Nx, gridx, rl2, xgi*Nx, XI, &dx0, &dx1, dcx2);
+#else
+ get_dx_dd(Nx, gridx, rl2, xgi, XI-grid_offset[XX],
+ ncpddc[XX], sh0[XX], sh1[XX], &dx0, &dx1, dcx2);
+#endif
+ if (dx0 > dx1)
+ {
+ continue;
+ }
+ /* Adress: an explicit cg that has a weigthing function of 0 is excluded
+ * from the neigbour list as it will not interact */
+ if (fr->adress_type != eAdressOff)
+ {
+ if (md->wf[cgs->index[icg]] <= GMX_REAL_EPS && egp_explicit(fr, igid))
+ {
+ continue;
+ }
+ }
+ /* Get shift vector */
+ shift = XYZ2IS(tx, ty, tz);
+#ifdef NS5DB
+ range_check(shift, 0, SHIFTS);
+#endif
+ for (nn = 0; (nn < ngid); nn++)
+ {
+ nsr[nn] = 0;
+ nlr_ljc[nn] = 0;
+ nlr_one[nn] = 0;
+ }
+#ifdef NS5DB
+ fprintf(log, "shift: %2d, dx0,1: %2d,%2d, dy0,1: %2d,%2d, dz0,1: %2d,%2d\n",
+ shift, dx0, dx1, dy0, dy1, dz0, dz1);
+ fprintf(log, "cgcm: %8.3f %8.3f %8.3f\n", cgcm[icg][XX],
+ cgcm[icg][YY], cgcm[icg][ZZ]);
+ fprintf(log, "xi: %8.3f %8.3f %8.3f\n", XI, YI, ZI);
+#endif
+ for (dx = dx0; (dx <= dx1); dx++)
+ {
+ tmp1 = rl2 - dcx2[dx];
+ for (dy = dy0; (dy <= dy1); dy++)
+ {
+ tmp2 = tmp1 - dcy2[dy];
+ if (tmp2 > 0)
+ {
+ for (dz = dz0; (dz <= dz1); dz++)
+ {
+ if (tmp2 > dcz2[dz])
+ {
+ /* Find grid-cell cj in which possible neighbours are */
+ cj = xyz2ci(Ny, Nz, dx, dy, dz);
+
+ /* Check out how many cgs (nrj) there in this cell */
+ nrj = gridnra[cj];
+
+ /* Find the offset in the cg list */
+ cgj0 = gridind[cj];
+
+ /* Check if all j's are out of range so we
+ * can skip the whole cell.
+ * Should save some time, especially with DD.
+ */
+ if (nrj == 0 ||
+ (grida[cgj0] >= max_jcg &&
+ (grida[cgj0] >= jcg1 || grida[cgj0+nrj-1] < jcg0)))
+ {
+ continue;
+ }
+
+ /* Loop over cgs */
+ for (j = 0; (j < nrj); j++)
+ {
+ jjcg = grida[cgj0+j];
+
+ /* check whether this guy is in range! */
+ if ((jjcg >= jcg0 && jjcg < jcg1) ||
+ (jjcg < max_jcg))
+ {
+ r2 = calc_dx2(XI, YI, ZI, cgcm[jjcg]);
+ if (r2 < rl2)
+ {
+ /* jgid = gid[cgsatoms[cgsindex[jjcg]]]; */
+ jgid = GET_CGINFO_GID(cginfo[jjcg]);
+ /* check energy group exclusions */
+ if (!(i_egp_flags[jgid] & EGP_EXCL))
+ {
+ if (r2 < rs2)
+ {
+ if (nsr[jgid] >= MAX_CG)
+ {
+ /* Add to short-range list */
+ put_in_list(bHaveVdW, ngid, md, icg, jgid,
+ nsr[jgid], nl_sr[jgid],
+ cgs->index, /* cgsatoms, */ bexcl,
+ shift, fr, FALSE, TRUE, TRUE, fr->solvent_opt);
+ nsr[jgid] = 0;
+ }
+ nl_sr[jgid][nsr[jgid]++] = jjcg;
+ }
+ else if (r2 < rm2)
+ {
+ if (nlr_ljc[jgid] >= MAX_CG)
+ {
+ /* Add to LJ+coulomb long-range list */
+ put_in_list(bHaveVdW, ngid, md, icg, jgid,
+ nlr_ljc[jgid], nl_lr_ljc[jgid], top->cgs.index,
+ bexcl, shift, fr, TRUE, TRUE, TRUE, fr->solvent_opt);
+ nlr_ljc[jgid] = 0;
+ }
+ nl_lr_ljc[jgid][nlr_ljc[jgid]++] = jjcg;
+ }
+ else
+ {
+ if (nlr_one[jgid] >= MAX_CG)
+ {
+ /* Add to long-range list with only coul, or only LJ */
+ put_in_list(bHaveVdW, ngid, md, icg, jgid,
+ nlr_one[jgid], nl_lr_one[jgid], top->cgs.index,
+ bexcl, shift, fr, TRUE, rvdw_lt_rcoul, rcoul_lt_rvdw, fr->solvent_opt);
+ nlr_one[jgid] = 0;
+ }
+ nl_lr_one[jgid][nlr_one[jgid]++] = jjcg;
+ }
+ }
+ }
+ nns++;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ /* CHECK whether there is anything left in the buffers */
+ for (nn = 0; (nn < ngid); nn++)
+ {
+ if (nsr[nn] > 0)
+ {
+ put_in_list(bHaveVdW, ngid, md, icg, nn, nsr[nn], nl_sr[nn],
+ cgs->index, /* cgsatoms, */ bexcl,
+ shift, fr, FALSE, TRUE, TRUE, fr->solvent_opt);
+ }
+
+ if (nlr_ljc[nn] > 0)
+ {
+ put_in_list(bHaveVdW, ngid, md, icg, nn, nlr_ljc[nn],
+ nl_lr_ljc[nn], top->cgs.index,
+ bexcl, shift, fr, TRUE, TRUE, TRUE, fr->solvent_opt);
+ }
+
+ if (nlr_one[nn] > 0)
+ {
+ put_in_list(bHaveVdW, ngid, md, icg, nn, nlr_one[nn],
+ nl_lr_one[nn], top->cgs.index,
+ bexcl, shift, fr, TRUE, rvdw_lt_rcoul, rcoul_lt_rvdw, fr->solvent_opt);
+ }
+ }
+ }
+ }
+ }
+ /* setexcl(nri,i_atoms,&top->atoms.excl,FALSE,bexcl); */
+ setexcl(cgs->index[icg], cgs->index[icg+1], &top->excls, FALSE, bexcl);
+ }
+ /* No need to perform any left-over force calculations anymore (as we used to do here)
+ * since we now save the proper long-range lists for later evaluation.
+ */
+
+ debug_gmx();
+
+ /* Close neighbourlists */
+ close_neighbor_lists(fr, bMakeQMMMnblist);
+
+ return nns;
+}
+
+void ns_realloc_natoms(gmx_ns_t *ns, int natoms)
+{
+ int i;
+
+ if (natoms > ns->nra_alloc)
+ {
+ ns->nra_alloc = over_alloc_dd(natoms);
+ srenew(ns->bexcl, ns->nra_alloc);
+ for (i = 0; i < ns->nra_alloc; i++)
+ {
+ ns->bexcl[i] = 0;
+ }
+ }
+}
+
+void init_ns(FILE *fplog, const t_commrec *cr,
+ gmx_ns_t *ns, t_forcerec *fr,
+ const gmx_mtop_t *mtop)
+{
+ int mt, icg, nr_in_cg, maxcg, i, j, jcg, ngid, ncg;
+ t_block *cgs;
+ char *ptr;
+
+ /* Compute largest charge groups size (# atoms) */
+ nr_in_cg = 1;
+ for (mt = 0; mt < mtop->nmoltype; mt++)
+ {
+ cgs = &mtop->moltype[mt].cgs;
+ for (icg = 0; (icg < cgs->nr); icg++)
+ {
+ nr_in_cg = max(nr_in_cg, (int)(cgs->index[icg+1]-cgs->index[icg]));
+ }
+ }
+
+ /* Verify whether largest charge group is <= max cg.
+ * This is determined by the type of the local exclusion type
+ * Exclusions are stored in bits. (If the type is not large
+ * enough, enlarge it, unsigned char -> unsigned short -> unsigned long)
+ */
+ maxcg = sizeof(t_excl)*8;
+ if (nr_in_cg > maxcg)
+ {
+ gmx_fatal(FARGS, "Max #atoms in a charge group: %d > %d\n",
+ nr_in_cg, maxcg);
+ }
+
+ ngid = mtop->groups.grps[egcENER].nr;
+ snew(ns->bExcludeAlleg, ngid);
+ for (i = 0; i < ngid; i++)
+ {
+ ns->bExcludeAlleg[i] = TRUE;
+ for (j = 0; j < ngid; j++)
+ {
+ if (!(fr->egp_flags[i*ngid+j] & EGP_EXCL))
+ {
+ ns->bExcludeAlleg[i] = FALSE;
+ }
+ }
+ }
+
+ if (fr->bGrid)
+ {
+ /* Grid search */
+ ns->grid = init_grid(fplog, fr);
+ init_nsgrid_lists(fr, ngid, ns);
+ }
+ else
+ {
+ /* Simple search */
+ snew(ns->ns_buf, ngid);
+ for (i = 0; (i < ngid); i++)
+ {
+ snew(ns->ns_buf[i], SHIFTS);
+ }
+ ncg = ncg_mtop(mtop);
+ snew(ns->simple_aaj, 2*ncg);
+ for (jcg = 0; (jcg < ncg); jcg++)
+ {
+ ns->simple_aaj[jcg] = jcg;
+ ns->simple_aaj[jcg+ncg] = jcg;
+ }
+ }
+
+ /* Create array that determines whether or not atoms have VdW */
+ snew(ns->bHaveVdW, fr->ntype);
+ for (i = 0; (i < fr->ntype); i++)
+ {
+ for (j = 0; (j < fr->ntype); j++)
+ {
+ ns->bHaveVdW[i] = (ns->bHaveVdW[i] ||
+ (fr->bBHAM ?
+ ((BHAMA(fr->nbfp, fr->ntype, i, j) != 0) ||
+ (BHAMB(fr->nbfp, fr->ntype, i, j) != 0) ||
+ (BHAMC(fr->nbfp, fr->ntype, i, j) != 0)) :
+ ((C6(fr->nbfp, fr->ntype, i, j) != 0) ||
+ (C12(fr->nbfp, fr->ntype, i, j) != 0))));
+ }
+ }
+ if (debug)
+ {
+ pr_bvec(debug, 0, "bHaveVdW", ns->bHaveVdW, fr->ntype, TRUE);
+ }
+
+ ns->nra_alloc = 0;
+ ns->bexcl = NULL;
+ if (!DOMAINDECOMP(cr))
+ {
+ ns_realloc_natoms(ns, mtop->natoms);
+ }
+
+ ns->nblist_initialized = FALSE;
+
+ /* nbr list debug dump */
+ {
+ char *ptr = getenv("GMX_DUMP_NL");
+ if (ptr)
+ {
+ ns->dump_nl = strtol(ptr, NULL, 10);
+ if (fplog)
+ {
+ fprintf(fplog, "GMX_DUMP_NL = %d", ns->dump_nl);
+ }
+ }
+ else
+ {
+ ns->dump_nl = 0;
+ }
+ }
+}
+
+
+int search_neighbours(FILE *log, t_forcerec *fr,
+ matrix box,
+ gmx_localtop_t *top,
+ gmx_groups_t *groups,
+ t_commrec *cr,
+ t_nrnb *nrnb, t_mdatoms *md,
+ gmx_bool bFillGrid,
+ gmx_bool bDoLongRangeNS)
+{
+ t_block *cgs = &(top->cgs);
+ rvec box_size, grid_x0, grid_x1;
+ int i, j, m, ngid;
+ real min_size, grid_dens;
+ int nsearch;
+ gmx_bool bGrid;
+ char *ptr;
+ gmx_bool *i_egp_flags;
+ int cg_start, cg_end, start, end;
+ gmx_ns_t *ns;
+ t_grid *grid;
+ gmx_domdec_zones_t *dd_zones;
+ put_in_list_t *put_in_list;
+
+ ns = &fr->ns;
+
+ /* Set some local variables */
+ bGrid = fr->bGrid;
+ ngid = groups->grps[egcENER].nr;
+
+ for (m = 0; (m < DIM); m++)
+ {
+ box_size[m] = box[m][m];
+ }
+
+ if (fr->ePBC != epbcNONE)
+ {
+ if (sqr(fr->rlistlong) >= max_cutoff2(fr->ePBC, box))
+ {
+ gmx_fatal(FARGS, "One of the box vectors has become shorter than twice the cut-off length or box_yy-|box_zy| or box_zz has become smaller than the cut-off.");
+ }
+ if (!bGrid)
+ {
+ min_size = min(box_size[XX], min(box_size[YY], box_size[ZZ]));
+ if (2*fr->rlistlong >= min_size)
+ {
+ gmx_fatal(FARGS, "One of the box diagonal elements has become smaller than twice the cut-off length.");
+ }
+ }
+ }
+
+ if (DOMAINDECOMP(cr))
+ {
+ ns_realloc_natoms(ns, cgs->index[cgs->nr]);
+ }
+ debug_gmx();
+
+ /* Reset the neighbourlists */
+ reset_neighbor_lists(fr, TRUE, TRUE);
+
+ if (bGrid && bFillGrid)
+ {
+
+ grid = ns->grid;
+ if (DOMAINDECOMP(cr))
+ {
+ dd_zones = domdec_zones(cr->dd);
+ }
+ else
+ {
+ dd_zones = NULL;
+
+ get_nsgrid_boundaries(grid->nboundeddim, box, NULL, NULL, NULL, NULL,
+ cgs->nr, fr->cg_cm, grid_x0, grid_x1, &grid_dens);
+
+ grid_first(log, grid, NULL, NULL, box, grid_x0, grid_x1,
+ fr->rlistlong, grid_dens);
+ }
+ debug_gmx();
+
+ start = 0;
+ end = cgs->nr;
+
+ if (DOMAINDECOMP(cr))
+ {
+ end = cgs->nr;
+ fill_grid(dd_zones, grid, end, -1, end, fr->cg_cm);
+ grid->icg0 = 0;
+ grid->icg1 = dd_zones->izone[dd_zones->nizone-1].cg1;
+ }
+ else
+ {
+ fill_grid(NULL, grid, cgs->nr, fr->cg0, fr->hcg, fr->cg_cm);
+ grid->icg0 = fr->cg0;
+ grid->icg1 = fr->hcg;
+ debug_gmx();
+ }
+
+ calc_elemnr(grid, start, end, cgs->nr);
+ calc_ptrs(grid);
+ grid_last(grid, start, end, cgs->nr);
+
+ if (gmx_debug_at)
+ {
+ check_grid(grid);
+ print_grid(debug, grid);
+ }
+ }
+ else if (fr->n_tpi)
+ {
+ /* Set the grid cell index for the test particle only.
+ * The cell to cg index is not corrected, but that does not matter.
+ */
+ fill_grid(NULL, ns->grid, fr->hcg, fr->hcg-1, fr->hcg, fr->cg_cm);
+ }
+ debug_gmx();
+
+ if (fr->adress_type == eAdressOff)
+ {
+ if (!fr->ns.bCGlist)
+ {
+ put_in_list = put_in_list_at;
+ }
+ else
+ {
+ put_in_list = put_in_list_cg;
+ }
+ }
+ else
+ {
+ put_in_list = put_in_list_adress;
+ }
+
+ /* Do the core! */
+ if (bGrid)
+ {
+ grid = ns->grid;
+ nsearch = nsgrid_core(cr, fr, box, ngid, top,
+ grid, ns->bexcl, ns->bExcludeAlleg,
+ md, put_in_list, ns->bHaveVdW,
+ bDoLongRangeNS, FALSE);
+
+ /* neighbour searching withouth QMMM! QM atoms have zero charge in
+ * the classical calculation. The charge-charge interaction
+ * between QM and MM atoms is handled in the QMMM core calculation
+ * (see QMMM.c). The VDW however, we'd like to compute classically
+ * and the QM MM atom pairs have just been put in the
+ * corresponding neighbourlists. in case of QMMM we still need to
+ * fill a special QMMM neighbourlist that contains all neighbours
+ * of the QM atoms. If bQMMM is true, this list will now be made:
+ */
+ if (fr->bQMMM && fr->qr->QMMMscheme != eQMMMschemeoniom)
+ {
+ nsearch += nsgrid_core(cr, fr, box, ngid, top,
+ grid, ns->bexcl, ns->bExcludeAlleg,
+ md, put_in_list_qmmm, ns->bHaveVdW,
+ bDoLongRangeNS, TRUE);
+ }
+ }
+ else
+ {
+ nsearch = ns_simple_core(fr, top, md, box, box_size,
+ ns->bexcl, ns->simple_aaj,
+ ngid, ns->ns_buf, put_in_list, ns->bHaveVdW);
+ }
+ debug_gmx();
+
+#ifdef DEBUG
+ pr_nsblock(log);
+#endif
+
+ inc_nrnb(nrnb, eNR_NS, nsearch);
+ /* inc_nrnb(nrnb,eNR_LR,fr->nlr); */
+
+ return nsearch;
+}
+
+int natoms_beyond_ns_buffer(t_inputrec *ir, t_forcerec *fr, t_block *cgs,
+ matrix scale_tot, rvec *x)
+{
+ int cg0, cg1, cg, a0, a1, a, i, j;
+ real rint, hbuf2, scale;
+ rvec *cg_cm, cgsc;
+ gmx_bool bIsotropic;
+ int nBeyond;
+
+ nBeyond = 0;
+
+ rint = max(ir->rcoulomb, ir->rvdw);
+ if (ir->rlist < rint)
+ {
+ gmx_fatal(FARGS, "The neighbor search buffer has negative size: %f nm",
+ ir->rlist - rint);
+ }
+ cg_cm = fr->cg_cm;
+
+ cg0 = fr->cg0;
+ cg1 = fr->hcg;
+
+ if (!EI_DYNAMICS(ir->eI) || !DYNAMIC_BOX(*ir))
+ {
+ hbuf2 = sqr(0.5*(ir->rlist - rint));
+ for (cg = cg0; cg < cg1; cg++)
+ {
+ a0 = cgs->index[cg];
+ a1 = cgs->index[cg+1];
+ for (a = a0; a < a1; a++)
+ {
+ if (distance2(cg_cm[cg], x[a]) > hbuf2)
+ {
+ nBeyond++;
+ }
+ }
+ }
+ }
+ else
+ {
+ bIsotropic = TRUE;
+ scale = scale_tot[0][0];
+ for (i = 1; i < DIM; i++)
+ {
+ /* With anisotropic scaling, the original spherical ns volumes become
+ * ellipsoids. To avoid costly transformations we use the minimum
+ * eigenvalue of the scaling matrix for determining the buffer size.
+ * Since the lower half is 0, the eigenvalues are the diagonal elements.
+ */
+ scale = min(scale, scale_tot[i][i]);
+ if (scale_tot[i][i] != scale_tot[i-1][i-1])
+ {
+ bIsotropic = FALSE;
+ }
+ for (j = 0; j < i; j++)
+ {
+ if (scale_tot[i][j] != 0)
+ {
+ bIsotropic = FALSE;
+ }
+ }
+ }
+ hbuf2 = sqr(0.5*(scale*ir->rlist - rint));
+ if (bIsotropic)
+ {
+ for (cg = cg0; cg < cg1; cg++)
+ {
+ svmul(scale, cg_cm[cg], cgsc);
+ a0 = cgs->index[cg];
+ a1 = cgs->index[cg+1];
+ for (a = a0; a < a1; a++)
+ {
+ if (distance2(cgsc, x[a]) > hbuf2)
+ {
+ nBeyond++;
+ }
+ }
+ }
+ }
+ else
+ {
+ /* Anistropic scaling */
+ for (cg = cg0; cg < cg1; cg++)
+ {
+ /* Since scale_tot contains the transpose of the scaling matrix,
+ * we need to multiply with the transpose.
+ */
+ tmvmul_ur0(scale_tot, cg_cm[cg], cgsc);
+ a0 = cgs->index[cg];
+ a1 = cgs->index[cg+1];
+ for (a = a0; a < a1; a++)
+ {
+ if (distance2(cgsc, x[a]) > hbuf2)
+ {
+ nBeyond++;
+ }
+ }
+ }
+ }
+ }
+
+ return nBeyond;
+}
--- /dev/null
- double eners[2], virs[2], enersum, virsum, y0, f, g, h;
- double r0, r1, r, rc3, rc9, ea, eb, ec, pa, pb, pc, pd;
- double invscale, invscale2, invscale3;
- int ri0, ri1, ri, i, offstart, offset;
- real scale, *vdwtab, tabfactor, tmp;
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ * Copyright (c) 2001-2004, The GROMACS development team.
+ * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <assert.h>
+#include <math.h>
+#include <stdio.h>
+#include <string.h>
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
+
+#include "typedefs.h"
+#include "gromacs/utility/cstringutil.h"
+#include "gromacs/utility/smalloc.h"
+#include "names.h"
+#include "txtdump.h"
+#include "pbc.h"
+#include "chargegroup.h"
+#include "vec.h"
+#include "nrnb.h"
+#include "mshift.h"
+#include "mdrun.h"
+#include "sim_util.h"
+#include "update.h"
+#include "physics.h"
+#include "main.h"
+#include "mdatoms.h"
+#include "force.h"
+#include "bondf.h"
+#include "pme.h"
+#include "disre.h"
+#include "orires.h"
+#include "network.h"
+#include "calcmu.h"
+#include "constr.h"
+#include "xvgr.h"
+#include "copyrite.h"
+#include "domdec.h"
+#include "genborn.h"
+#include "nbnxn_atomdata.h"
+#include "nbnxn_search.h"
+#include "nbnxn_kernels/nbnxn_kernel_ref.h"
+#include "nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.h"
+#include "nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn.h"
+#include "nbnxn_kernels/nbnxn_kernel_gpu_ref.h"
+#include "nonbonded.h"
+#include "../gmxlib/nonbonded/nb_kernel.h"
+#include "../gmxlib/nonbonded/nb_free_energy.h"
+
+#include "gromacs/timing/wallcycle.h"
+#include "gromacs/timing/walltime_accounting.h"
+#include "gromacs/utility/gmxmpi.h"
+#include "gromacs/essentialdynamics/edsam.h"
+#include "gromacs/pulling/pull.h"
+#include "gromacs/pulling/pull_rotation.h"
+#include "gromacs/imd/imd.h"
+#include "adress.h"
+#include "qmmm.h"
+
+#include "gmx_omp_nthreads.h"
+
+#include "nbnxn_cuda_data_mgmt.h"
+#include "nbnxn_cuda/nbnxn_cuda.h"
+
+void print_time(FILE *out,
+ gmx_walltime_accounting_t walltime_accounting,
+ gmx_int64_t step,
+ t_inputrec *ir,
+ t_commrec gmx_unused *cr)
+{
+ time_t finish;
+ char timebuf[STRLEN];
+ double dt, elapsed_seconds, time_per_step;
+ char buf[48];
+
+#ifndef GMX_THREAD_MPI
+ if (!PAR(cr))
+#endif
+ {
+ fprintf(out, "\r");
+ }
+ fprintf(out, "step %s", gmx_step_str(step, buf));
+ if ((step >= ir->nstlist))
+ {
+ double seconds_since_epoch = gmx_gettime();
+ elapsed_seconds = seconds_since_epoch - walltime_accounting_get_start_time_stamp(walltime_accounting);
+ time_per_step = elapsed_seconds/(step - ir->init_step + 1);
+ dt = (ir->nsteps + ir->init_step - step) * time_per_step;
+
+ if (ir->nsteps >= 0)
+ {
+ if (dt >= 300)
+ {
+ finish = (time_t) (seconds_since_epoch + dt);
+ gmx_ctime_r(&finish, timebuf, STRLEN);
+ sprintf(buf, "%s", timebuf);
+ buf[strlen(buf)-1] = '\0';
+ fprintf(out, ", will finish %s", buf);
+ }
+ else
+ {
+ fprintf(out, ", remaining wall clock time: %5d s ", (int)dt);
+ }
+ }
+ else
+ {
+ fprintf(out, " performance: %.1f ns/day ",
+ ir->delta_t/1000*24*60*60/time_per_step);
+ }
+ }
+#ifndef GMX_THREAD_MPI
+ if (PAR(cr))
+ {
+ fprintf(out, "\n");
+ }
+#endif
+
+ fflush(out);
+}
+
+void print_date_and_time(FILE *fplog, int nodeid, const char *title,
+ double the_time)
+{
+ char time_string[STRLEN];
+
+ if (!fplog)
+ {
+ return;
+ }
+
+ {
+ int i;
+ char timebuf[STRLEN];
+ time_t temp_time = (time_t) the_time;
+
+ gmx_ctime_r(&temp_time, timebuf, STRLEN);
+ for (i = 0; timebuf[i] >= ' '; i++)
+ {
+ time_string[i] = timebuf[i];
+ }
+ time_string[i] = '\0';
+ }
+
+ fprintf(fplog, "%s on node %d %s\n", title, nodeid, time_string);
+}
+
+void print_start(FILE *fplog, t_commrec *cr,
+ gmx_walltime_accounting_t walltime_accounting,
+ const char *name)
+{
+ char buf[STRLEN];
+
+ sprintf(buf, "Started %s", name);
+ print_date_and_time(fplog, cr->nodeid, buf,
+ walltime_accounting_get_start_time_stamp(walltime_accounting));
+}
+
+static void sum_forces(int start, int end, rvec f[], rvec flr[])
+{
+ int i;
+
+ if (gmx_debug_at)
+ {
+ pr_rvecs(debug, 0, "fsr", f+start, end-start);
+ pr_rvecs(debug, 0, "flr", flr+start, end-start);
+ }
+ for (i = start; (i < end); i++)
+ {
+ rvec_inc(f[i], flr[i]);
+ }
+}
+
+/*
+ * calc_f_el calculates forces due to an electric field.
+ *
+ * force is kJ mol^-1 nm^-1 = e * kJ mol^-1 nm^-1 / e
+ *
+ * Et[] contains the parameters for the time dependent
+ * part of the field (not yet used).
+ * Ex[] contains the parameters for
+ * the spatial dependent part of the field. You can have cool periodic
+ * fields in principle, but only a constant field is supported
+ * now.
+ * The function should return the energy due to the electric field
+ * (if any) but for now returns 0.
+ *
+ * WARNING:
+ * There can be problems with the virial.
+ * Since the field is not self-consistent this is unavoidable.
+ * For neutral molecules the virial is correct within this approximation.
+ * For neutral systems with many charged molecules the error is small.
+ * But for systems with a net charge or a few charged molecules
+ * the error can be significant when the field is high.
+ * Solution: implement a self-consitent electric field into PME.
+ */
+static void calc_f_el(FILE *fp, int start, int homenr,
+ real charge[], rvec f[],
+ t_cosines Ex[], t_cosines Et[], double t)
+{
+ rvec Ext;
+ real t0;
+ int i, m;
+
+ for (m = 0; (m < DIM); m++)
+ {
+ if (Et[m].n > 0)
+ {
+ if (Et[m].n == 3)
+ {
+ t0 = Et[m].a[1];
+ Ext[m] = cos(Et[m].a[0]*(t-t0))*exp(-sqr(t-t0)/(2.0*sqr(Et[m].a[2])));
+ }
+ else
+ {
+ Ext[m] = cos(Et[m].a[0]*t);
+ }
+ }
+ else
+ {
+ Ext[m] = 1.0;
+ }
+ if (Ex[m].n > 0)
+ {
+ /* Convert the field strength from V/nm to MD-units */
+ Ext[m] *= Ex[m].a[0]*FIELDFAC;
+ for (i = start; (i < start+homenr); i++)
+ {
+ f[i][m] += charge[i]*Ext[m];
+ }
+ }
+ else
+ {
+ Ext[m] = 0;
+ }
+ }
+ if (fp != NULL)
+ {
+ fprintf(fp, "%10g %10g %10g %10g #FIELD\n", t,
+ Ext[XX]/FIELDFAC, Ext[YY]/FIELDFAC, Ext[ZZ]/FIELDFAC);
+ }
+}
+
+static void calc_virial(int start, int homenr, rvec x[], rvec f[],
+ tensor vir_part, t_graph *graph, matrix box,
+ t_nrnb *nrnb, const t_forcerec *fr, int ePBC)
+{
+ int i, j;
+ tensor virtest;
+
+ /* The short-range virial from surrounding boxes */
+ clear_mat(vir_part);
+ calc_vir(SHIFTS, fr->shift_vec, fr->fshift, vir_part, ePBC == epbcSCREW, box);
+ inc_nrnb(nrnb, eNR_VIRIAL, SHIFTS);
+
+ /* Calculate partial virial, for local atoms only, based on short range.
+ * Total virial is computed in global_stat, called from do_md
+ */
+ f_calc_vir(start, start+homenr, x, f, vir_part, graph, box);
+ inc_nrnb(nrnb, eNR_VIRIAL, homenr);
+
+ /* Add position restraint contribution */
+ for (i = 0; i < DIM; i++)
+ {
+ vir_part[i][i] += fr->vir_diag_posres[i];
+ }
+
+ /* Add wall contribution */
+ for (i = 0; i < DIM; i++)
+ {
+ vir_part[i][ZZ] += fr->vir_wall_z[i];
+ }
+
+ if (debug)
+ {
+ pr_rvecs(debug, 0, "vir_part", vir_part, DIM);
+ }
+}
+
+static void posres_wrapper(FILE *fplog,
+ int flags,
+ gmx_bool bSepDVDL,
+ t_inputrec *ir,
+ t_nrnb *nrnb,
+ gmx_localtop_t *top,
+ matrix box, rvec x[],
+ gmx_enerdata_t *enerd,
+ real *lambda,
+ t_forcerec *fr)
+{
+ t_pbc pbc;
+ real v, dvdl;
+ int i;
+
+ /* Position restraints always require full pbc */
+ set_pbc(&pbc, ir->ePBC, box);
+ dvdl = 0;
+ v = posres(top->idef.il[F_POSRES].nr, top->idef.il[F_POSRES].iatoms,
+ top->idef.iparams_posres,
+ (const rvec*)x, fr->f_novirsum, fr->vir_diag_posres,
+ ir->ePBC == epbcNONE ? NULL : &pbc,
+ lambda[efptRESTRAINT], &dvdl,
+ fr->rc_scaling, fr->ePBC, fr->posres_com, fr->posres_comB);
+ if (bSepDVDL)
+ {
+ gmx_print_sepdvdl(fplog, interaction_function[F_POSRES].longname, v, dvdl);
+ }
+ enerd->term[F_POSRES] += v;
+ /* If just the force constant changes, the FEP term is linear,
+ * but if k changes, it is not.
+ */
+ enerd->dvdl_nonlin[efptRESTRAINT] += dvdl;
+ inc_nrnb(nrnb, eNR_POSRES, top->idef.il[F_POSRES].nr/2);
+
+ if ((ir->fepvals->n_lambda > 0) && (flags & GMX_FORCE_DHDL))
+ {
+ for (i = 0; i < enerd->n_lambda; i++)
+ {
+ real dvdl_dum, lambda_dum;
+
+ lambda_dum = (i == 0 ? lambda[efptRESTRAINT] : ir->fepvals->all_lambda[efptRESTRAINT][i-1]);
+ v = posres(top->idef.il[F_POSRES].nr, top->idef.il[F_POSRES].iatoms,
+ top->idef.iparams_posres,
+ (const rvec*)x, NULL, NULL,
+ ir->ePBC == epbcNONE ? NULL : &pbc, lambda_dum, &dvdl,
+ fr->rc_scaling, fr->ePBC, fr->posres_com, fr->posres_comB);
+ enerd->enerpart_lambda[i] += v;
+ }
+ }
+}
+
+static void fbposres_wrapper(t_inputrec *ir,
+ t_nrnb *nrnb,
+ gmx_localtop_t *top,
+ matrix box, rvec x[],
+ gmx_enerdata_t *enerd,
+ t_forcerec *fr)
+{
+ t_pbc pbc;
+ real v;
+
+ /* Flat-bottomed position restraints always require full pbc */
+ set_pbc(&pbc, ir->ePBC, box);
+ v = fbposres(top->idef.il[F_FBPOSRES].nr, top->idef.il[F_FBPOSRES].iatoms,
+ top->idef.iparams_fbposres,
+ (const rvec*)x, fr->f_novirsum, fr->vir_diag_posres,
+ ir->ePBC == epbcNONE ? NULL : &pbc,
+ fr->rc_scaling, fr->ePBC, fr->posres_com);
+ enerd->term[F_FBPOSRES] += v;
+ inc_nrnb(nrnb, eNR_FBPOSRES, top->idef.il[F_FBPOSRES].nr/2);
+}
+
+static void pull_potential_wrapper(FILE *fplog,
+ gmx_bool bSepDVDL,
+ t_commrec *cr,
+ t_inputrec *ir,
+ matrix box, rvec x[],
+ rvec f[],
+ tensor vir_force,
+ t_mdatoms *mdatoms,
+ gmx_enerdata_t *enerd,
+ real *lambda,
+ double t)
+{
+ t_pbc pbc;
+ real dvdl;
+
+ /* Calculate the center of mass forces, this requires communication,
+ * which is why pull_potential is called close to other communication.
+ * The virial contribution is calculated directly,
+ * which is why we call pull_potential after calc_virial.
+ */
+ set_pbc(&pbc, ir->ePBC, box);
+ dvdl = 0;
+ enerd->term[F_COM_PULL] +=
+ pull_potential(ir->ePull, ir->pull, mdatoms, &pbc,
+ cr, t, lambda[efptRESTRAINT], x, f, vir_force, &dvdl);
+ if (bSepDVDL)
+ {
+ gmx_print_sepdvdl(fplog, "Com pull", enerd->term[F_COM_PULL], dvdl);
+ }
+ enerd->dvdl_lin[efptRESTRAINT] += dvdl;
+}
+
+static void pme_receive_force_ener(FILE *fplog,
+ gmx_bool bSepDVDL,
+ t_commrec *cr,
+ gmx_wallcycle_t wcycle,
+ gmx_enerdata_t *enerd,
+ t_forcerec *fr)
+{
+ real e_q, e_lj, v, dvdl_q, dvdl_lj;
+ float cycles_ppdpme, cycles_seppme;
+
+ cycles_ppdpme = wallcycle_stop(wcycle, ewcPPDURINGPME);
+ dd_cycles_add(cr->dd, cycles_ppdpme, ddCyclPPduringPME);
+
+ /* In case of node-splitting, the PP nodes receive the long-range
+ * forces, virial and energy from the PME nodes here.
+ */
+ wallcycle_start(wcycle, ewcPP_PMEWAITRECVF);
+ dvdl_q = 0;
+ dvdl_lj = 0;
+ gmx_pme_receive_f(cr, fr->f_novirsum, fr->vir_el_recip, &e_q,
+ fr->vir_lj_recip, &e_lj, &dvdl_q, &dvdl_lj,
+ &cycles_seppme);
+ if (bSepDVDL)
+ {
+ gmx_print_sepdvdl(fplog, "Electrostatic PME mesh", e_q, dvdl_q);
+ gmx_print_sepdvdl(fplog, "Lennard-Jones PME mesh", e_lj, dvdl_lj);
+ }
+ enerd->term[F_COUL_RECIP] += e_q;
+ enerd->term[F_LJ_RECIP] += e_lj;
+ enerd->dvdl_lin[efptCOUL] += dvdl_q;
+ enerd->dvdl_lin[efptVDW] += dvdl_lj;
+
+ if (wcycle)
+ {
+ dd_cycles_add(cr->dd, cycles_seppme, ddCyclPME);
+ }
+ wallcycle_stop(wcycle, ewcPP_PMEWAITRECVF);
+}
+
+static void print_large_forces(FILE *fp, t_mdatoms *md, t_commrec *cr,
+ gmx_int64_t step, real pforce, rvec *x, rvec *f)
+{
+ int i;
+ real pf2, fn2;
+ char buf[STEPSTRSIZE];
+
+ pf2 = sqr(pforce);
+ for (i = 0; i < md->homenr; i++)
+ {
+ fn2 = norm2(f[i]);
+ /* We also catch NAN, if the compiler does not optimize this away. */
+ if (fn2 >= pf2 || fn2 != fn2)
+ {
+ fprintf(fp, "step %s atom %6d x %8.3f %8.3f %8.3f force %12.5e\n",
+ gmx_step_str(step, buf),
+ ddglatnr(cr->dd, i), x[i][XX], x[i][YY], x[i][ZZ], sqrt(fn2));
+ }
+ }
+}
+
+static void post_process_forces(t_commrec *cr,
+ gmx_int64_t step,
+ t_nrnb *nrnb, gmx_wallcycle_t wcycle,
+ gmx_localtop_t *top,
+ matrix box, rvec x[],
+ rvec f[],
+ tensor vir_force,
+ t_mdatoms *mdatoms,
+ t_graph *graph,
+ t_forcerec *fr, gmx_vsite_t *vsite,
+ int flags)
+{
+ if (fr->bF_NoVirSum)
+ {
+ if (vsite)
+ {
+ /* Spread the mesh force on virtual sites to the other particles...
+ * This is parallellized. MPI communication is performed
+ * if the constructing atoms aren't local.
+ */
+ wallcycle_start(wcycle, ewcVSITESPREAD);
+ spread_vsite_f(vsite, x, fr->f_novirsum, NULL,
+ (flags & GMX_FORCE_VIRIAL), fr->vir_el_recip,
+ nrnb,
+ &top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr);
+ wallcycle_stop(wcycle, ewcVSITESPREAD);
+ }
+ if (flags & GMX_FORCE_VIRIAL)
+ {
+ /* Now add the forces, this is local */
+ if (fr->bDomDec)
+ {
+ sum_forces(0, fr->f_novirsum_n, f, fr->f_novirsum);
+ }
+ else
+ {
+ sum_forces(0, mdatoms->homenr,
+ f, fr->f_novirsum);
+ }
+ if (EEL_FULL(fr->eeltype))
+ {
+ /* Add the mesh contribution to the virial */
+ m_add(vir_force, fr->vir_el_recip, vir_force);
+ }
+ if (EVDW_PME(fr->vdwtype))
+ {
+ /* Add the mesh contribution to the virial */
+ m_add(vir_force, fr->vir_lj_recip, vir_force);
+ }
+ if (debug)
+ {
+ pr_rvecs(debug, 0, "vir_force", vir_force, DIM);
+ }
+ }
+ }
+
+ if (fr->print_force >= 0)
+ {
+ print_large_forces(stderr, mdatoms, cr, step, fr->print_force, x, f);
+ }
+}
+
+static void do_nb_verlet(t_forcerec *fr,
+ interaction_const_t *ic,
+ gmx_enerdata_t *enerd,
+ int flags, int ilocality,
+ int clearF,
+ t_nrnb *nrnb,
+ gmx_wallcycle_t wcycle)
+{
+ int nnbl, kernel_type, enr_nbnxn_kernel_ljc, enr_nbnxn_kernel_lj;
+ char *env;
+ nonbonded_verlet_group_t *nbvg;
+ gmx_bool bCUDA;
+
+ if (!(flags & GMX_FORCE_NONBONDED))
+ {
+ /* skip non-bonded calculation */
+ return;
+ }
+
+ nbvg = &fr->nbv->grp[ilocality];
+
+ /* CUDA kernel launch overhead is already timed separately */
+ if (fr->cutoff_scheme != ecutsVERLET)
+ {
+ gmx_incons("Invalid cut-off scheme passed!");
+ }
+
+ bCUDA = (nbvg->kernel_type == nbnxnk8x8x8_CUDA);
+
+ if (!bCUDA)
+ {
+ wallcycle_sub_start(wcycle, ewcsNONBONDED);
+ }
+ switch (nbvg->kernel_type)
+ {
+ case nbnxnk4x4_PlainC:
+ nbnxn_kernel_ref(&nbvg->nbl_lists,
+ nbvg->nbat, ic,
+ fr->shift_vec,
+ flags,
+ clearF,
+ fr->fshift[0],
+ enerd->grpp.ener[egCOULSR],
+ fr->bBHAM ?
+ enerd->grpp.ener[egBHAMSR] :
+ enerd->grpp.ener[egLJSR]);
+ break;
+
+ case nbnxnk4xN_SIMD_4xN:
+ nbnxn_kernel_simd_4xn(&nbvg->nbl_lists,
+ nbvg->nbat, ic,
+ nbvg->ewald_excl,
+ fr->shift_vec,
+ flags,
+ clearF,
+ fr->fshift[0],
+ enerd->grpp.ener[egCOULSR],
+ fr->bBHAM ?
+ enerd->grpp.ener[egBHAMSR] :
+ enerd->grpp.ener[egLJSR]);
+ break;
+ case nbnxnk4xN_SIMD_2xNN:
+ nbnxn_kernel_simd_2xnn(&nbvg->nbl_lists,
+ nbvg->nbat, ic,
+ nbvg->ewald_excl,
+ fr->shift_vec,
+ flags,
+ clearF,
+ fr->fshift[0],
+ enerd->grpp.ener[egCOULSR],
+ fr->bBHAM ?
+ enerd->grpp.ener[egBHAMSR] :
+ enerd->grpp.ener[egLJSR]);
+ break;
+
+ case nbnxnk8x8x8_CUDA:
+ nbnxn_cuda_launch_kernel(fr->nbv->cu_nbv, nbvg->nbat, flags, ilocality);
+ break;
+
+ case nbnxnk8x8x8_PlainC:
+ nbnxn_kernel_gpu_ref(nbvg->nbl_lists.nbl[0],
+ nbvg->nbat, ic,
+ fr->shift_vec,
+ flags,
+ clearF,
+ nbvg->nbat->out[0].f,
+ fr->fshift[0],
+ enerd->grpp.ener[egCOULSR],
+ fr->bBHAM ?
+ enerd->grpp.ener[egBHAMSR] :
+ enerd->grpp.ener[egLJSR]);
+ break;
+
+ default:
+ gmx_incons("Invalid nonbonded kernel type passed!");
+
+ }
+ if (!bCUDA)
+ {
+ wallcycle_sub_stop(wcycle, ewcsNONBONDED);
+ }
+
+ if (EEL_RF(ic->eeltype) || ic->eeltype == eelCUT)
+ {
+ enr_nbnxn_kernel_ljc = eNR_NBNXN_LJ_RF;
+ }
+ else if ((!bCUDA && nbvg->ewald_excl == ewaldexclAnalytical) ||
+ (bCUDA && nbnxn_cuda_is_kernel_ewald_analytical(fr->nbv->cu_nbv)))
+ {
+ enr_nbnxn_kernel_ljc = eNR_NBNXN_LJ_EWALD;
+ }
+ else
+ {
+ enr_nbnxn_kernel_ljc = eNR_NBNXN_LJ_TAB;
+ }
+ enr_nbnxn_kernel_lj = eNR_NBNXN_LJ;
+ if (flags & GMX_FORCE_ENERGY)
+ {
+ /* In eNR_??? the nbnxn F+E kernels are always the F kernel + 1 */
+ enr_nbnxn_kernel_ljc += 1;
+ enr_nbnxn_kernel_lj += 1;
+ }
+
+ inc_nrnb(nrnb, enr_nbnxn_kernel_ljc,
+ nbvg->nbl_lists.natpair_ljq);
+ inc_nrnb(nrnb, enr_nbnxn_kernel_lj,
+ nbvg->nbl_lists.natpair_lj);
+ /* The Coulomb-only kernels are offset -eNR_NBNXN_LJ_RF+eNR_NBNXN_RF */
+ inc_nrnb(nrnb, enr_nbnxn_kernel_ljc-eNR_NBNXN_LJ_RF+eNR_NBNXN_RF,
+ nbvg->nbl_lists.natpair_q);
+
+ if (ic->vdw_modifier == eintmodFORCESWITCH)
+ {
+ /* We add up the switch cost separately */
+ inc_nrnb(nrnb, eNR_NBNXN_ADD_LJ_FSW+((flags & GMX_FORCE_ENERGY) ? 1 : 0),
+ nbvg->nbl_lists.natpair_ljq + nbvg->nbl_lists.natpair_lj);
+ }
+ if (ic->vdw_modifier == eintmodPOTSWITCH)
+ {
+ /* We add up the switch cost separately */
+ inc_nrnb(nrnb, eNR_NBNXN_ADD_LJ_PSW+((flags & GMX_FORCE_ENERGY) ? 1 : 0),
+ nbvg->nbl_lists.natpair_ljq + nbvg->nbl_lists.natpair_lj);
+ }
+ if (ic->vdwtype == evdwPME)
+ {
+ /* We add up the LJ Ewald cost separately */
+ inc_nrnb(nrnb, eNR_NBNXN_ADD_LJ_EWALD+((flags & GMX_FORCE_ENERGY) ? 1 : 0),
+ nbvg->nbl_lists.natpair_ljq + nbvg->nbl_lists.natpair_lj);
+ }
+}
+
+static void do_nb_verlet_fep(nbnxn_pairlist_set_t *nbl_lists,
+ t_forcerec *fr,
+ rvec x[],
+ rvec f[],
+ t_mdatoms *mdatoms,
+ t_lambda *fepvals,
+ real *lambda,
+ gmx_enerdata_t *enerd,
+ int flags,
+ t_nrnb *nrnb,
+ gmx_wallcycle_t wcycle)
+{
+ int donb_flags;
+ nb_kernel_data_t kernel_data;
+ real lam_i[efptNR];
+ real dvdl_nb[efptNR];
+ int th;
+ int i, j;
+
+ donb_flags = 0;
+ /* Add short-range interactions */
+ donb_flags |= GMX_NONBONDED_DO_SR;
+
+ /* Currently all group scheme kernels always calculate (shift-)forces */
+ if (flags & GMX_FORCE_FORCES)
+ {
+ donb_flags |= GMX_NONBONDED_DO_FORCE;
+ }
+ if (flags & GMX_FORCE_VIRIAL)
+ {
+ donb_flags |= GMX_NONBONDED_DO_SHIFTFORCE;
+ }
+ if (flags & GMX_FORCE_ENERGY)
+ {
+ donb_flags |= GMX_NONBONDED_DO_POTENTIAL;
+ }
+ if (flags & GMX_FORCE_DO_LR)
+ {
+ donb_flags |= GMX_NONBONDED_DO_LR;
+ }
+
+ kernel_data.flags = donb_flags;
+ kernel_data.lambda = lambda;
+ kernel_data.dvdl = dvdl_nb;
+
+ kernel_data.energygrp_elec = enerd->grpp.ener[egCOULSR];
+ kernel_data.energygrp_vdw = enerd->grpp.ener[egLJSR];
+
+ /* reset free energy components */
+ for (i = 0; i < efptNR; i++)
+ {
+ dvdl_nb[i] = 0;
+ }
+
+ assert(gmx_omp_nthreads_get(emntNonbonded) == nbl_lists->nnbl);
+
+ wallcycle_sub_start(wcycle, ewcsNONBONDED);
+#pragma omp parallel for schedule(static) num_threads(nbl_lists->nnbl)
+ for (th = 0; th < nbl_lists->nnbl; th++)
+ {
+ gmx_nb_free_energy_kernel(nbl_lists->nbl_fep[th],
+ x, f, fr, mdatoms, &kernel_data, nrnb);
+ }
+
+ if (fepvals->sc_alpha != 0)
+ {
+ enerd->dvdl_nonlin[efptVDW] += dvdl_nb[efptVDW];
+ enerd->dvdl_nonlin[efptCOUL] += dvdl_nb[efptCOUL];
+ }
+ else
+ {
+ enerd->dvdl_lin[efptVDW] += dvdl_nb[efptVDW];
+ enerd->dvdl_lin[efptCOUL] += dvdl_nb[efptCOUL];
+ }
+
+ /* If we do foreign lambda and we have soft-core interactions
+ * we have to recalculate the (non-linear) energies contributions.
+ */
+ if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && fepvals->sc_alpha != 0)
+ {
+ kernel_data.flags = (donb_flags & ~(GMX_NONBONDED_DO_FORCE | GMX_NONBONDED_DO_SHIFTFORCE)) | GMX_NONBONDED_DO_FOREIGNLAMBDA;
+ kernel_data.lambda = lam_i;
+ kernel_data.energygrp_elec = enerd->foreign_grpp.ener[egCOULSR];
+ kernel_data.energygrp_vdw = enerd->foreign_grpp.ener[egLJSR];
+ /* Note that we add to kernel_data.dvdl, but ignore the result */
+
+ for (i = 0; i < enerd->n_lambda; i++)
+ {
+ for (j = 0; j < efptNR; j++)
+ {
+ lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]);
+ }
+ reset_foreign_enerdata(enerd);
+#pragma omp parallel for schedule(static) num_threads(nbl_lists->nnbl)
+ for (th = 0; th < nbl_lists->nnbl; th++)
+ {
+ gmx_nb_free_energy_kernel(nbl_lists->nbl_fep[th],
+ x, f, fr, mdatoms, &kernel_data, nrnb);
+ }
+
+ sum_epot(&(enerd->foreign_grpp), enerd->foreign_term);
+ enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT];
+ }
+ }
+
+ wallcycle_sub_stop(wcycle, ewcsNONBONDED);
+}
+
+void do_force_cutsVERLET(FILE *fplog, t_commrec *cr,
+ t_inputrec *inputrec,
+ gmx_int64_t step, t_nrnb *nrnb, gmx_wallcycle_t wcycle,
+ gmx_localtop_t *top,
+ gmx_groups_t gmx_unused *groups,
+ matrix box, rvec x[], history_t *hist,
+ rvec f[],
+ tensor vir_force,
+ t_mdatoms *mdatoms,
+ gmx_enerdata_t *enerd, t_fcdata *fcd,
+ real *lambda, t_graph *graph,
+ t_forcerec *fr, interaction_const_t *ic,
+ gmx_vsite_t *vsite, rvec mu_tot,
+ double t, FILE *field, gmx_edsam_t ed,
+ gmx_bool bBornRadii,
+ int flags)
+{
+ int cg0, cg1, i, j;
+ int start, homenr;
+ int nb_kernel_type;
+ double mu[2*DIM];
+ gmx_bool bSepDVDL, bStateChanged, bNS, bFillGrid, bCalcCGCM, bBS;
+ gmx_bool bDoLongRange, bDoForces, bSepLRF, bUseGPU, bUseOrEmulGPU;
+ gmx_bool bDiffKernels = FALSE;
+ matrix boxs;
+ rvec vzero, box_diag;
+ real e, v, dvdl;
+ float cycles_pme, cycles_force, cycles_wait_gpu;
+ nonbonded_verlet_t *nbv;
+
+ cycles_force = 0;
+ cycles_wait_gpu = 0;
+ nbv = fr->nbv;
+ nb_kernel_type = fr->nbv->grp[0].kernel_type;
+
+ start = 0;
+ homenr = mdatoms->homenr;
+
+ bSepDVDL = (fr->bSepDVDL && do_per_step(step, inputrec->nstlog));
+
+ clear_mat(vir_force);
+
+ cg0 = 0;
+ if (DOMAINDECOMP(cr))
+ {
+ cg1 = cr->dd->ncg_tot;
+ }
+ else
+ {
+ cg1 = top->cgs.nr;
+ }
+ if (fr->n_tpi > 0)
+ {
+ cg1--;
+ }
+
+ bStateChanged = (flags & GMX_FORCE_STATECHANGED);
+ bNS = (flags & GMX_FORCE_NS) && (fr->bAllvsAll == FALSE);
+ bFillGrid = (bNS && bStateChanged);
+ bCalcCGCM = (bFillGrid && !DOMAINDECOMP(cr));
+ bDoLongRange = (fr->bTwinRange && bNS && (flags & GMX_FORCE_DO_LR));
+ bDoForces = (flags & GMX_FORCE_FORCES);
+ bSepLRF = (bDoLongRange && bDoForces && (flags & GMX_FORCE_SEPLRF));
+ bUseGPU = fr->nbv->bUseGPU;
+ bUseOrEmulGPU = bUseGPU || (nbv->grp[0].kernel_type == nbnxnk8x8x8_PlainC);
+
+ if (bStateChanged)
+ {
+ update_forcerec(fr, box);
+
+ if (NEED_MUTOT(*inputrec))
+ {
+ /* Calculate total (local) dipole moment in a temporary common array.
+ * This makes it possible to sum them over nodes faster.
+ */
+ calc_mu(start, homenr,
+ x, mdatoms->chargeA, mdatoms->chargeB, mdatoms->nChargePerturbed,
+ mu, mu+DIM);
+ }
+ }
+
+ if (fr->ePBC != epbcNONE)
+ {
+ /* Compute shift vectors every step,
+ * because of pressure coupling or box deformation!
+ */
+ if ((flags & GMX_FORCE_DYNAMICBOX) && bStateChanged)
+ {
+ calc_shifts(box, fr->shift_vec);
+ }
+
+ if (bCalcCGCM)
+ {
+ put_atoms_in_box_omp(fr->ePBC, box, homenr, x);
+ inc_nrnb(nrnb, eNR_SHIFTX, homenr);
+ }
+ else if (EI_ENERGY_MINIMIZATION(inputrec->eI) && graph)
+ {
+ unshift_self(graph, box, x);
+ }
+ }
+
+ nbnxn_atomdata_copy_shiftvec(flags & GMX_FORCE_DYNAMICBOX,
+ fr->shift_vec, nbv->grp[0].nbat);
+
+#ifdef GMX_MPI
+ if (!(cr->duty & DUTY_PME))
+ {
+ /* Send particle coordinates to the pme nodes.
+ * Since this is only implemented for domain decomposition
+ * and domain decomposition does not use the graph,
+ * we do not need to worry about shifting.
+ */
+
+ int pme_flags = 0;
+
+ wallcycle_start(wcycle, ewcPP_PMESENDX);
+
+ bBS = (inputrec->nwall == 2);
+ if (bBS)
+ {
+ copy_mat(box, boxs);
+ svmul(inputrec->wall_ewald_zfac, boxs[ZZ], boxs[ZZ]);
+ }
+
+ if (EEL_PME(fr->eeltype))
+ {
+ pme_flags |= GMX_PME_DO_COULOMB;
+ }
+
+ if (EVDW_PME(fr->vdwtype))
+ {
+ pme_flags |= GMX_PME_DO_LJ;
+ }
+
+ gmx_pme_send_coordinates(cr, bBS ? boxs : box, x,
+ mdatoms->nChargePerturbed, mdatoms->nTypePerturbed, lambda[efptCOUL], lambda[efptVDW],
+ (flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY)),
+ pme_flags, step);
+
+ wallcycle_stop(wcycle, ewcPP_PMESENDX);
+ }
+#endif /* GMX_MPI */
+
+ /* do gridding for pair search */
+ if (bNS)
+ {
+ if (graph && bStateChanged)
+ {
+ /* Calculate intramolecular shift vectors to make molecules whole */
+ mk_mshift(fplog, graph, fr->ePBC, box, x);
+ }
+
+ clear_rvec(vzero);
+ box_diag[XX] = box[XX][XX];
+ box_diag[YY] = box[YY][YY];
+ box_diag[ZZ] = box[ZZ][ZZ];
+
+ wallcycle_start(wcycle, ewcNS);
+ if (!fr->bDomDec)
+ {
+ wallcycle_sub_start(wcycle, ewcsNBS_GRID_LOCAL);
+ nbnxn_put_on_grid(nbv->nbs, fr->ePBC, box,
+ 0, vzero, box_diag,
+ 0, mdatoms->homenr, -1, fr->cginfo, x,
+ 0, NULL,
+ nbv->grp[eintLocal].kernel_type,
+ nbv->grp[eintLocal].nbat);
+ wallcycle_sub_stop(wcycle, ewcsNBS_GRID_LOCAL);
+ }
+ else
+ {
+ wallcycle_sub_start(wcycle, ewcsNBS_GRID_NONLOCAL);
+ nbnxn_put_on_grid_nonlocal(nbv->nbs, domdec_zones(cr->dd),
+ fr->cginfo, x,
+ nbv->grp[eintNonlocal].kernel_type,
+ nbv->grp[eintNonlocal].nbat);
+ wallcycle_sub_stop(wcycle, ewcsNBS_GRID_NONLOCAL);
+ }
+
+ if (nbv->ngrp == 1 ||
+ nbv->grp[eintNonlocal].nbat == nbv->grp[eintLocal].nbat)
+ {
+ nbnxn_atomdata_set(nbv->grp[eintLocal].nbat, eatAll,
+ nbv->nbs, mdatoms, fr->cginfo);
+ }
+ else
+ {
+ nbnxn_atomdata_set(nbv->grp[eintLocal].nbat, eatLocal,
+ nbv->nbs, mdatoms, fr->cginfo);
+ nbnxn_atomdata_set(nbv->grp[eintNonlocal].nbat, eatAll,
+ nbv->nbs, mdatoms, fr->cginfo);
+ }
+ wallcycle_stop(wcycle, ewcNS);
+ }
+
+ /* initialize the GPU atom data and copy shift vector */
+ if (bUseGPU)
+ {
+ if (bNS)
+ {
+ wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU_NB);
+ nbnxn_cuda_init_atomdata(nbv->cu_nbv, nbv->grp[eintLocal].nbat);
+ wallcycle_stop(wcycle, ewcLAUNCH_GPU_NB);
+ }
+
+ wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU_NB);
+ nbnxn_cuda_upload_shiftvec(nbv->cu_nbv, nbv->grp[eintLocal].nbat);
+ wallcycle_stop(wcycle, ewcLAUNCH_GPU_NB);
+ }
+
+ /* do local pair search */
+ if (bNS)
+ {
+ wallcycle_start_nocount(wcycle, ewcNS);
+ wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_LOCAL);
+ nbnxn_make_pairlist(nbv->nbs, nbv->grp[eintLocal].nbat,
+ &top->excls,
+ ic->rlist,
+ nbv->min_ci_balanced,
+ &nbv->grp[eintLocal].nbl_lists,
+ eintLocal,
+ nbv->grp[eintLocal].kernel_type,
+ nrnb);
+ wallcycle_sub_stop(wcycle, ewcsNBS_SEARCH_LOCAL);
+
+ if (bUseGPU)
+ {
+ /* initialize local pair-list on the GPU */
+ nbnxn_cuda_init_pairlist(nbv->cu_nbv,
+ nbv->grp[eintLocal].nbl_lists.nbl[0],
+ eintLocal);
+ }
+ wallcycle_stop(wcycle, ewcNS);
+ }
+ else
+ {
+ wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
+ wallcycle_sub_start(wcycle, ewcsNB_X_BUF_OPS);
+ nbnxn_atomdata_copy_x_to_nbat_x(nbv->nbs, eatLocal, FALSE, x,
+ nbv->grp[eintLocal].nbat);
+ wallcycle_sub_stop(wcycle, ewcsNB_X_BUF_OPS);
+ wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
+ }
+
+ if (bUseGPU)
+ {
+ wallcycle_start(wcycle, ewcLAUNCH_GPU_NB);
+ /* launch local nonbonded F on GPU */
+ do_nb_verlet(fr, ic, enerd, flags, eintLocal, enbvClearFNo,
+ nrnb, wcycle);
+ wallcycle_stop(wcycle, ewcLAUNCH_GPU_NB);
+ }
+
+ /* Communicate coordinates and sum dipole if necessary +
+ do non-local pair search */
+ if (DOMAINDECOMP(cr))
+ {
+ bDiffKernels = (nbv->grp[eintNonlocal].kernel_type !=
+ nbv->grp[eintLocal].kernel_type);
+
+ if (bDiffKernels)
+ {
+ /* With GPU+CPU non-bonded calculations we need to copy
+ * the local coordinates to the non-local nbat struct
+ * (in CPU format) as the non-local kernel call also
+ * calculates the local - non-local interactions.
+ */
+ wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
+ wallcycle_sub_start(wcycle, ewcsNB_X_BUF_OPS);
+ nbnxn_atomdata_copy_x_to_nbat_x(nbv->nbs, eatLocal, TRUE, x,
+ nbv->grp[eintNonlocal].nbat);
+ wallcycle_sub_stop(wcycle, ewcsNB_X_BUF_OPS);
+ wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
+ }
+
+ if (bNS)
+ {
+ wallcycle_start_nocount(wcycle, ewcNS);
+ wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_NONLOCAL);
+
+ if (bDiffKernels)
+ {
+ nbnxn_grid_add_simple(nbv->nbs, nbv->grp[eintNonlocal].nbat);
+ }
+
+ nbnxn_make_pairlist(nbv->nbs, nbv->grp[eintNonlocal].nbat,
+ &top->excls,
+ ic->rlist,
+ nbv->min_ci_balanced,
+ &nbv->grp[eintNonlocal].nbl_lists,
+ eintNonlocal,
+ nbv->grp[eintNonlocal].kernel_type,
+ nrnb);
+
+ wallcycle_sub_stop(wcycle, ewcsNBS_SEARCH_NONLOCAL);
+
+ if (nbv->grp[eintNonlocal].kernel_type == nbnxnk8x8x8_CUDA)
+ {
+ /* initialize non-local pair-list on the GPU */
+ nbnxn_cuda_init_pairlist(nbv->cu_nbv,
+ nbv->grp[eintNonlocal].nbl_lists.nbl[0],
+ eintNonlocal);
+ }
+ wallcycle_stop(wcycle, ewcNS);
+ }
+ else
+ {
+ wallcycle_start(wcycle, ewcMOVEX);
+ dd_move_x(cr->dd, box, x);
+
+ /* When we don't need the total dipole we sum it in global_stat */
+ if (bStateChanged && NEED_MUTOT(*inputrec))
+ {
+ gmx_sumd(2*DIM, mu, cr);
+ }
+ wallcycle_stop(wcycle, ewcMOVEX);
+
+ wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
+ wallcycle_sub_start(wcycle, ewcsNB_X_BUF_OPS);
+ nbnxn_atomdata_copy_x_to_nbat_x(nbv->nbs, eatNonlocal, FALSE, x,
+ nbv->grp[eintNonlocal].nbat);
+ wallcycle_sub_stop(wcycle, ewcsNB_X_BUF_OPS);
+ cycles_force += wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
+ }
+
+ if (bUseGPU && !bDiffKernels)
+ {
+ wallcycle_start(wcycle, ewcLAUNCH_GPU_NB);
+ /* launch non-local nonbonded F on GPU */
+ do_nb_verlet(fr, ic, enerd, flags, eintNonlocal, enbvClearFNo,
+ nrnb, wcycle);
+ cycles_force += wallcycle_stop(wcycle, ewcLAUNCH_GPU_NB);
+ }
+ }
+
+ if (bUseGPU)
+ {
+ /* launch D2H copy-back F */
+ wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU_NB);
+ if (DOMAINDECOMP(cr) && !bDiffKernels)
+ {
+ nbnxn_cuda_launch_cpyback(nbv->cu_nbv, nbv->grp[eintNonlocal].nbat,
+ flags, eatNonlocal);
+ }
+ nbnxn_cuda_launch_cpyback(nbv->cu_nbv, nbv->grp[eintLocal].nbat,
+ flags, eatLocal);
+ cycles_force += wallcycle_stop(wcycle, ewcLAUNCH_GPU_NB);
+ }
+
+ if (bStateChanged && NEED_MUTOT(*inputrec))
+ {
+ if (PAR(cr))
+ {
+ gmx_sumd(2*DIM, mu, cr);
+ }
+
+ for (i = 0; i < 2; i++)
+ {
+ for (j = 0; j < DIM; j++)
+ {
+ fr->mu_tot[i][j] = mu[i*DIM + j];
+ }
+ }
+ }
+ if (fr->efep == efepNO)
+ {
+ copy_rvec(fr->mu_tot[0], mu_tot);
+ }
+ else
+ {
+ for (j = 0; j < DIM; j++)
+ {
+ mu_tot[j] =
+ (1.0 - lambda[efptCOUL])*fr->mu_tot[0][j] +
+ lambda[efptCOUL]*fr->mu_tot[1][j];
+ }
+ }
+
+ /* Reset energies */
+ reset_enerdata(fr, bNS, enerd, MASTER(cr));
+ clear_rvecs(SHIFTS, fr->fshift);
+
+ if (DOMAINDECOMP(cr) && !(cr->duty & DUTY_PME))
+ {
+ wallcycle_start(wcycle, ewcPPDURINGPME);
+ dd_force_flop_start(cr->dd, nrnb);
+ }
+
+ if (inputrec->bRot)
+ {
+ /* Enforced rotation has its own cycle counter that starts after the collective
+ * coordinates have been communicated. It is added to ddCyclF to allow
+ * for proper load-balancing */
+ wallcycle_start(wcycle, ewcROT);
+ do_rotation(cr, inputrec, box, x, t, step, wcycle, bNS);
+ wallcycle_stop(wcycle, ewcROT);
+ }
+
+ /* Start the force cycle counter.
+ * This counter is stopped in do_forcelow_level.
+ * No parallel communication should occur while this counter is running,
+ * since that will interfere with the dynamic load balancing.
+ */
+ wallcycle_start(wcycle, ewcFORCE);
+ if (bDoForces)
+ {
+ /* Reset forces for which the virial is calculated separately:
+ * PME/Ewald forces if necessary */
+ if (fr->bF_NoVirSum)
+ {
+ if (flags & GMX_FORCE_VIRIAL)
+ {
+ fr->f_novirsum = fr->f_novirsum_alloc;
+ if (fr->bDomDec)
+ {
+ clear_rvecs(fr->f_novirsum_n, fr->f_novirsum);
+ }
+ else
+ {
+ clear_rvecs(homenr, fr->f_novirsum+start);
+ }
+ }
+ else
+ {
+ /* We are not calculating the pressure so we do not need
+ * a separate array for forces that do not contribute
+ * to the pressure.
+ */
+ fr->f_novirsum = f;
+ }
+ }
+
+ /* Clear the short- and long-range forces */
+ clear_rvecs(fr->natoms_force_constr, f);
+ if (bSepLRF && do_per_step(step, inputrec->nstcalclr))
+ {
+ clear_rvecs(fr->natoms_force_constr, fr->f_twin);
+ }
+
+ clear_rvec(fr->vir_diag_posres);
+ }
+
+ if (inputrec->ePull == epullCONSTRAINT)
+ {
+ clear_pull_forces(inputrec->pull);
+ }
+
+ /* We calculate the non-bonded forces, when done on the CPU, here.
+ * We do this before calling do_force_lowlevel, as in there bondeds
+ * forces are calculated before PME, which does communication.
+ * With this order, non-bonded and bonded force calculation imbalance
+ * can be balanced out by the domain decomposition load balancing.
+ */
+
+ if (!bUseOrEmulGPU)
+ {
+ /* Maybe we should move this into do_force_lowlevel */
+ do_nb_verlet(fr, ic, enerd, flags, eintLocal, enbvClearFYes,
+ nrnb, wcycle);
+ }
+
+ if (fr->efep != efepNO)
+ {
+ /* Calculate the local and non-local free energy interactions here.
+ * Happens here on the CPU both with and without GPU.
+ */
+ if (fr->nbv->grp[eintLocal].nbl_lists.nbl_fep[0]->nrj > 0)
+ {
+ do_nb_verlet_fep(&fr->nbv->grp[eintLocal].nbl_lists,
+ fr, x, f, mdatoms,
+ inputrec->fepvals, lambda,
+ enerd, flags, nrnb, wcycle);
+ }
+
+ if (DOMAINDECOMP(cr) &&
+ fr->nbv->grp[eintNonlocal].nbl_lists.nbl_fep[0]->nrj > 0)
+ {
+ do_nb_verlet_fep(&fr->nbv->grp[eintNonlocal].nbl_lists,
+ fr, x, f, mdatoms,
+ inputrec->fepvals, lambda,
+ enerd, flags, nrnb, wcycle);
+ }
+ }
+
+ if (!bUseOrEmulGPU || bDiffKernels)
+ {
+ int aloc;
+
+ if (DOMAINDECOMP(cr))
+ {
+ do_nb_verlet(fr, ic, enerd, flags, eintNonlocal,
+ bDiffKernels ? enbvClearFYes : enbvClearFNo,
+ nrnb, wcycle);
+ }
+
+ if (!bUseOrEmulGPU)
+ {
+ aloc = eintLocal;
+ }
+ else
+ {
+ aloc = eintNonlocal;
+ }
+
+ /* Add all the non-bonded force to the normal force array.
+ * This can be split into a local a non-local part when overlapping
+ * communication with calculation with domain decomposition.
+ */
+ cycles_force += wallcycle_stop(wcycle, ewcFORCE);
+ wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
+ wallcycle_sub_start(wcycle, ewcsNB_F_BUF_OPS);
+ nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs, eatAll, nbv->grp[aloc].nbat, f);
+ wallcycle_sub_stop(wcycle, ewcsNB_F_BUF_OPS);
+ cycles_force += wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
+ wallcycle_start_nocount(wcycle, ewcFORCE);
+
+ /* if there are multiple fshift output buffers reduce them */
+ if ((flags & GMX_FORCE_VIRIAL) &&
+ nbv->grp[aloc].nbl_lists.nnbl > 1)
+ {
+ nbnxn_atomdata_add_nbat_fshift_to_fshift(nbv->grp[aloc].nbat,
+ fr->fshift);
+ }
+ }
+
+ /* update QMMMrec, if necessary */
+ if (fr->bQMMM)
+ {
+ update_QMMMrec(cr, fr, x, mdatoms, box, top);
+ }
+
+ if ((flags & GMX_FORCE_BONDED) && top->idef.il[F_POSRES].nr > 0)
+ {
+ posres_wrapper(fplog, flags, bSepDVDL, inputrec, nrnb, top, box, x,
+ enerd, lambda, fr);
+ }
+
+ if ((flags & GMX_FORCE_BONDED) && top->idef.il[F_FBPOSRES].nr > 0)
+ {
+ fbposres_wrapper(inputrec, nrnb, top, box, x, enerd, fr);
+ }
+
+ /* Compute the bonded and non-bonded energies and optionally forces */
+ do_force_lowlevel(fplog, step, fr, inputrec, &(top->idef),
+ cr, nrnb, wcycle, mdatoms,
+ x, hist, f, bSepLRF ? fr->f_twin : f, enerd, fcd, top, fr->born,
+ &(top->atomtypes), bBornRadii, box,
+ inputrec->fepvals, lambda, graph, &(top->excls), fr->mu_tot,
+ flags, &cycles_pme);
+
+ if (bSepLRF)
+ {
+ if (do_per_step(step, inputrec->nstcalclr))
+ {
+ /* Add the long range forces to the short range forces */
+ for (i = 0; i < fr->natoms_force_constr; i++)
+ {
+ rvec_add(fr->f_twin[i], f[i], f[i]);
+ }
+ }
+ }
+
+ cycles_force += wallcycle_stop(wcycle, ewcFORCE);
+
+ if (ed)
+ {
+ do_flood(cr, inputrec, x, f, ed, box, step, bNS);
+ }
+
+ if (bUseOrEmulGPU && !bDiffKernels)
+ {
+ /* wait for non-local forces (or calculate in emulation mode) */
+ if (DOMAINDECOMP(cr))
+ {
+ if (bUseGPU)
+ {
+ float cycles_tmp;
+
+ wallcycle_start(wcycle, ewcWAIT_GPU_NB_NL);
+ nbnxn_cuda_wait_gpu(nbv->cu_nbv,
+ nbv->grp[eintNonlocal].nbat,
+ flags, eatNonlocal,
+ enerd->grpp.ener[egLJSR], enerd->grpp.ener[egCOULSR],
+ fr->fshift);
+ cycles_tmp = wallcycle_stop(wcycle, ewcWAIT_GPU_NB_NL);
+ cycles_wait_gpu += cycles_tmp;
+ cycles_force += cycles_tmp;
+ }
+ else
+ {
+ wallcycle_start_nocount(wcycle, ewcFORCE);
+ do_nb_verlet(fr, ic, enerd, flags, eintNonlocal, enbvClearFYes,
+ nrnb, wcycle);
+ cycles_force += wallcycle_stop(wcycle, ewcFORCE);
+ }
+ wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
+ wallcycle_sub_start(wcycle, ewcsNB_F_BUF_OPS);
+ /* skip the reduction if there was no non-local work to do */
+ if (nbv->grp[eintLocal].nbl_lists.nbl[0]->nsci > 0)
+ {
+ nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs, eatNonlocal,
+ nbv->grp[eintNonlocal].nbat, f);
+ }
+ wallcycle_sub_stop(wcycle, ewcsNB_F_BUF_OPS);
+ cycles_force += wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
+ }
+ }
+
+ if (bDoForces && DOMAINDECOMP(cr))
+ {
+ /* Communicate the forces */
+ wallcycle_start(wcycle, ewcMOVEF);
+ dd_move_f(cr->dd, f, fr->fshift);
+ /* Do we need to communicate the separate force array
+ * for terms that do not contribute to the single sum virial?
+ * Position restraints and electric fields do not introduce
+ * inter-cg forces, only full electrostatics methods do.
+ * When we do not calculate the virial, fr->f_novirsum = f,
+ * so we have already communicated these forces.
+ */
+ if (EEL_FULL(fr->eeltype) && cr->dd->n_intercg_excl &&
+ (flags & GMX_FORCE_VIRIAL))
+ {
+ dd_move_f(cr->dd, fr->f_novirsum, NULL);
+ }
+ if (bSepLRF)
+ {
+ /* We should not update the shift forces here,
+ * since f_twin is already included in f.
+ */
+ dd_move_f(cr->dd, fr->f_twin, NULL);
+ }
+ wallcycle_stop(wcycle, ewcMOVEF);
+ }
+
+ if (bUseOrEmulGPU)
+ {
+ /* wait for local forces (or calculate in emulation mode) */
+ if (bUseGPU)
+ {
+ wallcycle_start(wcycle, ewcWAIT_GPU_NB_L);
+ nbnxn_cuda_wait_gpu(nbv->cu_nbv,
+ nbv->grp[eintLocal].nbat,
+ flags, eatLocal,
+ enerd->grpp.ener[egLJSR], enerd->grpp.ener[egCOULSR],
+ fr->fshift);
+ cycles_wait_gpu += wallcycle_stop(wcycle, ewcWAIT_GPU_NB_L);
+
+ /* now clear the GPU outputs while we finish the step on the CPU */
+
+ wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU_NB);
+ nbnxn_cuda_clear_outputs(nbv->cu_nbv, flags);
+ wallcycle_stop(wcycle, ewcLAUNCH_GPU_NB);
+ }
+ else
+ {
+ wallcycle_start_nocount(wcycle, ewcFORCE);
+ do_nb_verlet(fr, ic, enerd, flags, eintLocal,
+ DOMAINDECOMP(cr) ? enbvClearFNo : enbvClearFYes,
+ nrnb, wcycle);
+ wallcycle_stop(wcycle, ewcFORCE);
+ }
+ wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
+ wallcycle_sub_start(wcycle, ewcsNB_F_BUF_OPS);
+ if (nbv->grp[eintLocal].nbl_lists.nbl[0]->nsci > 0)
+ {
+ /* skip the reduction if there was no non-local work to do */
+ nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs, eatLocal,
+ nbv->grp[eintLocal].nbat, f);
+ }
+ wallcycle_sub_stop(wcycle, ewcsNB_F_BUF_OPS);
+ wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
+ }
+
+ if (DOMAINDECOMP(cr))
+ {
+ dd_force_flop_stop(cr->dd, nrnb);
+ if (wcycle)
+ {
+ dd_cycles_add(cr->dd, cycles_force-cycles_pme, ddCyclF);
+ if (bUseGPU)
+ {
+ dd_cycles_add(cr->dd, cycles_wait_gpu, ddCyclWaitGPU);
+ }
+ }
+ }
+
+ if (bDoForces)
+ {
+ if (IR_ELEC_FIELD(*inputrec))
+ {
+ /* Compute forces due to electric field */
+ calc_f_el(MASTER(cr) ? field : NULL,
+ start, homenr, mdatoms->chargeA, fr->f_novirsum,
+ inputrec->ex, inputrec->et, t);
+ }
+
+ /* If we have NoVirSum forces, but we do not calculate the virial,
+ * we sum fr->f_novirum=f later.
+ */
+ if (vsite && !(fr->bF_NoVirSum && !(flags & GMX_FORCE_VIRIAL)))
+ {
+ wallcycle_start(wcycle, ewcVSITESPREAD);
+ spread_vsite_f(vsite, x, f, fr->fshift, FALSE, NULL, nrnb,
+ &top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr);
+ wallcycle_stop(wcycle, ewcVSITESPREAD);
+
+ if (bSepLRF)
+ {
+ wallcycle_start(wcycle, ewcVSITESPREAD);
+ spread_vsite_f(vsite, x, fr->f_twin, NULL, FALSE, NULL,
+ nrnb,
+ &top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr);
+ wallcycle_stop(wcycle, ewcVSITESPREAD);
+ }
+ }
+
+ if (flags & GMX_FORCE_VIRIAL)
+ {
+ /* Calculation of the virial must be done after vsites! */
+ calc_virial(0, mdatoms->homenr, x, f,
+ vir_force, graph, box, nrnb, fr, inputrec->ePBC);
+ }
+ }
+
+ if (inputrec->ePull == epullUMBRELLA || inputrec->ePull == epullCONST_F)
+ {
+ pull_potential_wrapper(fplog, bSepDVDL, cr, inputrec, box, x,
+ f, vir_force, mdatoms, enerd, lambda, t);
+ }
+
+ /* Add the forces from enforced rotation potentials (if any) */
+ if (inputrec->bRot)
+ {
+ wallcycle_start(wcycle, ewcROTadd);
+ enerd->term[F_COM_PULL] += add_rot_forces(inputrec->rot, f, cr, step, t);
+ wallcycle_stop(wcycle, ewcROTadd);
+ }
+
+ /* Add forces from interactive molecular dynamics (IMD), if bIMD == TRUE. */
+ IMD_apply_forces(inputrec->bIMD, inputrec->imd, cr, f, wcycle);
+
+ if (PAR(cr) && !(cr->duty & DUTY_PME))
+ {
+ /* In case of node-splitting, the PP nodes receive the long-range
+ * forces, virial and energy from the PME nodes here.
+ */
+ pme_receive_force_ener(fplog, bSepDVDL, cr, wcycle, enerd, fr);
+ }
+
+ if (bDoForces)
+ {
+ post_process_forces(cr, step, nrnb, wcycle,
+ top, box, x, f, vir_force, mdatoms, graph, fr, vsite,
+ flags);
+ }
+
+ /* Sum the potential energy terms from group contributions */
+ sum_epot(&(enerd->grpp), enerd->term);
+}
+
+void do_force_cutsGROUP(FILE *fplog, t_commrec *cr,
+ t_inputrec *inputrec,
+ gmx_int64_t step, t_nrnb *nrnb, gmx_wallcycle_t wcycle,
+ gmx_localtop_t *top,
+ gmx_groups_t *groups,
+ matrix box, rvec x[], history_t *hist,
+ rvec f[],
+ tensor vir_force,
+ t_mdatoms *mdatoms,
+ gmx_enerdata_t *enerd, t_fcdata *fcd,
+ real *lambda, t_graph *graph,
+ t_forcerec *fr, gmx_vsite_t *vsite, rvec mu_tot,
+ double t, FILE *field, gmx_edsam_t ed,
+ gmx_bool bBornRadii,
+ int flags)
+{
+ int cg0, cg1, i, j;
+ int start, homenr;
+ double mu[2*DIM];
+ gmx_bool bSepDVDL, bStateChanged, bNS, bFillGrid, bCalcCGCM, bBS;
+ gmx_bool bDoLongRangeNS, bDoForces, bDoPotential, bSepLRF;
+ gmx_bool bDoAdressWF;
+ matrix boxs;
+ rvec vzero, box_diag;
+ real e, v, dvdlambda[efptNR];
+ t_pbc pbc;
+ float cycles_pme, cycles_force;
+
+ start = 0;
+ homenr = mdatoms->homenr;
+
+ bSepDVDL = (fr->bSepDVDL && do_per_step(step, inputrec->nstlog));
+
+ clear_mat(vir_force);
+
+ cg0 = 0;
+ if (DOMAINDECOMP(cr))
+ {
+ cg1 = cr->dd->ncg_tot;
+ }
+ else
+ {
+ cg1 = top->cgs.nr;
+ }
+ if (fr->n_tpi > 0)
+ {
+ cg1--;
+ }
+
+ bStateChanged = (flags & GMX_FORCE_STATECHANGED);
+ bNS = (flags & GMX_FORCE_NS) && (fr->bAllvsAll == FALSE);
+ /* Should we update the long-range neighborlists at this step? */
+ bDoLongRangeNS = fr->bTwinRange && bNS;
+ /* Should we perform the long-range nonbonded evaluation inside the neighborsearching? */
+ bFillGrid = (bNS && bStateChanged);
+ bCalcCGCM = (bFillGrid && !DOMAINDECOMP(cr));
+ bDoForces = (flags & GMX_FORCE_FORCES);
+ bDoPotential = (flags & GMX_FORCE_ENERGY);
+ bSepLRF = ((inputrec->nstcalclr > 1) && bDoForces &&
+ (flags & GMX_FORCE_SEPLRF) && (flags & GMX_FORCE_DO_LR));
+
+ /* should probably move this to the forcerec since it doesn't change */
+ bDoAdressWF = ((fr->adress_type != eAdressOff));
+
+ if (bStateChanged)
+ {
+ update_forcerec(fr, box);
+
+ if (NEED_MUTOT(*inputrec))
+ {
+ /* Calculate total (local) dipole moment in a temporary common array.
+ * This makes it possible to sum them over nodes faster.
+ */
+ calc_mu(start, homenr,
+ x, mdatoms->chargeA, mdatoms->chargeB, mdatoms->nChargePerturbed,
+ mu, mu+DIM);
+ }
+ }
+
+ if (fr->ePBC != epbcNONE)
+ {
+ /* Compute shift vectors every step,
+ * because of pressure coupling or box deformation!
+ */
+ if ((flags & GMX_FORCE_DYNAMICBOX) && bStateChanged)
+ {
+ calc_shifts(box, fr->shift_vec);
+ }
+
+ if (bCalcCGCM)
+ {
+ put_charge_groups_in_box(fplog, cg0, cg1, fr->ePBC, box,
+ &(top->cgs), x, fr->cg_cm);
+ inc_nrnb(nrnb, eNR_CGCM, homenr);
+ inc_nrnb(nrnb, eNR_RESETX, cg1-cg0);
+ }
+ else if (EI_ENERGY_MINIMIZATION(inputrec->eI) && graph)
+ {
+ unshift_self(graph, box, x);
+ }
+ }
+ else if (bCalcCGCM)
+ {
+ calc_cgcm(fplog, cg0, cg1, &(top->cgs), x, fr->cg_cm);
+ inc_nrnb(nrnb, eNR_CGCM, homenr);
+ }
+
+ if (bCalcCGCM && gmx_debug_at)
+ {
+ pr_rvecs(debug, 0, "cgcm", fr->cg_cm, top->cgs.nr);
+ }
+
+#ifdef GMX_MPI
+ if (!(cr->duty & DUTY_PME))
+ {
+ /* Send particle coordinates to the pme nodes.
+ * Since this is only implemented for domain decomposition
+ * and domain decomposition does not use the graph,
+ * we do not need to worry about shifting.
+ */
+
+ int pme_flags = 0;
+
+ wallcycle_start(wcycle, ewcPP_PMESENDX);
+
+ bBS = (inputrec->nwall == 2);
+ if (bBS)
+ {
+ copy_mat(box, boxs);
+ svmul(inputrec->wall_ewald_zfac, boxs[ZZ], boxs[ZZ]);
+ }
+
+ if (EEL_PME(fr->eeltype))
+ {
+ pme_flags |= GMX_PME_DO_COULOMB;
+ }
+
+ if (EVDW_PME(fr->vdwtype))
+ {
+ pme_flags |= GMX_PME_DO_LJ;
+ }
+
+ gmx_pme_send_coordinates(cr, bBS ? boxs : box, x,
+ mdatoms->nChargePerturbed, mdatoms->nTypePerturbed, lambda[efptCOUL], lambda[efptVDW],
+ (flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY)),
+ pme_flags, step);
+
+ wallcycle_stop(wcycle, ewcPP_PMESENDX);
+ }
+#endif /* GMX_MPI */
+
+ /* Communicate coordinates and sum dipole if necessary */
+ if (DOMAINDECOMP(cr))
+ {
+ wallcycle_start(wcycle, ewcMOVEX);
+ dd_move_x(cr->dd, box, x);
+ wallcycle_stop(wcycle, ewcMOVEX);
+ }
+
+ /* update adress weight beforehand */
+ if (bStateChanged && bDoAdressWF)
+ {
+ /* need pbc for adress weight calculation with pbc_dx */
+ set_pbc(&pbc, inputrec->ePBC, box);
+ if (fr->adress_site == eAdressSITEcog)
+ {
+ update_adress_weights_cog(top->idef.iparams, top->idef.il, x, fr, mdatoms,
+ inputrec->ePBC == epbcNONE ? NULL : &pbc);
+ }
+ else if (fr->adress_site == eAdressSITEcom)
+ {
+ update_adress_weights_com(fplog, cg0, cg1, &(top->cgs), x, fr, mdatoms,
+ inputrec->ePBC == epbcNONE ? NULL : &pbc);
+ }
+ else if (fr->adress_site == eAdressSITEatomatom)
+ {
+ update_adress_weights_atom_per_atom(cg0, cg1, &(top->cgs), x, fr, mdatoms,
+ inputrec->ePBC == epbcNONE ? NULL : &pbc);
+ }
+ else
+ {
+ update_adress_weights_atom(cg0, cg1, &(top->cgs), x, fr, mdatoms,
+ inputrec->ePBC == epbcNONE ? NULL : &pbc);
+ }
+ }
+
+ if (NEED_MUTOT(*inputrec))
+ {
+
+ if (bStateChanged)
+ {
+ if (PAR(cr))
+ {
+ gmx_sumd(2*DIM, mu, cr);
+ }
+ for (i = 0; i < 2; i++)
+ {
+ for (j = 0; j < DIM; j++)
+ {
+ fr->mu_tot[i][j] = mu[i*DIM + j];
+ }
+ }
+ }
+ if (fr->efep == efepNO)
+ {
+ copy_rvec(fr->mu_tot[0], mu_tot);
+ }
+ else
+ {
+ for (j = 0; j < DIM; j++)
+ {
+ mu_tot[j] =
+ (1.0 - lambda[efptCOUL])*fr->mu_tot[0][j] + lambda[efptCOUL]*fr->mu_tot[1][j];
+ }
+ }
+ }
+
+ /* Reset energies */
+ reset_enerdata(fr, bNS, enerd, MASTER(cr));
+ clear_rvecs(SHIFTS, fr->fshift);
+
+ if (bNS)
+ {
+ wallcycle_start(wcycle, ewcNS);
+
+ if (graph && bStateChanged)
+ {
+ /* Calculate intramolecular shift vectors to make molecules whole */
+ mk_mshift(fplog, graph, fr->ePBC, box, x);
+ }
+
+ /* Do the actual neighbour searching */
+ ns(fplog, fr, box,
+ groups, top, mdatoms,
+ cr, nrnb, bFillGrid,
+ bDoLongRangeNS);
+
+ wallcycle_stop(wcycle, ewcNS);
+ }
+
+ if (inputrec->implicit_solvent && bNS)
+ {
+ make_gb_nblist(cr, inputrec->gb_algorithm,
+ x, box, fr, &top->idef, graph, fr->born);
+ }
+
+ if (DOMAINDECOMP(cr) && !(cr->duty & DUTY_PME))
+ {
+ wallcycle_start(wcycle, ewcPPDURINGPME);
+ dd_force_flop_start(cr->dd, nrnb);
+ }
+
+ if (inputrec->bRot)
+ {
+ /* Enforced rotation has its own cycle counter that starts after the collective
+ * coordinates have been communicated. It is added to ddCyclF to allow
+ * for proper load-balancing */
+ wallcycle_start(wcycle, ewcROT);
+ do_rotation(cr, inputrec, box, x, t, step, wcycle, bNS);
+ wallcycle_stop(wcycle, ewcROT);
+ }
+
+ /* Start the force cycle counter.
+ * This counter is stopped in do_forcelow_level.
+ * No parallel communication should occur while this counter is running,
+ * since that will interfere with the dynamic load balancing.
+ */
+ wallcycle_start(wcycle, ewcFORCE);
+
+ if (bDoForces)
+ {
+ /* Reset forces for which the virial is calculated separately:
+ * PME/Ewald forces if necessary */
+ if (fr->bF_NoVirSum)
+ {
+ if (flags & GMX_FORCE_VIRIAL)
+ {
+ fr->f_novirsum = fr->f_novirsum_alloc;
+ if (fr->bDomDec)
+ {
+ clear_rvecs(fr->f_novirsum_n, fr->f_novirsum);
+ }
+ else
+ {
+ clear_rvecs(homenr, fr->f_novirsum+start);
+ }
+ }
+ else
+ {
+ /* We are not calculating the pressure so we do not need
+ * a separate array for forces that do not contribute
+ * to the pressure.
+ */
+ fr->f_novirsum = f;
+ }
+ }
+
+ /* Clear the short- and long-range forces */
+ clear_rvecs(fr->natoms_force_constr, f);
+ if (bSepLRF && do_per_step(step, inputrec->nstcalclr))
+ {
+ clear_rvecs(fr->natoms_force_constr, fr->f_twin);
+ }
+
+ clear_rvec(fr->vir_diag_posres);
+ }
+ if (inputrec->ePull == epullCONSTRAINT)
+ {
+ clear_pull_forces(inputrec->pull);
+ }
+
+ /* update QMMMrec, if necessary */
+ if (fr->bQMMM)
+ {
+ update_QMMMrec(cr, fr, x, mdatoms, box, top);
+ }
+
+ if ((flags & GMX_FORCE_BONDED) && top->idef.il[F_POSRES].nr > 0)
+ {
+ posres_wrapper(fplog, flags, bSepDVDL, inputrec, nrnb, top, box, x,
+ enerd, lambda, fr);
+ }
+
+ if ((flags & GMX_FORCE_BONDED) && top->idef.il[F_FBPOSRES].nr > 0)
+ {
+ fbposres_wrapper(inputrec, nrnb, top, box, x, enerd, fr);
+ }
+
+ /* Compute the bonded and non-bonded energies and optionally forces */
+ do_force_lowlevel(fplog, step, fr, inputrec, &(top->idef),
+ cr, nrnb, wcycle, mdatoms,
+ x, hist, f, bSepLRF ? fr->f_twin : f, enerd, fcd, top, fr->born,
+ &(top->atomtypes), bBornRadii, box,
+ inputrec->fepvals, lambda,
+ graph, &(top->excls), fr->mu_tot,
+ flags,
+ &cycles_pme);
+
+ if (bSepLRF)
+ {
+ if (do_per_step(step, inputrec->nstcalclr))
+ {
+ /* Add the long range forces to the short range forces */
+ for (i = 0; i < fr->natoms_force_constr; i++)
+ {
+ rvec_add(fr->f_twin[i], f[i], f[i]);
+ }
+ }
+ }
+
+ cycles_force = wallcycle_stop(wcycle, ewcFORCE);
+
+ if (ed)
+ {
+ do_flood(cr, inputrec, x, f, ed, box, step, bNS);
+ }
+
+ if (DOMAINDECOMP(cr))
+ {
+ dd_force_flop_stop(cr->dd, nrnb);
+ if (wcycle)
+ {
+ dd_cycles_add(cr->dd, cycles_force-cycles_pme, ddCyclF);
+ }
+ }
+
+ if (bDoForces)
+ {
+ if (IR_ELEC_FIELD(*inputrec))
+ {
+ /* Compute forces due to electric field */
+ calc_f_el(MASTER(cr) ? field : NULL,
+ start, homenr, mdatoms->chargeA, fr->f_novirsum,
+ inputrec->ex, inputrec->et, t);
+ }
+
+ if (bDoAdressWF && fr->adress_icor == eAdressICThermoForce)
+ {
+ /* Compute thermodynamic force in hybrid AdResS region */
+ adress_thermo_force(start, homenr, &(top->cgs), x, fr->f_novirsum, fr, mdatoms,
+ inputrec->ePBC == epbcNONE ? NULL : &pbc);
+ }
+
+ /* Communicate the forces */
+ if (DOMAINDECOMP(cr))
+ {
+ wallcycle_start(wcycle, ewcMOVEF);
+ dd_move_f(cr->dd, f, fr->fshift);
+ /* Do we need to communicate the separate force array
+ * for terms that do not contribute to the single sum virial?
+ * Position restraints and electric fields do not introduce
+ * inter-cg forces, only full electrostatics methods do.
+ * When we do not calculate the virial, fr->f_novirsum = f,
+ * so we have already communicated these forces.
+ */
+ if (EEL_FULL(fr->eeltype) && cr->dd->n_intercg_excl &&
+ (flags & GMX_FORCE_VIRIAL))
+ {
+ dd_move_f(cr->dd, fr->f_novirsum, NULL);
+ }
+ if (bSepLRF)
+ {
+ /* We should not update the shift forces here,
+ * since f_twin is already included in f.
+ */
+ dd_move_f(cr->dd, fr->f_twin, NULL);
+ }
+ wallcycle_stop(wcycle, ewcMOVEF);
+ }
+
+ /* If we have NoVirSum forces, but we do not calculate the virial,
+ * we sum fr->f_novirum=f later.
+ */
+ if (vsite && !(fr->bF_NoVirSum && !(flags & GMX_FORCE_VIRIAL)))
+ {
+ wallcycle_start(wcycle, ewcVSITESPREAD);
+ spread_vsite_f(vsite, x, f, fr->fshift, FALSE, NULL, nrnb,
+ &top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr);
+ wallcycle_stop(wcycle, ewcVSITESPREAD);
+
+ if (bSepLRF)
+ {
+ wallcycle_start(wcycle, ewcVSITESPREAD);
+ spread_vsite_f(vsite, x, fr->f_twin, NULL, FALSE, NULL,
+ nrnb,
+ &top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr);
+ wallcycle_stop(wcycle, ewcVSITESPREAD);
+ }
+ }
+
+ if (flags & GMX_FORCE_VIRIAL)
+ {
+ /* Calculation of the virial must be done after vsites! */
+ calc_virial(0, mdatoms->homenr, x, f,
+ vir_force, graph, box, nrnb, fr, inputrec->ePBC);
+ }
+ }
+
+ if (inputrec->ePull == epullUMBRELLA || inputrec->ePull == epullCONST_F)
+ {
+ pull_potential_wrapper(fplog, bSepDVDL, cr, inputrec, box, x,
+ f, vir_force, mdatoms, enerd, lambda, t);
+ }
+
+ /* Add the forces from enforced rotation potentials (if any) */
+ if (inputrec->bRot)
+ {
+ wallcycle_start(wcycle, ewcROTadd);
+ enerd->term[F_COM_PULL] += add_rot_forces(inputrec->rot, f, cr, step, t);
+ wallcycle_stop(wcycle, ewcROTadd);
+ }
+
+ /* Add forces from interactive molecular dynamics (IMD), if bIMD == TRUE. */
+ IMD_apply_forces(inputrec->bIMD, inputrec->imd, cr, f, wcycle);
+
+ if (PAR(cr) && !(cr->duty & DUTY_PME))
+ {
+ /* In case of node-splitting, the PP nodes receive the long-range
+ * forces, virial and energy from the PME nodes here.
+ */
+ pme_receive_force_ener(fplog, bSepDVDL, cr, wcycle, enerd, fr);
+ }
+
+ if (bDoForces)
+ {
+ post_process_forces(cr, step, nrnb, wcycle,
+ top, box, x, f, vir_force, mdatoms, graph, fr, vsite,
+ flags);
+ }
+
+ /* Sum the potential energy terms from group contributions */
+ sum_epot(&(enerd->grpp), enerd->term);
+}
+
+void do_force(FILE *fplog, t_commrec *cr,
+ t_inputrec *inputrec,
+ gmx_int64_t step, t_nrnb *nrnb, gmx_wallcycle_t wcycle,
+ gmx_localtop_t *top,
+ gmx_groups_t *groups,
+ matrix box, rvec x[], history_t *hist,
+ rvec f[],
+ tensor vir_force,
+ t_mdatoms *mdatoms,
+ gmx_enerdata_t *enerd, t_fcdata *fcd,
+ real *lambda, t_graph *graph,
+ t_forcerec *fr,
+ gmx_vsite_t *vsite, rvec mu_tot,
+ double t, FILE *field, gmx_edsam_t ed,
+ gmx_bool bBornRadii,
+ int flags)
+{
+ /* modify force flag if not doing nonbonded */
+ if (!fr->bNonbonded)
+ {
+ flags &= ~GMX_FORCE_NONBONDED;
+ }
+
+ switch (inputrec->cutoff_scheme)
+ {
+ case ecutsVERLET:
+ do_force_cutsVERLET(fplog, cr, inputrec,
+ step, nrnb, wcycle,
+ top,
+ groups,
+ box, x, hist,
+ f, vir_force,
+ mdatoms,
+ enerd, fcd,
+ lambda, graph,
+ fr, fr->ic,
+ vsite, mu_tot,
+ t, field, ed,
+ bBornRadii,
+ flags);
+ break;
+ case ecutsGROUP:
+ do_force_cutsGROUP(fplog, cr, inputrec,
+ step, nrnb, wcycle,
+ top,
+ groups,
+ box, x, hist,
+ f, vir_force,
+ mdatoms,
+ enerd, fcd,
+ lambda, graph,
+ fr, vsite, mu_tot,
+ t, field, ed,
+ bBornRadii,
+ flags);
+ break;
+ default:
+ gmx_incons("Invalid cut-off scheme passed!");
+ }
+}
+
+
+void do_constrain_first(FILE *fplog, gmx_constr_t constr,
+ t_inputrec *ir, t_mdatoms *md,
+ t_state *state, t_commrec *cr, t_nrnb *nrnb,
+ t_forcerec *fr, gmx_localtop_t *top)
+{
+ int i, m, start, end;
+ gmx_int64_t step;
+ real dt = ir->delta_t;
+ real dvdl_dum;
+ rvec *savex;
+
+ snew(savex, state->natoms);
+
+ start = 0;
+ end = md->homenr;
+
+ if (debug)
+ {
+ fprintf(debug, "vcm: start=%d, homenr=%d, end=%d\n",
+ start, md->homenr, end);
+ }
+ /* Do a first constrain to reset particles... */
+ step = ir->init_step;
+ if (fplog)
+ {
+ char buf[STEPSTRSIZE];
+ fprintf(fplog, "\nConstraining the starting coordinates (step %s)\n",
+ gmx_step_str(step, buf));
+ }
+ dvdl_dum = 0;
+
+ /* constrain the current position */
+ constrain(NULL, TRUE, FALSE, constr, &(top->idef),
+ ir, NULL, cr, step, 0, md,
+ state->x, state->x, NULL,
+ fr->bMolPBC, state->box,
+ state->lambda[efptBONDED], &dvdl_dum,
+ NULL, NULL, nrnb, econqCoord,
+ ir->epc == epcMTTK, state->veta, state->veta);
+ if (EI_VV(ir->eI))
+ {
+ /* constrain the inital velocity, and save it */
+ /* also may be useful if we need the ekin from the halfstep for velocity verlet */
+ /* might not yet treat veta correctly */
+ constrain(NULL, TRUE, FALSE, constr, &(top->idef),
+ ir, NULL, cr, step, 0, md,
+ state->x, state->v, state->v,
+ fr->bMolPBC, state->box,
+ state->lambda[efptBONDED], &dvdl_dum,
+ NULL, NULL, nrnb, econqVeloc,
+ ir->epc == epcMTTK, state->veta, state->veta);
+ }
+ /* constrain the inital velocities at t-dt/2 */
+ if (EI_STATE_VELOCITY(ir->eI) && ir->eI != eiVV)
+ {
+ for (i = start; (i < end); i++)
+ {
+ for (m = 0; (m < DIM); m++)
+ {
+ /* Reverse the velocity */
+ state->v[i][m] = -state->v[i][m];
+ /* Store the position at t-dt in buf */
+ savex[i][m] = state->x[i][m] + dt*state->v[i][m];
+ }
+ }
+ /* Shake the positions at t=-dt with the positions at t=0
+ * as reference coordinates.
+ */
+ if (fplog)
+ {
+ char buf[STEPSTRSIZE];
+ fprintf(fplog, "\nConstraining the coordinates at t0-dt (step %s)\n",
+ gmx_step_str(step, buf));
+ }
+ dvdl_dum = 0;
+ constrain(NULL, TRUE, FALSE, constr, &(top->idef),
+ ir, NULL, cr, step, -1, md,
+ state->x, savex, NULL,
+ fr->bMolPBC, state->box,
+ state->lambda[efptBONDED], &dvdl_dum,
+ state->v, NULL, nrnb, econqCoord,
+ ir->epc == epcMTTK, state->veta, state->veta);
+
+ for (i = start; i < end; i++)
+ {
+ for (m = 0; m < DIM; m++)
+ {
+ /* Re-reverse the velocities */
+ state->v[i][m] = -state->v[i][m];
+ }
+ }
+ }
+ sfree(savex);
+}
+
+
+static void
+integrate_table(real vdwtab[], real scale, int offstart, int rstart, int rend,
+ double *enerout, double *virout)
+{
+ double enersum, virsum;
+ double invscale, invscale2, invscale3;
+ double r, ea, eb, ec, pa, pb, pc, pd;
+ double y0, f, g, h;
+ int ri, offset, tabfactor;
+
+ invscale = 1.0/scale;
+ invscale2 = invscale*invscale;
+ invscale3 = invscale*invscale2;
+
+ /* Following summation derived from cubic spline definition,
+ * Numerical Recipies in C, second edition, p. 113-116. Exact for
+ * the cubic spline. We first calculate the negative of the
+ * energy from rvdw to rvdw_switch, assuming that g(r)=1, and then
+ * add the more standard, abrupt cutoff correction to that result,
+ * yielding the long-range correction for a switched function. We
+ * perform both the pressure and energy loops at the same time for
+ * simplicity, as the computational cost is low. */
+
+ if (offstart == 0)
+ {
+ /* Since the dispersion table has been scaled down a factor
+ * 6.0 and the repulsion a factor 12.0 to compensate for the
+ * c6/c12 parameters inside nbfp[] being scaled up (to save
+ * flops in kernels), we need to correct for this.
+ */
+ tabfactor = 6.0;
+ }
+ else
+ {
+ tabfactor = 12.0;
+ }
+
+ enersum = 0.0;
+ virsum = 0.0;
+ for (ri = rstart; ri < rend; ++ri)
+ {
+ r = ri*invscale;
+ ea = invscale3;
+ eb = 2.0*invscale2*r;
+ ec = invscale*r*r;
+
+ pa = invscale3;
+ pb = 3.0*invscale2*r;
+ pc = 3.0*invscale*r*r;
+ pd = r*r*r;
+
+ /* this "8" is from the packing in the vdwtab array - perhaps
+ should be defined? */
+
+ offset = 8*ri + offstart;
+ y0 = vdwtab[offset];
+ f = vdwtab[offset+1];
+ g = vdwtab[offset+2];
+ h = vdwtab[offset+3];
+
+ enersum += y0*(ea/3 + eb/2 + ec) + f*(ea/4 + eb/3 + ec/2) + g*(ea/5 + eb/4 + ec/3) + h*(ea/6 + eb/5 + ec/4);
+ virsum += f*(pa/4 + pb/3 + pc/2 + pd) + 2*g*(pa/5 + pb/4 + pc/3 + pd/2) + 3*h*(pa/6 + pb/5 + pc/4 + pd/3);
+ }
+ *enerout = 4.0*M_PI*enersum*tabfactor;
+ *virout = 4.0*M_PI*virsum*tabfactor;
+}
+
+void calc_enervirdiff(FILE *fplog, int eDispCorr, t_forcerec *fr)
+{
- if (fr->vdwtype == evdwSWITCH || fr->vdwtype == evdwSHIFT ||
- fr->vdw_modifier == eintmodPOTSWITCH ||
- fr->vdw_modifier == eintmodFORCESWITCH)
++ double eners[2], virs[2], enersum, virsum, y0, f, g, h;
++ double r0, r1, r, rc3, rc9, ea, eb, ec, pa, pb, pc, pd;
++ double invscale, invscale2, invscale3;
++ int ri0, ri1, ri, i, offstart, offset;
++ real scale, *vdwtab, tabfactor, tmp;
+
+ fr->enershiftsix = 0;
+ fr->enershifttwelve = 0;
+ fr->enerdiffsix = 0;
+ fr->enerdifftwelve = 0;
+ fr->virdiffsix = 0;
+ fr->virdifftwelve = 0;
+
+ if (eDispCorr != edispcNO)
+ {
+ for (i = 0; i < 2; i++)
+ {
+ eners[i] = 0;
+ virs[i] = 0;
+ }
- if (fr->rvdw_switch == 0)
++ if ((fr->vdw_modifier == eintmodPOTSHIFT) ||
++ (fr->vdw_modifier == eintmodPOTSWITCH) ||
++ (fr->vdw_modifier == eintmodFORCESWITCH) ||
++ (fr->vdwtype == evdwSHIFT) ||
++ (fr->vdwtype == evdwSWITCH))
+ {
- scale = fr->nblists[0].table_elec_vdw.scale;
++ if (((fr->vdw_modifier == eintmodPOTSWITCH) ||
++ (fr->vdw_modifier == eintmodFORCESWITCH) ||
++ (fr->vdwtype == evdwSWITCH)) && fr->rvdw_switch == 0)
+ {
+ gmx_fatal(FARGS,
+ "With dispersion correction rvdw-switch can not be zero "
+ "for vdw-type = %s", evdw_names[fr->vdwtype]);
+ }
+
- if (fr->vdwtype == evdwSHIFT ||
- fr->vdw_modifier == eintmodFORCESWITCH)
++ scale = fr->nblists[0].table_vdw.scale;
+ vdwtab = fr->nblists[0].table_vdw.data;
+
+ /* Round the cut-offs to exact table values for precision */
+ ri0 = floor(fr->rvdw_switch*scale);
+ ri1 = ceil(fr->rvdw*scale);
++
++ /* The code below has some support for handling force-switching, i.e.
++ * when the force (instead of potential) is switched over a limited
++ * region. This leads to a constant shift in the potential inside the
++ * switching region, which we can handle by adding a constant energy
++ * term in the force-switch case just like when we do potential-shift.
++ *
++ * For now this is not enabled, but to keep the functionality in the
++ * code we check separately for switch and shift. When we do force-switch
++ * the shifting point is rvdw_switch, while it is the cutoff when we
++ * have a classical potential-shift.
++ *
++ * For a pure potential-shift the potential has a constant shift
++ * all the way out to the cutoff, and that is it. For other forms
++ * we need to calculate the constant shift up to the point where we
++ * start modifying the potential.
++ */
++ ri0 = (fr->vdw_modifier == eintmodPOTSHIFT) ? ri1 : ri0;
++
+ r0 = ri0/scale;
+ r1 = ri1/scale;
+ rc3 = r0*r0*r0;
+ rc9 = rc3*rc3*rc3;
+
- /* now add the correction for rvdw_switch to infinity */
++ if ((fr->vdw_modifier == eintmodFORCESWITCH) ||
++ (fr->vdwtype == evdwSHIFT))
+ {
+ /* Determine the constant energy shift below rvdw_switch.
+ * Table has a scale factor since we have scaled it down to compensate
+ * for scaling-up c6/c12 with the derivative factors to save flops in analytical kernels.
+ */
+ fr->enershiftsix = (real)(-1.0/(rc3*rc3)) - 6.0*vdwtab[8*ri0];
+ fr->enershifttwelve = (real)( 1.0/(rc9*rc3)) - 12.0*vdwtab[8*ri0 + 4];
+ }
++ else if (fr->vdw_modifier == eintmodPOTSHIFT)
++ {
++ fr->enershiftsix = (real)(-1.0/(rc3*rc3));
++ fr->enershifttwelve = (real)( 1.0/(rc9*rc3));
++ }
++
+ /* Add the constant part from 0 to rvdw_switch.
+ * This integration from 0 to rvdw_switch overcounts the number
+ * of interactions by 1, as it also counts the self interaction.
+ * We will correct for this later.
+ */
+ eners[0] += 4.0*M_PI*fr->enershiftsix*rc3/3.0;
+ eners[1] += 4.0*M_PI*fr->enershifttwelve*rc3/3.0;
++
++ /* Calculate the contribution in the range [r0,r1] where we
++ * modify the potential. For a pure potential-shift modifier we will
++ * have ri0==ri1, and there will not be any contribution here.
++ */
+ for (i = 0; i < 2; i++)
+ {
+ enersum = 0;
+ virsum = 0;
+ integrate_table(vdwtab, scale, (i == 0 ? 0 : 4), ri0, ri1, &enersum, &virsum);
+ eners[i] -= enersum;
+ virs[i] -= virsum;
+ }
+
- /* TODO: remove this code once we have group LJ-PME kernels
- * that calculate the exact, full LJ param C6/r^6 within the cut-off,
- * as the current nbnxn kernels do.
- */
++ /* Alright: Above we compensated by REMOVING the parts outside r0
++ * corresponding to the ideal VdW 1/r6 and /r12 potentials.
++ *
++ * Regardless of whether r0 is the point where we start switching,
++ * or the cutoff where we calculated the constant shift, we include
++ * all the parts we are missing out to infinity from r0 by
++ * calculating the analytical dispersion correction.
++ */
+ eners[0] += -4.0*M_PI/(3.0*rc3);
+ eners[1] += 4.0*M_PI/(9.0*rc9);
+ virs[0] += 8.0*M_PI/rc3;
+ virs[1] += -16.0*M_PI/(3.0*rc9);
+ }
+ else if (fr->vdwtype == evdwCUT ||
+ EVDW_PME(fr->vdwtype) ||
+ fr->vdwtype == evdwUSER)
+ {
+ if (fr->vdwtype == evdwUSER && fplog)
+ {
+ fprintf(fplog,
+ "WARNING: using dispersion correction with user tables\n");
+ }
+
+ /* Note that with LJ-PME, the dispersion correction is multiplied
+ * by the difference between the actual C6 and the value of C6
+ * that would produce the combination rule.
+ * This means the normal energy and virial difference formulas
+ * can be used here.
+ */
+
+ rc3 = fr->rvdw*fr->rvdw*fr->rvdw;
+ rc9 = rc3*rc3*rc3;
+ /* Contribution beyond the cut-off */
+ eners[0] += -4.0*M_PI/(3.0*rc3);
+ eners[1] += 4.0*M_PI/(9.0*rc9);
+ if (fr->vdw_modifier == eintmodPOTSHIFT)
+ {
+ /* Contribution within the cut-off */
+ eners[0] += -4.0*M_PI/(3.0*rc3);
+ eners[1] += 4.0*M_PI/(3.0*rc9);
+ }
+ /* Contribution beyond the cut-off */
+ virs[0] += 8.0*M_PI/rc3;
+ virs[1] += -16.0*M_PI/(3.0*rc9);
+ }
+ else
+ {
+ gmx_fatal(FARGS,
+ "Dispersion correction is not implemented for vdw-type = %s",
+ evdw_names[fr->vdwtype]);
+ }
+
++ /* When we deprecate the group kernels the code below can go too */
+ if (fr->vdwtype == evdwPME && fr->cutoff_scheme == ecutsGROUP)
+ {
+ /* Calculate self-interaction coefficient (assuming that
+ * the reciprocal-space contribution is constant in the
+ * region that contributes to the self-interaction).
+ */
+ fr->enershiftsix = pow(fr->ewaldcoeff_lj, 6) / 6.0;
+
+ eners[0] += -pow(sqrt(M_PI)*fr->ewaldcoeff_lj, 3)/3.0;
+ virs[0] += pow(sqrt(M_PI)*fr->ewaldcoeff_lj, 3);
+ }
+
+ fr->enerdiffsix = eners[0];
+ fr->enerdifftwelve = eners[1];
+ /* The 0.5 is due to the Gromacs definition of the virial */
+ fr->virdiffsix = 0.5*virs[0];
+ fr->virdifftwelve = 0.5*virs[1];
+ }
+}
+
+void calc_dispcorr(FILE *fplog, t_inputrec *ir, t_forcerec *fr,
+ gmx_int64_t step, int natoms,
+ matrix box, real lambda, tensor pres, tensor virial,
+ real *prescorr, real *enercorr, real *dvdlcorr)
+{
+ gmx_bool bCorrAll, bCorrPres;
+ real dvdlambda, invvol, dens, ninter, avcsix, avctwelve, enerdiff, svir = 0, spres = 0;
+ int m;
+
+ *prescorr = 0;
+ *enercorr = 0;
+ *dvdlcorr = 0;
+
+ clear_mat(virial);
+ clear_mat(pres);
+
+ if (ir->eDispCorr != edispcNO)
+ {
+ bCorrAll = (ir->eDispCorr == edispcAllEner ||
+ ir->eDispCorr == edispcAllEnerPres);
+ bCorrPres = (ir->eDispCorr == edispcEnerPres ||
+ ir->eDispCorr == edispcAllEnerPres);
+
+ invvol = 1/det(box);
+ if (fr->n_tpi)
+ {
+ /* Only correct for the interactions with the inserted molecule */
+ dens = (natoms - fr->n_tpi)*invvol;
+ ninter = fr->n_tpi;
+ }
+ else
+ {
+ dens = natoms*invvol;
+ ninter = 0.5*natoms;
+ }
+
+ if (ir->efep == efepNO)
+ {
+ avcsix = fr->avcsix[0];
+ avctwelve = fr->avctwelve[0];
+ }
+ else
+ {
+ avcsix = (1 - lambda)*fr->avcsix[0] + lambda*fr->avcsix[1];
+ avctwelve = (1 - lambda)*fr->avctwelve[0] + lambda*fr->avctwelve[1];
+ }
+
+ enerdiff = ninter*(dens*fr->enerdiffsix - fr->enershiftsix);
+ *enercorr += avcsix*enerdiff;
+ dvdlambda = 0.0;
+ if (ir->efep != efepNO)
+ {
+ dvdlambda += (fr->avcsix[1] - fr->avcsix[0])*enerdiff;
+ }
+ if (bCorrAll)
+ {
+ enerdiff = ninter*(dens*fr->enerdifftwelve - fr->enershifttwelve);
+ *enercorr += avctwelve*enerdiff;
+ if (fr->efep != efepNO)
+ {
+ dvdlambda += (fr->avctwelve[1] - fr->avctwelve[0])*enerdiff;
+ }
+ }
+
+ if (bCorrPres)
+ {
+ svir = ninter*dens*avcsix*fr->virdiffsix/3.0;
+ if (ir->eDispCorr == edispcAllEnerPres)
+ {
+ svir += ninter*dens*avctwelve*fr->virdifftwelve/3.0;
+ }
+ /* The factor 2 is because of the Gromacs virial definition */
+ spres = -2.0*invvol*svir*PRESFAC;
+
+ for (m = 0; m < DIM; m++)
+ {
+ virial[m][m] += svir;
+ pres[m][m] += spres;
+ }
+ *prescorr += spres;
+ }
+
+ /* Can't currently control when it prints, for now, just print when degugging */
+ if (debug)
+ {
+ if (bCorrAll)
+ {
+ fprintf(debug, "Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
+ avcsix, avctwelve);
+ }
+ if (bCorrPres)
+ {
+ fprintf(debug,
+ "Long Range LJ corr.: Epot %10g, Pres: %10g, Vir: %10g\n",
+ *enercorr, spres, svir);
+ }
+ else
+ {
+ fprintf(debug, "Long Range LJ corr.: Epot %10g\n", *enercorr);
+ }
+ }
+
+ if (fr->bSepDVDL && do_per_step(step, ir->nstlog))
+ {
+ gmx_print_sepdvdl(fplog, "Dispersion correction", *enercorr, dvdlambda);
+ }
+ if (fr->efep != efepNO)
+ {
+ *dvdlcorr += dvdlambda;
+ }
+ }
+}
+
+void do_pbc_first(FILE *fplog, matrix box, t_forcerec *fr,
+ t_graph *graph, rvec x[])
+{
+ if (fplog)
+ {
+ fprintf(fplog, "Removing pbc first time\n");
+ }
+ calc_shifts(box, fr->shift_vec);
+ if (graph)
+ {
+ mk_mshift(fplog, graph, fr->ePBC, box, x);
+ if (gmx_debug_at)
+ {
+ p_graph(debug, "do_pbc_first 1", graph);
+ }
+ shift_self(graph, box, x);
+ /* By doing an extra mk_mshift the molecules that are broken
+ * because they were e.g. imported from another software
+ * will be made whole again. Such are the healing powers
+ * of GROMACS.
+ */
+ mk_mshift(fplog, graph, fr->ePBC, box, x);
+ if (gmx_debug_at)
+ {
+ p_graph(debug, "do_pbc_first 2", graph);
+ }
+ }
+ if (fplog)
+ {
+ fprintf(fplog, "Done rmpbc\n");
+ }
+}
+
+static void low_do_pbc_mtop(FILE *fplog, int ePBC, matrix box,
+ gmx_mtop_t *mtop, rvec x[],
+ gmx_bool bFirst)
+{
+ t_graph *graph;
+ int mb, as, mol;
+ gmx_molblock_t *molb;
+
+ if (bFirst && fplog)
+ {
+ fprintf(fplog, "Removing pbc first time\n");
+ }
+
+ snew(graph, 1);
+ as = 0;
+ for (mb = 0; mb < mtop->nmolblock; mb++)
+ {
+ molb = &mtop->molblock[mb];
+ if (molb->natoms_mol == 1 ||
+ (!bFirst && mtop->moltype[molb->type].cgs.nr == 1))
+ {
+ /* Just one atom or charge group in the molecule, no PBC required */
+ as += molb->nmol*molb->natoms_mol;
+ }
+ else
+ {
+ /* Pass NULL iso fplog to avoid graph prints for each molecule type */
+ mk_graph_ilist(NULL, mtop->moltype[molb->type].ilist,
+ 0, molb->natoms_mol, FALSE, FALSE, graph);
+
+ for (mol = 0; mol < molb->nmol; mol++)
+ {
+ mk_mshift(fplog, graph, ePBC, box, x+as);
+
+ shift_self(graph, box, x+as);
+ /* The molecule is whole now.
+ * We don't need the second mk_mshift call as in do_pbc_first,
+ * since we no longer need this graph.
+ */
+
+ as += molb->natoms_mol;
+ }
+ done_graph(graph);
+ }
+ }
+ sfree(graph);
+}
+
+void do_pbc_first_mtop(FILE *fplog, int ePBC, matrix box,
+ gmx_mtop_t *mtop, rvec x[])
+{
+ low_do_pbc_mtop(fplog, ePBC, box, mtop, x, TRUE);
+}
+
+void do_pbc_mtop(FILE *fplog, int ePBC, matrix box,
+ gmx_mtop_t *mtop, rvec x[])
+{
+ low_do_pbc_mtop(fplog, ePBC, box, mtop, x, FALSE);
+}
+
+void finish_run(FILE *fplog, t_commrec *cr,
+ t_inputrec *inputrec,
+ t_nrnb nrnb[], gmx_wallcycle_t wcycle,
+ gmx_walltime_accounting_t walltime_accounting,
+ wallclock_gpu_t *gputimes,
+ gmx_bool bWriteStat)
+{
+ int i, j;
+ t_nrnb *nrnb_tot = NULL;
+ real delta_t;
+ double nbfs, mflop;
+ double elapsed_time,
+ elapsed_time_over_all_ranks,
+ elapsed_time_over_all_threads,
+ elapsed_time_over_all_threads_over_all_ranks;
+ wallcycle_sum(cr, wcycle);
+
+ if (cr->nnodes > 1)
+ {
+ snew(nrnb_tot, 1);
+#ifdef GMX_MPI
+ MPI_Allreduce(nrnb->n, nrnb_tot->n, eNRNB, MPI_DOUBLE, MPI_SUM,
+ cr->mpi_comm_mysim);
+#endif
+ }
+ else
+ {
+ nrnb_tot = nrnb;
+ }
+
+ elapsed_time = walltime_accounting_get_elapsed_time(walltime_accounting);
+ elapsed_time_over_all_ranks = elapsed_time;
+ elapsed_time_over_all_threads = walltime_accounting_get_elapsed_time_over_all_threads(walltime_accounting);
+ elapsed_time_over_all_threads_over_all_ranks = elapsed_time_over_all_threads;
+#ifdef GMX_MPI
+ if (cr->nnodes > 1)
+ {
+ /* reduce elapsed_time over all MPI ranks in the current simulation */
+ MPI_Allreduce(&elapsed_time,
+ &elapsed_time_over_all_ranks,
+ 1, MPI_DOUBLE, MPI_SUM,
+ cr->mpi_comm_mysim);
+ elapsed_time_over_all_ranks /= cr->nnodes;
+ /* Reduce elapsed_time_over_all_threads over all MPI ranks in the
+ * current simulation. */
+ MPI_Allreduce(&elapsed_time_over_all_threads,
+ &elapsed_time_over_all_threads_over_all_ranks,
+ 1, MPI_DOUBLE, MPI_SUM,
+ cr->mpi_comm_mysim);
+ }
+#endif
+
+ if (SIMMASTER(cr))
+ {
+ print_flop(fplog, nrnb_tot, &nbfs, &mflop);
+ }
+ if (cr->nnodes > 1)
+ {
+ sfree(nrnb_tot);
+ }
+
+ if ((cr->duty & DUTY_PP) && DOMAINDECOMP(cr))
+ {
+ print_dd_statistics(cr, inputrec, fplog);
+ }
+
+ if (SIMMASTER(cr))
+ {
+ wallcycle_print(fplog, cr->nnodes, cr->npmenodes,
+ elapsed_time_over_all_ranks,
+ wcycle, gputimes);
+
+ if (EI_DYNAMICS(inputrec->eI))
+ {
+ delta_t = inputrec->delta_t;
+ }
+ else
+ {
+ delta_t = 0;
+ }
+
+ if (fplog)
+ {
+ print_perf(fplog, elapsed_time_over_all_threads_over_all_ranks,
+ elapsed_time_over_all_ranks,
+ walltime_accounting_get_nsteps_done(walltime_accounting),
+ delta_t, nbfs, mflop);
+ }
+ if (bWriteStat)
+ {
+ print_perf(stderr, elapsed_time_over_all_threads_over_all_ranks,
+ elapsed_time_over_all_ranks,
+ walltime_accounting_get_nsteps_done(walltime_accounting),
+ delta_t, nbfs, mflop);
+ }
+ }
+}
+
+extern void initialize_lambdas(FILE *fplog, t_inputrec *ir, int *fep_state, real *lambda, double *lam0)
+{
+ /* this function works, but could probably use a logic rewrite to keep all the different
+ types of efep straight. */
+
+ int i;
+ t_lambda *fep = ir->fepvals;
+
+ if ((ir->efep == efepNO) && (ir->bSimTemp == FALSE))
+ {
+ for (i = 0; i < efptNR; i++)
+ {
+ lambda[i] = 0.0;
+ if (lam0)
+ {
+ lam0[i] = 0.0;
+ }
+ }
+ return;
+ }
+ else
+ {
+ *fep_state = fep->init_fep_state; /* this might overwrite the checkpoint
+ if checkpoint is set -- a kludge is in for now
+ to prevent this.*/
+ for (i = 0; i < efptNR; i++)
+ {
+ /* overwrite lambda state with init_lambda for now for backwards compatibility */
+ if (fep->init_lambda >= 0) /* if it's -1, it was never initializd */
+ {
+ lambda[i] = fep->init_lambda;
+ if (lam0)
+ {
+ lam0[i] = lambda[i];
+ }
+ }
+ else
+ {
+ lambda[i] = fep->all_lambda[i][*fep_state];
+ if (lam0)
+ {
+ lam0[i] = lambda[i];
+ }
+ }
+ }
+ if (ir->bSimTemp)
+ {
+ /* need to rescale control temperatures to match current state */
+ for (i = 0; i < ir->opts.ngtc; i++)
+ {
+ if (ir->opts.ref_t[i] > 0)
+ {
+ ir->opts.ref_t[i] = ir->simtempvals->temperatures[*fep_state];
+ }
+ }
+ }
+ }
+
+ /* Send to the log the information on the current lambdas */
+ if (fplog != NULL)
+ {
+ fprintf(fplog, "Initial vector of lambda components:[ ");
+ for (i = 0; i < efptNR; i++)
+ {
+ fprintf(fplog, "%10.4f ", lambda[i]);
+ }
+ fprintf(fplog, "]\n");
+ }
+ return;
+}
+
+
+void init_md(FILE *fplog,
+ t_commrec *cr, t_inputrec *ir, const output_env_t oenv,
+ double *t, double *t0,
+ real *lambda, int *fep_state, double *lam0,
+ t_nrnb *nrnb, gmx_mtop_t *mtop,
+ gmx_update_t *upd,
+ int nfile, const t_filenm fnm[],
+ gmx_mdoutf_t *outf, t_mdebin **mdebin,
+ tensor force_vir, tensor shake_vir, rvec mu_tot,
+ gmx_bool *bSimAnn, t_vcm **vcm, unsigned long Flags)
+{
+ int i, j, n;
+ real tmpt, mod;
+
+ /* Initial values */
+ *t = *t0 = ir->init_t;
+
+ *bSimAnn = FALSE;
+ for (i = 0; i < ir->opts.ngtc; i++)
+ {
+ /* set bSimAnn if any group is being annealed */
+ if (ir->opts.annealing[i] != eannNO)
+ {
+ *bSimAnn = TRUE;
+ }
+ }
+ if (*bSimAnn)
+ {
+ update_annealing_target_temp(&(ir->opts), ir->init_t);
+ }
+
+ /* Initialize lambda variables */
+ initialize_lambdas(fplog, ir, fep_state, lambda, lam0);
+
+ if (upd)
+ {
+ *upd = init_update(ir);
+ }
+
+
+ if (vcm != NULL)
+ {
+ *vcm = init_vcm(fplog, &mtop->groups, ir);
+ }
+
+ if (EI_DYNAMICS(ir->eI) && !(Flags & MD_APPENDFILES))
+ {
+ if (ir->etc == etcBERENDSEN)
+ {
+ please_cite(fplog, "Berendsen84a");
+ }
+ if (ir->etc == etcVRESCALE)
+ {
+ please_cite(fplog, "Bussi2007a");
+ }
+ }
+
+ init_nrnb(nrnb);
+
+ if (nfile != -1)
+ {
+ *outf = init_mdoutf(fplog, nfile, fnm, Flags, cr, ir, mtop, oenv);
+
+ *mdebin = init_mdebin((Flags & MD_APPENDFILES) ? NULL : mdoutf_get_fp_ene(*outf),
+ mtop, ir, mdoutf_get_fp_dhdl(*outf));
+ }
+
+ if (ir->bAdress)
+ {
+ please_cite(fplog, "Fritsch12");
+ please_cite(fplog, "Junghans10");
+ }
+ /* Initiate variables */
+ clear_mat(force_vir);
+ clear_mat(shake_vir);
+ clear_rvec(mu_tot);
+
+ debug_gmx();
+}
--- /dev/null
- static void fill_table(t_tabledata *td, int tp, const t_forcerec *fr)
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ * Copyright (c) 2001-2004, The GROMACS development team.
+ * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+#include "gromacs/math/utilities.h"
+#include "typedefs.h"
+#include "names.h"
+#include "gromacs/utility/smalloc.h"
+#include "gmx_fatal.h"
+#include "gromacs/fileio/futil.h"
+#include "xvgr.h"
+#include "vec.h"
+#include "main.h"
+#include "network.h"
+#include "physics.h"
+#include "force.h"
+#include "gromacs/fileio/gmxfio.h"
+#include "macros.h"
+#include "tables.h"
+
+/* All the possible (implemented) table functions */
+enum {
+ etabLJ6,
+ etabLJ12,
+ etabLJ6Shift,
+ etabLJ12Shift,
+ etabShift,
+ etabRF,
+ etabRF_ZERO,
+ etabCOUL,
+ etabEwald,
+ etabEwaldSwitch,
+ etabEwaldUser,
+ etabEwaldUserSwitch,
+ etabLJ6Ewald,
+ etabLJ6Switch,
+ etabLJ12Switch,
+ etabCOULSwitch,
+ etabLJ6Encad,
+ etabLJ12Encad,
+ etabCOULEncad,
+ etabEXPMIN,
+ etabUSER,
+ etabNR
+};
+
+/** Evaluates to true if the table type contains user data. */
+#define ETAB_USER(e) ((e) == etabUSER || \
+ (e) == etabEwaldUser || (e) == etabEwaldUserSwitch)
+
+typedef struct {
+ const char *name;
+ gmx_bool bCoulomb;
+} t_tab_props;
+
+/* This structure holds name and a flag that tells whether
+ this is a Coulomb type funtion */
+static const t_tab_props tprops[etabNR] = {
+ { "LJ6", FALSE },
+ { "LJ12", FALSE },
+ { "LJ6Shift", FALSE },
+ { "LJ12Shift", FALSE },
+ { "Shift", TRUE },
+ { "RF", TRUE },
+ { "RF-zero", TRUE },
+ { "COUL", TRUE },
+ { "Ewald", TRUE },
+ { "Ewald-Switch", TRUE },
+ { "Ewald-User", TRUE },
+ { "Ewald-User-Switch", TRUE },
+ { "LJ6Ewald", FALSE },
+ { "LJ6Switch", FALSE },
+ { "LJ12Switch", FALSE },
+ { "COULSwitch", TRUE },
+ { "LJ6-Encad shift", FALSE },
+ { "LJ12-Encad shift", FALSE },
+ { "COUL-Encad shift", TRUE },
+ { "EXPMIN", FALSE },
+ { "USER", FALSE },
+};
+
+/* Index in the table that says which function to use */
+enum {
+ etiCOUL, etiLJ6, etiLJ12, etiNR
+};
+
+typedef struct {
+ int nx, nx0;
+ double tabscale;
+ double *x, *v, *f;
+} t_tabledata;
+
+#define pow2(x) ((x)*(x))
+#define pow3(x) ((x)*(x)*(x))
+#define pow4(x) ((x)*(x)*(x)*(x))
+#define pow5(x) ((x)*(x)*(x)*(x)*(x))
+
+double v_q_ewald_lr(double beta, double r)
+{
+ if (r == 0)
+ {
+ return beta*2/sqrt(M_PI);
+ }
+ else
+ {
+ return gmx_erfd(beta*r)/r;
+ }
+}
+
+double v_lj_ewald_lr(double beta, double r)
+{
+ double br, br2, br4, r6, factor;
+ if (r == 0)
+ {
+ return pow(beta, 6)/6;
+ }
+ else
+ {
+ br = beta*r;
+ br2 = br*br;
+ br4 = br2*br2;
+ r6 = pow(r, 6.0);
+ factor = (1.0 - exp(-br2)*(1 + br2 + 0.5*br4))/r6;
+ return factor;
+ }
+}
+
+void table_spline3_fill_ewald_lr(real *table_f,
+ real *table_v,
+ real *table_fdv0,
+ int ntab,
+ real dx,
+ real beta,
+ real_space_grid_contribution_computer v_lr)
+{
+ real tab_max;
+ int i, i_inrange;
+ double dc, dc_new;
+ gmx_bool bOutOfRange;
+ double v_r0, v_r1, v_inrange, vi, a0, a1, a2dx;
+ double x_r0;
+
+ /* This function is called using either v_ewald_lr or v_lj_ewald_lr as a function argument
+ * depending on wether we should create electrostatic or Lennard-Jones Ewald tables.
+ */
+
+ if (ntab < 2)
+ {
+ gmx_fatal(FARGS, "Can not make a spline table with less than 2 points");
+ }
+
+ /* We need some margin to be able to divide table values by r
+ * in the kernel and also to do the integration arithmetics
+ * without going out of range. Furthemore, we divide by dx below.
+ */
+ tab_max = GMX_REAL_MAX*0.0001;
+
+ /* This function produces a table with:
+ * maximum energy error: V'''/(6*12*sqrt(3))*dx^3
+ * maximum force error: V'''/(6*4)*dx^2
+ * The rms force error is the max error times 1/sqrt(5)=0.45.
+ */
+
+ bOutOfRange = FALSE;
+ i_inrange = ntab;
+ v_inrange = 0;
+ dc = 0;
+ for (i = ntab-1; i >= 0; i--)
+ {
+ x_r0 = i*dx;
+
+ v_r0 = (*v_lr)(beta, x_r0);
+
+ if (!bOutOfRange)
+ {
+ i_inrange = i;
+ v_inrange = v_r0;
+
+ vi = v_r0;
+ }
+ else
+ {
+ /* Linear continuation for the last point in range */
+ vi = v_inrange - dc*(i - i_inrange)*dx;
+ }
+
+ if (table_v != NULL)
+ {
+ table_v[i] = vi;
+ }
+
+ if (i == 0)
+ {
+ continue;
+ }
+
+ /* Get the potential at table point i-1 */
+ v_r1 = (*v_lr)(beta, (i-1)*dx);
+
+ if (v_r1 != v_r1 || v_r1 < -tab_max || v_r1 > tab_max)
+ {
+ bOutOfRange = TRUE;
+ }
+
+ if (!bOutOfRange)
+ {
+ /* Calculate the average second derivative times dx over interval i-1 to i.
+ * Using the function values at the end points and in the middle.
+ */
+ a2dx = (v_r0 + v_r1 - 2*(*v_lr)(beta, x_r0-0.5*dx))/(0.25*dx);
+ /* Set the derivative of the spline to match the difference in potential
+ * over the interval plus the average effect of the quadratic term.
+ * This is the essential step for minimizing the error in the force.
+ */
+ dc = (v_r0 - v_r1)/dx + 0.5*a2dx;
+ }
+
+ if (i == ntab - 1)
+ {
+ /* Fill the table with the force, minus the derivative of the spline */
+ table_f[i] = -dc;
+ }
+ else
+ {
+ /* tab[i] will contain the average of the splines over the two intervals */
+ table_f[i] += -0.5*dc;
+ }
+
+ if (!bOutOfRange)
+ {
+ /* Make spline s(x) = a0 + a1*(x - xr) + 0.5*a2*(x - xr)^2
+ * matching the potential at the two end points
+ * and the derivative dc at the end point xr.
+ */
+ a0 = v_r0;
+ a1 = dc;
+ a2dx = (a1*dx + v_r1 - a0)*2/dx;
+
+ /* Set dc to the derivative at the next point */
+ dc_new = a1 - a2dx;
+
+ if (dc_new != dc_new || dc_new < -tab_max || dc_new > tab_max)
+ {
+ bOutOfRange = TRUE;
+ }
+ else
+ {
+ dc = dc_new;
+ }
+ }
+
+ table_f[(i-1)] = -0.5*dc;
+ }
+ /* Currently the last value only contains half the force: double it */
+ table_f[0] *= 2;
+
+ if (table_v != NULL && table_fdv0 != NULL)
+ {
+ /* Copy to FDV0 table too. Allocation occurs in forcerec.c,
+ * init_ewald_f_table().
+ */
+ for (i = 0; i < ntab-1; i++)
+ {
+ table_fdv0[4*i] = table_f[i];
+ table_fdv0[4*i+1] = table_f[i+1]-table_f[i];
+ table_fdv0[4*i+2] = table_v[i];
+ table_fdv0[4*i+3] = 0.0;
+ }
+ table_fdv0[4*(ntab-1)] = table_f[(ntab-1)];
+ table_fdv0[4*(ntab-1)+1] = -table_f[(ntab-1)];
+ table_fdv0[4*(ntab-1)+2] = table_v[(ntab-1)];
+ table_fdv0[4*(ntab-1)+3] = 0.0;
+ }
+}
+
+/* The scale (1/spacing) for third order spline interpolation
+ * of the Ewald mesh contribution which needs to be subtracted
+ * from the non-bonded interactions.
+ */
+real ewald_spline3_table_scale(real ewaldcoeff, real rc)
+{
+ double erf_x_d3 = 1.0522; /* max of (erf(x)/x)''' */
+ double ftol, etol;
+ double sc_f, sc_e;
+
+ /* Force tolerance: single precision accuracy */
+ ftol = GMX_FLOAT_EPS;
+ sc_f = sqrt(erf_x_d3/(6*4*ftol*ewaldcoeff))*ewaldcoeff;
+
+ /* Energy tolerance: 10x more accurate than the cut-off jump */
+ etol = 0.1*gmx_erfc(ewaldcoeff*rc);
+ etol = max(etol, GMX_REAL_EPS);
+ sc_e = pow(erf_x_d3/(6*12*sqrt(3)*etol), 1.0/3.0)*ewaldcoeff;
+
+ return max(sc_f, sc_e);
+}
+
+/* Calculate the potential and force for an r value
+ * in exactly the same way it is done in the inner loop.
+ * VFtab is a pointer to the table data, offset is
+ * the point where we should begin and stride is
+ * 4 if we have a buckingham table, 3 otherwise.
+ * If you want to evaluate table no N, set offset to 4*N.
+ *
+ * We use normal precision here, since that is what we
+ * will use in the inner loops.
+ */
+static void evaluate_table(real VFtab[], int offset, int stride,
+ real tabscale, real r, real *y, real *yp)
+{
+ int n;
+ real rt, eps, eps2;
+ real Y, F, Geps, Heps2, Fp;
+
+ rt = r*tabscale;
+ n = (int)rt;
+ eps = rt - n;
+ eps2 = eps*eps;
+ n = offset+stride*n;
+ Y = VFtab[n];
+ F = VFtab[n+1];
+ Geps = eps*VFtab[n+2];
+ Heps2 = eps2*VFtab[n+3];
+ Fp = F+Geps+Heps2;
+ *y = Y+eps*Fp;
+ *yp = (Fp+Geps+2.0*Heps2)*tabscale;
+}
+
+static void copy2table(int n, int offset, int stride,
+ double x[], double Vtab[], double Ftab[], real scalefactor,
+ real dest[])
+{
+/* Use double prec. for the intermediary variables
+ * and temporary x/vtab/vtab2 data to avoid unnecessary
+ * loss of precision.
+ */
+ int i, nn0;
+ double F, G, H, h;
+
+ h = 0;
+ for (i = 0; (i < n); i++)
+ {
+ if (i < n-1)
+ {
+ h = x[i+1] - x[i];
+ F = -Ftab[i]*h;
+ G = 3*(Vtab[i+1] - Vtab[i]) + (Ftab[i+1] + 2*Ftab[i])*h;
+ H = -2*(Vtab[i+1] - Vtab[i]) - (Ftab[i+1] + Ftab[i])*h;
+ }
+ else
+ {
+ /* Fill the last entry with a linear potential,
+ * this is mainly for rounding issues with angle and dihedral potentials.
+ */
+ F = -Ftab[i]*h;
+ G = 0;
+ H = 0;
+ }
+ nn0 = offset + i*stride;
+ dest[nn0] = scalefactor*Vtab[i];
+ dest[nn0+1] = scalefactor*F;
+ dest[nn0+2] = scalefactor*G;
+ dest[nn0+3] = scalefactor*H;
+ }
+}
+
+static void init_table(int n, int nx0,
+ double tabscale, t_tabledata *td, gmx_bool bAlloc)
+{
+ int i;
+
+ td->nx = n;
+ td->nx0 = nx0;
+ td->tabscale = tabscale;
+ if (bAlloc)
+ {
+ snew(td->x, td->nx);
+ snew(td->v, td->nx);
+ snew(td->f, td->nx);
+ }
+ for (i = 0; (i < td->nx); i++)
+ {
+ td->x[i] = i/tabscale;
+ }
+}
+
+static void spline_forces(int nx, double h, double v[], gmx_bool bS3, gmx_bool bE3,
+ double f[])
+{
+ int start, end, i;
+ double v3, b_s, b_e, b;
+ double beta, *gamma;
+
+ /* Formulas can be found in:
+ * H.J.C. Berendsen, Simulating the Physical World, Cambridge 2007
+ */
+
+ if (nx < 4 && (bS3 || bE3))
+ {
+ gmx_fatal(FARGS, "Can not generate splines with third derivative boundary conditions with less than 4 (%d) points", nx);
+ }
+
+ /* To make life easy we initially set the spacing to 1
+ * and correct for this at the end.
+ */
+ beta = 2;
+ if (bS3)
+ {
+ /* Fit V''' at the start */
+ v3 = v[3] - 3*v[2] + 3*v[1] - v[0];
+ if (debug)
+ {
+ fprintf(debug, "The left third derivative is %g\n", v3/(h*h*h));
+ }
+ b_s = 2*(v[1] - v[0]) + v3/6;
+ start = 0;
+
+ if (FALSE)
+ {
+ /* Fit V'' at the start */
+ real v2;
+
+ v2 = -v[3] + 4*v[2] - 5*v[1] + 2*v[0];
+ /* v2 = v[2] - 2*v[1] + v[0]; */
+ if (debug)
+ {
+ fprintf(debug, "The left second derivative is %g\n", v2/(h*h));
+ }
+ b_s = 3*(v[1] - v[0]) - v2/2;
+ start = 0;
+ }
+ }
+ else
+ {
+ b_s = 3*(v[2] - v[0]) + f[0]*h;
+ start = 1;
+ }
+ if (bE3)
+ {
+ /* Fit V''' at the end */
+ v3 = v[nx-1] - 3*v[nx-2] + 3*v[nx-3] - v[nx-4];
+ if (debug)
+ {
+ fprintf(debug, "The right third derivative is %g\n", v3/(h*h*h));
+ }
+ b_e = 2*(v[nx-1] - v[nx-2]) + v3/6;
+ end = nx;
+ }
+ else
+ {
+ /* V'=0 at the end */
+ b_e = 3*(v[nx-1] - v[nx-3]) + f[nx-1]*h;
+ end = nx - 1;
+ }
+
+ snew(gamma, nx);
+ beta = (bS3 ? 1 : 4);
+
+ /* For V'' fitting */
+ /* beta = (bS3 ? 2 : 4); */
+
+ f[start] = b_s/beta;
+ for (i = start+1; i < end; i++)
+ {
+ gamma[i] = 1/beta;
+ beta = 4 - gamma[i];
+ b = 3*(v[i+1] - v[i-1]);
+ f[i] = (b - f[i-1])/beta;
+ }
+ gamma[end-1] = 1/beta;
+ beta = (bE3 ? 1 : 4) - gamma[end-1];
+ f[end-1] = (b_e - f[end-2])/beta;
+
+ for (i = end-2; i >= start; i--)
+ {
+ f[i] -= gamma[i+1]*f[i+1];
+ }
+ sfree(gamma);
+
+ /* Correct for the minus sign and the spacing */
+ for (i = start; i < end; i++)
+ {
+ f[i] = -f[i]/h;
+ }
+}
+
+static void set_forces(FILE *fp, int angle,
+ int nx, double h, double v[], double f[],
+ int table)
+{
+ int start, end;
+
+ if (angle == 2)
+ {
+ gmx_fatal(FARGS,
+ "Force generation for dihedral tables is not (yet) implemented");
+ }
+
+ start = 0;
+ while (v[start] == 0)
+ {
+ start++;
+ }
+
+ end = nx;
+ while (v[end-1] == 0)
+ {
+ end--;
+ }
+ if (end > nx - 2)
+ {
+ end = nx;
+ }
+ else
+ {
+ end++;
+ }
+
+ if (fp)
+ {
+ fprintf(fp, "Generating forces for table %d, boundary conditions: V''' at %g, %s at %g\n",
+ table+1, start*h, end == nx ? "V'''" : "V'=0", (end-1)*h);
+ }
+ spline_forces(end-start, h, v+start, TRUE, end == nx, f+start);
+}
+
+static void read_tables(FILE *fp, const char *fn,
+ int ntab, int angle, t_tabledata td[])
+{
+ char *libfn;
+ char buf[STRLEN];
+ double **yy = NULL, start, end, dx0, dx1, ssd, vm, vp, f, numf;
+ int k, i, nx, nx0 = 0, ny, nny, ns;
+ gmx_bool bAllZero, bZeroV, bZeroF;
+ double tabscale;
+
+ nny = 2*ntab+1;
+ libfn = gmxlibfn(fn);
+ nx = read_xvg(libfn, &yy, &ny);
+ if (ny != nny)
+ {
+ gmx_fatal(FARGS, "Trying to read file %s, but nr columns = %d, should be %d",
+ libfn, ny, nny);
+ }
+ if (angle == 0)
+ {
+ if (yy[0][0] != 0.0)
+ {
+ gmx_fatal(FARGS,
+ "The first distance in file %s is %f nm instead of %f nm",
+ libfn, yy[0][0], 0.0);
+ }
+ }
+ else
+ {
+ if (angle == 1)
+ {
+ start = 0.0;
+ }
+ else
+ {
+ start = -180.0;
+ }
+ end = 180.0;
+ if (yy[0][0] != start || yy[0][nx-1] != end)
+ {
+ gmx_fatal(FARGS, "The angles in file %s should go from %f to %f instead of %f to %f\n",
+ libfn, start, end, yy[0][0], yy[0][nx-1]);
+ }
+ }
+
+ tabscale = (nx-1)/(yy[0][nx-1] - yy[0][0]);
+
+ if (fp)
+ {
+ fprintf(fp, "Read user tables from %s with %d data points.\n", libfn, nx);
+ if (angle == 0)
+ {
+ fprintf(fp, "Tabscale = %g points/nm\n", tabscale);
+ }
+ }
+
+ bAllZero = TRUE;
+ for (k = 0; k < ntab; k++)
+ {
+ bZeroV = TRUE;
+ bZeroF = TRUE;
+ for (i = 0; (i < nx); i++)
+ {
+ if (i >= 2)
+ {
+ dx0 = yy[0][i-1] - yy[0][i-2];
+ dx1 = yy[0][i] - yy[0][i-1];
+ /* Check for 1% deviation in spacing */
+ if (fabs(dx1 - dx0) >= 0.005*(fabs(dx0) + fabs(dx1)))
+ {
+ gmx_fatal(FARGS, "In table file '%s' the x values are not equally spaced: %f %f %f", fn, yy[0][i-2], yy[0][i-1], yy[0][i]);
+ }
+ }
+ if (yy[1+k*2][i] != 0)
+ {
+ bZeroV = FALSE;
+ if (bAllZero)
+ {
+ bAllZero = FALSE;
+ nx0 = i;
+ }
+ if (yy[1+k*2][i] > 0.01*GMX_REAL_MAX ||
+ yy[1+k*2][i] < -0.01*GMX_REAL_MAX)
+ {
+ gmx_fatal(FARGS, "Out of range potential value %g in file '%s'",
+ yy[1+k*2][i], fn);
+ }
+ }
+ if (yy[1+k*2+1][i] != 0)
+ {
+ bZeroF = FALSE;
+ if (bAllZero)
+ {
+ bAllZero = FALSE;
+ nx0 = i;
+ }
+ if (yy[1+k*2+1][i] > 0.01*GMX_REAL_MAX ||
+ yy[1+k*2+1][i] < -0.01*GMX_REAL_MAX)
+ {
+ gmx_fatal(FARGS, "Out of range force value %g in file '%s'",
+ yy[1+k*2+1][i], fn);
+ }
+ }
+ }
+
+ if (!bZeroV && bZeroF)
+ {
+ set_forces(fp, angle, nx, 1/tabscale, yy[1+k*2], yy[1+k*2+1], k);
+ }
+ else
+ {
+ /* Check if the second column is close to minus the numerical
+ * derivative of the first column.
+ */
+ ssd = 0;
+ ns = 0;
+ for (i = 1; (i < nx-1); i++)
+ {
+ vm = yy[1+2*k][i-1];
+ vp = yy[1+2*k][i+1];
+ f = yy[1+2*k+1][i];
+ if (vm != 0 && vp != 0 && f != 0)
+ {
+ /* Take the centered difference */
+ numf = -(vp - vm)*0.5*tabscale;
+ ssd += fabs(2*(f - numf)/(f + numf));
+ ns++;
+ }
+ }
+ if (ns > 0)
+ {
+ ssd /= ns;
+ sprintf(buf, "For the %d non-zero entries for table %d in %s the forces deviate on average %d%% from minus the numerical derivative of the potential\n", ns, k, libfn, (int)(100*ssd+0.5));
+ if (debug)
+ {
+ fprintf(debug, "%s", buf);
+ }
+ if (ssd > 0.2)
+ {
+ if (fp)
+ {
+ fprintf(fp, "\nWARNING: %s\n", buf);
+ }
+ fprintf(stderr, "\nWARNING: %s\n", buf);
+ }
+ }
+ }
+ }
+ if (bAllZero && fp)
+ {
+ fprintf(fp, "\nNOTE: All elements in table %s are zero\n\n", libfn);
+ }
+
+ for (k = 0; (k < ntab); k++)
+ {
+ init_table(nx, nx0, tabscale, &(td[k]), TRUE);
+ for (i = 0; (i < nx); i++)
+ {
+ td[k].x[i] = yy[0][i];
+ td[k].v[i] = yy[2*k+1][i];
+ td[k].f[i] = yy[2*k+2][i];
+ }
+ }
+ for (i = 0; (i < ny); i++)
+ {
+ sfree(yy[i]);
+ }
+ sfree(yy);
+ sfree(libfn);
+}
+
+static void done_tabledata(t_tabledata *td)
+{
+ int i;
+
+ if (!td)
+ {
+ return;
+ }
+
+ sfree(td->x);
+ sfree(td->v);
+ sfree(td->f);
+}
+
- double r, r2, r6, rc6;
++static void fill_table(t_tabledata *td, int tp, const t_forcerec *fr,
++ gmx_bool b14only)
+{
+ /* Fill the table according to the formulas in the manual.
+ * In principle, we only need the potential and the second
+ * derivative, but then we would have to do lots of calculations
+ * in the inner loop. By precalculating some terms (see manual)
+ * we get better eventual performance, despite a larger table.
+ *
+ * Since some of these higher-order terms are very small,
+ * we always use double precision to calculate them here, in order
+ * to avoid unnecessary loss of precision.
+ */
+#ifdef DEBUG_SWITCH
+ FILE *fp;
+#endif
+ int i;
+ double reppow, p;
+ double r1, rc, r12, r13;
- gmx_bool bSwitch, bShift;
++ double r, r2, r6, rc2, rc6, rc12;
+ double expr, Vtab, Ftab;
+ /* Parameters for David's function */
+ double A = 0, B = 0, C = 0, A_3 = 0, B_4 = 0;
+ /* Parameters for the switching function */
+ double ksw, swi, swi1;
+ /* Temporary parameters */
- bSwitch = ((tp == etabLJ6Switch) || (tp == etabLJ12Switch) ||
- (tp == etabCOULSwitch) ||
- (tp == etabEwaldSwitch) || (tp == etabEwaldUserSwitch));
-
- bShift = ((tp == etabLJ6Shift) || (tp == etabLJ12Shift) ||
- (tp == etabShift));
++ gmx_bool bPotentialSwitch, bForceSwitch, bPotentialShift;
+ double ewc = fr->ewaldcoeff_q;
+ double ewclj = fr->ewaldcoeff_lj;
++ double Vcut = 0;
+
- if (bSwitch)
++ if (b14only)
++ {
++ bPotentialSwitch = FALSE;
++ bForceSwitch = FALSE;
++ bPotentialShift = FALSE;
++ }
++ else
++ {
++ bPotentialSwitch = ((tp == etabLJ6Switch) || (tp == etabLJ12Switch) ||
++ (tp == etabCOULSwitch) ||
++ (tp == etabEwaldSwitch) || (tp == etabEwaldUserSwitch) ||
++ (tprops[tp].bCoulomb && (fr->coulomb_modifier == eintmodPOTSWITCH)) ||
++ (!tprops[tp].bCoulomb && (fr->vdw_modifier == eintmodPOTSWITCH)));
++ bForceSwitch = ((tp == etabLJ6Shift) || (tp == etabLJ12Shift) ||
++ (tp == etabShift) ||
++ (tprops[tp].bCoulomb && (fr->coulomb_modifier == eintmodFORCESWITCH)) ||
++ (!tprops[tp].bCoulomb && (fr->vdw_modifier == eintmodFORCESWITCH)));
++ bPotentialShift = ((tprops[tp].bCoulomb && (fr->coulomb_modifier == eintmodPOTSHIFT)) ||
++ (!tprops[tp].bCoulomb && (fr->vdw_modifier == eintmodPOTSHIFT)));
++ }
+
+ reppow = fr->reppow;
+
+ if (tprops[tp].bCoulomb)
+ {
+ r1 = fr->rcoulomb_switch;
+ rc = fr->rcoulomb;
+ }
+ else
+ {
+ r1 = fr->rvdw_switch;
+ rc = fr->rvdw;
+ }
- if (bShift)
++ if (bPotentialSwitch)
+ {
+ ksw = 1.0/(pow5(rc-r1));
+ }
+ else
+ {
+ ksw = 0.0;
+ }
- if (bSwitch)
++ if (bForceSwitch)
+ {
+ if (tp == etabShift)
+ {
+ p = 1;
+ }
+ else if (tp == etabLJ6Shift)
+ {
+ p = 6;
+ }
+ else
+ {
+ p = reppow;
+ }
+
+ A = p * ((p+1)*r1-(p+4)*rc)/(pow(rc, p+2)*pow2(rc-r1));
+ B = -p * ((p+1)*r1-(p+3)*rc)/(pow(rc, p+2)*pow3(rc-r1));
+ C = 1.0/pow(rc, p)-A/3.0*pow3(rc-r1)-B/4.0*pow4(rc-r1);
+ if (tp == etabLJ6Shift)
+ {
+ A = -A;
+ B = -B;
+ C = -C;
+ }
+ A_3 = A/3.0;
+ B_4 = B/4.0;
+ }
+ if (debug)
+ {
+ fprintf(debug, "Setting up tables\n"); fflush(debug);
+ }
+
+#ifdef DEBUG_SWITCH
+ fp = xvgropen("switch.xvg", "switch", "r", "s");
+#endif
+
++ if (bPotentialShift)
++ {
++ rc2 = rc*rc;
++ rc6 = 1.0/(rc2*rc2*rc2);
++ if (gmx_within_tol(reppow, 12.0, 10*GMX_DOUBLE_EPS))
++ {
++ rc12 = rc6*rc6;
++ }
++ else
++ {
++ rc12 = pow(rc, -reppow);
++ }
++
++ switch (tp)
++ {
++ case etabLJ6:
++ /* Dispersion */
++ Vcut = -rc6;
++ break;
++ case etabLJ6Ewald:
++ Vcut = -rc6*exp(-ewclj*ewclj*rc2)*(1 + ewclj*ewclj*rc2 + pow4(ewclj)*rc2*rc2/2);
++ break;
++ case etabLJ12:
++ /* Repulsion */
++ Vcut = rc12;
++ break;
++ case etabCOUL:
++ Vcut = 1.0/rc;
++ break;
++ case etabEwald:
++ case etabEwaldSwitch:
++ Vtab = gmx_erfc(ewc*rc)/rc;
++ break;
++ case etabEwaldUser:
++ /* Only calculate minus the reciprocal space contribution */
++ Vtab = -gmx_erf(ewc*rc)/rc;
++ break;
++ case etabRF:
++ case etabRF_ZERO:
++ /* No need for preventing the usage of modifiers with RF */
++ Vcut = 0.0;
++ break;
++ case etabEXPMIN:
++ Vcut = exp(-rc);
++ break;
++ default:
++ gmx_fatal(FARGS, "Cannot apply new potential-shift modifier to interaction type '%s' yet. (%s,%d)",
++ tprops[tp].name, __FILE__, __LINE__);
++ }
++ }
++
+ for (i = td->nx0; (i < td->nx); i++)
+ {
+ r = td->x[i];
+ r2 = r*r;
+ r6 = 1.0/(r2*r2*r2);
+ if (gmx_within_tol(reppow, 12.0, 10*GMX_DOUBLE_EPS))
+ {
+ r12 = r6*r6;
+ }
+ else
+ {
+ r12 = pow(r, -reppow);
+ }
+ Vtab = 0.0;
+ Ftab = 0.0;
- if (bShift)
++ if (bPotentialSwitch)
+ {
+ /* swi is function, swi1 1st derivative and swi2 2nd derivative */
+ /* The switch function is 1 for r<r1, 0 for r>rc, and smooth for
+ * r1<=r<=rc. The 1st and 2nd derivatives are both zero at
+ * r1 and rc.
+ * ksw is just the constant 1/(rc-r1)^5, to save some calculations...
+ */
+ if (r <= r1)
+ {
+ swi = 1.0;
+ swi1 = 0.0;
+ }
+ else if (r >= rc)
+ {
+ swi = 0.0;
+ swi1 = 0.0;
+ }
+ else
+ {
+ swi = 1 - 10*pow3(r-r1)*ksw*pow2(rc-r1)
+ + 15*pow4(r-r1)*ksw*(rc-r1) - 6*pow5(r-r1)*ksw;
+ swi1 = -30*pow2(r-r1)*ksw*pow2(rc-r1)
+ + 60*pow3(r-r1)*ksw*(rc-r1) - 30*pow4(r-r1)*ksw;
+ }
+ }
+ else /* not really needed, but avoids compiler warnings... */
+ {
+ swi = 1.0;
+ swi1 = 0.0;
+ }
+#ifdef DEBUG_SWITCH
+ fprintf(fp, "%10g %10g %10g %10g\n", r, swi, swi1, swi2);
+#endif
+
+ rc6 = rc*rc*rc;
+ rc6 = 1.0/(rc6*rc6);
+
+ switch (tp)
+ {
+ case etabLJ6:
+ /* Dispersion */
+ Vtab = -r6;
+ Ftab = 6.0*Vtab/r;
+ break;
+ case etabLJ6Switch:
+ case etabLJ6Shift:
+ /* Dispersion */
+ if (r < rc)
+ {
+ Vtab = -r6;
+ Ftab = 6.0*Vtab/r;
+ break;
+ }
+ break;
+ case etabLJ12:
+ /* Repulsion */
+ Vtab = r12;
+ Ftab = reppow*Vtab/r;
+ break;
+ case etabLJ12Switch:
+ case etabLJ12Shift:
+ /* Repulsion */
+ if (r < rc)
+ {
+ Vtab = r12;
+ Ftab = reppow*Vtab/r;
+ }
+ break;
+ case etabLJ6Encad:
+ if (r < rc)
+ {
+ Vtab = -(r6-6.0*(rc-r)*rc6/rc-rc6);
+ Ftab = -(6.0*r6/r-6.0*rc6/rc);
+ }
+ else /* r>rc */
+ {
+ Vtab = 0;
+ Ftab = 0;
+ }
+ break;
+ case etabLJ12Encad:
+ if (r < rc)
+ {
+ Vtab = -(r6-6.0*(rc-r)*rc6/rc-rc6);
+ Ftab = -(6.0*r6/r-6.0*rc6/rc);
+ }
+ else /* r>rc */
+ {
+ Vtab = 0;
+ Ftab = 0;
+ }
+ break;
+ case etabCOUL:
+ Vtab = 1.0/r;
+ Ftab = 1.0/r2;
+ break;
+ case etabCOULSwitch:
+ case etabShift:
+ if (r < rc)
+ {
+ Vtab = 1.0/r;
+ Ftab = 1.0/r2;
+ }
+ break;
+ case etabEwald:
+ case etabEwaldSwitch:
+ Vtab = gmx_erfc(ewc*r)/r;
+ Ftab = gmx_erfc(ewc*r)/r2+exp(-(ewc*ewc*r2))*ewc*M_2_SQRTPI/r;
+ break;
+ case etabEwaldUser:
+ case etabEwaldUserSwitch:
+ /* Only calculate the negative of the reciprocal space contribution */
+ Vtab = -gmx_erf(ewc*r)/r;
+ Ftab = -gmx_erf(ewc*r)/r2+exp(-(ewc*ewc*r2))*ewc*M_2_SQRTPI/r;
+ break;
+ case etabLJ6Ewald:
+ Vtab = -r6*exp(-ewclj*ewclj*r2)*(1 + ewclj*ewclj*r2 + pow4(ewclj)*r2*r2/2);
+ Ftab = 6.0*Vtab/r - r6*exp(-ewclj*ewclj*r2)*pow5(ewclj)*ewclj*r2*r2*r;
+ break;
+ case etabRF:
+ case etabRF_ZERO:
+ Vtab = 1.0/r + fr->k_rf*r2 - fr->c_rf;
+ Ftab = 1.0/r2 - 2*fr->k_rf*r;
+ if (tp == etabRF_ZERO && r >= rc)
+ {
+ Vtab = 0;
+ Ftab = 0;
+ }
+ break;
+ case etabEXPMIN:
+ expr = exp(-r);
+ Vtab = expr;
+ Ftab = expr;
+ break;
+ case etabCOULEncad:
+ if (r < rc)
+ {
+ Vtab = 1.0/r-(rc-r)/(rc*rc)-1.0/rc;
+ Ftab = 1.0/r2-1.0/(rc*rc);
+ }
+ else /* r>rc */
+ {
+ Vtab = 0;
+ Ftab = 0;
+ }
+ break;
+ default:
+ gmx_fatal(FARGS, "Table type %d not implemented yet. (%s,%d)",
+ tp, __FILE__, __LINE__);
+ }
- if ((r > r1) && bSwitch)
++ if (bForceSwitch)
+ {
+ /* Normal coulomb with cut-off correction for potential */
+ if (r < rc)
+ {
+ Vtab -= C;
+ /* If in Shifting range add something to it */
+ if (r > r1)
+ {
+ r12 = (r-r1)*(r-r1);
+ r13 = (r-r1)*r12;
+ Vtab += -A_3*r13 - B_4*r12*r12;
+ Ftab += A*r12 + B*r13;
+ }
+ }
++ else
++ {
++ /* Make sure interactions are zero outside cutoff with modifiers */
++ Vtab = 0;
++ Ftab = 0;
++ }
++ }
++ if (bPotentialShift)
++ {
++ if (r < rc)
++ {
++ Vtab -= Vcut;
++ }
++ else
++ {
++ /* Make sure interactions are zero outside cutoff with modifiers */
++ Vtab = 0;
++ Ftab = 0;
++ }
+ }
+
+ if (ETAB_USER(tp))
+ {
+ Vtab += td->v[i];
+ Ftab += td->f[i];
+ }
+
- Ftab = Ftab*swi - Vtab*swi1;
- Vtab = Vtab*swi;
++ if (bPotentialSwitch)
+ {
-
++ if (r >= rc)
++ {
++ /* Make sure interactions are zero outside cutoff with modifiers */
++ Vtab = 0;
++ Ftab = 0;
++ }
++ else if (r > r1)
++ {
++ Ftab = Ftab*swi - Vtab*swi1;
++ Vtab = Vtab*swi;
++ }
+ }
- switch (fr->vdw_modifier)
+ /* Convert to single precision when we store to mem */
+ td->v[i] = Vtab;
+ td->f[i] = Ftab;
+ }
+
+ /* Continue the table linearly from nx0 to 0.
+ * These values are only required for energy minimization with overlap or TPI.
+ */
+ for (i = td->nx0-1; i >= 0; i--)
+ {
+ td->v[i] = td->v[i+1] + td->f[i+1]*(td->x[i+1] - td->x[i]);
+ td->f[i] = td->f[i+1];
+ }
+
+#ifdef DEBUG_SWITCH
+ gmx_fio_fclose(fp);
+#endif
+}
+
+static void set_table_type(int tabsel[], const t_forcerec *fr, gmx_bool b14only)
+{
+ int eltype, vdwtype;
+
+ /* Set the different table indices.
+ * Coulomb first.
+ */
+
+
+ if (b14only)
+ {
+ switch (fr->eeltype)
+ {
+ case eelRF_NEC:
+ eltype = eelRF;
+ break;
+ case eelUSER:
+ case eelPMEUSER:
+ case eelPMEUSERSWITCH:
+ eltype = eelUSER;
+ break;
+ default:
+ eltype = eelCUT;
+ }
+ }
+ else
+ {
+ eltype = fr->eeltype;
+ }
+
+ switch (eltype)
+ {
+ case eelCUT:
+ tabsel[etiCOUL] = etabCOUL;
+ break;
+ case eelPOISSON:
+ tabsel[etiCOUL] = etabShift;
+ break;
+ case eelSHIFT:
+ if (fr->rcoulomb > fr->rcoulomb_switch)
+ {
+ tabsel[etiCOUL] = etabShift;
+ }
+ else
+ {
+ tabsel[etiCOUL] = etabCOUL;
+ }
+ break;
+ case eelEWALD:
+ case eelPME:
+ case eelP3M_AD:
+ tabsel[etiCOUL] = etabEwald;
+ break;
+ case eelPMESWITCH:
+ tabsel[etiCOUL] = etabEwaldSwitch;
+ break;
+ case eelPMEUSER:
+ tabsel[etiCOUL] = etabEwaldUser;
+ break;
+ case eelPMEUSERSWITCH:
+ tabsel[etiCOUL] = etabEwaldUserSwitch;
+ break;
+ case eelRF:
+ case eelGRF:
+ case eelRF_NEC:
+ tabsel[etiCOUL] = etabRF;
+ break;
+ case eelRF_ZERO:
+ tabsel[etiCOUL] = etabRF_ZERO;
+ break;
+ case eelSWITCH:
+ tabsel[etiCOUL] = etabCOULSwitch;
+ break;
+ case eelUSER:
+ tabsel[etiCOUL] = etabUSER;
+ break;
+ case eelENCADSHIFT:
+ tabsel[etiCOUL] = etabCOULEncad;
+ break;
+ default:
+ gmx_fatal(FARGS, "Invalid eeltype %d", eltype);
+ }
+
+ /* Van der Waals time */
+ if (fr->bBHAM && !b14only)
+ {
+ tabsel[etiLJ6] = etabLJ6;
+ tabsel[etiLJ12] = etabEXPMIN;
+ }
+ else
+ {
+ if (b14only && fr->vdwtype != evdwUSER)
+ {
+ vdwtype = evdwCUT;
+ }
+ else
+ {
+ vdwtype = fr->vdwtype;
+ }
+
+ switch (vdwtype)
+ {
+ case evdwSWITCH:
+ tabsel[etiLJ6] = etabLJ6Switch;
+ tabsel[etiLJ12] = etabLJ12Switch;
+ break;
+ case evdwSHIFT:
+ tabsel[etiLJ6] = etabLJ6Shift;
+ tabsel[etiLJ12] = etabLJ12Shift;
+ break;
+ case evdwUSER:
+ tabsel[etiLJ6] = etabUSER;
+ tabsel[etiLJ12] = etabUSER;
+ break;
+ case evdwCUT:
+ tabsel[etiLJ6] = etabLJ6;
+ tabsel[etiLJ12] = etabLJ12;
+ break;
+ case evdwENCADSHIFT:
+ tabsel[etiLJ6] = etabLJ6Encad;
+ tabsel[etiLJ12] = etabLJ12Encad;
+ break;
+ case evdwPME:
+ tabsel[etiLJ6] = etabLJ6Ewald;
+ tabsel[etiLJ12] = etabLJ12;
+ break;
+ default:
+ gmx_fatal(FARGS, "Invalid vdwtype %d in %s line %d", vdwtype,
+ __FILE__, __LINE__);
+ }
+
+ if (!b14only && fr->vdw_modifier != eintmodNONE)
+ {
+ if (fr->vdw_modifier != eintmodPOTSHIFT &&
+ fr->vdwtype != evdwCUT)
+ {
+ gmx_incons("Potential modifiers other than potential-shift are only implemented for LJ cut-off");
+ }
+
- case eintmodNONE:
- case eintmodPOTSHIFT:
- case eintmodEXACTCUTOFF:
- /* No modification */
- break;
- case eintmodPOTSWITCH:
- tabsel[etiLJ6] = etabLJ6Switch;
- tabsel[etiLJ12] = etabLJ12Switch;
- break;
- case eintmodFORCESWITCH:
- tabsel[etiLJ6] = etabLJ6Shift;
- tabsel[etiLJ12] = etabLJ12Shift;
- break;
- default:
- gmx_incons("Unsupported vdw_modifier");
++ /* LJ-PME and other (shift-only) modifiers are handled by applying the modifiers
++ * to the original interaction forms when we fill the table, so we only check cutoffs here.
++ */
++ if (fr->vdwtype == evdwCUT)
+ {
- fill_table(&(td[k]), tabsel[k], fr);
++ switch (fr->vdw_modifier)
++ {
++ case eintmodNONE:
++ case eintmodPOTSHIFT:
++ case eintmodEXACTCUTOFF:
++ /* No modification */
++ break;
++ case eintmodPOTSWITCH:
++ tabsel[etiLJ6] = etabLJ6Switch;
++ tabsel[etiLJ12] = etabLJ12Switch;
++ break;
++ case eintmodFORCESWITCH:
++ tabsel[etiLJ6] = etabLJ6Shift;
++ tabsel[etiLJ12] = etabLJ12Shift;
++ break;
++ default:
++ gmx_incons("Unsupported vdw_modifier");
++ }
+ }
+ }
+ }
+}
+
+t_forcetable make_tables(FILE *out, const output_env_t oenv,
+ const t_forcerec *fr,
+ gmx_bool bVerbose, const char *fn,
+ real rtab, int flags)
+{
+ const char *fns[3] = { "ctab.xvg", "dtab.xvg", "rtab.xvg" };
+ const char *fns14[3] = { "ctab14.xvg", "dtab14.xvg", "rtab14.xvg" };
+ FILE *fp;
+ t_tabledata *td;
+ gmx_bool b14only, bReadTab, bGenTab;
+ real x0, y0, yp;
+ int i, j, k, nx, nx0, tabsel[etiNR];
+ real scalefactor;
+
+ t_forcetable table;
+
+ b14only = (flags & GMX_MAKETABLES_14ONLY);
+
+ if (flags & GMX_MAKETABLES_FORCEUSER)
+ {
+ tabsel[etiCOUL] = etabUSER;
+ tabsel[etiLJ6] = etabUSER;
+ tabsel[etiLJ12] = etabUSER;
+ }
+ else
+ {
+ set_table_type(tabsel, fr, b14only);
+ }
+ snew(td, etiNR);
+ table.r = rtab;
+ table.scale = 0;
+ table.n = 0;
+ table.scale_exp = 0;
+ nx0 = 10;
+ nx = 0;
+
+ table.interaction = GMX_TABLE_INTERACTION_ELEC_VDWREP_VDWDISP;
+ table.format = GMX_TABLE_FORMAT_CUBICSPLINE_YFGH;
+ table.formatsize = 4;
+ table.ninteractions = 3;
+ table.stride = table.formatsize*table.ninteractions;
+
+ /* Check whether we have to read or generate */
+ bReadTab = FALSE;
+ bGenTab = FALSE;
+ for (i = 0; (i < etiNR); i++)
+ {
+ if (ETAB_USER(tabsel[i]))
+ {
+ bReadTab = TRUE;
+ }
+ if (tabsel[i] != etabUSER)
+ {
+ bGenTab = TRUE;
+ }
+ }
+ if (bReadTab)
+ {
+ read_tables(out, fn, etiNR, 0, td);
+ if (rtab == 0 || (flags & GMX_MAKETABLES_14ONLY))
+ {
+ rtab = td[0].x[td[0].nx-1];
+ table.n = td[0].nx;
+ nx = table.n;
+ }
+ else
+ {
+ if (td[0].x[td[0].nx-1] < rtab)
+ {
+ gmx_fatal(FARGS, "Tables in file %s not long enough for cut-off:\n"
+ "\tshould be at least %f nm\n", fn, rtab);
+ }
+ nx = table.n = (int)(rtab*td[0].tabscale + 0.5);
+ }
+ table.scale = td[0].tabscale;
+ nx0 = td[0].nx0;
+ }
+ if (bGenTab)
+ {
+ if (!bReadTab)
+ {
+#ifdef GMX_DOUBLE
+ table.scale = 2000.0;
+#else
+ table.scale = 500.0;
+#endif
+ nx = table.n = rtab*table.scale;
+ }
+ }
+ if (fr->bBHAM)
+ {
+ if (fr->bham_b_max != 0)
+ {
+ table.scale_exp = table.scale/fr->bham_b_max;
+ }
+ else
+ {
+ table.scale_exp = table.scale;
+ }
+ }
+
+ /* Each table type (e.g. coul,lj6,lj12) requires four
+ * numbers per nx+1 data points. For performance reasons we want
+ * the table data to be aligned to 16-byte.
+ */
+ snew_aligned(table.data, 12*(nx+1)*sizeof(real), 32);
+
+ for (k = 0; (k < etiNR); k++)
+ {
+ if (tabsel[k] != etabUSER)
+ {
+ init_table(nx, nx0,
+ (tabsel[k] == etabEXPMIN) ? table.scale_exp : table.scale,
+ &(td[k]), !bReadTab);
++ fill_table(&(td[k]), tabsel[k], fr, b14only);
+ if (out)
+ {
+ fprintf(out, "%s table with %d data points for %s%s.\n"
+ "Tabscale = %g points/nm\n",
+ ETAB_USER(tabsel[k]) ? "Modified" : "Generated",
+ td[k].nx, b14only ? "1-4 " : "", tprops[tabsel[k]].name,
+ td[k].tabscale);
+ }
+ }
+
+ /* Set scalefactor for c6/c12 tables. This is because we save flops in the non-table kernels
+ * by including the derivative constants (6.0 or 12.0) in the parameters, since
+ * we no longer calculate force in most steps. This means the c6/c12 parameters
+ * have been scaled up, so we need to scale down the table interactions too.
+ * It comes here since we need to scale user tables too.
+ */
+ if (k == etiLJ6)
+ {
+ scalefactor = 1.0/6.0;
+ }
+ else if (k == etiLJ12 && tabsel[k] != etabEXPMIN)
+ {
+ scalefactor = 1.0/12.0;
+ }
+ else
+ {
+ scalefactor = 1.0;
+ }
+
+ copy2table(table.n, k*4, 12, td[k].x, td[k].v, td[k].f, scalefactor, table.data);
+
+ if (bDebugMode() && bVerbose)
+ {
+ if (b14only)
+ {
+ fp = xvgropen(fns14[k], fns14[k], "r", "V", oenv);
+ }
+ else
+ {
+ fp = xvgropen(fns[k], fns[k], "r", "V", oenv);
+ }
+ /* plot the output 5 times denser than the table data */
+ for (i = 5*((nx0+1)/2); i < 5*table.n; i++)
+ {
+ x0 = i*table.r/(5*(table.n-1));
+ evaluate_table(table.data, 4*k, 12, table.scale, x0, &y0, &yp);
+ fprintf(fp, "%15.10e %15.10e %15.10e\n", x0, y0, yp);
+ }
+ gmx_fio_fclose(fp);
+ }
+ done_tabledata(&(td[k]));
+ }
+ sfree(td);
+
+ return table;
+}
+
+t_forcetable make_gb_table(const output_env_t oenv,
+ const t_forcerec *fr)
+{
+ const char *fns[3] = { "gbctab.xvg", "gbdtab.xvg", "gbrtab.xvg" };
+ const char *fns14[3] = { "gbctab14.xvg", "gbdtab14.xvg", "gbrtab14.xvg" };
+ FILE *fp;
+ t_tabledata *td;
+ gmx_bool bReadTab, bGenTab;
+ real x0, y0, yp;
+ int i, j, k, nx, nx0, tabsel[etiNR];
+ double r, r2, Vtab, Ftab, expterm;
+
+ t_forcetable table;
+
+ double abs_error_r, abs_error_r2;
+ double rel_error_r, rel_error_r2;
+ double rel_error_r_old = 0, rel_error_r2_old = 0;
+ double x0_r_error, x0_r2_error;
+
+
+ /* Only set a Coulomb table for GB */
+ /*
+ tabsel[0]=etabGB;
+ tabsel[1]=-1;
+ tabsel[2]=-1;
+ */
+
+ /* Set the table dimensions for GB, not really necessary to
+ * use etiNR (since we only have one table, but ...)
+ */
+ snew(td, 1);
+ table.interaction = GMX_TABLE_INTERACTION_ELEC;
+ table.format = GMX_TABLE_FORMAT_CUBICSPLINE_YFGH;
+ table.r = fr->gbtabr;
+ table.scale = fr->gbtabscale;
+ table.scale_exp = 0;
+ table.n = table.scale*table.r;
+ table.formatsize = 4;
+ table.ninteractions = 1;
+ table.stride = table.formatsize*table.ninteractions;
+ nx0 = 0;
+ nx = table.scale*table.r;
+
+ /* Check whether we have to read or generate
+ * We will always generate a table, so remove the read code
+ * (Compare with original make_table function
+ */
+ bReadTab = FALSE;
+ bGenTab = TRUE;
+
+ /* Each table type (e.g. coul,lj6,lj12) requires four
+ * numbers per datapoint. For performance reasons we want
+ * the table data to be aligned to 16-byte. This is accomplished
+ * by allocating 16 bytes extra to a temporary pointer, and then
+ * calculating an aligned pointer. This new pointer must not be
+ * used in a free() call, but thankfully we're sloppy enough not
+ * to do this :-)
+ */
+
+ snew_aligned(table.data, 4*nx, 32);
+
+ init_table(nx, nx0, table.scale, &(td[0]), !bReadTab);
+
+ /* Local implementation so we don't have to use the etabGB
+ * enum above, which will cause problems later when
+ * making the other tables (right now even though we are using
+ * GB, the normal Coulomb tables will be created, but this
+ * will cause a problem since fr->eeltype==etabGB which will not
+ * be defined in fill_table and set_table_type
+ */
+
+ for (i = nx0; i < nx; i++)
+ {
+ r = td->x[i];
+ r2 = r*r;
+ expterm = exp(-0.25*r2);
+
+ Vtab = 1/sqrt(r2+expterm);
+ Ftab = (r-0.25*r*expterm)/((r2+expterm)*sqrt(r2+expterm));
+
+ /* Convert to single precision when we store to mem */
+ td->v[i] = Vtab;
+ td->f[i] = Ftab;
+
+ }
+
+ copy2table(table.n, 0, 4, td[0].x, td[0].v, td[0].f, 1.0, table.data);
+
+ if (bDebugMode())
+ {
+ fp = xvgropen(fns[0], fns[0], "r", "V", oenv);
+ /* plot the output 5 times denser than the table data */
+ /* for(i=5*nx0;i<5*table.n;i++) */
+ for (i = nx0; i < table.n; i++)
+ {
+ /* x0=i*table.r/(5*table.n); */
+ x0 = i*table.r/table.n;
+ evaluate_table(table.data, 0, 4, table.scale, x0, &y0, &yp);
+ fprintf(fp, "%15.10e %15.10e %15.10e\n", x0, y0, yp);
+
+ }
+ gmx_fio_fclose(fp);
+ }
+
+ /*
+ for(i=100*nx0;i<99.81*table.n;i++)
+ {
+ r = i*table.r/(100*table.n);
+ r2 = r*r;
+ expterm = exp(-0.25*r2);
+
+ Vtab = 1/sqrt(r2+expterm);
+ Ftab = (r-0.25*r*expterm)/((r2+expterm)*sqrt(r2+expterm));
+
+
+ evaluate_table(table.data,0,4,table.scale,r,&y0,&yp);
+ printf("gb: i=%d, x0=%g, y0=%15.15f, Vtab=%15.15f, yp=%15.15f, Ftab=%15.15f\n",i,r, y0, Vtab, yp, Ftab);
+
+ abs_error_r=fabs(y0-Vtab);
+ abs_error_r2=fabs(yp-(-1)*Ftab);
+
+ rel_error_r=abs_error_r/y0;
+ rel_error_r2=fabs(abs_error_r2/yp);
+
+
+ if(rel_error_r>rel_error_r_old)
+ {
+ rel_error_r_old=rel_error_r;
+ x0_r_error=x0;
+ }
+
+ if(rel_error_r2>rel_error_r2_old)
+ {
+ rel_error_r2_old=rel_error_r2;
+ x0_r2_error=x0;
+ }
+ }
+
+ printf("gb: MAX REL ERROR IN R=%15.15f, MAX REL ERROR IN R2=%15.15f\n",rel_error_r_old, rel_error_r2_old);
+ printf("gb: XO_R=%g, X0_R2=%g\n",x0_r_error, x0_r2_error);
+
+ exit(1); */
+ done_tabledata(&(td[0]));
+ sfree(td);
+
+ return table;
+
+
+}
+
+t_forcetable make_atf_table(FILE *out, const output_env_t oenv,
+ const t_forcerec *fr,
+ const char *fn,
+ matrix box)
+{
+ const char *fns[3] = { "tf_tab.xvg", "atfdtab.xvg", "atfrtab.xvg" };
+ FILE *fp;
+ t_tabledata *td;
+ real x0, y0, yp, rtab;
+ int i, nx, nx0;
+ real rx, ry, rz, box_r;
+
+ t_forcetable table;
+
+
+ /* Set the table dimensions for ATF, not really necessary to
+ * use etiNR (since we only have one table, but ...)
+ */
+ snew(td, 1);
+
+ if (fr->adress_type == eAdressSphere)
+ {
+ /* take half box diagonal direction as tab range */
+ rx = 0.5*box[0][0]+0.5*box[1][0]+0.5*box[2][0];
+ ry = 0.5*box[0][1]+0.5*box[1][1]+0.5*box[2][1];
+ rz = 0.5*box[0][2]+0.5*box[1][2]+0.5*box[2][2];
+ box_r = sqrt(rx*rx+ry*ry+rz*rz);
+
+ }
+ else
+ {
+ /* xsplit: take half box x direction as tab range */
+ box_r = box[0][0]/2;
+ }
+ table.r = box_r;
+ table.scale = 0;
+ table.n = 0;
+ table.scale_exp = 0;
+ nx0 = 10;
+ nx = 0;
+
+ read_tables(out, fn, 1, 0, td);
+ rtab = td[0].x[td[0].nx-1];
+
+ if (fr->adress_type == eAdressXSplit && (rtab < box[0][0]/2))
+ {
+ gmx_fatal(FARGS, "AdResS full box therm force table in file %s extends to %f:\n"
+ "\tshould extend to at least half the length of the box in x-direction"
+ "%f\n", fn, rtab, box[0][0]/2);
+ }
+ if (rtab < box_r)
+ {
+ gmx_fatal(FARGS, "AdResS full box therm force table in file %s extends to %f:\n"
+ "\tshould extend to at least for spherical adress"
+ "%f (=distance from center to furthermost point in box \n", fn, rtab, box_r);
+ }
+
+
+ table.n = td[0].nx;
+ nx = table.n;
+ table.scale = td[0].tabscale;
+ nx0 = td[0].nx0;
+
+ /* Each table type (e.g. coul,lj6,lj12) requires four
+ * numbers per datapoint. For performance reasons we want
+ * the table data to be aligned to 16-byte. This is accomplished
+ * by allocating 16 bytes extra to a temporary pointer, and then
+ * calculating an aligned pointer. This new pointer must not be
+ * used in a free() call, but thankfully we're sloppy enough not
+ * to do this :-)
+ */
+
+ snew_aligned(table.data, 4*nx, 32);
+
+ copy2table(table.n, 0, 4, td[0].x, td[0].v, td[0].f, 1.0, table.data);
+
+ if (bDebugMode())
+ {
+ fp = xvgropen(fns[0], fns[0], "r", "V", oenv);
+ /* plot the output 5 times denser than the table data */
+ /* for(i=5*nx0;i<5*table.n;i++) */
+
+ for (i = 5*((nx0+1)/2); i < 5*table.n; i++)
+ {
+ /* x0=i*table.r/(5*table.n); */
+ x0 = i*table.r/(5*(table.n-1));
+ evaluate_table(table.data, 0, 4, table.scale, x0, &y0, &yp);
+ fprintf(fp, "%15.10e %15.10e %15.10e\n", x0, y0, yp);
+
+ }
+ gmx_ffclose(fp);
+ }
+
+ done_tabledata(&(td[0]));
+ sfree(td);
+
+ table.interaction = GMX_TABLE_INTERACTION_ELEC_VDWREP_VDWDISP;
+ table.format = GMX_TABLE_FORMAT_CUBICSPLINE_YFGH;
+ table.formatsize = 4;
+ table.ninteractions = 3;
+ table.stride = table.formatsize*table.ninteractions;
+
+
+ return table;
+}
+
+bondedtable_t make_bonded_table(FILE *fplog, char *fn, int angle)
+{
+ t_tabledata td;
+ double start;
+ int i;
+ bondedtable_t tab;
+
+ if (angle < 2)
+ {
+ start = 0;
+ }
+ else
+ {
+ start = -180.0;
+ }
+ read_tables(fplog, fn, 1, angle, &td);
+ if (angle > 0)
+ {
+ /* Convert the table from degrees to radians */
+ for (i = 0; i < td.nx; i++)
+ {
+ td.x[i] *= DEG2RAD;
+ td.f[i] *= RAD2DEG;
+ }
+ td.tabscale *= RAD2DEG;
+ }
+ tab.n = td.nx;
+ tab.scale = td.tabscale;
+ snew(tab.data, tab.n*4);
+ copy2table(tab.n, 0, 4, td.x, td.v, td.f, 1.0, tab.data);
+ done_tabledata(&td);
+
+ return tab;
+}