From: Roland Schulz Date: Tue, 24 Jun 2014 01:52:58 +0000 (-0400) Subject: Merge release-4-6 into release-5-0 X-Git-Url: http://biod.pnpi.spb.ru/gitweb/?a=commitdiff_plain;h=b2b95f071d3522005949a60e77aa896b45cfc981;p=alexxy%2Fgromacs.git Merge release-4-6 into release-5-0 This merges commit dced970, which changes many free-energy, modifier and table-generation code paths, and its fix 349d8056. That patch 349d8056 contains fixes to potential-shift and potential-switch, as well as the shift/switch interactions in combination with free energy. Since 5.0 has undergone changes in the same areas (both nbnxn free energy, LJ-PME and force-switch), this commit is a likely place for bugs to have been introduced, so we keep it as a separate commit. Uncrustified the result of the merge. Conflicts: src/gmxlib/nonbonded/nb_free_energy.c Resolved in favour of whichever branch seemed most right; changes from dced970, 99aa704d and 5f59569a8 were all relevant here. We have introduced some new LJ-PME-related variables so that code path is reasonably similar to the coulomb path. We have also fixed a small bug where the LJPME self-energy (i==j for verlet kernels) was not multiplied by 0.5. src/gromacs/gmxlib/nonbonded/nonbonded.c Resolved as for dced970 src/gromacs/gmxpreprocess/readir.c Resolved as for dced970 src/gromacs/mdlib/forcerec.c Resolved from both branches src/gromacs/mdlib/sim_util.c Resolved from both branches, and from 349d8056 src/gromacs/mdlib/tables.c Resolved from both branches, and added a few lines of code to make LJ-PME work with shift modifiers. As noted above, to avoid breaking the 5.0 branch, we have manually added the changes corresponding to 349d8056 to make sure force-switch (same as vdwtype=shift) results in correct dispersion correction, and we have added a fix for the sign of the LJPME grid c6 term in the generic nonbonded kernels. This means 349d8056 should not be merged in again to 5.0 later. Change-Id: Ida29b143a1bcb727ff38f9c63bf133bf749477b1 --- b2b95f071d3522005949a60e77aa896b45cfc981 diff --cc src/gromacs/gmxlib/nonbonded/nb_free_energy.c index a2e145153b,0000000000..058b2a87eb mode 100644,000000..100644 --- a/src/gromacs/gmxlib/nonbonded/nb_free_energy.c +++ b/src/gromacs/gmxlib/nonbonded/nb_free_energy.c @@@ -1,989 -1,0 +1,1066 @@@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2004, The GROMACS development team. + * Copyright (c) 2013,2014, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +#ifdef HAVE_CONFIG_H +#include +#endif + +#include + +#include "vec.h" +#include "typedefs.h" +#include "nonbonded.h" +#include "nb_kernel.h" +#include "nrnb.h" +#include "macros.h" +#include "nb_free_energy.h" + +#include "gmx_fatal.h" + +void +gmx_nb_free_energy_kernel(const t_nblist * gmx_restrict nlist, + rvec * gmx_restrict xx, + rvec * gmx_restrict ff, + t_forcerec * gmx_restrict fr, + const t_mdatoms * gmx_restrict mdatoms, + nb_kernel_data_t * gmx_restrict kernel_data, + t_nrnb * gmx_restrict nrnb) +{ + +#define STATE_A 0 +#define STATE_B 1 +#define NSTATES 2 + int i, j, n, ii, is3, ii3, k, nj0, nj1, jnr, j3, ggid; + real shX, shY, shZ; + real Fscal, FscalC[NSTATES], FscalV[NSTATES], tx, ty, tz; + real Vcoul[NSTATES], Vvdw[NSTATES]; + real rinv6, r, rt, rtC, rtV; + real iqA, iqB; + real qq[NSTATES], vctot, krsq; + int ntiA, ntiB, tj[NSTATES]; + real Vvdw6, Vvdw12, vvtot; + real ix, iy, iz, fix, fiy, fiz; + real dx, dy, dz, rsq, rinv; - real c6[NSTATES], c12[NSTATES], c6grid[NSTATES]; ++ real c6[NSTATES], c12[NSTATES], c6grid; + real LFC[NSTATES], LFV[NSTATES], DLF[NSTATES]; + double dvdl_coul, dvdl_vdw; + real lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES]; + real sigma6[NSTATES], alpha_vdw_eff, alpha_coul_eff, sigma2_def, sigma2_min; + real rp, rpm2, rC, rV, rinvC, rpinvC, rinvV, rpinvV; + real sigma2[NSTATES], sigma_pow[NSTATES], sigma_powm2[NSTATES], rs, rs2; + int do_tab, tab_elemsize; + int n0, n1C, n1V, nnn; + real Y, F, G, H, Fp, Geps, Heps2, epsC, eps2C, epsV, eps2V, VV, FF; + int icoul, ivdw; + int nri; + const int * iinr; + const int * jindex; + const int * jjnr; + const int * shift; + const int * gid; + const int * typeA; + const int * typeB; + int ntype; + const real * shiftvec; + real dvdl_part; + real * fshift; + real tabscale = 0; + const real * VFtab = NULL; + const real * x; + real * f; + real facel, krf, crf; + const real * chargeA; + const real * chargeB; + real sigma6_min, sigma6_def, lam_power, sc_power, sc_r_power; + real alpha_coul, alpha_vdw, lambda_coul, lambda_vdw, ewc_lj; + const real * nbfp, *nbfp_grid; + real * dvdl; + real * Vv; + real * Vc; + gmx_bool bDoForces, bDoShiftForces, bDoPotential; - real rcoulomb, sh_ewald; - real rvdw, sh_invrc6; - gmx_bool bExactElecCutoff, bExactVdwCutoff, bExactCutoffAll, bEwald; ++ real rcoulomb, rvdw, sh_invrc6; ++ gmx_bool bExactElecCutoff, bExactVdwCutoff, bExactCutoffAll; ++ gmx_bool bEwald, bEwaldLJ; + real rcutoff_max2; - real rcutoff, rcutoff2, rswitch, d, d2, swV3, swV4, swV5, swF2, swF3, swF4, sw, dsw, rinvcorr; - const real * tab_ewald_F; - const real * tab_ewald_V; + const real * tab_ewald_F_lj; + const real * tab_ewald_V_lj; - real tab_ewald_scale, tab_ewald_halfsp; ++ real d, d2, sw, dsw, rinvcorr; ++ real elec_swV3, elec_swV4, elec_swV5, elec_swF2, elec_swF3, elec_swF4; ++ real vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4; ++ gmx_bool bConvertEwaldToCoulomb, bConvertLJEwaldToLJ6; ++ gmx_bool bComputeVdwInteraction, bComputeElecInteraction; ++ const real * ewtab; ++ int ewitab; ++ real ewrt, eweps, ewtabscale, ewtabhalfspace, sh_ewald; ++ ++ sh_ewald = fr->ic->sh_ewald; ++ ewtab = fr->ic->tabq_coul_FDV0; ++ ewtabscale = fr->ic->tabq_scale; ++ ewtabhalfspace = 0.5/ewtabscale; ++ tab_ewald_F_lj = fr->ic->tabq_vdw_F; ++ tab_ewald_V_lj = fr->ic->tabq_vdw_V; + + x = xx[0]; + f = ff[0]; + + fshift = fr->fshift[0]; + + nri = nlist->nri; + iinr = nlist->iinr; + jindex = nlist->jindex; + jjnr = nlist->jjnr; + icoul = nlist->ielec; + ivdw = nlist->ivdw; + shift = nlist->shift; + gid = nlist->gid; + + shiftvec = fr->shift_vec[0]; + chargeA = mdatoms->chargeA; + chargeB = mdatoms->chargeB; + facel = fr->epsfac; + krf = fr->k_rf; + crf = fr->c_rf; + ewc_lj = fr->ewaldcoeff_lj; + Vc = kernel_data->energygrp_elec; + typeA = mdatoms->typeA; + typeB = mdatoms->typeB; + ntype = fr->ntype; + nbfp = fr->nbfp; + nbfp_grid = fr->ljpme_c6grid; + Vv = kernel_data->energygrp_vdw; + lambda_coul = kernel_data->lambda[efptCOUL]; + lambda_vdw = kernel_data->lambda[efptVDW]; + dvdl = kernel_data->dvdl; + alpha_coul = fr->sc_alphacoul; + alpha_vdw = fr->sc_alphavdw; + lam_power = fr->sc_power; + sc_r_power = fr->sc_r_power; + sigma6_def = fr->sc_sigma6_def; + sigma6_min = fr->sc_sigma6_min; + bDoForces = kernel_data->flags & GMX_NONBONDED_DO_FORCE; + bDoShiftForces = kernel_data->flags & GMX_NONBONDED_DO_SHIFTFORCE; + bDoPotential = kernel_data->flags & GMX_NONBONDED_DO_POTENTIAL; + + rcoulomb = fr->rcoulomb; + sh_ewald = fr->ic->sh_ewald; + rvdw = fr->rvdw; + sh_invrc6 = fr->ic->sh_invrc6; + - /* Ewald (PME) reciprocal force and energy quadratic spline tables */ - tab_ewald_F = fr->ic->tabq_coul_F; - tab_ewald_V = fr->ic->tabq_coul_V; - tab_ewald_scale = fr->ic->tabq_scale; - tab_ewald_F_lj = fr->ic->tabq_vdw_F; - tab_ewald_V_lj = fr->ic->tabq_vdw_V; - tab_ewald_halfsp = 0.5/tab_ewald_scale; ++ if (fr->coulomb_modifier == eintmodPOTSWITCH) ++ { ++ d = fr->rcoulomb-fr->rcoulomb_switch; ++ elec_swV3 = -10.0/(d*d*d); ++ elec_swV4 = 15.0/(d*d*d*d); ++ elec_swV5 = -6.0/(d*d*d*d*d); ++ elec_swF2 = -30.0/(d*d*d); ++ elec_swF3 = 60.0/(d*d*d*d); ++ elec_swF4 = -30.0/(d*d*d*d*d); ++ } ++ else ++ { ++ /* Avoid warnings from stupid compilers (looking at you, Clang!) */ ++ elec_swV3 = elec_swV4 = elec_swV5 = elec_swF2 = elec_swF3 = elec_swF4 = 0.0; ++ } + - if (fr->coulomb_modifier == eintmodPOTSWITCH || fr->vdw_modifier == eintmodPOTSWITCH) ++ if (fr->vdw_modifier == eintmodPOTSWITCH) + { - rcutoff = (fr->coulomb_modifier == eintmodPOTSWITCH) ? fr->rcoulomb : fr->rvdw; - rcutoff2 = rcutoff*rcutoff; - rswitch = (fr->coulomb_modifier == eintmodPOTSWITCH) ? fr->rcoulomb_switch : fr->rvdw_switch; - d = rcutoff-rswitch; - swV3 = -10.0/(d*d*d); - swV4 = 15.0/(d*d*d*d); - swV5 = -6.0/(d*d*d*d*d); - swF2 = -30.0/(d*d*d); - swF3 = 60.0/(d*d*d*d); - swF4 = -30.0/(d*d*d*d*d); ++ d = fr->rvdw-fr->rvdw_switch; ++ vdw_swV3 = -10.0/(d*d*d); ++ vdw_swV4 = 15.0/(d*d*d*d); ++ vdw_swV5 = -6.0/(d*d*d*d*d); ++ vdw_swF2 = -30.0/(d*d*d); ++ vdw_swF3 = 60.0/(d*d*d*d); ++ vdw_swF4 = -30.0/(d*d*d*d*d); + } + else + { - /* Stupid compilers dont realize these variables will not be used */ - rswitch = 0.0; - swV3 = 0.0; - swV4 = 0.0; - swV5 = 0.0; - swF2 = 0.0; - swF3 = 0.0; - swF4 = 0.0; ++ /* Avoid warnings from stupid compilers (looking at you, Clang!) */ ++ vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0; + } + + if (fr->cutoff_scheme == ecutsVERLET) + { + const interaction_const_t *ic; + + ic = fr->ic; + if (EVDW_PME(ic->vdwtype)) + { + ivdw = GMX_NBKERNEL_VDW_LJEWALD; + } + else + { + ivdw = GMX_NBKERNEL_VDW_LENNARDJONES; + } + + if (ic->eeltype == eelCUT || EEL_RF(ic->eeltype)) + { + icoul = GMX_NBKERNEL_ELEC_REACTIONFIELD; + } + else if (EEL_PME_EWALD(ic->eeltype)) + { + icoul = GMX_NBKERNEL_ELEC_EWALD; + } + else + { + gmx_incons("Unsupported eeltype with Verlet and free-energy"); + } + + bExactElecCutoff = TRUE; + bExactVdwCutoff = TRUE; + } + else + { + bExactElecCutoff = (fr->coulomb_modifier != eintmodNONE) || fr->eeltype == eelRF_ZERO; + bExactVdwCutoff = (fr->vdw_modifier != eintmodNONE); + } + + bExactCutoffAll = (bExactElecCutoff && bExactVdwCutoff); + rcutoff_max2 = max(fr->rcoulomb, fr->rvdw); + rcutoff_max2 = rcutoff_max2*rcutoff_max2; + + bEwald = (icoul == GMX_NBKERNEL_ELEC_EWALD); ++ bEwaldLJ = (ivdw == GMX_NBKERNEL_VDW_LJEWALD); ++ ++ /* For Ewald/PME interactions we cannot easily apply the soft-core component to ++ * reciprocal space. When we use vanilla (not switch/shift) Ewald interactions, we ++ * can apply the small trick of subtracting the _reciprocal_ space contribution ++ * in this kernel, and instead apply the free energy interaction to the 1/r ++ * (standard coulomb) interaction. ++ * ++ * However, we cannot use this approach for switch-modified since we would then ++ * effectively end up evaluating a significantly different interaction here compared to the ++ * normal (non-free-energy) kernels, either by applying a cutoff at a different ++ * position than what the user requested, or by switching different ++ * things (1/r rather than short-range Ewald). For these settings, we just ++ * use the traditional short-range Ewald interaction in that case. ++ */ ++ bConvertEwaldToCoulomb = (bEwald && (fr->coulomb_modifier != eintmodPOTSWITCH)); ++ /* For now the below will always be true (since LJ-PME only works with Shift in Gromacs-5.0), ++ * but writing it this way means we stay in sync with coulomb, and it avoids future bugs. ++ */ ++ bConvertLJEwaldToLJ6 = (bEwaldLJ && (fr->vdw_modifier != eintmodPOTSWITCH)); + + /* fix compiler warnings */ + nj1 = 0; + n1C = n1V = 0; + epsC = epsV = 0; + eps2C = eps2V = 0; + + dvdl_coul = 0; + dvdl_vdw = 0; + + /* Lambda factor for state A, 1-lambda*/ + LFC[STATE_A] = 1.0 - lambda_coul; + LFV[STATE_A] = 1.0 - lambda_vdw; + + /* Lambda factor for state B, lambda*/ + LFC[STATE_B] = lambda_coul; + LFV[STATE_B] = lambda_vdw; + + /*derivative of the lambda factor for state A and B */ + DLF[STATE_A] = -1; + DLF[STATE_B] = 1; + + for (i = 0; i < NSTATES; i++) + { + lfac_coul[i] = (lam_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i])); + dlfac_coul[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFC[i]) : 1); + lfac_vdw[i] = (lam_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i])); + dlfac_vdw[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFV[i]) : 1); + } + /* precalculate */ + sigma2_def = pow(sigma6_def, 1.0/3.0); + sigma2_min = pow(sigma6_min, 1.0/3.0); + + /* Ewald (not PME) table is special (icoul==enbcoulFEWALD) */ + + do_tab = (icoul == GMX_NBKERNEL_ELEC_CUBICSPLINETABLE || + ivdw == GMX_NBKERNEL_VDW_CUBICSPLINETABLE); + if (do_tab) + { + tabscale = kernel_data->table_elec_vdw->scale; + VFtab = kernel_data->table_elec_vdw->data; + /* we always use the combined table here */ + tab_elemsize = 12; + } + + for (n = 0; (n < nri); n++) + { + int npair_within_cutoff; + + npair_within_cutoff = 0; + + is3 = 3*shift[n]; + shX = shiftvec[is3]; + shY = shiftvec[is3+1]; + shZ = shiftvec[is3+2]; + nj0 = jindex[n]; + nj1 = jindex[n+1]; + ii = iinr[n]; + ii3 = 3*ii; + ix = shX + x[ii3+0]; + iy = shY + x[ii3+1]; + iz = shZ + x[ii3+2]; + iqA = facel*chargeA[ii]; + iqB = facel*chargeB[ii]; + ntiA = 2*ntype*typeA[ii]; + ntiB = 2*ntype*typeB[ii]; + vctot = 0; + vvtot = 0; + fix = 0; + fiy = 0; + fiz = 0; + + for (k = nj0; (k < nj1); k++) + { + jnr = jjnr[k]; + j3 = 3*jnr; + dx = ix - x[j3]; + dy = iy - x[j3+1]; + dz = iz - x[j3+2]; + rsq = dx*dx + dy*dy + dz*dz; + + if (bExactCutoffAll && rsq >= rcutoff_max2) + { + /* We save significant time by skipping all code below. + * Note that with soft-core interactions, the actual cut-off + * check might be different. But since the soft-core distance + * is always larger than r, checking on r here is safe. + */ + continue; + } + npair_within_cutoff++; + + if (rsq > 0) + { + rinv = gmx_invsqrt(rsq); + r = rsq*rinv; + } + else + { + /* The force at r=0 is zero, because of symmetry. + * But note that the potential is in general non-zero, + * since the soft-cored r will be non-zero. + */ + rinv = 0; + r = 0; + } + + if (sc_r_power == 6.0) + { + rpm2 = rsq*rsq; /* r4 */ + rp = rpm2*rsq; /* r6 */ + } + else if (sc_r_power == 48.0) + { + rp = rsq*rsq*rsq; /* r6 */ + rp = rp*rp; /* r12 */ + rp = rp*rp; /* r24 */ + rp = rp*rp; /* r48 */ + rpm2 = rp/rsq; /* r46 */ + } + else + { + rp = pow(r, sc_r_power); /* not currently supported as input, but can handle it */ + rpm2 = rp/rsq; + } + + Fscal = 0; + + qq[STATE_A] = iqA*chargeA[jnr]; + qq[STATE_B] = iqB*chargeB[jnr]; + + tj[STATE_A] = ntiA+2*typeA[jnr]; + tj[STATE_B] = ntiB+2*typeB[jnr]; + - if (ivdw == GMX_NBKERNEL_VDW_LJEWALD) - { - c6grid[STATE_A] = nbfp_grid[tj[STATE_A]]; - c6grid[STATE_B] = nbfp_grid[tj[STATE_B]]; - } - + if (nlist->excl_fep == NULL || nlist->excl_fep[k]) + { + c6[STATE_A] = nbfp[tj[STATE_A]]; + c6[STATE_B] = nbfp[tj[STATE_B]]; + + for (i = 0; i < NSTATES; i++) + { + c12[i] = nbfp[tj[i]+1]; + if ((c6[i] > 0) && (c12[i] > 0)) + { + /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */ + sigma6[i] = 0.5*c12[i]/c6[i]; + sigma2[i] = pow(sigma6[i], 1.0/3.0); + /* should be able to get rid of this ^^^ internal pow call eventually. Will require agreement on + what data to store externally. Can't be fixed without larger scale changes, so not 4.6 */ + if (sigma6[i] < sigma6_min) /* for disappearing coul and vdw with soft core at the same time */ + { + sigma6[i] = sigma6_min; + sigma2[i] = sigma2_min; + } + } + else + { + sigma6[i] = sigma6_def; + sigma2[i] = sigma2_def; + } + if (sc_r_power == 6.0) + { + sigma_pow[i] = sigma6[i]; + sigma_powm2[i] = sigma6[i]/sigma2[i]; + } + else if (sc_r_power == 48.0) + { + sigma_pow[i] = sigma6[i]*sigma6[i]; /* sigma^12 */ + sigma_pow[i] = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */ + sigma_pow[i] = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */ + sigma_powm2[i] = sigma_pow[i]/sigma2[i]; + } + else + { /* not really supported as input, but in here for testing the general case*/ + sigma_pow[i] = pow(sigma2[i], sc_r_power/2); + sigma_powm2[i] = sigma_pow[i]/(sigma2[i]); + } + } + + /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/ + if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0)) + { + alpha_vdw_eff = 0; + alpha_coul_eff = 0; + } + else + { + alpha_vdw_eff = alpha_vdw; + alpha_coul_eff = alpha_coul; + } + + for (i = 0; i < NSTATES; i++) + { + FscalC[i] = 0; + FscalV[i] = 0; + Vcoul[i] = 0; + Vvdw[i] = 0; + + /* Only spend time on A or B state if it is non-zero */ + if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) ) + { + /* this section has to be inside the loop because of the dependence on sigma_pow */ + rpinvC = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp); + rinvC = pow(rpinvC, 1.0/sc_r_power); + rC = 1.0/rinvC; + + rpinvV = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp); + rinvV = pow(rpinvV, 1.0/sc_r_power); + rV = 1.0/rinvV; + + if (do_tab) + { + rtC = rC*tabscale; + n0 = rtC; + epsC = rtC-n0; + eps2C = epsC*epsC; + n1C = tab_elemsize*n0; + + rtV = rV*tabscale; + n0 = rtV; + epsV = rtV-n0; + eps2V = epsV*epsV; + n1V = tab_elemsize*n0; + } + - /* With Ewald and soft-core we should put the cut-off on r, - * not on the soft-cored rC, as the real-space and - * reciprocal space contributions should (almost) cancel. ++ /* Only process the coulomb interactions if we have charges, ++ * and if we either include all entries in the list (no cutoff ++ * used in the kernel), or if we are within the cutoff. + */ - if (qq[i] != 0 && - !(bExactElecCutoff && - ((!bEwald && rC >= rcoulomb) || - (bEwald && r >= rcoulomb)))) ++ bComputeElecInteraction = !bExactElecCutoff || ++ ( bConvertEwaldToCoulomb && r < rcoulomb) || ++ (!bConvertEwaldToCoulomb && rC < rcoulomb); ++ ++ if ( (qq[i] != 0) && bComputeElecInteraction) + { + switch (icoul) + { + case GMX_NBKERNEL_ELEC_COULOMB: + /* simple cutoff */ + Vcoul[i] = qq[i]*rinvC; + FscalC[i] = Vcoul[i]; - break; - - case GMX_NBKERNEL_ELEC_EWALD: - /* Ewald FEP is done only on the 1/r part */ - Vcoul[i] = qq[i]*(rinvC - sh_ewald); - FscalC[i] = Vcoul[i]; ++ /* The shift for the Coulomb potential is stored in ++ * the RF parameter c_rf, which is 0 without shift ++ */ ++ Vcoul[i] -= qq[i]*fr->ic->c_rf; + break; + + case GMX_NBKERNEL_ELEC_REACTIONFIELD: + /* reaction-field */ + Vcoul[i] = qq[i]*(rinvC + krf*rC*rC-crf); + FscalC[i] = qq[i]*(rinvC - 2.0*krf*rC*rC); + break; + + case GMX_NBKERNEL_ELEC_CUBICSPLINETABLE: + /* non-Ewald tabulated coulomb */ + nnn = n1C; + Y = VFtab[nnn]; + F = VFtab[nnn+1]; + Geps = epsC*VFtab[nnn+2]; + Heps2 = eps2C*VFtab[nnn+3]; + Fp = F+Geps+Heps2; + VV = Y+epsC*Fp; + FF = Fp+Geps+2.0*Heps2; + Vcoul[i] = qq[i]*VV; + FscalC[i] = -qq[i]*tabscale*FF*rC; + break; + + case GMX_NBKERNEL_ELEC_GENERALIZEDBORN: + gmx_fatal(FARGS, "Free energy and GB not implemented.\n"); + break; + ++ case GMX_NBKERNEL_ELEC_EWALD: ++ if (bConvertEwaldToCoulomb) ++ { ++ /* Ewald FEP is done only on the 1/r part */ ++ Vcoul[i] = qq[i]*(rinvC-sh_ewald); ++ FscalC[i] = qq[i]*rinvC; ++ } ++ else ++ { ++ ewrt = rC*ewtabscale; ++ ewitab = (int) ewrt; ++ eweps = ewrt-ewitab; ++ ewitab = 4*ewitab; ++ FscalC[i] = ewtab[ewitab]+eweps*ewtab[ewitab+1]; ++ rinvcorr = rinvC-sh_ewald; ++ Vcoul[i] = qq[i]*(rinvcorr-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+FscalC[i]))); ++ FscalC[i] = qq[i]*(rinvC-rC*FscalC[i]); ++ } ++ break; ++ + case GMX_NBKERNEL_ELEC_NONE: + FscalC[i] = 0.0; + Vcoul[i] = 0.0; + break; + + default: + gmx_incons("Invalid icoul in free energy kernel"); + break; + } + + if (fr->coulomb_modifier == eintmodPOTSWITCH) + { - d = rC-rswitch; ++ d = rC-fr->rcoulomb_switch; + d = (d > 0.0) ? d : 0.0; + d2 = d*d; - sw = 1.0+d2*d*(swV3+d*(swV4+d*swV5)); - dsw = d2*(swF2+d*(swF3+d*swF4)); ++ sw = 1.0+d2*d*(elec_swV3+d*(elec_swV4+d*elec_swV5)); ++ dsw = d2*(elec_swF2+d*(elec_swF3+d*elec_swF4)); ++ ++ FscalC[i] = FscalC[i]*sw - rC*Vcoul[i]*dsw; ++ Vcoul[i] *= sw; + - Vcoul[i] *= sw; - FscalC[i] = FscalC[i]*sw + Vcoul[i]*dsw; ++ FscalC[i] = (rC < rcoulomb) ? FscalC[i] : 0.0; ++ Vcoul[i] = (rC < rcoulomb) ? Vcoul[i] : 0.0; + } + } + - if ((c6[i] != 0 || c12[i] != 0) && - !(bExactVdwCutoff && - ((ivdw != GMX_NBKERNEL_VDW_LJEWALD && rV >= rvdw) || - (ivdw == GMX_NBKERNEL_VDW_LJEWALD && r >= rvdw)))) ++ /* Only process the VDW interactions if we have ++ * some non-zero parameters, and if we either ++ * include all entries in the list (no cutoff used ++ * in the kernel), or if we are within the cutoff. ++ */ ++ bComputeVdwInteraction = !bExactVdwCutoff || ++ ( bConvertLJEwaldToLJ6 && r < rvdw) || ++ (!bConvertLJEwaldToLJ6 && rV < rvdw); ++ if ((c6[i] != 0 || c12[i] != 0) && bComputeVdwInteraction) + { + switch (ivdw) + { + case GMX_NBKERNEL_VDW_LENNARDJONES: + case GMX_NBKERNEL_VDW_LJEWALD: + /* cutoff LJ */ + if (sc_r_power == 6.0) + { + rinv6 = rpinvV; + } + else + { + rinv6 = pow(rinvV, 6.0); + } + Vvdw6 = c6[i]*rinv6; + Vvdw12 = c12[i]*rinv6*rinv6; + if (fr->vdw_modifier == eintmodPOTSHIFT) + { + Vvdw[i] = ( (Vvdw12-c12[i]*sh_invrc6*sh_invrc6)*(1.0/12.0) + -(Vvdw6-c6[i]*sh_invrc6)*(1.0/6.0)); + } + else + { + Vvdw[i] = Vvdw12*(1.0/12.0) - Vvdw6*(1.0/6.0); + } + FscalV[i] = Vvdw12 - Vvdw6; + break; + + case GMX_NBKERNEL_VDW_BUCKINGHAM: + gmx_fatal(FARGS, "Buckingham free energy not supported."); + break; + + case GMX_NBKERNEL_VDW_CUBICSPLINETABLE: + /* Table LJ */ + nnn = n1V+4; + /* dispersion */ + Y = VFtab[nnn]; + F = VFtab[nnn+1]; + Geps = epsV*VFtab[nnn+2]; + Heps2 = eps2V*VFtab[nnn+3]; + Fp = F+Geps+Heps2; + VV = Y+epsV*Fp; + FF = Fp+Geps+2.0*Heps2; + Vvdw[i] += c6[i]*VV; + FscalV[i] -= c6[i]*tabscale*FF*rV; + + /* repulsion */ + Y = VFtab[nnn+4]; + F = VFtab[nnn+5]; + Geps = epsV*VFtab[nnn+6]; + Heps2 = eps2V*VFtab[nnn+7]; + Fp = F+Geps+Heps2; + VV = Y+epsV*Fp; + FF = Fp+Geps+2.0*Heps2; + Vvdw[i] += c12[i]*VV; + FscalV[i] -= c12[i]*tabscale*FF*rV; + break; + + case GMX_NBKERNEL_VDW_NONE: + Vvdw[i] = 0.0; + FscalV[i] = 0.0; + break; + + default: + gmx_incons("Invalid ivdw in free energy kernel"); + break; + } + + if (fr->vdw_modifier == eintmodPOTSWITCH) + { - d = rV-rswitch; - d = (d > 0.0) ? d : 0.0; - d2 = d*d; - sw = 1.0+d2*d*(swV3+d*(swV4+d*swV5)); - dsw = d2*(swF2+d*(swF3+d*swF4)); ++ d = rV-fr->rvdw_switch; ++ d = (d > 0.0) ? d : 0.0; ++ d2 = d*d; ++ sw = 1.0+d2*d*(vdw_swV3+d*(vdw_swV4+d*vdw_swV5)); ++ dsw = d2*(vdw_swF2+d*(vdw_swF3+d*vdw_swF4)); + - Vvdw[i] *= sw; - FscalV[i] = FscalV[i]*sw + Vvdw[i]*dsw; ++ FscalV[i] = FscalV[i]*sw - rV*Vvdw[i]*dsw; ++ Vvdw[i] *= sw; + + FscalV[i] = (rV < rvdw) ? FscalV[i] : 0.0; + Vvdw[i] = (rV < rvdw) ? Vvdw[i] : 0.0; + } + } + + /* FscalC (and FscalV) now contain: dV/drC * rC + * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p + * Further down we first multiply by r^p-2 and then by + * the vector r, which in total gives: dV/drC * (r/rC)^1-p + */ + FscalC[i] *= rpinvC; + FscalV[i] *= rpinvV; + } + } + + /* Assemble A and B states */ + for (i = 0; i < NSTATES; i++) + { + vctot += LFC[i]*Vcoul[i]; + vvtot += LFV[i]*Vvdw[i]; + + Fscal += LFC[i]*FscalC[i]*rpm2; + Fscal += LFV[i]*FscalV[i]*rpm2; + + dvdl_coul += Vcoul[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*FscalC[i]*sigma_pow[i]; + dvdl_vdw += Vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*FscalV[i]*sigma_pow[i]; + } + } + else if (icoul == GMX_NBKERNEL_ELEC_REACTIONFIELD) + { + /* For excluded pairs, which are only in this pair list when + * using the Verlet scheme, we don't use soft-core. + * The group scheme also doesn't soft-core for these. + * As there is no singularity, there is no need for soft-core. + */ + VV = krf*rsq - crf; + FF = -2.0*krf; + + if (ii == jnr) + { + VV *= 0.5; + } + + for (i = 0; i < NSTATES; i++) + { + vctot += LFC[i]*qq[i]*VV; + Fscal += LFC[i]*qq[i]*FF; + dvdl_coul += DLF[i]*qq[i]*VV; + } + } + - if (icoul == GMX_NBKERNEL_ELEC_EWALD && - !(bExactElecCutoff && r >= rcoulomb)) ++ if (bConvertEwaldToCoulomb && ( !bExactElecCutoff || r < rcoulomb ) ) + { - /* Because we compute the soft-core normally, - * we have to remove the Ewald short range portion. - * Done outside of the states loop because this part - * doesn't depend on the scaled R. ++ /* See comment in the preamble. When using Ewald interactions ++ * (unless we use a switch modifier) we subtract the reciprocal-space ++ * Ewald component here which made it possible to apply the free ++ * energy interaction to 1/r (vanilla coulomb short-range part) ++ * above. This gets us closer to the ideal case of applying ++ * the softcore to the entire electrostatic interaction, ++ * including the reciprocal-space component. + */ - real rs, frac, f_lr; - int ri; ++ real v_lr, f_lr; + - rs = rsq*rinv*tab_ewald_scale; - ri = (int)rs; - frac = rs - ri; - f_lr = (1 - frac)*tab_ewald_F[ri] + frac*tab_ewald_F[ri+1]; - FF = f_lr*rinv; - VV = tab_ewald_V[ri] - tab_ewald_halfsp*frac*(tab_ewald_F[ri] + f_lr); ++ ewrt = r*ewtabscale; ++ ewitab = (int) ewrt; ++ eweps = ewrt-ewitab; ++ ewitab = 4*ewitab; ++ f_lr = ewtab[ewitab]+eweps*ewtab[ewitab+1]; ++ v_lr = (ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+f_lr)); ++ f_lr *= rinv; + + if (ii == jnr) + { - VV *= 0.5; ++ /* If we get here, the i particle (ii) has itself (jnr) ++ * in its neighborlist. This can only happen with the Verlet ++ * scheme, and corresponds to a self-interaction that will ++ * occur twice. Scale it down by 50% to only include it once. ++ */ ++ v_lr *= 0.5; + } + + for (i = 0; i < NSTATES; i++) + { - vctot -= LFC[i]*qq[i]*VV; - Fscal -= LFC[i]*qq[i]*FF; - dvdl_coul -= (DLF[i]*qq[i])*VV; ++ vctot -= LFC[i]*qq[i]*v_lr; ++ Fscal -= LFC[i]*qq[i]*f_lr; ++ dvdl_coul -= (DLF[i]*qq[i])*v_lr; + } + } + - if (ivdw == GMX_NBKERNEL_VDW_LJEWALD && - !(bExactVdwCutoff && r >= rvdw)) ++ if (bConvertLJEwaldToLJ6 && (!bExactVdwCutoff || r < rvdw)) + { ++ /* See comment in the preamble. When using LJ-Ewald interactions ++ * (unless we use a switch modifier) we subtract the reciprocal-space ++ * Ewald component here which made it possible to apply the free ++ * energy interaction to r^-6 (vanilla LJ6 short-range part) ++ * above. This gets us closer to the ideal case of applying ++ * the softcore to the entire VdW interaction, ++ * including the reciprocal-space component. ++ */ + real rs, frac, f_lr; + int ri; + - rs = rsq*rinv*tab_ewald_scale; ++ rs = rsq*rinv*ewtabscale; + ri = (int)rs; + frac = rs - ri; + f_lr = (1 - frac)*tab_ewald_F_lj[ri] + frac*tab_ewald_F_lj[ri+1]; + FF = f_lr*rinv; - VV = tab_ewald_V_lj[ri] - tab_ewald_halfsp*frac*(tab_ewald_F_lj[ri] + f_lr); ++ VV = tab_ewald_V_lj[ri] - ewtabhalfspace*frac*(tab_ewald_F_lj[ri] + f_lr); ++ ++ if (ii == jnr) ++ { ++ /* If we get here, the i particle (ii) has itself (jnr) ++ * in its neighborlist. This can only happen with the Verlet ++ * scheme, and corresponds to a self-interaction that will ++ * occur twice. Scale it down by 50% to only include it once. ++ */ ++ VV *= 0.5; ++ } ++ + for (i = 0; i < NSTATES; i++) + { - vvtot += LFV[i]*c6grid[i]*VV*(1.0/6.0); - Fscal += LFV[i]*c6grid[i]*FF*(1.0/6.0); - dvdl_vdw += (DLF[i]*c6grid[i])*VV*(1.0/6.0); ++ c6grid = nbfp_grid[tj[i]]; ++ vvtot += LFV[i]*c6grid*VV*(1.0/6.0); ++ Fscal += LFV[i]*c6grid*FF*(1.0/6.0); ++ dvdl_vdw += (DLF[i]*c6grid)*VV*(1.0/6.0); + } + + } + + if (bDoForces) + { + tx = Fscal*dx; + ty = Fscal*dy; + tz = Fscal*dz; + fix = fix + tx; + fiy = fiy + ty; + fiz = fiz + tz; + /* OpenMP atomics are expensive, but this kernels is also + * expensive, so we can take this hit, instead of using + * thread-local output buffers and extra reduction. + */ +#pragma omp atomic + f[j3] -= tx; +#pragma omp atomic + f[j3+1] -= ty; +#pragma omp atomic + f[j3+2] -= tz; + } + } + + /* The atomics below are expensive with many OpenMP threads. + * Here unperturbed i-particles will usually only have a few + * (perturbed) j-particles in the list. Thus with a buffered list + * we can skip a significant number of i-reductions with a check. + */ + if (npair_within_cutoff > 0) + { + if (bDoForces) + { +#pragma omp atomic + f[ii3] += fix; +#pragma omp atomic + f[ii3+1] += fiy; +#pragma omp atomic + f[ii3+2] += fiz; + } + if (bDoShiftForces) + { +#pragma omp atomic + fshift[is3] += fix; +#pragma omp atomic + fshift[is3+1] += fiy; +#pragma omp atomic + fshift[is3+2] += fiz; + } + if (bDoPotential) + { + ggid = gid[n]; +#pragma omp atomic + Vc[ggid] += vctot; +#pragma omp atomic + Vv[ggid] += vvtot; + } + } + } + +#pragma omp atomic + dvdl[efptCOUL] += dvdl_coul; + #pragma omp atomic + dvdl[efptVDW] += dvdl_vdw; + + /* Estimate flops, average for free energy stuff: + * 12 flops per outer iteration + * 150 flops per inner iteration + */ +#pragma omp atomic + inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri*12 + nlist->jindex[n]*150); +} + +real +nb_free_energy_evaluate_single(real r2, real sc_r_power, real alpha_coul, real alpha_vdw, + real tabscale, real *vftab, + real qqA, real c6A, real c12A, real qqB, real c6B, real c12B, + real LFC[2], real LFV[2], real DLF[2], + real lfac_coul[2], real lfac_vdw[2], real dlfac_coul[2], real dlfac_vdw[2], + real sigma6_def, real sigma6_min, real sigma2_def, real sigma2_min, + real *velectot, real *vvdwtot, real *dvdl) +{ + real r, rp, rpm2, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VV, FF, fscal; + real qq[2], c6[2], c12[2], sigma6[2], sigma2[2], sigma_pow[2], sigma_powm2[2]; + real alpha_coul_eff, alpha_vdw_eff, dvdl_coul, dvdl_vdw; + real rpinv, r_coul, r_vdw, velecsum, vvdwsum; + real fscal_vdw[2], fscal_elec[2]; + real velec[2], vvdw[2]; + int i, ntab; + + qq[0] = qqA; + qq[1] = qqB; + c6[0] = c6A; + c6[1] = c6B; + c12[0] = c12A; + c12[1] = c12B; + + if (sc_r_power == 6.0) + { + rpm2 = r2*r2; /* r4 */ + rp = rpm2*r2; /* r6 */ + } + else if (sc_r_power == 48.0) + { + rp = r2*r2*r2; /* r6 */ + rp = rp*rp; /* r12 */ + rp = rp*rp; /* r24 */ + rp = rp*rp; /* r48 */ + rpm2 = rp/r2; /* r46 */ + } + else + { + rp = pow(r2, 0.5*sc_r_power); /* not currently supported as input, but can handle it */ + rpm2 = rp/r2; + } + + /* Loop over state A(0) and B(1) */ + for (i = 0; i < 2; i++) + { + if ((c6[i] > 0) && (c12[i] > 0)) + { + /* The c6 & c12 coefficients now contain the constants 6.0 and 12.0, respectively. + * Correct for this by multiplying with (1/12.0)/(1/6.0)=6.0/12.0=0.5. + */ + sigma6[i] = 0.5*c12[i]/c6[i]; + sigma2[i] = pow(0.5*c12[i]/c6[i], 1.0/3.0); + /* should be able to get rid of this ^^^ internal pow call eventually. Will require agreement on + what data to store externally. Can't be fixed without larger scale changes, so not 5.0 */ + if (sigma6[i] < sigma6_min) /* for disappearing coul and vdw with soft core at the same time */ + { + sigma6[i] = sigma6_min; + sigma2[i] = sigma2_min; + } + } + else + { + sigma6[i] = sigma6_def; + sigma2[i] = sigma2_def; + } + if (sc_r_power == 6.0) + { + sigma_pow[i] = sigma6[i]; + sigma_powm2[i] = sigma6[i]/sigma2[i]; + } + else if (sc_r_power == 48.0) + { + sigma_pow[i] = sigma6[i]*sigma6[i]; /* sigma^12 */ + sigma_pow[i] = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */ + sigma_pow[i] = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */ + sigma_powm2[i] = sigma_pow[i]/sigma2[i]; + } + else + { /* not really supported as input, but in here for testing the general case*/ + sigma_pow[i] = pow(sigma2[i], sc_r_power/2); + sigma_powm2[i] = sigma_pow[i]/(sigma2[i]); + } + } + + /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/ + if ((c12[0] > 0) && (c12[1] > 0)) + { + alpha_vdw_eff = 0; + alpha_coul_eff = 0; + } + else + { + alpha_vdw_eff = alpha_vdw; + alpha_coul_eff = alpha_coul; + } + + /* Loop over A and B states again */ + for (i = 0; i < 2; i++) + { + fscal_elec[i] = 0; + fscal_vdw[i] = 0; + velec[i] = 0; + vvdw[i] = 0; + + /* Only spend time on A or B state if it is non-zero */ + if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) ) + { + /* Coulomb */ + rpinv = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp); + r_coul = pow(rpinv, -1.0/sc_r_power); + + /* Electrostatics table lookup data */ + rtab = r_coul*tabscale; + ntab = rtab; + eps = rtab-ntab; + eps2 = eps*eps; + ntab = 12*ntab; + /* Electrostatics */ + Y = vftab[ntab]; + F = vftab[ntab+1]; + Geps = eps*vftab[ntab+2]; + Heps2 = eps2*vftab[ntab+3]; + Fp = F+Geps+Heps2; + VV = Y+eps*Fp; + FF = Fp+Geps+2.0*Heps2; + velec[i] = qq[i]*VV; + fscal_elec[i] = -qq[i]*FF*r_coul*rpinv*tabscale; + + /* Vdw */ + rpinv = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp); + r_vdw = pow(rpinv, -1.0/sc_r_power); + /* Vdw table lookup data */ + rtab = r_vdw*tabscale; + ntab = rtab; + eps = rtab-ntab; + eps2 = eps*eps; + ntab = 12*ntab; + /* Dispersion */ + Y = vftab[ntab+4]; + F = vftab[ntab+5]; + Geps = eps*vftab[ntab+6]; + Heps2 = eps2*vftab[ntab+7]; + Fp = F+Geps+Heps2; + VV = Y+eps*Fp; + FF = Fp+Geps+2.0*Heps2; + vvdw[i] = c6[i]*VV; + fscal_vdw[i] = -c6[i]*FF; + + /* Repulsion */ + Y = vftab[ntab+8]; + F = vftab[ntab+9]; + Geps = eps*vftab[ntab+10]; + Heps2 = eps2*vftab[ntab+11]; + Fp = F+Geps+Heps2; + VV = Y+eps*Fp; + FF = Fp+Geps+2.0*Heps2; + vvdw[i] += c12[i]*VV; + fscal_vdw[i] -= c12[i]*FF; + fscal_vdw[i] *= r_vdw*rpinv*tabscale; + } + } + /* Now we have velec[i], vvdw[i], and fscal[i] for both states */ + /* Assemble A and B states */ + velecsum = 0; + vvdwsum = 0; + dvdl_coul = 0; + dvdl_vdw = 0; + fscal = 0; + for (i = 0; i < 2; i++) + { + velecsum += LFC[i]*velec[i]; + vvdwsum += LFV[i]*vvdw[i]; + + fscal += (LFC[i]*fscal_elec[i]+LFV[i]*fscal_vdw[i])*rpm2; + + dvdl_coul += velec[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*fscal_elec[i]*sigma_pow[i]; + dvdl_vdw += vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*fscal_vdw[i]*sigma_pow[i]; + } + + dvdl[efptCOUL] += dvdl_coul; + dvdl[efptVDW] += dvdl_vdw; + + *velectot = velecsum; + *vvdwtot = vvdwsum; + + return fscal; +} diff --cc src/gromacs/gmxlib/nonbonded/nb_generic.c index 4f29311dea,0000000000..019ba8b341 mode 100644,000000..100644 --- a/src/gromacs/gmxlib/nonbonded/nb_generic.c +++ b/src/gromacs/gmxlib/nonbonded/nb_generic.c @@@ -1,480 -1,0 +1,480 @@@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2004, The GROMACS development team. + * Copyright (c) 2012,2014, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +#ifdef HAVE_CONFIG_H +#include +#endif + +#include + +#include "types/simple.h" +#include "vec.h" +#include "typedefs.h" +#include "nb_generic.h" +#include "nrnb.h" + +#include "gmx_fatal.h" + +#include "nonbonded.h" +#include "nb_kernel.h" + +void +gmx_nb_generic_kernel(t_nblist * nlist, + rvec * xx, + rvec * ff, + t_forcerec * fr, + t_mdatoms * mdatoms, + nb_kernel_data_t * kernel_data, + t_nrnb * nrnb) +{ + int nri, ntype, table_nelements, ielec, ivdw; + real facel, gbtabscale; + int n, ii, is3, ii3, k, nj0, nj1, jnr, j3, ggid, nnn, n0; + real shX, shY, shZ; + real fscal, felec, fvdw, velec, vvdw, tx, ty, tz; + real rinvsq; + real iq; + real qq, vctot; + int nti, nvdwparam; + int tj; + real rt, r, eps, eps2, Y, F, Geps, Heps2, VV, FF, Fp, fijD, fijR; + real rinvsix; + real vvdwtot; + real vvdw_rep, vvdw_disp; + real ix, iy, iz, fix, fiy, fiz; + real jx, jy, jz; + real dx, dy, dz, rsq, rinv; + real c6, c12, c6grid, cexp1, cexp2, br; + real * charge; + real * shiftvec; + real * vdwparam, *vdwgridparam; + int * shift; + int * type; + real * fshift; + real * velecgrp; + real * vvdwgrp; + real tabscale; + real * VFtab; + real * x; + real * f; + int ewitab; + real ewtabscale, eweps, sh_ewald, ewrt, ewtabhalfspace; + real * ewtab; + real rcoulomb2, rvdw, rvdw2, sh_dispersion, sh_repulsion; + real rcutoff, rcutoff2; + real rswitch_elec, rswitch_vdw, d, d2, sw, dsw, rinvcorr; + real elec_swV3, elec_swV4, elec_swV5, elec_swF2, elec_swF3, elec_swF4; + real vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4; + real ewclj, ewclj2, ewclj6, ewcljrsq, poly, exponent, sh_lj_ewald; + gmx_bool bExactElecCutoff, bExactVdwCutoff, bExactCutoff; + + x = xx[0]; + f = ff[0]; + ielec = nlist->ielec; + ivdw = nlist->ivdw; + + fshift = fr->fshift[0]; + velecgrp = kernel_data->energygrp_elec; + vvdwgrp = kernel_data->energygrp_vdw; + tabscale = kernel_data->table_elec_vdw->scale; + VFtab = kernel_data->table_elec_vdw->data; + + sh_ewald = fr->ic->sh_ewald; + ewtab = fr->ic->tabq_coul_FDV0; + ewtabscale = fr->ic->tabq_scale; + ewtabhalfspace = 0.5/ewtabscale; + + rcoulomb2 = fr->rcoulomb*fr->rcoulomb; + rvdw = fr->rvdw; + rvdw2 = rvdw*rvdw; + sh_dispersion = fr->ic->dispersion_shift.cpot; + sh_repulsion = fr->ic->repulsion_shift.cpot; + sh_lj_ewald = fr->ic->sh_lj_ewald; + + ewclj = fr->ewaldcoeff_lj; + ewclj2 = ewclj*ewclj; + ewclj6 = ewclj2*ewclj2*ewclj2; + + if (fr->coulomb_modifier == eintmodPOTSWITCH) + { + d = fr->rcoulomb-fr->rcoulomb_switch; + elec_swV3 = -10.0/(d*d*d); + elec_swV4 = 15.0/(d*d*d*d); + elec_swV5 = -6.0/(d*d*d*d*d); + elec_swF2 = -30.0/(d*d*d); + elec_swF3 = 60.0/(d*d*d*d); + elec_swF4 = -30.0/(d*d*d*d*d); + } + else + { + /* Avoid warnings from stupid compilers (looking at you, Clang!) */ + elec_swV3 = elec_swV4 = elec_swV5 = elec_swF2 = elec_swF3 = elec_swF4 = 0.0; + } + if (fr->vdw_modifier == eintmodPOTSWITCH) + { + d = fr->rvdw-fr->rvdw_switch; + vdw_swV3 = -10.0/(d*d*d); + vdw_swV4 = 15.0/(d*d*d*d); + vdw_swV5 = -6.0/(d*d*d*d*d); + vdw_swF2 = -30.0/(d*d*d); + vdw_swF3 = 60.0/(d*d*d*d); + vdw_swF4 = -30.0/(d*d*d*d*d); + } + else + { + /* Avoid warnings from stupid compilers (looking at you, Clang!) */ + vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0; + } + + bExactElecCutoff = (fr->coulomb_modifier != eintmodNONE) || fr->eeltype == eelRF_ZERO; + bExactVdwCutoff = (fr->vdw_modifier != eintmodNONE); + bExactCutoff = bExactElecCutoff && bExactVdwCutoff; + + if (bExactCutoff) + { + rcutoff = ( fr->rcoulomb > fr->rvdw ) ? fr->rcoulomb : fr->rvdw; + rcutoff2 = rcutoff*rcutoff; + } + else + { + /* Fix warnings for stupid compilers */ + rcutoff = rcutoff2 = 1e30; + } + + /* avoid compiler warnings for cases that cannot happen */ + nnn = 0; + eps = 0.0; + eps2 = 0.0; + + /* 3 VdW parameters for Buckingham, otherwise 2 */ + nvdwparam = (ivdw == GMX_NBKERNEL_VDW_BUCKINGHAM) ? 3 : 2; + table_nelements = 12; + + charge = mdatoms->chargeA; + type = mdatoms->typeA; + facel = fr->epsfac; + shiftvec = fr->shift_vec[0]; + vdwparam = fr->nbfp; + ntype = fr->ntype; + vdwgridparam = fr->ljpme_c6grid; + + for (n = 0; (n < nlist->nri); n++) + { + is3 = 3*nlist->shift[n]; + shX = shiftvec[is3]; + shY = shiftvec[is3+1]; + shZ = shiftvec[is3+2]; + nj0 = nlist->jindex[n]; + nj1 = nlist->jindex[n+1]; + ii = nlist->iinr[n]; + ii3 = 3*ii; + ix = shX + x[ii3+0]; + iy = shY + x[ii3+1]; + iz = shZ + x[ii3+2]; + iq = facel*charge[ii]; + nti = nvdwparam*ntype*type[ii]; + vctot = 0; + vvdwtot = 0; + fix = 0; + fiy = 0; + fiz = 0; + + for (k = nj0; (k < nj1); k++) + { + jnr = nlist->jjnr[k]; + j3 = 3*jnr; + jx = x[j3+0]; + jy = x[j3+1]; + jz = x[j3+2]; + dx = ix - jx; + dy = iy - jy; + dz = iz - jz; + rsq = dx*dx+dy*dy+dz*dz; + rinv = gmx_invsqrt(rsq); + rinvsq = rinv*rinv; + felec = 0; + fvdw = 0; + velec = 0; + vvdw = 0; + + if (bExactCutoff && rsq >= rcutoff2) + { + continue; + } + + if (ielec == GMX_NBKERNEL_ELEC_CUBICSPLINETABLE || ivdw == GMX_NBKERNEL_VDW_CUBICSPLINETABLE) + { + r = rsq*rinv; + rt = r*tabscale; + n0 = rt; + eps = rt-n0; + eps2 = eps*eps; + nnn = table_nelements*n0; + } + + /* Coulomb interaction. ielec==0 means no interaction */ + if (ielec != GMX_NBKERNEL_ELEC_NONE) + { + qq = iq*charge[jnr]; + + switch (ielec) + { + case GMX_NBKERNEL_ELEC_NONE: + break; + + case GMX_NBKERNEL_ELEC_COULOMB: + /* Vanilla cutoff coulomb */ + velec = qq*rinv; + felec = velec*rinvsq; + /* The shift for the Coulomb potential is stored in + * the RF parameter c_rf, which is 0 without shift + */ + velec -= qq*fr->ic->c_rf; + break; + + case GMX_NBKERNEL_ELEC_REACTIONFIELD: + /* Reaction-field */ + velec = qq*(rinv+fr->k_rf*rsq-fr->c_rf); + felec = qq*(rinv*rinvsq-2.0*fr->k_rf); + break; + + case GMX_NBKERNEL_ELEC_CUBICSPLINETABLE: + /* Tabulated coulomb */ + Y = VFtab[nnn]; + F = VFtab[nnn+1]; + Geps = eps*VFtab[nnn+2]; + Heps2 = eps2*VFtab[nnn+3]; + Fp = F+Geps+Heps2; + VV = Y+eps*Fp; + FF = Fp+Geps+2.0*Heps2; + velec = qq*VV; + felec = -qq*FF*tabscale*rinv; + break; + + case GMX_NBKERNEL_ELEC_GENERALIZEDBORN: + /* GB */ + gmx_fatal(FARGS, "Death & horror! GB generic interaction not implemented.\n"); + break; + + case GMX_NBKERNEL_ELEC_EWALD: + ewrt = rsq*rinv*ewtabscale; + ewitab = ewrt; + eweps = ewrt-ewitab; + ewitab = 4*ewitab; + felec = ewtab[ewitab]+eweps*ewtab[ewitab+1]; + rinvcorr = (fr->coulomb_modifier == eintmodPOTSHIFT) ? rinv-fr->ic->sh_ewald : rinv; + velec = qq*(rinvcorr-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec))); + felec = qq*rinv*(rinvsq-felec); + break; + + default: + gmx_fatal(FARGS, "Death & horror! No generic coulomb interaction for ielec=%d.\n", ielec); + break; + } + if (fr->coulomb_modifier == eintmodPOTSWITCH) + { + d = rsq*rinv-fr->rcoulomb_switch; + d = (d > 0.0) ? d : 0.0; + d2 = d*d; + sw = 1.0+d2*d*(elec_swV3+d*(elec_swV4+d*elec_swV5)); + dsw = d2*(elec_swF2+d*(elec_swF3+d*elec_swF4)); + /* Apply switch function. Note that felec=f/r since it will be multiplied + * by the i-j displacement vector. This means felec'=f'/r=-(v*sw)'/r= + * -(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=felec*sw-v*dsw/r + */ + felec = felec*sw - rinv*velec*dsw; + /* Once we have used velec to update felec we can modify velec too */ + velec *= sw; + } + if (bExactElecCutoff) + { + felec = (rsq < rcoulomb2) ? felec : 0.0; + velec = (rsq < rcoulomb2) ? velec : 0.0; + } + vctot += velec; + } /* End of coulomb interactions */ + + + /* VdW interaction. ivdw==0 means no interaction */ + if (ivdw != GMX_NBKERNEL_VDW_NONE) + { + tj = nti+nvdwparam*type[jnr]; + + switch (ivdw) + { + case GMX_NBKERNEL_VDW_NONE: + break; + + case GMX_NBKERNEL_VDW_LENNARDJONES: + /* Vanilla Lennard-Jones cutoff */ + c6 = vdwparam[tj]; + c12 = vdwparam[tj+1]; + rinvsix = rinvsq*rinvsq*rinvsq; + vvdw_disp = c6*rinvsix; + vvdw_rep = c12*rinvsix*rinvsix; + fvdw = (vvdw_rep-vvdw_disp)*rinvsq; + if (fr->vdw_modifier == eintmodPOTSHIFT) + { + vvdw = (vvdw_rep + c12*sh_repulsion)/12.0 - (vvdw_disp + c6*sh_dispersion)/6.0; + } + else + { + vvdw = vvdw_rep/12.0-vvdw_disp/6.0; + } + break; + + case GMX_NBKERNEL_VDW_BUCKINGHAM: + /* Buckingham */ + c6 = vdwparam[tj]; + cexp1 = vdwparam[tj+1]; + cexp2 = vdwparam[tj+2]; + + rinvsix = rinvsq*rinvsq*rinvsq; + vvdw_disp = c6*rinvsix; + br = cexp2*rsq*rinv; + vvdw_rep = cexp1*exp(-br); + fvdw = (br*vvdw_rep-vvdw_disp)*rinvsq; + if (fr->vdw_modifier == eintmodPOTSHIFT) + { + vvdw = (vvdw_rep-cexp1*exp(-cexp2*rvdw))-(vvdw_disp + c6*sh_dispersion)/6.0; + } + else + { + vvdw = vvdw_rep-vvdw_disp/6.0; + } + break; + + case GMX_NBKERNEL_VDW_CUBICSPLINETABLE: + /* Tabulated VdW */ + c6 = vdwparam[tj]; + c12 = vdwparam[tj+1]; + Y = VFtab[nnn+4]; + F = VFtab[nnn+5]; + Geps = eps*VFtab[nnn+6]; + Heps2 = eps2*VFtab[nnn+7]; + Fp = F+Geps+Heps2; + VV = Y+eps*Fp; + FF = Fp+Geps+2.0*Heps2; + vvdw_disp = c6*VV; + fijD = c6*FF; + Y = VFtab[nnn+8]; + F = VFtab[nnn+9]; + Geps = eps*VFtab[nnn+10]; + Heps2 = eps2*VFtab[nnn+11]; + Fp = F+Geps+Heps2; + VV = Y+eps*Fp; + FF = Fp+Geps+2.0*Heps2; + vvdw_rep = c12*VV; + fijR = c12*FF; + fvdw = -(fijD+fijR)*tabscale*rinv; + vvdw = vvdw_disp + vvdw_rep; + break; + + + case GMX_NBKERNEL_VDW_LJEWALD: + /* LJ-PME */ + rinvsix = rinvsq*rinvsq*rinvsq; + ewcljrsq = ewclj2*rsq; + exponent = exp(-ewcljrsq); + poly = exponent*(1.0 + ewcljrsq + ewcljrsq*ewcljrsq*0.5); + c6 = vdwparam[tj]; + c12 = vdwparam[tj+1]; + c6grid = vdwgridparam[tj]; + vvdw_disp = (c6-c6grid*(1.0-poly))*rinvsix; + vvdw_rep = c12*rinvsix*rinvsix; + fvdw = (vvdw_rep - vvdw_disp - c6grid*(1.0/6.0)*exponent*ewclj6)*rinvsq; + if (fr->vdw_modifier == eintmodPOTSHIFT) + { - vvdw = (vvdw_rep + c12*sh_repulsion)/12.0 - (vvdw_disp + c6*sh_dispersion + c6grid*sh_lj_ewald)/6.0; ++ vvdw = (vvdw_rep + c12*sh_repulsion)/12.0 - (vvdw_disp + c6*sh_dispersion - c6grid*sh_lj_ewald)/6.0; + } + else + { + vvdw = vvdw_rep/12.0-vvdw_disp/6.0; + } + break; + + default: + gmx_fatal(FARGS, "Death & horror! No generic VdW interaction for ivdw=%d.\n", ivdw); + break; + } + if (fr->vdw_modifier == eintmodPOTSWITCH) + { + d = rsq*rinv-fr->rvdw_switch; + d = (d > 0.0) ? d : 0.0; + d2 = d*d; + sw = 1.0+d2*d*(vdw_swV3+d*(vdw_swV4+d*vdw_swV5)); + dsw = d2*(vdw_swF2+d*(vdw_swF3+d*vdw_swF4)); + /* See coulomb interaction for the force-switch formula */ + fvdw = fvdw*sw - rinv*vvdw*dsw; + vvdw *= sw; + } + if (bExactVdwCutoff) + { + fvdw = (rsq < rvdw2) ? fvdw : 0.0; + vvdw = (rsq < rvdw2) ? vvdw : 0.0; + } + vvdwtot += vvdw; + } /* end VdW interactions */ + + fscal = felec+fvdw; + + tx = fscal*dx; + ty = fscal*dy; + tz = fscal*dz; + fix = fix + tx; + fiy = fiy + ty; + fiz = fiz + tz; + f[j3+0] = f[j3+0] - tx; + f[j3+1] = f[j3+1] - ty; + f[j3+2] = f[j3+2] - tz; + } + + f[ii3+0] = f[ii3+0] + fix; + f[ii3+1] = f[ii3+1] + fiy; + f[ii3+2] = f[ii3+2] + fiz; + fshift[is3] = fshift[is3]+fix; + fshift[is3+1] = fshift[is3+1]+fiy; + fshift[is3+2] = fshift[is3+2]+fiz; + ggid = nlist->gid[n]; + velecgrp[ggid] += vctot; + vvdwgrp[ggid] += vvdwtot; + } + /* Estimate flops, average for generic kernel: + * 12 flops per outer iteration + * 50 flops per inner iteration + */ + inc_nrnb(nrnb, eNR_NBKERNEL_GENERIC, nlist->nri*12 + nlist->jindex[n]*50); +} diff --cc src/gromacs/gmxlib/nonbonded/nonbonded.c index 95cc2d5720,0000000000..ab68c47db0 mode 100644,000000..100644 --- a/src/gromacs/gmxlib/nonbonded/nonbonded.c +++ b/src/gromacs/gmxlib/nonbonded/nonbonded.c @@@ -1,686 -1,0 +1,707 @@@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2004, The GROMACS development team. + * Copyright (c) 2013,2014, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include + +#include "thread_mpi/threads.h" + +#include "typedefs.h" +#include "txtdump.h" +#include "gromacs/utility/smalloc.h" +#include "ns.h" +#include "vec.h" +#include "gromacs/math/utilities.h" +#include "macros.h" +#include "gromacs/utility/cstringutil.h" +#include "force.h" +#include "names.h" +#include "main.h" +#include "xvgr.h" +#include "gmx_fatal.h" +#include "physics.h" +#include "force.h" +#include "bondf.h" +#include "nrnb.h" +#include "nonbonded.h" +#include "gromacs/simd/simd.h" + +#include "nb_kernel.h" +#include "nb_free_energy.h" +#include "nb_generic.h" +#include "nb_generic_cg.h" +#include "nb_generic_adress.h" + +/* Different default (c) and SIMD instructions interaction-specific kernels */ +#include "nb_kernel_c/nb_kernel_c.h" + +#if (defined GMX_SIMD_X86_SSE2) && !(defined GMX_DOUBLE) +# include "nb_kernel_sse2_single/nb_kernel_sse2_single.h" +#endif +#if (defined GMX_SIMD_X86_SSE4_1) && !(defined GMX_DOUBLE) +# include "nb_kernel_sse4_1_single/nb_kernel_sse4_1_single.h" +#endif +#if (defined GMX_SIMD_X86_AVX_128_FMA) && !(defined GMX_DOUBLE) +# include "nb_kernel_avx_128_fma_single/nb_kernel_avx_128_fma_single.h" +#endif +#if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER) && !(defined GMX_DOUBLE) +# include "nb_kernel_avx_256_single/nb_kernel_avx_256_single.h" +#endif +#if (defined GMX_SIMD_X86_SSE2 && defined GMX_DOUBLE) +# include "nb_kernel_sse2_double/nb_kernel_sse2_double.h" +#endif +#if (defined GMX_SIMD_X86_SSE4_1 && defined GMX_DOUBLE) +# include "nb_kernel_sse4_1_double/nb_kernel_sse4_1_double.h" +#endif +#if (defined GMX_SIMD_X86_AVX_128_FMA && defined GMX_DOUBLE) +# include "nb_kernel_avx_128_fma_double/nb_kernel_avx_128_fma_double.h" +#endif +#if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER && defined GMX_DOUBLE) +# include "nb_kernel_avx_256_double/nb_kernel_avx_256_double.h" +#endif +#if (defined GMX_SIMD_SPARC64_HPC_ACE && defined GMX_DOUBLE) +# include "nb_kernel_sparc64_hpc_ace_double/nb_kernel_sparc64_hpc_ace_double.h" +#endif + + +static tMPI_Thread_mutex_t nonbonded_setup_mutex = TMPI_THREAD_MUTEX_INITIALIZER; +static gmx_bool nonbonded_setup_done = FALSE; + + +void +gmx_nonbonded_setup(t_forcerec * fr, + gmx_bool bGenericKernelOnly) +{ + tMPI_Thread_mutex_lock(&nonbonded_setup_mutex); + /* Here we are guaranteed only one thread made it. */ + if (nonbonded_setup_done == FALSE) + { + if (bGenericKernelOnly == FALSE) + { + /* Add the generic kernels to the structure stored statically in nb_kernel.c */ + nb_kernel_list_add_kernels(kernellist_c, kernellist_c_size); + + if (!(fr != NULL && fr->use_simd_kernels == FALSE)) + { + /* Add interaction-specific kernels for different architectures */ + /* Single precision */ +#if (defined GMX_SIMD_X86_SSE2) && !(defined GMX_DOUBLE) + nb_kernel_list_add_kernels(kernellist_sse2_single, kernellist_sse2_single_size); +#endif +#if (defined GMX_SIMD_X86_SSE4_1) && !(defined GMX_DOUBLE) + nb_kernel_list_add_kernels(kernellist_sse4_1_single, kernellist_sse4_1_single_size); +#endif +#if (defined GMX_SIMD_X86_AVX_128_FMA) && !(defined GMX_DOUBLE) + nb_kernel_list_add_kernels(kernellist_avx_128_fma_single, kernellist_avx_128_fma_single_size); +#endif +#if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER) && !(defined GMX_DOUBLE) + nb_kernel_list_add_kernels(kernellist_avx_256_single, kernellist_avx_256_single_size); +#endif + /* Double precision */ +#if (defined GMX_SIMD_X86_SSE2 && defined GMX_DOUBLE) + nb_kernel_list_add_kernels(kernellist_sse2_double, kernellist_sse2_double_size); +#endif +#if (defined GMX_SIMD_X86_SSE4_1 && defined GMX_DOUBLE) + nb_kernel_list_add_kernels(kernellist_sse4_1_double, kernellist_sse4_1_double_size); +#endif +#if (defined GMX_SIMD_X86_AVX_128_FMA && defined GMX_DOUBLE) + nb_kernel_list_add_kernels(kernellist_avx_128_fma_double, kernellist_avx_128_fma_double_size); +#endif +#if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER && defined GMX_DOUBLE) + nb_kernel_list_add_kernels(kernellist_avx_256_double, kernellist_avx_256_double_size); +#endif +#if (defined GMX_SIMD_SPARC64_HPC_ACE && defined GMX_DOUBLE) + nb_kernel_list_add_kernels(kernellist_sparc64_hpc_ace_double, kernellist_sparc64_hpc_ace_double_size); +#endif + ; /* empty statement to avoid a completely empty block */ + } + } + /* Create a hash for faster lookups */ + nb_kernel_list_hash_init(); + + nonbonded_setup_done = TRUE; + } + tMPI_Thread_mutex_unlock(&nonbonded_setup_mutex); +} + + + +void - gmx_nonbonded_set_kernel_pointers(FILE *log, t_nblist *nl) ++gmx_nonbonded_set_kernel_pointers(FILE *log, t_nblist *nl, gmx_bool bElecAndVdwSwitchDiffers) +{ + const char * elec; + const char * elec_mod; + const char * vdw; + const char * vdw_mod; + const char * geom; + const char * other; + const char * vf; + + struct + { + const char * arch; + int simd_padding_width; + } + arch_and_padding[] = + { + /* Single precision */ +#if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER) && !(defined GMX_DOUBLE) + { "avx_256_single", 8 }, +#endif +#if (defined GMX_SIMD_X86_AVX_128_FMA) && !(defined GMX_DOUBLE) + { "avx_128_fma_single", 4 }, +#endif +#if (defined GMX_SIMD_X86_SSE4_1) && !(defined GMX_DOUBLE) + { "sse4_1_single", 4 }, +#endif +#if (defined GMX_SIMD_X86_SSE2) && !(defined GMX_DOUBLE) + { "sse2_single", 4 }, +#endif + /* Double precision */ +#if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER && defined GMX_DOUBLE) + { "avx_256_double", 4 }, +#endif +#if (defined GMX_SIMD_X86_AVX_128_FMA && defined GMX_DOUBLE) + /* Sic. Double precision 2-way SIMD does not require neighbor list padding, + * since the kernels execute a loop unrolled a factor 2, followed by + * a possible single odd-element epilogue. + */ + { "avx_128_fma_double", 1 }, +#endif +#if (defined GMX_SIMD_X86_SSE2 && defined GMX_DOUBLE) + /* No padding - see comment above */ + { "sse2_double", 1 }, +#endif +#if (defined GMX_SIMD_X86_SSE4_1 && defined GMX_DOUBLE) + /* No padding - see comment above */ + { "sse4_1_double", 1 }, +#endif +#if (defined GMX_SIMD_SPARC64_HPC_ACE && defined GMX_DOUBLE) + /* No padding - see comment above */ + { "sparc64_hpc_ace_double", 1 }, +#endif + { "c", 1 }, + }; + int narch = asize(arch_and_padding); + int i; + + if (nonbonded_setup_done == FALSE) + { + /* We typically call this setup routine before starting timers, + * but if that has not been done for whatever reason we do it now. + */ + gmx_nonbonded_setup(NULL, FALSE); + } + + /* Not used yet */ + other = ""; + + nl->kernelptr_vf = NULL; + nl->kernelptr_v = NULL; + nl->kernelptr_f = NULL; + + elec = gmx_nbkernel_elec_names[nl->ielec]; + elec_mod = eintmod_names[nl->ielecmod]; + vdw = gmx_nbkernel_vdw_names[nl->ivdw]; + vdw_mod = eintmod_names[nl->ivdwmod]; + geom = gmx_nblist_geometry_names[nl->igeometry]; + + if (nl->type == GMX_NBLIST_INTERACTION_ADRESS) + { + nl->kernelptr_vf = (void *) gmx_nb_generic_adress_kernel; + nl->kernelptr_f = (void *) gmx_nb_generic_adress_kernel; + nl->simd_padding_width = 1; + return; + } + + if (nl->type == GMX_NBLIST_INTERACTION_FREE_ENERGY) + { + nl->kernelptr_vf = (void *) gmx_nb_free_energy_kernel; + nl->kernelptr_f = (void *) gmx_nb_free_energy_kernel; + nl->simd_padding_width = 1; + } + else if (!gmx_strcasecmp_min(geom, "CG-CG")) + { + nl->kernelptr_vf = (void *) gmx_nb_generic_cg_kernel; + nl->kernelptr_f = (void *) gmx_nb_generic_cg_kernel; + nl->simd_padding_width = 1; + } + else + { + /* Try to find a specific kernel first */ + + for (i = 0; i < narch && nl->kernelptr_vf == NULL; i++) + { + nl->kernelptr_vf = (void *) nb_kernel_list_findkernel(log, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "PotentialAndForce"); + nl->simd_padding_width = arch_and_padding[i].simd_padding_width; + } + for (i = 0; i < narch && nl->kernelptr_f == NULL; i++) + { + nl->kernelptr_f = (void *) nb_kernel_list_findkernel(log, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "Force"); + nl->simd_padding_width = arch_and_padding[i].simd_padding_width; + + /* If there is not force-only optimized kernel, is there a potential & force one? */ + if (nl->kernelptr_f == NULL) + { + nl->kernelptr_f = (void *) nb_kernel_list_findkernel(NULL, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "PotentialAndForce"); + nl->simd_padding_width = arch_and_padding[i].simd_padding_width; + } + } + - /* Give up. If this was a water kernel, leave the pointer as NULL, which - * will disable water optimization in NS. If it is a particle kernel, set - * the pointer to the generic NB kernel. ++ /* For now, the accelerated kernels cannot handle the combination of switch functions for both ++ * electrostatics and VdW that use different switch radius or switch cutoff distances ++ * (both of them enter in the switch function calculation). This would require ++ * us to evaluate two completely separate switch functions for every interaction. ++ * Instead, we disable such kernels by setting the pointer to NULL. ++ * This will cause the generic kernel (which can handle it) to be called instead. ++ * ++ * Note that we typically already enable tabulated coulomb interactions for this case, ++ * so this is mostly a safe-guard to make sure we call the generic kernel if the ++ * tables are disabled. ++ */ ++ if ((nl->ielec != GMX_NBKERNEL_ELEC_NONE) && (nl->ielecmod == eintmodPOTSWITCH) && ++ (nl->ivdw != GMX_NBKERNEL_VDW_NONE) && (nl->ivdwmod == eintmodPOTSWITCH) && ++ bElecAndVdwSwitchDiffers) ++ { ++ nl->kernelptr_vf = NULL; ++ nl->kernelptr_f = NULL; ++ } ++ ++ /* Give up, pick a generic one instead. ++ * We only do this for particle-particle kernels; by leaving the water-optimized kernel ++ * pointers to NULL, the water optimization will automatically be disabled for this interaction. + */ + if (nl->kernelptr_vf == NULL && !gmx_strcasecmp_min(geom, "Particle-Particle")) + { + nl->kernelptr_vf = (void *) gmx_nb_generic_kernel; + nl->kernelptr_f = (void *) gmx_nb_generic_kernel; + nl->simd_padding_width = 1; + if (debug) + { + fprintf(debug, + "WARNING - Slow generic NB kernel used for neighborlist with\n" + " Elec: '%s', Modifier: '%s'\n" - " Vdw: '%s', Modifier: '%s'\n" - " Geom: '%s', Other: '%s'\n\n", - elec, elec_mod, vdw, vdw_mod, geom, other); ++ " Vdw: '%s', Modifier: '%s'\n", ++ elec, elec_mod, vdw, vdw_mod); + } + } + } - + return; +} + +void do_nonbonded(t_forcerec *fr, + rvec x[], rvec f_shortrange[], rvec f_longrange[], t_mdatoms *mdatoms, t_blocka *excl, + gmx_grppairener_t *grppener, + t_nrnb *nrnb, real *lambda, real *dvdl, + int nls, int eNL, int flags) +{ + t_nblist * nlist; + int n, n0, n1, i, i0, i1, sz, range; + t_nblists * nblists; + nb_kernel_data_t kernel_data; + nb_kernel_t * kernelptr = NULL; + rvec * f; + + kernel_data.flags = flags; + kernel_data.exclusions = excl; + kernel_data.lambda = lambda; + kernel_data.dvdl = dvdl; + + if (fr->bAllvsAll) + { + gmx_incons("All-vs-all kernels have not been implemented in version 4.6"); + return; + } + + if (eNL >= 0) + { + i0 = eNL; + i1 = i0+1; + } + else + { + i0 = 0; + i1 = eNL_NR; + } + + if (nls >= 0) + { + n0 = nls; + n1 = nls+1; + } + else + { + n0 = 0; + n1 = fr->nnblists; + } + + for (n = n0; (n < n1); n++) + { + nblists = &fr->nblists[n]; + + kernel_data.table_elec = &nblists->table_elec; + kernel_data.table_vdw = &nblists->table_vdw; + kernel_data.table_elec_vdw = &nblists->table_elec_vdw; + + for (range = 0; range < 2; range++) + { + /* Are we doing short/long-range? */ + if (range == 0) + { + /* Short-range */ + if (!(flags & GMX_NONBONDED_DO_SR)) + { + continue; + } + kernel_data.energygrp_elec = grppener->ener[egCOULSR]; + kernel_data.energygrp_vdw = grppener->ener[fr->bBHAM ? egBHAMSR : egLJSR]; + kernel_data.energygrp_polarization = grppener->ener[egGB]; + nlist = nblists->nlist_sr; + f = f_shortrange; + } + else if (range == 1) + { + /* Long-range */ + if (!(flags & GMX_NONBONDED_DO_LR)) + { + continue; + } + kernel_data.energygrp_elec = grppener->ener[egCOULLR]; + kernel_data.energygrp_vdw = grppener->ener[fr->bBHAM ? egBHAMLR : egLJLR]; + kernel_data.energygrp_polarization = grppener->ener[egGB]; + nlist = nblists->nlist_lr; + f = f_longrange; + } + + for (i = i0; (i < i1); i++) + { + if (nlist[i].nri > 0) + { + if (flags & GMX_NONBONDED_DO_POTENTIAL) + { + /* Potential and force */ + kernelptr = (nb_kernel_t *)nlist[i].kernelptr_vf; + } + else + { + /* Force only, no potential */ + kernelptr = (nb_kernel_t *)nlist[i].kernelptr_f; + } + + if (nlist[i].type != GMX_NBLIST_INTERACTION_FREE_ENERGY && (flags & GMX_NONBONDED_DO_FOREIGNLAMBDA)) + { + /* We don't need the non-perturbed interactions */ + continue; + } + /* Neighborlists whose kernelptr==NULL will always be empty */ + if (kernelptr != NULL) + { + (*kernelptr)(&(nlist[i]), x, f, fr, mdatoms, &kernel_data, nrnb); + } ++ else ++ { ++ gmx_fatal(FARGS, "Non-empty neighborlist does not have any kernel pointer assigned."); ++ } + } + } + } + } +} + +static void +nb_listed_warning_rlimit(const rvec *x, int ai, int aj, int * global_atom_index, real r, real rlimit) +{ + gmx_warning("Listed nonbonded interaction between particles %d and %d\n" + "at distance %.3f which is larger than the table limit %.3f nm.\n\n" + "This is likely either a 1,4 interaction, or a listed interaction inside\n" + "a smaller molecule you are decoupling during a free energy calculation.\n" + "Since interactions at distances beyond the table cannot be computed,\n" + "they are skipped until they are inside the table limit again. You will\n" + "only see this message once, even if it occurs for several interactions.\n\n" + "IMPORTANT: This should not happen in a stable simulation, so there is\n" + "probably something wrong with your system. Only change the table-extension\n" + "distance in the mdp file if you are really sure that is the reason.\n", + glatnr(global_atom_index, ai), glatnr(global_atom_index, aj), r, rlimit); + + if (debug) + { + fprintf(debug, + "%8f %8f %8f\n%8f %8f %8f\n1-4 (%d,%d) interaction not within cut-off! r=%g. Ignored\n", + x[ai][XX], x[ai][YY], x[ai][ZZ], x[aj][XX], x[aj][YY], x[aj][ZZ], + glatnr(global_atom_index, ai), glatnr(global_atom_index, aj), r); + } +} + + + +/* This might logically belong better in the nb_generic.c module, but it is only + * used in do_nonbonded_listed(), and we want it to be inlined there to avoid an + * extra functional call for every single pair listed in the topology. + */ +static real +nb_evaluate_single(real r2, real tabscale, real *vftab, + real qq, real c6, real c12, real *velec, real *vvdw) +{ + real rinv, r, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VVe, FFe, VVd, FFd, VVr, FFr, fscal; + int ntab; + + /* Do the tabulated interactions - first table lookup */ + rinv = gmx_invsqrt(r2); + r = r2*rinv; + rtab = r*tabscale; + ntab = rtab; + eps = rtab-ntab; + eps2 = eps*eps; + ntab = 12*ntab; + /* Electrostatics */ + Y = vftab[ntab]; + F = vftab[ntab+1]; + Geps = eps*vftab[ntab+2]; + Heps2 = eps2*vftab[ntab+3]; + Fp = F+Geps+Heps2; + VVe = Y+eps*Fp; + FFe = Fp+Geps+2.0*Heps2; + /* Dispersion */ + Y = vftab[ntab+4]; + F = vftab[ntab+5]; + Geps = eps*vftab[ntab+6]; + Heps2 = eps2*vftab[ntab+7]; + Fp = F+Geps+Heps2; + VVd = Y+eps*Fp; + FFd = Fp+Geps+2.0*Heps2; + /* Repulsion */ + Y = vftab[ntab+8]; + F = vftab[ntab+9]; + Geps = eps*vftab[ntab+10]; + Heps2 = eps2*vftab[ntab+11]; + Fp = F+Geps+Heps2; + VVr = Y+eps*Fp; + FFr = Fp+Geps+2.0*Heps2; + + *velec = qq*VVe; + *vvdw = c6*VVd+c12*VVr; + + fscal = -(qq*FFe+c6*FFd+c12*FFr)*tabscale*rinv; + + return fscal; +} + + +real +do_nonbonded_listed(int ftype, int nbonds, + const t_iatom iatoms[], const t_iparams iparams[], + const rvec x[], rvec f[], rvec fshift[], + const t_pbc *pbc, const t_graph *g, + real *lambda, real *dvdl, + const t_mdatoms *md, + const t_forcerec *fr, gmx_grppairener_t *grppener, + int *global_atom_index) +{ + int ielec, ivdw; + real qq, c6, c12; + rvec dx; + ivec dt; + int i, j, itype, ai, aj, gid; + int fshift_index; + real r2, rinv; + real fscal, velec, vvdw; + real * energygrp_elec; + real * energygrp_vdw; + static gmx_bool warned_rlimit = FALSE; + /* Free energy stuff */ + gmx_bool bFreeEnergy; + real LFC[2], LFV[2], DLF[2], lfac_coul[2], lfac_vdw[2], dlfac_coul[2], dlfac_vdw[2]; + real qqB, c6B, c12B, sigma2_def, sigma2_min; + + + switch (ftype) + { + case F_LJ14: + case F_LJC14_Q: + energygrp_elec = grppener->ener[egCOUL14]; + energygrp_vdw = grppener->ener[egLJ14]; + break; + case F_LJC_PAIRS_NB: + energygrp_elec = grppener->ener[egCOULSR]; + energygrp_vdw = grppener->ener[egLJSR]; + break; + default: + energygrp_elec = NULL; /* Keep compiler happy */ + energygrp_vdw = NULL; /* Keep compiler happy */ + gmx_fatal(FARGS, "Unknown function type %d in do_nonbonded14", ftype); + break; + } + + if (fr->efep != efepNO) + { + /* Lambda factor for state A=1-lambda and B=lambda */ + LFC[0] = 1.0 - lambda[efptCOUL]; + LFV[0] = 1.0 - lambda[efptVDW]; + LFC[1] = lambda[efptCOUL]; + LFV[1] = lambda[efptVDW]; + + /*derivative of the lambda factor for state A and B */ + DLF[0] = -1; + DLF[1] = 1; + + /* precalculate */ + sigma2_def = pow(fr->sc_sigma6_def, 1.0/3.0); + sigma2_min = pow(fr->sc_sigma6_min, 1.0/3.0); + + for (i = 0; i < 2; i++) + { + lfac_coul[i] = (fr->sc_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i])); + dlfac_coul[i] = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power == 2 ? (1-LFC[i]) : 1); + lfac_vdw[i] = (fr->sc_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i])); + dlfac_vdw[i] = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power == 2 ? (1-LFV[i]) : 1); + } + } + else + { + sigma2_min = sigma2_def = 0; + } + + bFreeEnergy = FALSE; + for (i = 0; (i < nbonds); ) + { + itype = iatoms[i++]; + ai = iatoms[i++]; + aj = iatoms[i++]; + gid = GID(md->cENER[ai], md->cENER[aj], md->nenergrp); + + /* Get parameters */ + switch (ftype) + { + case F_LJ14: + bFreeEnergy = + (fr->efep != efepNO && + ((md->nPerturbed && (md->bPerturbed[ai] || md->bPerturbed[aj])) || + iparams[itype].lj14.c6A != iparams[itype].lj14.c6B || + iparams[itype].lj14.c12A != iparams[itype].lj14.c12B)); + qq = md->chargeA[ai]*md->chargeA[aj]*fr->epsfac*fr->fudgeQQ; + c6 = iparams[itype].lj14.c6A; + c12 = iparams[itype].lj14.c12A; + break; + case F_LJC14_Q: + qq = iparams[itype].ljc14.qi*iparams[itype].ljc14.qj*fr->epsfac*iparams[itype].ljc14.fqq; + c6 = iparams[itype].ljc14.c6; + c12 = iparams[itype].ljc14.c12; + break; + case F_LJC_PAIRS_NB: + qq = iparams[itype].ljcnb.qi*iparams[itype].ljcnb.qj*fr->epsfac; + c6 = iparams[itype].ljcnb.c6; + c12 = iparams[itype].ljcnb.c12; + break; + default: + /* Cannot happen since we called gmx_fatal() above in this case */ + qq = c6 = c12 = 0; /* Keep compiler happy */ + break; + } + + /* To save flops in the optimized kernels, c6/c12 have 6.0/12.0 derivative prefactors + * included in the general nfbp array now. This means the tables are scaled down by the + * same factor, so when we use the original c6/c12 parameters from iparams[] they must + * be scaled up. + */ + c6 *= 6.0; + c12 *= 12.0; + + /* Do we need to apply full periodic boundary conditions? */ + if (fr->bMolPBC == TRUE) + { + fshift_index = pbc_dx_aiuc(pbc, x[ai], x[aj], dx); + } + else + { + fshift_index = CENTRAL; + rvec_sub(x[ai], x[aj], dx); + } + r2 = norm2(dx); + + if (r2 >= fr->tab14.r*fr->tab14.r) + { + if (warned_rlimit == FALSE) + { + nb_listed_warning_rlimit(x, ai, aj, global_atom_index, sqrt(r2), fr->tab14.r); + warned_rlimit = TRUE; + } + continue; + } + + if (bFreeEnergy) + { + /* Currently free energy is only supported for F_LJ14, so no need to check for that if we got here */ + qqB = md->chargeB[ai]*md->chargeB[aj]*fr->epsfac*fr->fudgeQQ; + c6B = iparams[itype].lj14.c6B*6.0; + c12B = iparams[itype].lj14.c12B*12.0; + + fscal = nb_free_energy_evaluate_single(r2, fr->sc_r_power, fr->sc_alphacoul, fr->sc_alphavdw, + fr->tab14.scale, fr->tab14.data, qq, c6, c12, qqB, c6B, c12B, + LFC, LFV, DLF, lfac_coul, lfac_vdw, dlfac_coul, dlfac_vdw, + fr->sc_sigma6_def, fr->sc_sigma6_min, sigma2_def, sigma2_min, &velec, &vvdw, dvdl); + } + else + { + /* Evaluate tabulated interaction without free energy */ + fscal = nb_evaluate_single(r2, fr->tab14.scale, fr->tab14.data, qq, c6, c12, &velec, &vvdw); + } + + energygrp_elec[gid] += velec; + energygrp_vdw[gid] += vvdw; + svmul(fscal, dx, dx); + + /* Add the forces */ + rvec_inc(f[ai], dx); + rvec_dec(f[aj], dx); + + if (g) + { + /* Correct the shift forces using the graph */ + ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt); + fshift_index = IVEC2IS(dt); + } + if (fshift_index != CENTRAL) + { + rvec_inc(fshift[fshift_index], dx); + rvec_dec(fshift[CENTRAL], dx); + } + } + return 0.0; +} diff --cc src/gromacs/gmxpreprocess/readir.c index 4dbdf75e3e,0000000000..5fbede033e mode 100644,000000..100644 --- a/src/gromacs/gmxpreprocess/readir.c +++ b/src/gromacs/gmxpreprocess/readir.c @@@ -1,4398 -1,0 +1,4411 @@@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2004, The GROMACS development team. + * Copyright (c) 2013,2014, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include +#include "sysstuff.h" +#include "gromacs/utility/smalloc.h" +#include "typedefs.h" +#include "physics.h" +#include "names.h" +#include "gmx_fatal.h" +#include "macros.h" +#include "index.h" +#include "symtab.h" +#include "gromacs/utility/cstringutil.h" +#include "readinp.h" +#include "warninp.h" +#include "readir.h" +#include "toputil.h" +#include "index.h" +#include "network.h" +#include "vec.h" +#include "pbc.h" +#include "mtop_util.h" +#include "chargegroup.h" +#include "inputrec.h" +#include "calc_verletbuf.h" + +#define MAXPTR 254 +#define NOGID 255 + +/* Resource parameters + * Do not change any of these until you read the instruction + * in readinp.h. Some cpp's do not take spaces after the backslash + * (like the c-shell), which will give you a very weird compiler + * message. + */ + +typedef struct t_inputrec_strings +{ + char tcgrps[STRLEN], tau_t[STRLEN], ref_t[STRLEN], + acc[STRLEN], accgrps[STRLEN], freeze[STRLEN], frdim[STRLEN], + energy[STRLEN], user1[STRLEN], user2[STRLEN], vcm[STRLEN], x_compressed_groups[STRLEN], + couple_moltype[STRLEN], orirefitgrp[STRLEN], egptable[STRLEN], egpexcl[STRLEN], + wall_atomtype[STRLEN], wall_density[STRLEN], deform[STRLEN], QMMM[STRLEN], + imd_grp[STRLEN]; + char fep_lambda[efptNR][STRLEN]; + char lambda_weights[STRLEN]; + char **pull_grp; + char **rot_grp; + char anneal[STRLEN], anneal_npoints[STRLEN], + anneal_time[STRLEN], anneal_temp[STRLEN]; + char QMmethod[STRLEN], QMbasis[STRLEN], QMcharge[STRLEN], QMmult[STRLEN], + bSH[STRLEN], CASorbitals[STRLEN], CASelectrons[STRLEN], SAon[STRLEN], + SAoff[STRLEN], SAsteps[STRLEN], bTS[STRLEN], bOPT[STRLEN]; + char efield_x[STRLEN], efield_xt[STRLEN], efield_y[STRLEN], + efield_yt[STRLEN], efield_z[STRLEN], efield_zt[STRLEN]; + +} gmx_inputrec_strings; + +static gmx_inputrec_strings *is = NULL; + +void init_inputrec_strings() +{ + if (is) + { + gmx_incons("Attempted to call init_inputrec_strings before calling done_inputrec_strings. Only one inputrec (i.e. .mdp file) can be parsed at a time."); + } + snew(is, 1); +} + +void done_inputrec_strings() +{ + sfree(is); + is = NULL; +} + +static char swapgrp[STRLEN], splitgrp0[STRLEN], splitgrp1[STRLEN], solgrp[STRLEN]; + +enum { + egrptpALL, /* All particles have to be a member of a group. */ + egrptpALL_GENREST, /* A rest group with name is generated for particles * + * that are not part of any group. */ + egrptpPART, /* As egrptpALL_GENREST, but no name is generated * + * for the rest group. */ + egrptpONE /* Merge all selected groups into one group, * + * make a rest group for the remaining particles. */ +}; + +static const char *constraints[eshNR+1] = { + "none", "h-bonds", "all-bonds", "h-angles", "all-angles", NULL +}; + +static const char *couple_lam[ecouplamNR+1] = { + "vdw-q", "vdw", "q", "none", NULL +}; + +void init_ir(t_inputrec *ir, t_gromppopts *opts) +{ + snew(opts->include, STRLEN); + snew(opts->define, STRLEN); + snew(ir->fepvals, 1); + snew(ir->expandedvals, 1); + snew(ir->simtempvals, 1); +} + +static void GetSimTemps(int ntemps, t_simtemp *simtemp, double *temperature_lambdas) +{ + + int i; + + for (i = 0; i < ntemps; i++) + { + /* simple linear scaling -- allows more control */ + if (simtemp->eSimTempScale == esimtempLINEAR) + { + simtemp->temperatures[i] = simtemp->simtemp_low + (simtemp->simtemp_high-simtemp->simtemp_low)*temperature_lambdas[i]; + } + else if (simtemp->eSimTempScale == esimtempGEOMETRIC) /* should give roughly equal acceptance for constant heat capacity . . . */ + { + simtemp->temperatures[i] = simtemp->simtemp_low * pow(simtemp->simtemp_high/simtemp->simtemp_low, (1.0*i)/(ntemps-1)); + } + else if (simtemp->eSimTempScale == esimtempEXPONENTIAL) + { + simtemp->temperatures[i] = simtemp->simtemp_low + (simtemp->simtemp_high-simtemp->simtemp_low)*((exp(temperature_lambdas[i])-1)/(exp(1.0)-1)); + } + else + { + char errorstr[128]; + sprintf(errorstr, "eSimTempScale=%d not defined", simtemp->eSimTempScale); + gmx_fatal(FARGS, errorstr); + } + } +} + + + +static void _low_check(gmx_bool b, char *s, warninp_t wi) +{ + if (b) + { + warning_error(wi, s); + } +} + +static void check_nst(const char *desc_nst, int nst, + const char *desc_p, int *p, + warninp_t wi) +{ + char buf[STRLEN]; + + if (*p > 0 && *p % nst != 0) + { + /* Round up to the next multiple of nst */ + *p = ((*p)/nst + 1)*nst; + sprintf(buf, "%s should be a multiple of %s, changing %s to %d\n", + desc_p, desc_nst, desc_p, *p); + warning(wi, buf); + } +} + +static gmx_bool ir_NVE(const t_inputrec *ir) +{ + return ((ir->eI == eiMD || EI_VV(ir->eI)) && ir->etc == etcNO); +} + +static int lcd(int n1, int n2) +{ + int d, i; + + d = 1; + for (i = 2; (i <= n1 && i <= n2); i++) + { + if (n1 % i == 0 && n2 % i == 0) + { + d = i; + } + } + + return d; +} + +static void process_interaction_modifier(const t_inputrec *ir, int *eintmod) +{ + if (*eintmod == eintmodPOTSHIFT_VERLET) + { + if (ir->cutoff_scheme == ecutsVERLET) + { + *eintmod = eintmodPOTSHIFT; + } + else + { + *eintmod = eintmodNONE; + } + } +} + +void check_ir(const char *mdparin, t_inputrec *ir, t_gromppopts *opts, + warninp_t wi) +/* Check internal consistency. + * NOTE: index groups are not set here yet, don't check things + * like temperature coupling group options here, but in triple_check + */ +{ + /* Strange macro: first one fills the err_buf, and then one can check + * the condition, which will print the message and increase the error + * counter. + */ +#define CHECK(b) _low_check(b, err_buf, wi) + char err_buf[256], warn_buf[STRLEN]; + int i, j; + int ns_type = 0; + real dt_coupl = 0; + real dt_pcoupl; + int nstcmin; + t_lambda *fep = ir->fepvals; + t_expanded *expand = ir->expandedvals; + + set_warning_line(wi, mdparin, -1); + + /* BASIC CUT-OFF STUFF */ + if (ir->rcoulomb < 0) + { + warning_error(wi, "rcoulomb should be >= 0"); + } + if (ir->rvdw < 0) + { + warning_error(wi, "rvdw should be >= 0"); + } + if (ir->rlist < 0 && + !(ir->cutoff_scheme == ecutsVERLET && ir->verletbuf_tol > 0)) + { + warning_error(wi, "rlist should be >= 0"); + } + + process_interaction_modifier(ir, &ir->coulomb_modifier); + process_interaction_modifier(ir, &ir->vdw_modifier); + + if (ir->cutoff_scheme == ecutsGROUP) + { + warning_note(wi, + "The group cutoff scheme is deprecated in Gromacs 5.0 and will be removed in a future " + "release when all interaction forms are supported for the verlet scheme. The verlet " + "scheme already scales better, and it is compatible with GPUs and other accelerators."); + + /* BASIC CUT-OFF STUFF */ + if (ir->rlist == 0 || + !((ir_coulomb_might_be_zero_at_cutoff(ir) && ir->rcoulomb > ir->rlist) || + (ir_vdw_might_be_zero_at_cutoff(ir) && ir->rvdw > ir->rlist))) + { + /* No switched potential and/or no twin-range: + * we can set the long-range cut-off to the maximum of the other cut-offs. + */ + ir->rlistlong = max_cutoff(ir->rlist, max_cutoff(ir->rvdw, ir->rcoulomb)); + } + else if (ir->rlistlong < 0) + { + ir->rlistlong = max_cutoff(ir->rlist, max_cutoff(ir->rvdw, ir->rcoulomb)); + sprintf(warn_buf, "rlistlong was not set, setting it to %g (no buffer)", + ir->rlistlong); + warning(wi, warn_buf); + } + if (ir->rlistlong == 0 && ir->ePBC != epbcNONE) + { + warning_error(wi, "Can not have an infinite cut-off with PBC"); + } + if (ir->rlistlong > 0 && (ir->rlist == 0 || ir->rlistlong < ir->rlist)) + { + warning_error(wi, "rlistlong can not be shorter than rlist"); + } + if (IR_TWINRANGE(*ir) && ir->nstlist <= 0) + { + warning_error(wi, "Can not have nstlist<=0 with twin-range interactions"); + } + } + + if (ir->rlistlong == ir->rlist) + { + ir->nstcalclr = 0; + } + else if (ir->rlistlong > ir->rlist && ir->nstcalclr == 0) + { + warning_error(wi, "With different cutoffs for electrostatics and VdW, nstcalclr must be -1 or a positive number"); + } + + if (ir->cutoff_scheme == ecutsVERLET) + { + real rc_max; + + /* Normal Verlet type neighbor-list, currently only limited feature support */ + if (inputrec2nboundeddim(ir) < 3) + { + warning_error(wi, "With Verlet lists only full pbc or pbc=xy with walls is supported"); + } + if (ir->rcoulomb != ir->rvdw) + { + warning_error(wi, "With Verlet lists rcoulomb!=rvdw is not supported"); + } + if (ir->vdwtype == evdwSHIFT || ir->vdwtype == evdwSWITCH) + { + if (ir->vdw_modifier == eintmodNONE || + ir->vdw_modifier == eintmodPOTSHIFT) + { + ir->vdw_modifier = (ir->vdwtype == evdwSHIFT ? eintmodFORCESWITCH : eintmodPOTSWITCH); + + sprintf(warn_buf, "Replacing vdwtype=%s by the equivalent combination of vdwtype=%s and vdw_modifier=%s", evdw_names[ir->vdwtype], evdw_names[evdwCUT], eintmod_names[ir->vdw_modifier]); + warning_note(wi, warn_buf); + + ir->vdwtype = evdwCUT; + } + else + { + sprintf(warn_buf, "Unsupported combination of vdwtype=%s and vdw_modifier=%s", evdw_names[ir->vdwtype], eintmod_names[ir->vdw_modifier]); + warning_error(wi, warn_buf); + } + } + + if (!(ir->vdwtype == evdwCUT || ir->vdwtype == evdwPME)) + { + warning_error(wi, "With Verlet lists only cut-off and PME LJ interactions are supported"); + } + if (!(ir->coulombtype == eelCUT || + (EEL_RF(ir->coulombtype) && ir->coulombtype != eelRF_NEC) || + EEL_PME(ir->coulombtype) || ir->coulombtype == eelEWALD)) + { + warning_error(wi, "With Verlet lists only cut-off, reaction-field, PME and Ewald electrostatics are supported"); + } + if (!(ir->coulomb_modifier == eintmodNONE || + ir->coulomb_modifier == eintmodPOTSHIFT)) + { + sprintf(warn_buf, "coulomb_modifier=%s is not supported with the Verlet cut-off scheme", eintmod_names[ir->coulomb_modifier]); + warning_error(wi, warn_buf); + } + + if (ir->nstlist <= 0) + { + warning_error(wi, "With Verlet lists nstlist should be larger than 0"); + } + + if (ir->nstlist < 10) + { + warning_note(wi, "With Verlet lists the optimal nstlist is >= 10, with GPUs >= 20. Note that with the Verlet scheme, nstlist has no effect on the accuracy of your simulation."); + } + + rc_max = max(ir->rvdw, ir->rcoulomb); + + if (ir->verletbuf_tol <= 0) + { + if (ir->verletbuf_tol == 0) + { + warning_error(wi, "Can not have Verlet buffer tolerance of exactly 0"); + } + + if (ir->rlist < rc_max) + { + warning_error(wi, "With verlet lists rlist can not be smaller than rvdw or rcoulomb"); + } + + if (ir->rlist == rc_max && ir->nstlist > 1) + { + warning_note(wi, "rlist is equal to rvdw and/or rcoulomb: there is no explicit Verlet buffer. The cluster pair list does have a buffering effect, but choosing a larger rlist might be necessary for good energy conservation."); + } + } + else + { + if (ir->rlist > rc_max) + { + warning_note(wi, "You have set rlist larger than the interaction cut-off, but you also have verlet-buffer-tolerance > 0. Will set rlist using verlet-buffer-tolerance."); + } + + if (ir->nstlist == 1) + { + /* No buffer required */ + ir->rlist = rc_max; + } + else + { + if (EI_DYNAMICS(ir->eI)) + { + if (inputrec2nboundeddim(ir) < 3) + { + warning_error(wi, "The box volume is required for calculating rlist from the energy drift with verlet-buffer-tolerance > 0. You are using at least one unbounded dimension, so no volume can be computed. Either use a finite box, or set rlist yourself together with verlet-buffer-tolerance = -1."); + } + /* Set rlist temporarily so we can continue processing */ + ir->rlist = rc_max; + } + else + { + /* Set the buffer to 5% of the cut-off */ + ir->rlist = (1.0 + verlet_buffer_ratio_nodynamics)*rc_max; + } + } + } + + /* No twin-range calculations with Verlet lists */ + ir->rlistlong = ir->rlist; + } + + if (ir->nstcalclr == -1) + { + /* if rlist=rlistlong, this will later be changed to nstcalclr=0 */ + ir->nstcalclr = ir->nstlist; + } + else if (ir->nstcalclr > 0) + { + if (ir->nstlist > 0 && (ir->nstlist % ir->nstcalclr != 0)) + { + warning_error(wi, "nstlist must be evenly divisible by nstcalclr. Use nstcalclr = -1 to automatically follow nstlist"); + } + } + else if (ir->nstcalclr < -1) + { + warning_error(wi, "nstcalclr must be a positive number (divisor of nstcalclr), or -1 to follow nstlist."); + } + + if (EEL_PME(ir->coulombtype) && ir->rcoulomb > ir->rvdw && ir->nstcalclr > 1) + { + warning_error(wi, "When used with PME, the long-range component of twin-range interactions must be updated every step (nstcalclr)"); + } + + /* GENERAL INTEGRATOR STUFF */ + if (!(ir->eI == eiMD || EI_VV(ir->eI))) + { + ir->etc = etcNO; + } + if (ir->eI == eiVVAK) + { + sprintf(warn_buf, "Integrator method %s is implemented primarily for validation purposes; for molecular dynamics, you should probably be using %s or %s", ei_names[eiVVAK], ei_names[eiMD], ei_names[eiVV]); + warning_note(wi, warn_buf); + } + if (!EI_DYNAMICS(ir->eI)) + { + ir->epc = epcNO; + } + if (EI_DYNAMICS(ir->eI)) + { + if (ir->nstcalcenergy < 0) + { + ir->nstcalcenergy = ir_optimal_nstcalcenergy(ir); + if (ir->nstenergy != 0 && ir->nstenergy < ir->nstcalcenergy) + { + /* nstcalcenergy larger than nstener does not make sense. + * We ideally want nstcalcenergy=nstener. + */ + if (ir->nstlist > 0) + { + ir->nstcalcenergy = lcd(ir->nstenergy, ir->nstlist); + } + else + { + ir->nstcalcenergy = ir->nstenergy; + } + } + } + else if ( (ir->nstenergy > 0 && ir->nstcalcenergy > ir->nstenergy) || + (ir->efep != efepNO && ir->fepvals->nstdhdl > 0 && + (ir->nstcalcenergy > ir->fepvals->nstdhdl) ) ) + + { + const char *nsten = "nstenergy"; + const char *nstdh = "nstdhdl"; + const char *min_name = nsten; + int min_nst = ir->nstenergy; + + /* find the smallest of ( nstenergy, nstdhdl ) */ + if (ir->efep != efepNO && ir->fepvals->nstdhdl > 0 && + (ir->nstenergy == 0 || ir->fepvals->nstdhdl < ir->nstenergy)) + { + min_nst = ir->fepvals->nstdhdl; + min_name = nstdh; + } + /* If the user sets nstenergy small, we should respect that */ + sprintf(warn_buf, + "Setting nstcalcenergy (%d) equal to %s (%d)", + ir->nstcalcenergy, min_name, min_nst); + warning_note(wi, warn_buf); + ir->nstcalcenergy = min_nst; + } + + if (ir->epc != epcNO) + { + if (ir->nstpcouple < 0) + { + ir->nstpcouple = ir_optimal_nstpcouple(ir); + } + } + if (IR_TWINRANGE(*ir)) + { + check_nst("nstlist", ir->nstlist, + "nstcalcenergy", &ir->nstcalcenergy, wi); + if (ir->epc != epcNO) + { + check_nst("nstlist", ir->nstlist, + "nstpcouple", &ir->nstpcouple, wi); + } + } + + if (ir->nstcalcenergy > 0) + { + if (ir->efep != efepNO) + { + /* nstdhdl should be a multiple of nstcalcenergy */ + check_nst("nstcalcenergy", ir->nstcalcenergy, + "nstdhdl", &ir->fepvals->nstdhdl, wi); + /* nstexpanded should be a multiple of nstcalcenergy */ + check_nst("nstcalcenergy", ir->nstcalcenergy, + "nstexpanded", &ir->expandedvals->nstexpanded, wi); + } + /* for storing exact averages nstenergy should be + * a multiple of nstcalcenergy + */ + check_nst("nstcalcenergy", ir->nstcalcenergy, + "nstenergy", &ir->nstenergy, wi); + } + } + + if (ir->nsteps == 0 && !ir->bContinuation) + { + warning_note(wi, "For a correct single-point energy evaluation with nsteps = 0, use continuation = yes to avoid constraining the input coordinates."); + } + + /* LD STUFF */ + if ((EI_SD(ir->eI) || ir->eI == eiBD) && + ir->bContinuation && ir->ld_seed != -1) + { + warning_note(wi, "You are doing a continuation with SD or BD, make sure that ld_seed is different from the previous run (using ld_seed=-1 will ensure this)"); + } + + /* TPI STUFF */ + if (EI_TPI(ir->eI)) + { + sprintf(err_buf, "TPI only works with pbc = %s", epbc_names[epbcXYZ]); + CHECK(ir->ePBC != epbcXYZ); + sprintf(err_buf, "TPI only works with ns = %s", ens_names[ensGRID]); + CHECK(ir->ns_type != ensGRID); + sprintf(err_buf, "with TPI nstlist should be larger than zero"); + CHECK(ir->nstlist <= 0); + sprintf(err_buf, "TPI does not work with full electrostatics other than PME"); + CHECK(EEL_FULL(ir->coulombtype) && !EEL_PME(ir->coulombtype)); + } + + /* SHAKE / LINCS */ + if ( (opts->nshake > 0) && (opts->bMorse) ) + { + sprintf(warn_buf, + "Using morse bond-potentials while constraining bonds is useless"); + warning(wi, warn_buf); + } + + if ((EI_SD(ir->eI) || ir->eI == eiBD) && + ir->bContinuation && ir->ld_seed != -1) + { + warning_note(wi, "You are doing a continuation with SD or BD, make sure that ld_seed is different from the previous run (using ld_seed=-1 will ensure this)"); + } + /* verify simulated tempering options */ + + if (ir->bSimTemp) + { + gmx_bool bAllTempZero = TRUE; + for (i = 0; i < fep->n_lambda; i++) + { + sprintf(err_buf, "Entry %d for %s must be between 0 and 1, instead is %g", i, efpt_names[efptTEMPERATURE], fep->all_lambda[efptTEMPERATURE][i]); + CHECK((fep->all_lambda[efptTEMPERATURE][i] < 0) || (fep->all_lambda[efptTEMPERATURE][i] > 1)); + if (fep->all_lambda[efptTEMPERATURE][i] > 0) + { + bAllTempZero = FALSE; + } + } + sprintf(err_buf, "if simulated tempering is on, temperature-lambdas may not be all zero"); + CHECK(bAllTempZero == TRUE); + + sprintf(err_buf, "Simulated tempering is currently only compatible with md-vv"); + CHECK(ir->eI != eiVV); + + /* check compatability of the temperature coupling with simulated tempering */ + + if (ir->etc == etcNOSEHOOVER) + { + sprintf(warn_buf, "Nose-Hoover based temperature control such as [%s] my not be entirelyconsistent with simulated tempering", etcoupl_names[ir->etc]); + warning_note(wi, warn_buf); + } + + /* check that the temperatures make sense */ + + sprintf(err_buf, "Higher simulated tempering temperature (%g) must be >= than the simulated tempering lower temperature (%g)", ir->simtempvals->simtemp_high, ir->simtempvals->simtemp_low); + CHECK(ir->simtempvals->simtemp_high <= ir->simtempvals->simtemp_low); + + sprintf(err_buf, "Higher simulated tempering temperature (%g) must be >= zero", ir->simtempvals->simtemp_high); + CHECK(ir->simtempvals->simtemp_high <= 0); + + sprintf(err_buf, "Lower simulated tempering temperature (%g) must be >= zero", ir->simtempvals->simtemp_low); + CHECK(ir->simtempvals->simtemp_low <= 0); + } + + /* verify free energy options */ + + if (ir->efep != efepNO) + { + fep = ir->fepvals; + sprintf(err_buf, "The soft-core power is %d and can only be 1 or 2", + fep->sc_power); + CHECK(fep->sc_alpha != 0 && fep->sc_power != 1 && fep->sc_power != 2); + + sprintf(err_buf, "The soft-core sc-r-power is %d and can only be 6 or 48", + (int)fep->sc_r_power); + CHECK(fep->sc_alpha != 0 && fep->sc_r_power != 6.0 && fep->sc_r_power != 48.0); + + sprintf(err_buf, "Can't use postive delta-lambda (%g) if initial state/lambda does not start at zero", fep->delta_lambda); + CHECK(fep->delta_lambda > 0 && ((fep->init_fep_state > 0) || (fep->init_lambda > 0))); + + sprintf(err_buf, "Can't use postive delta-lambda (%g) with expanded ensemble simulations", fep->delta_lambda); + CHECK(fep->delta_lambda > 0 && (ir->efep == efepEXPANDED)); + + sprintf(err_buf, "Can only use expanded ensemble with md-vv for now; should be supported for other integrators in 5.0"); + CHECK(!(EI_VV(ir->eI)) && (ir->efep == efepEXPANDED)); + + sprintf(err_buf, "Free-energy not implemented for Ewald"); + CHECK(ir->coulombtype == eelEWALD); + + /* check validty of lambda inputs */ + if (fep->n_lambda == 0) + { + /* Clear output in case of no states:*/ + sprintf(err_buf, "init-lambda-state set to %d: no lambda states are defined.", fep->init_fep_state); + CHECK((fep->init_fep_state >= 0) && (fep->n_lambda == 0)); + } + else + { + sprintf(err_buf, "initial thermodynamic state %d does not exist, only goes to %d", fep->init_fep_state, fep->n_lambda-1); + CHECK((fep->init_fep_state >= fep->n_lambda)); + } + + sprintf(err_buf, "Lambda state must be set, either with init-lambda-state or with init-lambda"); + CHECK((fep->init_fep_state < 0) && (fep->init_lambda < 0)); + + sprintf(err_buf, "init-lambda=%g while init-lambda-state=%d. Lambda state must be set either with init-lambda-state or with init-lambda, but not both", + fep->init_lambda, fep->init_fep_state); + CHECK((fep->init_fep_state >= 0) && (fep->init_lambda >= 0)); + + + + if ((fep->init_lambda >= 0) && (fep->delta_lambda == 0)) + { + int n_lambda_terms; + n_lambda_terms = 0; + for (i = 0; i < efptNR; i++) + { + if (fep->separate_dvdl[i]) + { + n_lambda_terms++; + } + } + if (n_lambda_terms > 1) + { + sprintf(warn_buf, "If lambda vector states (fep-lambdas, coul-lambdas etc.) are set, don't use init-lambda to set lambda state (except for slow growth). Use init-lambda-state instead."); + warning(wi, warn_buf); + } + + if (n_lambda_terms < 2 && fep->n_lambda > 0) + { + warning_note(wi, + "init-lambda is deprecated for setting lambda state (except for slow growth). Use init-lambda-state instead."); + } + } + + for (j = 0; j < efptNR; j++) + { + for (i = 0; i < fep->n_lambda; i++) + { + sprintf(err_buf, "Entry %d for %s must be between 0 and 1, instead is %g", i, efpt_names[j], fep->all_lambda[j][i]); + CHECK((fep->all_lambda[j][i] < 0) || (fep->all_lambda[j][i] > 1)); + } + } + + if ((fep->sc_alpha > 0) && (!fep->bScCoul)) + { + for (i = 0; i < fep->n_lambda; i++) + { + sprintf(err_buf, "For state %d, vdw-lambdas (%f) is changing with vdw softcore, while coul-lambdas (%f) is nonzero without coulomb softcore: this will lead to crashes, and is not supported.", i, fep->all_lambda[efptVDW][i], + fep->all_lambda[efptCOUL][i]); + CHECK((fep->sc_alpha > 0) && + (((fep->all_lambda[efptCOUL][i] > 0.0) && + (fep->all_lambda[efptCOUL][i] < 1.0)) && + ((fep->all_lambda[efptVDW][i] > 0.0) && + (fep->all_lambda[efptVDW][i] < 1.0)))); + } + } + + if ((fep->bScCoul) && (EEL_PME(ir->coulombtype))) + { + real sigma, lambda, r_sc; + + sigma = 0.34; + /* Maximum estimate for A and B charges equal with lambda power 1 */ + lambda = 0.5; + r_sc = pow(lambda*fep->sc_alpha*pow(sigma/ir->rcoulomb, fep->sc_r_power) + 1.0, 1.0/fep->sc_r_power); + sprintf(warn_buf, "With PME there is a minor soft core effect present at the cut-off, proportional to (LJsigma/rcoulomb)^%g. This could have a minor effect on energy conservation, but usually other effects dominate. With a common sigma value of %g nm the fraction of the particle-particle potential at the cut-off at lambda=%g is around %.1e, while ewald-rtol is %.1e.", + fep->sc_r_power, + sigma, lambda, r_sc - 1.0, ir->ewald_rtol); + warning_note(wi, warn_buf); + } + + /* Free Energy Checks -- In an ideal world, slow growth and FEP would + be treated differently, but that's the next step */ + + for (i = 0; i < efptNR; i++) + { + for (j = 0; j < fep->n_lambda; j++) + { + sprintf(err_buf, "%s[%d] must be between 0 and 1", efpt_names[i], j); + CHECK((fep->all_lambda[i][j] < 0) || (fep->all_lambda[i][j] > 1)); + } + } + } + + if ((ir->bSimTemp) || (ir->efep == efepEXPANDED)) + { + fep = ir->fepvals; + expand = ir->expandedvals; + + /* checking equilibration of weights inputs for validity */ + + sprintf(err_buf, "weight-equil-number-all-lambda (%d) is ignored if lmc-weights-equil is not equal to %s", + expand->equil_n_at_lam, elmceq_names[elmceqNUMATLAM]); + CHECK((expand->equil_n_at_lam > 0) && (expand->elmceq != elmceqNUMATLAM)); + + sprintf(err_buf, "weight-equil-number-samples (%d) is ignored if lmc-weights-equil is not equal to %s", + expand->equil_samples, elmceq_names[elmceqSAMPLES]); + CHECK((expand->equil_samples > 0) && (expand->elmceq != elmceqSAMPLES)); + + sprintf(err_buf, "weight-equil-number-steps (%d) is ignored if lmc-weights-equil is not equal to %s", + expand->equil_steps, elmceq_names[elmceqSTEPS]); + CHECK((expand->equil_steps > 0) && (expand->elmceq != elmceqSTEPS)); + + sprintf(err_buf, "weight-equil-wl-delta (%d) is ignored if lmc-weights-equil is not equal to %s", + expand->equil_samples, elmceq_names[elmceqWLDELTA]); + CHECK((expand->equil_wl_delta > 0) && (expand->elmceq != elmceqWLDELTA)); + + sprintf(err_buf, "weight-equil-count-ratio (%f) is ignored if lmc-weights-equil is not equal to %s", + expand->equil_ratio, elmceq_names[elmceqRATIO]); + CHECK((expand->equil_ratio > 0) && (expand->elmceq != elmceqRATIO)); + + sprintf(err_buf, "weight-equil-number-all-lambda (%d) must be a positive integer if lmc-weights-equil=%s", + expand->equil_n_at_lam, elmceq_names[elmceqNUMATLAM]); + CHECK((expand->equil_n_at_lam <= 0) && (expand->elmceq == elmceqNUMATLAM)); + + sprintf(err_buf, "weight-equil-number-samples (%d) must be a positive integer if lmc-weights-equil=%s", + expand->equil_samples, elmceq_names[elmceqSAMPLES]); + CHECK((expand->equil_samples <= 0) && (expand->elmceq == elmceqSAMPLES)); + + sprintf(err_buf, "weight-equil-number-steps (%d) must be a positive integer if lmc-weights-equil=%s", + expand->equil_steps, elmceq_names[elmceqSTEPS]); + CHECK((expand->equil_steps <= 0) && (expand->elmceq == elmceqSTEPS)); + + sprintf(err_buf, "weight-equil-wl-delta (%f) must be > 0 if lmc-weights-equil=%s", + expand->equil_wl_delta, elmceq_names[elmceqWLDELTA]); + CHECK((expand->equil_wl_delta <= 0) && (expand->elmceq == elmceqWLDELTA)); + + sprintf(err_buf, "weight-equil-count-ratio (%f) must be > 0 if lmc-weights-equil=%s", + expand->equil_ratio, elmceq_names[elmceqRATIO]); + CHECK((expand->equil_ratio <= 0) && (expand->elmceq == elmceqRATIO)); + + sprintf(err_buf, "lmc-weights-equil=%s only possible when lmc-stats = %s or lmc-stats %s", + elmceq_names[elmceqWLDELTA], elamstats_names[elamstatsWL], elamstats_names[elamstatsWWL]); + CHECK((expand->elmceq == elmceqWLDELTA) && (!EWL(expand->elamstats))); + + sprintf(err_buf, "lmc-repeats (%d) must be greater than 0", expand->lmc_repeats); + CHECK((expand->lmc_repeats <= 0)); + sprintf(err_buf, "minimum-var-min (%d) must be greater than 0", expand->minvarmin); + CHECK((expand->minvarmin <= 0)); + sprintf(err_buf, "weight-c-range (%d) must be greater or equal to 0", expand->c_range); + CHECK((expand->c_range < 0)); + sprintf(err_buf, "init-lambda-state (%d) must be zero if lmc-forced-nstart (%d)> 0 and lmc-move != 'no'", + fep->init_fep_state, expand->lmc_forced_nstart); + CHECK((fep->init_fep_state != 0) && (expand->lmc_forced_nstart > 0) && (expand->elmcmove != elmcmoveNO)); + sprintf(err_buf, "lmc-forced-nstart (%d) must not be negative", expand->lmc_forced_nstart); + CHECK((expand->lmc_forced_nstart < 0)); + sprintf(err_buf, "init-lambda-state (%d) must be in the interval [0,number of lambdas)", fep->init_fep_state); + CHECK((fep->init_fep_state < 0) || (fep->init_fep_state >= fep->n_lambda)); + + sprintf(err_buf, "init-wl-delta (%f) must be greater than or equal to 0", expand->init_wl_delta); + CHECK((expand->init_wl_delta < 0)); + sprintf(err_buf, "wl-ratio (%f) must be between 0 and 1", expand->wl_ratio); + CHECK((expand->wl_ratio <= 0) || (expand->wl_ratio >= 1)); + sprintf(err_buf, "wl-scale (%f) must be between 0 and 1", expand->wl_scale); + CHECK((expand->wl_scale <= 0) || (expand->wl_scale >= 1)); + + /* if there is no temperature control, we need to specify an MC temperature */ + sprintf(err_buf, "If there is no temperature control, and lmc-mcmove!= 'no',mc_temperature must be set to a positive number"); + if (expand->nstTij > 0) + { + sprintf(err_buf, "nst-transition-matrix (%d) must be an integer multiple of nstlog (%d)", + expand->nstTij, ir->nstlog); + CHECK((mod(expand->nstTij, ir->nstlog) != 0)); + } + } + + /* PBC/WALLS */ + sprintf(err_buf, "walls only work with pbc=%s", epbc_names[epbcXY]); + CHECK(ir->nwall && ir->ePBC != epbcXY); + + /* VACUUM STUFF */ + if (ir->ePBC != epbcXYZ && ir->nwall != 2) + { + if (ir->ePBC == epbcNONE) + { + if (ir->epc != epcNO) + { + warning(wi, "Turning off pressure coupling for vacuum system"); + ir->epc = epcNO; + } + } + else + { + sprintf(err_buf, "Can not have pressure coupling with pbc=%s", + epbc_names[ir->ePBC]); + CHECK(ir->epc != epcNO); + } + sprintf(err_buf, "Can not have Ewald with pbc=%s", epbc_names[ir->ePBC]); + CHECK(EEL_FULL(ir->coulombtype)); + + sprintf(err_buf, "Can not have dispersion correction with pbc=%s", + epbc_names[ir->ePBC]); + CHECK(ir->eDispCorr != edispcNO); + } + + if (ir->rlist == 0.0) + { + sprintf(err_buf, "can only have neighborlist cut-off zero (=infinite)\n" + "with coulombtype = %s or coulombtype = %s\n" + "without periodic boundary conditions (pbc = %s) and\n" + "rcoulomb and rvdw set to zero", + eel_names[eelCUT], eel_names[eelUSER], epbc_names[epbcNONE]); + CHECK(((ir->coulombtype != eelCUT) && (ir->coulombtype != eelUSER)) || + (ir->ePBC != epbcNONE) || + (ir->rcoulomb != 0.0) || (ir->rvdw != 0.0)); + + if (ir->nstlist < 0) + { + warning_error(wi, "Can not have heuristic neighborlist updates without cut-off"); + } + if (ir->nstlist > 0) + { + warning_note(wi, "Simulating without cut-offs can be (slightly) faster with nstlist=0, nstype=simple and only one MPI rank"); + } + } + + /* COMM STUFF */ + if (ir->nstcomm == 0) + { + ir->comm_mode = ecmNO; + } + if (ir->comm_mode != ecmNO) + { + if (ir->nstcomm < 0) + { + warning(wi, "If you want to remove the rotation around the center of mass, you should set comm_mode = Angular instead of setting nstcomm < 0. nstcomm is modified to its absolute value"); + ir->nstcomm = abs(ir->nstcomm); + } + + if (ir->nstcalcenergy > 0 && ir->nstcomm < ir->nstcalcenergy) + { + warning_note(wi, "nstcomm < nstcalcenergy defeats the purpose of nstcalcenergy, setting nstcomm to nstcalcenergy"); + ir->nstcomm = ir->nstcalcenergy; + } + + if (ir->comm_mode == ecmANGULAR) + { + sprintf(err_buf, "Can not remove the rotation around the center of mass with periodic molecules"); + CHECK(ir->bPeriodicMols); + if (ir->ePBC != epbcNONE) + { + warning(wi, "Removing the rotation around the center of mass in a periodic system (this is not a problem when you have only one molecule)."); + } + } + } + + if (EI_STATE_VELOCITY(ir->eI) && ir->ePBC == epbcNONE && ir->comm_mode != ecmANGULAR) + { + warning_note(wi, "Tumbling and or flying ice-cubes: We are not removing rotation around center of mass in a non-periodic system. You should probably set comm_mode = ANGULAR."); + } + + sprintf(err_buf, "Twin-range neighbour searching (NS) with simple NS" + " algorithm not implemented"); + CHECK(((ir->rcoulomb > ir->rlist) || (ir->rvdw > ir->rlist)) + && (ir->ns_type == ensSIMPLE)); + + /* TEMPERATURE COUPLING */ + if (ir->etc == etcYES) + { + ir->etc = etcBERENDSEN; + warning_note(wi, "Old option for temperature coupling given: " + "changing \"yes\" to \"Berendsen\"\n"); + } + + if ((ir->etc == etcNOSEHOOVER) || (ir->epc == epcMTTK)) + { + if (ir->opts.nhchainlength < 1) + { + sprintf(warn_buf, "number of Nose-Hoover chains (currently %d) cannot be less than 1,reset to 1\n", ir->opts.nhchainlength); + ir->opts.nhchainlength = 1; + warning(wi, warn_buf); + } + + if (ir->etc == etcNOSEHOOVER && !EI_VV(ir->eI) && ir->opts.nhchainlength > 1) + { + warning_note(wi, "leapfrog does not yet support Nose-Hoover chains, nhchainlength reset to 1"); + ir->opts.nhchainlength = 1; + } + } + else + { + ir->opts.nhchainlength = 0; + } + + if (ir->eI == eiVVAK) + { + sprintf(err_buf, "%s implemented primarily for validation, and requires nsttcouple = 1 and nstpcouple = 1.", + ei_names[eiVVAK]); + CHECK((ir->nsttcouple != 1) || (ir->nstpcouple != 1)); + } + + if (ETC_ANDERSEN(ir->etc)) + { + sprintf(err_buf, "%s temperature control not supported for integrator %s.", etcoupl_names[ir->etc], ei_names[ir->eI]); + CHECK(!(EI_VV(ir->eI))); + + if (ir->nstcomm > 0 && (ir->etc == etcANDERSEN)) + { + sprintf(warn_buf, "Center of mass removal not necessary for %s. All velocities of coupled groups are rerandomized periodically, so flying ice cube errors will not occur.", etcoupl_names[ir->etc]); + warning_note(wi, warn_buf); + } + + sprintf(err_buf, "nstcomm must be 1, not %d for %s, as velocities of atoms in coupled groups are randomized every time step", ir->nstcomm, etcoupl_names[ir->etc]); + CHECK(ir->nstcomm > 1 && (ir->etc == etcANDERSEN)); + } + + if (ir->etc == etcBERENDSEN) + { + sprintf(warn_buf, "The %s thermostat does not generate the correct kinetic energy distribution. You might want to consider using the %s thermostat.", + ETCOUPLTYPE(ir->etc), ETCOUPLTYPE(etcVRESCALE)); + warning_note(wi, warn_buf); + } + + if ((ir->etc == etcNOSEHOOVER || ETC_ANDERSEN(ir->etc)) + && ir->epc == epcBERENDSEN) + { + sprintf(warn_buf, "Using Berendsen pressure coupling invalidates the " + "true ensemble for the thermostat"); + warning(wi, warn_buf); + } + + /* PRESSURE COUPLING */ + if (ir->epc == epcISOTROPIC) + { + ir->epc = epcBERENDSEN; + warning_note(wi, "Old option for pressure coupling given: " + "changing \"Isotropic\" to \"Berendsen\"\n"); + } + + if (ir->epc != epcNO) + { + dt_pcoupl = ir->nstpcouple*ir->delta_t; + + sprintf(err_buf, "tau-p must be > 0 instead of %g\n", ir->tau_p); + CHECK(ir->tau_p <= 0); + + if (ir->tau_p/dt_pcoupl < pcouple_min_integration_steps(ir->epc)) + { + sprintf(warn_buf, "For proper integration of the %s barostat, tau-p (%g) should be at least %d times larger than nstpcouple*dt (%g)", + EPCOUPLTYPE(ir->epc), ir->tau_p, pcouple_min_integration_steps(ir->epc), dt_pcoupl); + warning(wi, warn_buf); + } + + sprintf(err_buf, "compressibility must be > 0 when using pressure" + " coupling %s\n", EPCOUPLTYPE(ir->epc)); + CHECK(ir->compress[XX][XX] < 0 || ir->compress[YY][YY] < 0 || + ir->compress[ZZ][ZZ] < 0 || + (trace(ir->compress) == 0 && ir->compress[YY][XX] <= 0 && + ir->compress[ZZ][XX] <= 0 && ir->compress[ZZ][YY] <= 0)); + + if (epcPARRINELLORAHMAN == ir->epc && opts->bGenVel) + { + sprintf(warn_buf, + "You are generating velocities so I am assuming you " + "are equilibrating a system. You are using " + "%s pressure coupling, but this can be " + "unstable for equilibration. If your system crashes, try " + "equilibrating first with Berendsen pressure coupling. If " + "you are not equilibrating the system, you can probably " + "ignore this warning.", + epcoupl_names[ir->epc]); + warning(wi, warn_buf); + } + } + + if (EI_VV(ir->eI)) + { + if (ir->epc > epcNO) + { + if ((ir->epc != epcBERENDSEN) && (ir->epc != epcMTTK)) + { + warning_error(wi, "for md-vv and md-vv-avek, can only use Berendsen and Martyna-Tuckerman-Tobias-Klein (MTTK) equations for pressure control; MTTK is equivalent to Parrinello-Rahman."); + } + } + } + else + { + if (ir->epc == epcMTTK) + { + warning_error(wi, "MTTK pressure coupling requires a Velocity-verlet integrator"); + } + } + + /* ELECTROSTATICS */ + /* More checks are in triple check (grompp.c) */ + + if (ir->coulombtype == eelSWITCH) + { + sprintf(warn_buf, "coulombtype = %s is only for testing purposes and can lead to serious " + "artifacts, advice: use coulombtype = %s", + eel_names[ir->coulombtype], + eel_names[eelRF_ZERO]); + warning(wi, warn_buf); + } + + if (ir->epsilon_r != 1 && ir->implicit_solvent == eisGBSA) + { + sprintf(warn_buf, "epsilon-r = %g with GB implicit solvent, will use this value for inner dielectric", ir->epsilon_r); + warning_note(wi, warn_buf); + } + + if (EEL_RF(ir->coulombtype) && ir->epsilon_rf == 1 && ir->epsilon_r != 1) + { + sprintf(warn_buf, "epsilon-r = %g and epsilon-rf = 1 with reaction field, proceeding assuming old format and exchanging epsilon-r and epsilon-rf", ir->epsilon_r); + warning(wi, warn_buf); + ir->epsilon_rf = ir->epsilon_r; + ir->epsilon_r = 1.0; + } + + if (getenv("GMX_DO_GALACTIC_DYNAMICS") == NULL) + { + sprintf(err_buf, "epsilon-r must be >= 0 instead of %g\n", ir->epsilon_r); + CHECK(ir->epsilon_r < 0); + } + + if (EEL_RF(ir->coulombtype)) + { + /* reaction field (at the cut-off) */ + + if (ir->coulombtype == eelRF_ZERO) + { + sprintf(warn_buf, "With coulombtype = %s, epsilon-rf must be 0, assuming you meant epsilon_rf=0", + eel_names[ir->coulombtype]); + CHECK(ir->epsilon_rf != 0); + ir->epsilon_rf = 0.0; + } + + sprintf(err_buf, "epsilon-rf must be >= epsilon-r"); + CHECK((ir->epsilon_rf < ir->epsilon_r && ir->epsilon_rf != 0) || + (ir->epsilon_r == 0)); + if (ir->epsilon_rf == ir->epsilon_r) + { + sprintf(warn_buf, "Using epsilon-rf = epsilon-r with %s does not make sense", + eel_names[ir->coulombtype]); + warning(wi, warn_buf); + } + } + /* Allow rlist>rcoulomb for tabulated long range stuff. This just + * means the interaction is zero outside rcoulomb, but it helps to + * provide accurate energy conservation. + */ + if (ir_coulomb_might_be_zero_at_cutoff(ir)) + { + if (ir_coulomb_switched(ir)) + { + sprintf(err_buf, + "With coulombtype = %s rcoulomb_switch must be < rcoulomb. Or, better: Use the potential modifier options!", + eel_names[ir->coulombtype]); + CHECK(ir->rcoulomb_switch >= ir->rcoulomb); + } + } + else if (ir->coulombtype == eelCUT || EEL_RF(ir->coulombtype)) + { + if (ir->cutoff_scheme == ecutsGROUP && ir->coulomb_modifier == eintmodNONE) + { + sprintf(err_buf, "With coulombtype = %s, rcoulomb should be >= rlist unless you use a potential modifier", + eel_names[ir->coulombtype]); + CHECK(ir->rlist > ir->rcoulomb); + } + } + ++ if (ir->coulombtype == eelSWITCH || ir->coulombtype == eelSHIFT) ++ { ++ sprintf(err_buf, ++ "Explicit switch/shift coulomb interactions cannot be used in combination with a secondary coulomb-modifier."); ++ CHECK( ir->coulomb_modifier != eintmodNONE); ++ } ++ if (ir->vdwtype == evdwSWITCH || ir->vdwtype == evdwSHIFT) ++ { ++ sprintf(err_buf, ++ "Explicit switch/shift vdw interactions cannot be used in combination with a secondary vdw-modifier."); ++ CHECK( ir->vdw_modifier != eintmodNONE); ++ } ++ + if (ir->coulombtype == eelSWITCH || ir->coulombtype == eelSHIFT || + ir->vdwtype == evdwSWITCH || ir->vdwtype == evdwSHIFT) + { + sprintf(warn_buf, + "The switch/shift interaction settings are just for compatibility; you will get better " + "performance from applying potential modifiers to your interactions!\n"); + warning_note(wi, warn_buf); + } + + if (ir->coulombtype == eelPMESWITCH || ir->coulomb_modifier == eintmodPOTSWITCH) + { + if (ir->rcoulomb_switch/ir->rcoulomb < 0.9499) + { + real percentage = 100*(ir->rcoulomb-ir->rcoulomb_switch)/ir->rcoulomb; - sprintf(warn_buf, "The switching range for should be 5%% or less (currently %.2f%% using a switching range of %4f-%4f) for accurate electrostatic energies, energy conservation will be good regardless, since ewald_rtol = %g.", ++ sprintf(warn_buf, "The switching range should be 5%% or less (currently %.2f%% using a switching range of %4f-%4f) for accurate electrostatic energies, energy conservation will be good regardless, since ewald_rtol = %g.", + percentage, ir->rcoulomb_switch, ir->rcoulomb, ir->ewald_rtol); + warning(wi, warn_buf); + } + } + + if (ir->vdwtype == evdwSWITCH || ir->vdw_modifier == eintmodPOTSWITCH) + { + if (ir->rvdw_switch == 0) + { + sprintf(warn_buf, "rvdw-switch is equal 0 even though you are using a switched Lennard-Jones potential. This suggests it was not set in the mdp, which can lead to large energy errors. In GROMACS, 0.05 to 0.1 nm is often a reasonable vdw switching range."); + warning(wi, warn_buf); + } + } + + if (EEL_FULL(ir->coulombtype)) + { + if (ir->coulombtype == eelPMESWITCH || ir->coulombtype == eelPMEUSER || + ir->coulombtype == eelPMEUSERSWITCH) + { + sprintf(err_buf, "With coulombtype = %s, rcoulomb must be <= rlist", + eel_names[ir->coulombtype]); + CHECK(ir->rcoulomb > ir->rlist); + } + else if (ir->cutoff_scheme == ecutsGROUP && ir->coulomb_modifier == eintmodNONE) + { + if (ir->coulombtype == eelPME || ir->coulombtype == eelP3M_AD) + { + sprintf(err_buf, + "With coulombtype = %s (without modifier), rcoulomb must be equal to rlist,\n" + "or rlistlong if nstcalclr=1. For optimal energy conservation,consider using\n" + "a potential modifier.", eel_names[ir->coulombtype]); + if (ir->nstcalclr == 1) + { + CHECK(ir->rcoulomb != ir->rlist && ir->rcoulomb != ir->rlistlong); + } + else + { + CHECK(ir->rcoulomb != ir->rlist); + } + } + } + } + + if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype)) + { + if (ir->pme_order < 3) + { + warning_error(wi, "pme-order can not be smaller than 3"); + } + } + + if (ir->nwall == 2 && EEL_FULL(ir->coulombtype)) + { + if (ir->ewald_geometry == eewg3D) + { + sprintf(warn_buf, "With pbc=%s you should use ewald-geometry=%s", + epbc_names[ir->ePBC], eewg_names[eewg3DC]); + warning(wi, warn_buf); + } + /* This check avoids extra pbc coding for exclusion corrections */ + sprintf(err_buf, "wall-ewald-zfac should be >= 2"); + CHECK(ir->wall_ewald_zfac < 2); + } + + if (ir_vdw_switched(ir)) + { + sprintf(err_buf, "With switched vdw forces or potentials, rvdw-switch must be < rvdw"); + CHECK(ir->rvdw_switch >= ir->rvdw); + + if (ir->rvdw_switch < 0.5*ir->rvdw) + { + sprintf(warn_buf, "You are applying a switch function to vdw forces or potentials from %g to %g nm, which is more than half the interaction range, whereas switch functions are intended to act only close to the cut-off.", + ir->rvdw_switch, ir->rvdw); + warning_note(wi, warn_buf); + } + } + else if (ir->vdwtype == evdwCUT || ir->vdwtype == evdwPME) + { + if (ir->cutoff_scheme == ecutsGROUP && ir->vdw_modifier == eintmodNONE) + { + sprintf(err_buf, "With vdwtype = %s, rvdw must be >= rlist unless you use a potential modifier", evdw_names[ir->vdwtype]); + CHECK(ir->rlist > ir->rvdw); + } + } + + if (ir->vdwtype == evdwPME) + { + if (!(ir->vdw_modifier == eintmodNONE || ir->vdw_modifier == eintmodPOTSHIFT)) + { + sprintf(err_buf, "With vdwtype = %s, the only supported modifiers are %s a\ +nd %s", + evdw_names[ir->vdwtype], + eintmod_names[eintmodPOTSHIFT], + eintmod_names[eintmodNONE]); + } + } + + if (ir->cutoff_scheme == ecutsGROUP) + { + if (((ir->coulomb_modifier != eintmodNONE && ir->rcoulomb == ir->rlist) || + (ir->vdw_modifier != eintmodNONE && ir->rvdw == ir->rlist)) && + ir->nstlist != 1) + { + warning_note(wi, "With exact cut-offs, rlist should be " + "larger than rcoulomb and rvdw, so that there " + "is a buffer region for particle motion " + "between neighborsearch steps"); + } + + if (ir_coulomb_is_zero_at_cutoff(ir) && ir->rlistlong <= ir->rcoulomb) + { + sprintf(warn_buf, "For energy conservation with switch/shift potentials, %s should be 0.1 to 0.3 nm larger than rcoulomb.", + IR_TWINRANGE(*ir) ? "rlistlong" : "rlist"); + warning_note(wi, warn_buf); + } + if (ir_vdw_switched(ir) && (ir->rlistlong <= ir->rvdw)) + { + sprintf(warn_buf, "For energy conservation with switch/shift potentials, %s should be 0.1 to 0.3 nm larger than rvdw.", + IR_TWINRANGE(*ir) ? "rlistlong" : "rlist"); + warning_note(wi, warn_buf); + } + } + + if (ir->vdwtype == evdwUSER && ir->eDispCorr != edispcNO) + { + warning_note(wi, "You have selected user tables with dispersion correction, the dispersion will be corrected to -C6/r^6 beyond rvdw_switch (the tabulated interaction between rvdw_switch and rvdw will not be double counted). Make sure that you really want dispersion correction to -C6/r^6."); + } + + if (ir->nstlist == -1) + { + sprintf(err_buf, "With nstlist=-1 rvdw and rcoulomb should be smaller than rlist to account for diffusion and possibly charge-group radii"); + CHECK(ir->rvdw >= ir->rlist || ir->rcoulomb >= ir->rlist); + } + sprintf(err_buf, "nstlist can not be smaller than -1"); + CHECK(ir->nstlist < -1); + + if (ir->eI == eiLBFGS && (ir->coulombtype == eelCUT || ir->vdwtype == evdwCUT) + && ir->rvdw != 0) + { + warning(wi, "For efficient BFGS minimization, use switch/shift/pme instead of cut-off."); + } + + if (ir->eI == eiLBFGS && ir->nbfgscorr <= 0) + { + warning(wi, "Using L-BFGS with nbfgscorr<=0 just gets you steepest descent."); + } + + /* ENERGY CONSERVATION */ + if (ir_NVE(ir) && ir->cutoff_scheme == ecutsGROUP) + { + if (!ir_vdw_might_be_zero_at_cutoff(ir) && ir->rvdw > 0 && ir->vdw_modifier == eintmodNONE) + { + sprintf(warn_buf, "You are using a cut-off for VdW interactions with NVE, for good energy conservation use vdwtype = %s (possibly with DispCorr)", + evdw_names[evdwSHIFT]); + warning_note(wi, warn_buf); + } + if (!ir_coulomb_might_be_zero_at_cutoff(ir) && ir->rcoulomb > 0) + { + sprintf(warn_buf, "You are using a cut-off for electrostatics with NVE, for good energy conservation use coulombtype = %s or %s", + eel_names[eelPMESWITCH], eel_names[eelRF_ZERO]); + warning_note(wi, warn_buf); + } + } + + if (EI_VV(ir->eI) && IR_TWINRANGE(*ir) && ir->nstlist > 1) + { + sprintf(warn_buf, "Twin-range multiple time stepping does not work with integrator %s.", ei_names[ir->eI]); + warning_error(wi, warn_buf); + } + + /* IMPLICIT SOLVENT */ + if (ir->coulombtype == eelGB_NOTUSED) + { + ir->coulombtype = eelCUT; + ir->implicit_solvent = eisGBSA; + fprintf(stderr, "Note: Old option for generalized born electrostatics given:\n" + "Changing coulombtype from \"generalized-born\" to \"cut-off\" and instead\n" + "setting implicit-solvent value to \"GBSA\" in input section.\n"); + } + + if (ir->sa_algorithm == esaSTILL) + { + sprintf(err_buf, "Still SA algorithm not available yet, use %s or %s instead\n", esa_names[esaAPPROX], esa_names[esaNO]); + CHECK(ir->sa_algorithm == esaSTILL); + } + + if (ir->implicit_solvent == eisGBSA) + { + sprintf(err_buf, "With GBSA implicit solvent, rgbradii must be equal to rlist."); + CHECK(ir->rgbradii != ir->rlist); + + if (ir->coulombtype != eelCUT) + { + sprintf(err_buf, "With GBSA, coulombtype must be equal to %s\n", eel_names[eelCUT]); + CHECK(ir->coulombtype != eelCUT); + } + if (ir->vdwtype != evdwCUT) + { + sprintf(err_buf, "With GBSA, vdw-type must be equal to %s\n", evdw_names[evdwCUT]); + CHECK(ir->vdwtype != evdwCUT); + } + if (ir->nstgbradii < 1) + { + sprintf(warn_buf, "Using GBSA with nstgbradii<1, setting nstgbradii=1"); + warning_note(wi, warn_buf); + ir->nstgbradii = 1; + } + if (ir->sa_algorithm == esaNO) + { + sprintf(warn_buf, "No SA (non-polar) calculation requested together with GB. Are you sure this is what you want?\n"); + warning_note(wi, warn_buf); + } + if (ir->sa_surface_tension < 0 && ir->sa_algorithm != esaNO) + { + sprintf(warn_buf, "Value of sa_surface_tension is < 0. Changing it to 2.05016 or 2.25936 kJ/nm^2/mol for Still and HCT/OBC respectively\n"); + warning_note(wi, warn_buf); + + if (ir->gb_algorithm == egbSTILL) + { + ir->sa_surface_tension = 0.0049 * CAL2JOULE * 100; + } + else + { + ir->sa_surface_tension = 0.0054 * CAL2JOULE * 100; + } + } + if (ir->sa_surface_tension == 0 && ir->sa_algorithm != esaNO) + { + sprintf(err_buf, "Surface tension set to 0 while SA-calculation requested\n"); + CHECK(ir->sa_surface_tension == 0 && ir->sa_algorithm != esaNO); + } + + } + + if (ir->bAdress) + { + if (ir->cutoff_scheme != ecutsGROUP) + { + warning_error(wi, "AdresS simulation supports only cutoff-scheme=group"); + } + if (!EI_SD(ir->eI)) + { + warning_error(wi, "AdresS simulation supports only stochastic dynamics"); + } + if (ir->epc != epcNO) + { + warning_error(wi, "AdresS simulation does not support pressure coupling"); + } + if (EEL_FULL(ir->coulombtype)) + { + warning_error(wi, "AdresS simulation does not support long-range electrostatics"); + } + } +} + +/* count the number of text elemets separated by whitespace in a string. + str = the input string + maxptr = the maximum number of allowed elements + ptr = the output array of pointers to the first character of each element + returns: the number of elements. */ +int str_nelem(const char *str, int maxptr, char *ptr[]) +{ + int np = 0; + char *copy0, *copy; + + copy0 = strdup(str); + copy = copy0; + ltrim(copy); + while (*copy != '\0') + { + if (np >= maxptr) + { + gmx_fatal(FARGS, "Too many groups on line: '%s' (max is %d)", + str, maxptr); + } + if (ptr) + { + ptr[np] = copy; + } + np++; + while ((*copy != '\0') && !isspace(*copy)) + { + copy++; + } + if (*copy != '\0') + { + *copy = '\0'; + copy++; + } + ltrim(copy); + } + if (ptr == NULL) + { + sfree(copy0); + } + + return np; +} + +/* interpret a number of doubles from a string and put them in an array, + after allocating space for them. + str = the input string + n = the (pre-allocated) number of doubles read + r = the output array of doubles. */ +static void parse_n_real(char *str, int *n, real **r) +{ + char *ptr[MAXPTR]; + int i; + + *n = str_nelem(str, MAXPTR, ptr); + + snew(*r, *n); + for (i = 0; i < *n; i++) + { + (*r)[i] = strtod(ptr[i], NULL); + } +} + +static void do_fep_params(t_inputrec *ir, char fep_lambda[][STRLEN], char weights[STRLEN]) +{ + + int i, j, max_n_lambda, nweights, nfep[efptNR]; + t_lambda *fep = ir->fepvals; + t_expanded *expand = ir->expandedvals; + real **count_fep_lambdas; + gmx_bool bOneLambda = TRUE; + + snew(count_fep_lambdas, efptNR); + + /* FEP input processing */ + /* first, identify the number of lambda values for each type. + All that are nonzero must have the same number */ + + for (i = 0; i < efptNR; i++) + { + parse_n_real(fep_lambda[i], &(nfep[i]), &(count_fep_lambdas[i])); + } + + /* now, determine the number of components. All must be either zero, or equal. */ + + max_n_lambda = 0; + for (i = 0; i < efptNR; i++) + { + if (nfep[i] > max_n_lambda) + { + max_n_lambda = nfep[i]; /* here's a nonzero one. All of them + must have the same number if its not zero.*/ + break; + } + } + + for (i = 0; i < efptNR; i++) + { + if (nfep[i] == 0) + { + ir->fepvals->separate_dvdl[i] = FALSE; + } + else if (nfep[i] == max_n_lambda) + { + if (i != efptTEMPERATURE) /* we treat this differently -- not really a reason to compute the derivative with + respect to the temperature currently */ + { + ir->fepvals->separate_dvdl[i] = TRUE; + } + } + else + { + gmx_fatal(FARGS, "Number of lambdas (%d) for FEP type %s not equal to number of other types (%d)", + nfep[i], efpt_names[i], max_n_lambda); + } + } + /* we don't print out dhdl if the temperature is changing, since we can't correctly define dhdl in this case */ + ir->fepvals->separate_dvdl[efptTEMPERATURE] = FALSE; + + /* the number of lambdas is the number we've read in, which is either zero + or the same for all */ + fep->n_lambda = max_n_lambda; + + /* allocate space for the array of lambda values */ + snew(fep->all_lambda, efptNR); + /* if init_lambda is defined, we need to set lambda */ + if ((fep->init_lambda > 0) && (fep->n_lambda == 0)) + { + ir->fepvals->separate_dvdl[efptFEP] = TRUE; + } + /* otherwise allocate the space for all of the lambdas, and transfer the data */ + for (i = 0; i < efptNR; i++) + { + snew(fep->all_lambda[i], fep->n_lambda); + if (nfep[i] > 0) /* if it's zero, then the count_fep_lambda arrays + are zero */ + { + for (j = 0; j < fep->n_lambda; j++) + { + fep->all_lambda[i][j] = (double)count_fep_lambdas[i][j]; + } + sfree(count_fep_lambdas[i]); + } + } + sfree(count_fep_lambdas); + + /* "fep-vals" is either zero or the full number. If zero, we'll need to define fep-lambdas for internal + bookkeeping -- for now, init_lambda */ + + if ((nfep[efptFEP] == 0) && (fep->init_lambda >= 0)) + { + for (i = 0; i < fep->n_lambda; i++) + { + fep->all_lambda[efptFEP][i] = fep->init_lambda; + } + } + + /* check to see if only a single component lambda is defined, and soft core is defined. + In this case, turn on coulomb soft core */ + + if (max_n_lambda == 0) + { + bOneLambda = TRUE; + } + else + { + for (i = 0; i < efptNR; i++) + { + if ((nfep[i] != 0) && (i != efptFEP)) + { + bOneLambda = FALSE; + } + } + } + if ((bOneLambda) && (fep->sc_alpha > 0)) + { + fep->bScCoul = TRUE; + } + + /* Fill in the others with the efptFEP if they are not explicitly + specified (i.e. nfep[i] == 0). This means if fep is not defined, + they are all zero. */ + + for (i = 0; i < efptNR; i++) + { + if ((nfep[i] == 0) && (i != efptFEP)) + { + for (j = 0; j < fep->n_lambda; j++) + { + fep->all_lambda[i][j] = fep->all_lambda[efptFEP][j]; + } + } + } + + + /* make it easier if sc_r_power = 48 by increasing it to the 4th power, to be in the right scale. */ + if (fep->sc_r_power == 48) + { + if (fep->sc_alpha > 0.1) + { + gmx_fatal(FARGS, "sc_alpha (%f) for sc_r_power = 48 should usually be between 0.001 and 0.004", fep->sc_alpha); + } + } + + expand = ir->expandedvals; + /* now read in the weights */ + parse_n_real(weights, &nweights, &(expand->init_lambda_weights)); + if (nweights == 0) + { + snew(expand->init_lambda_weights, fep->n_lambda); /* initialize to zero */ + } + else if (nweights != fep->n_lambda) + { + gmx_fatal(FARGS, "Number of weights (%d) is not equal to number of lambda values (%d)", + nweights, fep->n_lambda); + } + if ((expand->nstexpanded < 0) && (ir->efep != efepNO)) + { + expand->nstexpanded = fep->nstdhdl; + /* if you don't specify nstexpanded when doing expanded ensemble free energy calcs, it is set to nstdhdl */ + } + if ((expand->nstexpanded < 0) && ir->bSimTemp) + { + expand->nstexpanded = 2*(int)(ir->opts.tau_t[0]/ir->delta_t); + /* if you don't specify nstexpanded when doing expanded ensemble simulated tempering, it is set to + 2*tau_t just to be careful so it's not to frequent */ + } +} + + +static void do_simtemp_params(t_inputrec *ir) +{ + + snew(ir->simtempvals->temperatures, ir->fepvals->n_lambda); + GetSimTemps(ir->fepvals->n_lambda, ir->simtempvals, ir->fepvals->all_lambda[efptTEMPERATURE]); + + return; +} + +static void do_wall_params(t_inputrec *ir, + char *wall_atomtype, char *wall_density, + t_gromppopts *opts) +{ + int nstr, i; + char *names[MAXPTR]; + double dbl; + + opts->wall_atomtype[0] = NULL; + opts->wall_atomtype[1] = NULL; + + ir->wall_atomtype[0] = -1; + ir->wall_atomtype[1] = -1; + ir->wall_density[0] = 0; + ir->wall_density[1] = 0; + + if (ir->nwall > 0) + { + nstr = str_nelem(wall_atomtype, MAXPTR, names); + if (nstr != ir->nwall) + { + gmx_fatal(FARGS, "Expected %d elements for wall_atomtype, found %d", + ir->nwall, nstr); + } + for (i = 0; i < ir->nwall; i++) + { + opts->wall_atomtype[i] = strdup(names[i]); + } + + if (ir->wall_type == ewt93 || ir->wall_type == ewt104) + { + nstr = str_nelem(wall_density, MAXPTR, names); + if (nstr != ir->nwall) + { + gmx_fatal(FARGS, "Expected %d elements for wall-density, found %d", ir->nwall, nstr); + } + for (i = 0; i < ir->nwall; i++) + { + sscanf(names[i], "%lf", &dbl); + if (dbl <= 0) + { + gmx_fatal(FARGS, "wall-density[%d] = %f\n", i, dbl); + } + ir->wall_density[i] = dbl; + } + } + } +} + +static void add_wall_energrps(gmx_groups_t *groups, int nwall, t_symtab *symtab) +{ + int i; + t_grps *grps; + char str[STRLEN]; + + if (nwall > 0) + { + srenew(groups->grpname, groups->ngrpname+nwall); + grps = &(groups->grps[egcENER]); + srenew(grps->nm_ind, grps->nr+nwall); + for (i = 0; i < nwall; i++) + { + sprintf(str, "wall%d", i); + groups->grpname[groups->ngrpname] = put_symtab(symtab, str); + grps->nm_ind[grps->nr++] = groups->ngrpname++; + } + } +} + +void read_expandedparams(int *ninp_p, t_inpfile **inp_p, + t_expanded *expand, warninp_t wi) +{ + int ninp, nerror = 0; + t_inpfile *inp; + + ninp = *ninp_p; + inp = *inp_p; + + /* read expanded ensemble parameters */ + CCTYPE ("expanded ensemble variables"); + ITYPE ("nstexpanded", expand->nstexpanded, -1); + EETYPE("lmc-stats", expand->elamstats, elamstats_names); + EETYPE("lmc-move", expand->elmcmove, elmcmove_names); + EETYPE("lmc-weights-equil", expand->elmceq, elmceq_names); + ITYPE ("weight-equil-number-all-lambda", expand->equil_n_at_lam, -1); + ITYPE ("weight-equil-number-samples", expand->equil_samples, -1); + ITYPE ("weight-equil-number-steps", expand->equil_steps, -1); + RTYPE ("weight-equil-wl-delta", expand->equil_wl_delta, -1); + RTYPE ("weight-equil-count-ratio", expand->equil_ratio, -1); + CCTYPE("Seed for Monte Carlo in lambda space"); + ITYPE ("lmc-seed", expand->lmc_seed, -1); + RTYPE ("mc-temperature", expand->mc_temp, -1); + ITYPE ("lmc-repeats", expand->lmc_repeats, 1); + ITYPE ("lmc-gibbsdelta", expand->gibbsdeltalam, -1); + ITYPE ("lmc-forced-nstart", expand->lmc_forced_nstart, 0); + EETYPE("symmetrized-transition-matrix", expand->bSymmetrizedTMatrix, yesno_names); + ITYPE("nst-transition-matrix", expand->nstTij, -1); + ITYPE ("mininum-var-min", expand->minvarmin, 100); /*default is reasonable */ + ITYPE ("weight-c-range", expand->c_range, 0); /* default is just C=0 */ + RTYPE ("wl-scale", expand->wl_scale, 0.8); + RTYPE ("wl-ratio", expand->wl_ratio, 0.8); + RTYPE ("init-wl-delta", expand->init_wl_delta, 1.0); + EETYPE("wl-oneovert", expand->bWLoneovert, yesno_names); + + *ninp_p = ninp; + *inp_p = inp; + + return; +} + +void get_ir(const char *mdparin, const char *mdparout, + t_inputrec *ir, t_gromppopts *opts, + warninp_t wi) +{ + char *dumstr[2]; + double dumdub[2][6]; + t_inpfile *inp; + const char *tmp; + int i, j, m, ninp; + char warn_buf[STRLEN]; + t_lambda *fep = ir->fepvals; + t_expanded *expand = ir->expandedvals; + + init_inputrec_strings(); + inp = read_inpfile(mdparin, &ninp, wi); + + snew(dumstr[0], STRLEN); + snew(dumstr[1], STRLEN); + + if (-1 == search_einp(ninp, inp, "cutoff-scheme")) + { + sprintf(warn_buf, + "%s did not specify a value for the .mdp option " + "\"cutoff-scheme\". Probably it was first intended for use " + "with GROMACS before 4.6. In 4.6, the Verlet scheme was " + "introduced, but the group scheme was still the default. " + "The default is now the Verlet scheme, so you will observe " + "different behaviour.", mdparin); + warning_note(wi, warn_buf); + } + + /* ignore the following deprecated commands */ + REM_TYPE("title"); + REM_TYPE("cpp"); + REM_TYPE("domain-decomposition"); + REM_TYPE("andersen-seed"); + REM_TYPE("dihre"); + REM_TYPE("dihre-fc"); + REM_TYPE("dihre-tau"); + REM_TYPE("nstdihreout"); + REM_TYPE("nstcheckpoint"); + REM_TYPE("optimize-fft"); + + /* replace the following commands with the clearer new versions*/ + REPL_TYPE("unconstrained-start", "continuation"); + REPL_TYPE("foreign-lambda", "fep-lambdas"); + REPL_TYPE("verlet-buffer-drift", "verlet-buffer-tolerance"); + REPL_TYPE("nstxtcout", "nstxout-compressed"); + REPL_TYPE("xtc-grps", "compressed-x-grps"); + REPL_TYPE("xtc-precision", "compressed-x-precision"); + + CCTYPE ("VARIOUS PREPROCESSING OPTIONS"); + CTYPE ("Preprocessor information: use cpp syntax."); + CTYPE ("e.g.: -I/home/joe/doe -I/home/mary/roe"); + STYPE ("include", opts->include, NULL); + CTYPE ("e.g.: -DPOSRES -DFLEXIBLE (note these variable names are case sensitive)"); + STYPE ("define", opts->define, NULL); + + CCTYPE ("RUN CONTROL PARAMETERS"); + EETYPE("integrator", ir->eI, ei_names); + CTYPE ("Start time and timestep in ps"); + RTYPE ("tinit", ir->init_t, 0.0); + RTYPE ("dt", ir->delta_t, 0.001); + STEPTYPE ("nsteps", ir->nsteps, 0); + CTYPE ("For exact run continuation or redoing part of a run"); + STEPTYPE ("init-step", ir->init_step, 0); + CTYPE ("Part index is updated automatically on checkpointing (keeps files separate)"); + ITYPE ("simulation-part", ir->simulation_part, 1); + CTYPE ("mode for center of mass motion removal"); + EETYPE("comm-mode", ir->comm_mode, ecm_names); + CTYPE ("number of steps for center of mass motion removal"); + ITYPE ("nstcomm", ir->nstcomm, 100); + CTYPE ("group(s) for center of mass motion removal"); + STYPE ("comm-grps", is->vcm, NULL); + + CCTYPE ("LANGEVIN DYNAMICS OPTIONS"); + CTYPE ("Friction coefficient (amu/ps) and random seed"); + RTYPE ("bd-fric", ir->bd_fric, 0.0); + STEPTYPE ("ld-seed", ir->ld_seed, -1); + + /* Em stuff */ + CCTYPE ("ENERGY MINIMIZATION OPTIONS"); + CTYPE ("Force tolerance and initial step-size"); + RTYPE ("emtol", ir->em_tol, 10.0); + RTYPE ("emstep", ir->em_stepsize, 0.01); + CTYPE ("Max number of iterations in relax-shells"); + ITYPE ("niter", ir->niter, 20); + CTYPE ("Step size (ps^2) for minimization of flexible constraints"); + RTYPE ("fcstep", ir->fc_stepsize, 0); + CTYPE ("Frequency of steepest descents steps when doing CG"); + ITYPE ("nstcgsteep", ir->nstcgsteep, 1000); + ITYPE ("nbfgscorr", ir->nbfgscorr, 10); + + CCTYPE ("TEST PARTICLE INSERTION OPTIONS"); + RTYPE ("rtpi", ir->rtpi, 0.05); + + /* Output options */ + CCTYPE ("OUTPUT CONTROL OPTIONS"); + CTYPE ("Output frequency for coords (x), velocities (v) and forces (f)"); + ITYPE ("nstxout", ir->nstxout, 0); + ITYPE ("nstvout", ir->nstvout, 0); + ITYPE ("nstfout", ir->nstfout, 0); + CTYPE ("Output frequency for energies to log file and energy file"); + ITYPE ("nstlog", ir->nstlog, 1000); + ITYPE ("nstcalcenergy", ir->nstcalcenergy, 100); + ITYPE ("nstenergy", ir->nstenergy, 1000); + CTYPE ("Output frequency and precision for .xtc file"); + ITYPE ("nstxout-compressed", ir->nstxout_compressed, 0); + RTYPE ("compressed-x-precision", ir->x_compression_precision, 1000.0); + CTYPE ("This selects the subset of atoms for the compressed"); + CTYPE ("trajectory file. You can select multiple groups. By"); + CTYPE ("default, all atoms will be written."); + STYPE ("compressed-x-grps", is->x_compressed_groups, NULL); + CTYPE ("Selection of energy groups"); + STYPE ("energygrps", is->energy, NULL); + + /* Neighbor searching */ + CCTYPE ("NEIGHBORSEARCHING PARAMETERS"); + CTYPE ("cut-off scheme (Verlet: particle based cut-offs, group: using charge groups)"); + EETYPE("cutoff-scheme", ir->cutoff_scheme, ecutscheme_names); + CTYPE ("nblist update frequency"); + ITYPE ("nstlist", ir->nstlist, 10); + CTYPE ("ns algorithm (simple or grid)"); + EETYPE("ns-type", ir->ns_type, ens_names); + CTYPE ("Periodic boundary conditions: xyz, no, xy"); + EETYPE("pbc", ir->ePBC, epbc_names); + EETYPE("periodic-molecules", ir->bPeriodicMols, yesno_names); + CTYPE ("Allowed energy error due to the Verlet buffer in kJ/mol/ps per atom,"); + CTYPE ("a value of -1 means: use rlist"); + RTYPE("verlet-buffer-tolerance", ir->verletbuf_tol, 0.005); + CTYPE ("nblist cut-off"); + RTYPE ("rlist", ir->rlist, 1.0); + CTYPE ("long-range cut-off for switched potentials"); + RTYPE ("rlistlong", ir->rlistlong, -1); + ITYPE ("nstcalclr", ir->nstcalclr, -1); + + /* Electrostatics */ + CCTYPE ("OPTIONS FOR ELECTROSTATICS AND VDW"); + CTYPE ("Method for doing electrostatics"); + EETYPE("coulombtype", ir->coulombtype, eel_names); + EETYPE("coulomb-modifier", ir->coulomb_modifier, eintmod_names); + CTYPE ("cut-off lengths"); + RTYPE ("rcoulomb-switch", ir->rcoulomb_switch, 0.0); + RTYPE ("rcoulomb", ir->rcoulomb, 1.0); + CTYPE ("Relative dielectric constant for the medium and the reaction field"); + RTYPE ("epsilon-r", ir->epsilon_r, 1.0); + RTYPE ("epsilon-rf", ir->epsilon_rf, 0.0); + CTYPE ("Method for doing Van der Waals"); + EETYPE("vdw-type", ir->vdwtype, evdw_names); + EETYPE("vdw-modifier", ir->vdw_modifier, eintmod_names); + CTYPE ("cut-off lengths"); + RTYPE ("rvdw-switch", ir->rvdw_switch, 0.0); + RTYPE ("rvdw", ir->rvdw, 1.0); + CTYPE ("Apply long range dispersion corrections for Energy and Pressure"); + EETYPE("DispCorr", ir->eDispCorr, edispc_names); + CTYPE ("Extension of the potential lookup tables beyond the cut-off"); + RTYPE ("table-extension", ir->tabext, 1.0); + CTYPE ("Separate tables between energy group pairs"); + STYPE ("energygrp-table", is->egptable, NULL); + CTYPE ("Spacing for the PME/PPPM FFT grid"); + RTYPE ("fourierspacing", ir->fourier_spacing, 0.12); + CTYPE ("FFT grid size, when a value is 0 fourierspacing will be used"); + ITYPE ("fourier-nx", ir->nkx, 0); + ITYPE ("fourier-ny", ir->nky, 0); + ITYPE ("fourier-nz", ir->nkz, 0); + CTYPE ("EWALD/PME/PPPM parameters"); + ITYPE ("pme-order", ir->pme_order, 4); + RTYPE ("ewald-rtol", ir->ewald_rtol, 0.00001); + RTYPE ("ewald-rtol-lj", ir->ewald_rtol_lj, 0.001); + EETYPE("lj-pme-comb-rule", ir->ljpme_combination_rule, eljpme_names); + EETYPE("ewald-geometry", ir->ewald_geometry, eewg_names); + RTYPE ("epsilon-surface", ir->epsilon_surface, 0.0); + + CCTYPE("IMPLICIT SOLVENT ALGORITHM"); + EETYPE("implicit-solvent", ir->implicit_solvent, eis_names); + + CCTYPE ("GENERALIZED BORN ELECTROSTATICS"); + CTYPE ("Algorithm for calculating Born radii"); + EETYPE("gb-algorithm", ir->gb_algorithm, egb_names); + CTYPE ("Frequency of calculating the Born radii inside rlist"); + ITYPE ("nstgbradii", ir->nstgbradii, 1); + CTYPE ("Cutoff for Born radii calculation; the contribution from atoms"); + CTYPE ("between rlist and rgbradii is updated every nstlist steps"); + RTYPE ("rgbradii", ir->rgbradii, 1.0); + CTYPE ("Dielectric coefficient of the implicit solvent"); + RTYPE ("gb-epsilon-solvent", ir->gb_epsilon_solvent, 80.0); + CTYPE ("Salt concentration in M for Generalized Born models"); + RTYPE ("gb-saltconc", ir->gb_saltconc, 0.0); + CTYPE ("Scaling factors used in the OBC GB model. Default values are OBC(II)"); + RTYPE ("gb-obc-alpha", ir->gb_obc_alpha, 1.0); + RTYPE ("gb-obc-beta", ir->gb_obc_beta, 0.8); + RTYPE ("gb-obc-gamma", ir->gb_obc_gamma, 4.85); + RTYPE ("gb-dielectric-offset", ir->gb_dielectric_offset, 0.009); + EETYPE("sa-algorithm", ir->sa_algorithm, esa_names); + CTYPE ("Surface tension (kJ/mol/nm^2) for the SA (nonpolar surface) part of GBSA"); + CTYPE ("The value -1 will set default value for Still/HCT/OBC GB-models."); + RTYPE ("sa-surface-tension", ir->sa_surface_tension, -1); + + /* Coupling stuff */ + CCTYPE ("OPTIONS FOR WEAK COUPLING ALGORITHMS"); + CTYPE ("Temperature coupling"); + EETYPE("tcoupl", ir->etc, etcoupl_names); + ITYPE ("nsttcouple", ir->nsttcouple, -1); + ITYPE("nh-chain-length", ir->opts.nhchainlength, 10); + EETYPE("print-nose-hoover-chain-variables", ir->bPrintNHChains, yesno_names); + CTYPE ("Groups to couple separately"); + STYPE ("tc-grps", is->tcgrps, NULL); + CTYPE ("Time constant (ps) and reference temperature (K)"); + STYPE ("tau-t", is->tau_t, NULL); + STYPE ("ref-t", is->ref_t, NULL); + CTYPE ("pressure coupling"); + EETYPE("pcoupl", ir->epc, epcoupl_names); + EETYPE("pcoupltype", ir->epct, epcoupltype_names); + ITYPE ("nstpcouple", ir->nstpcouple, -1); + CTYPE ("Time constant (ps), compressibility (1/bar) and reference P (bar)"); + RTYPE ("tau-p", ir->tau_p, 1.0); + STYPE ("compressibility", dumstr[0], NULL); + STYPE ("ref-p", dumstr[1], NULL); + CTYPE ("Scaling of reference coordinates, No, All or COM"); + EETYPE ("refcoord-scaling", ir->refcoord_scaling, erefscaling_names); + + /* QMMM */ + CCTYPE ("OPTIONS FOR QMMM calculations"); + EETYPE("QMMM", ir->bQMMM, yesno_names); + CTYPE ("Groups treated Quantum Mechanically"); + STYPE ("QMMM-grps", is->QMMM, NULL); + CTYPE ("QM method"); + STYPE("QMmethod", is->QMmethod, NULL); + CTYPE ("QMMM scheme"); + EETYPE("QMMMscheme", ir->QMMMscheme, eQMMMscheme_names); + CTYPE ("QM basisset"); + STYPE("QMbasis", is->QMbasis, NULL); + CTYPE ("QM charge"); + STYPE ("QMcharge", is->QMcharge, NULL); + CTYPE ("QM multiplicity"); + STYPE ("QMmult", is->QMmult, NULL); + CTYPE ("Surface Hopping"); + STYPE ("SH", is->bSH, NULL); + CTYPE ("CAS space options"); + STYPE ("CASorbitals", is->CASorbitals, NULL); + STYPE ("CASelectrons", is->CASelectrons, NULL); + STYPE ("SAon", is->SAon, NULL); + STYPE ("SAoff", is->SAoff, NULL); + STYPE ("SAsteps", is->SAsteps, NULL); + CTYPE ("Scale factor for MM charges"); + RTYPE ("MMChargeScaleFactor", ir->scalefactor, 1.0); + CTYPE ("Optimization of QM subsystem"); + STYPE ("bOPT", is->bOPT, NULL); + STYPE ("bTS", is->bTS, NULL); + + /* Simulated annealing */ + CCTYPE("SIMULATED ANNEALING"); + CTYPE ("Type of annealing for each temperature group (no/single/periodic)"); + STYPE ("annealing", is->anneal, NULL); + CTYPE ("Number of time points to use for specifying annealing in each group"); + STYPE ("annealing-npoints", is->anneal_npoints, NULL); + CTYPE ("List of times at the annealing points for each group"); + STYPE ("annealing-time", is->anneal_time, NULL); + CTYPE ("Temp. at each annealing point, for each group."); + STYPE ("annealing-temp", is->anneal_temp, NULL); + + /* Startup run */ + CCTYPE ("GENERATE VELOCITIES FOR STARTUP RUN"); + EETYPE("gen-vel", opts->bGenVel, yesno_names); + RTYPE ("gen-temp", opts->tempi, 300.0); + ITYPE ("gen-seed", opts->seed, -1); + + /* Shake stuff */ + CCTYPE ("OPTIONS FOR BONDS"); + EETYPE("constraints", opts->nshake, constraints); + CTYPE ("Type of constraint algorithm"); + EETYPE("constraint-algorithm", ir->eConstrAlg, econstr_names); + CTYPE ("Do not constrain the start configuration"); + EETYPE("continuation", ir->bContinuation, yesno_names); + CTYPE ("Use successive overrelaxation to reduce the number of shake iterations"); + EETYPE("Shake-SOR", ir->bShakeSOR, yesno_names); + CTYPE ("Relative tolerance of shake"); + RTYPE ("shake-tol", ir->shake_tol, 0.0001); + CTYPE ("Highest order in the expansion of the constraint coupling matrix"); + ITYPE ("lincs-order", ir->nProjOrder, 4); + CTYPE ("Number of iterations in the final step of LINCS. 1 is fine for"); + CTYPE ("normal simulations, but use 2 to conserve energy in NVE runs."); + CTYPE ("For energy minimization with constraints it should be 4 to 8."); + ITYPE ("lincs-iter", ir->nLincsIter, 1); + CTYPE ("Lincs will write a warning to the stderr if in one step a bond"); + CTYPE ("rotates over more degrees than"); + RTYPE ("lincs-warnangle", ir->LincsWarnAngle, 30.0); + CTYPE ("Convert harmonic bonds to morse potentials"); + EETYPE("morse", opts->bMorse, yesno_names); + + /* Energy group exclusions */ + CCTYPE ("ENERGY GROUP EXCLUSIONS"); + CTYPE ("Pairs of energy groups for which all non-bonded interactions are excluded"); + STYPE ("energygrp-excl", is->egpexcl, NULL); + + /* Walls */ + CCTYPE ("WALLS"); + CTYPE ("Number of walls, type, atom types, densities and box-z scale factor for Ewald"); + ITYPE ("nwall", ir->nwall, 0); + EETYPE("wall-type", ir->wall_type, ewt_names); + RTYPE ("wall-r-linpot", ir->wall_r_linpot, -1); + STYPE ("wall-atomtype", is->wall_atomtype, NULL); + STYPE ("wall-density", is->wall_density, NULL); + RTYPE ("wall-ewald-zfac", ir->wall_ewald_zfac, 3); + + /* COM pulling */ + CCTYPE("COM PULLING"); + CTYPE("Pull type: no, umbrella, constraint or constant-force"); + EETYPE("pull", ir->ePull, epull_names); + if (ir->ePull != epullNO) + { + snew(ir->pull, 1); + is->pull_grp = read_pullparams(&ninp, &inp, ir->pull, &opts->pull_start, wi); + } + + /* Enforced rotation */ + CCTYPE("ENFORCED ROTATION"); + CTYPE("Enforced rotation: No or Yes"); + EETYPE("rotation", ir->bRot, yesno_names); + if (ir->bRot) + { + snew(ir->rot, 1); + is->rot_grp = read_rotparams(&ninp, &inp, ir->rot, wi); + } + + /* Interactive MD */ + ir->bIMD = FALSE; + CCTYPE("Group to display and/or manipulate in interactive MD session"); + STYPE ("IMD-group", is->imd_grp, NULL); + if (is->imd_grp[0] != '\0') + { + snew(ir->imd, 1); + ir->bIMD = TRUE; + } + + /* Refinement */ + CCTYPE("NMR refinement stuff"); + CTYPE ("Distance restraints type: No, Simple or Ensemble"); + EETYPE("disre", ir->eDisre, edisre_names); + CTYPE ("Force weighting of pairs in one distance restraint: Conservative or Equal"); + EETYPE("disre-weighting", ir->eDisreWeighting, edisreweighting_names); + CTYPE ("Use sqrt of the time averaged times the instantaneous violation"); + EETYPE("disre-mixed", ir->bDisreMixed, yesno_names); + RTYPE ("disre-fc", ir->dr_fc, 1000.0); + RTYPE ("disre-tau", ir->dr_tau, 0.0); + CTYPE ("Output frequency for pair distances to energy file"); + ITYPE ("nstdisreout", ir->nstdisreout, 100); + CTYPE ("Orientation restraints: No or Yes"); + EETYPE("orire", opts->bOrire, yesno_names); + CTYPE ("Orientation restraints force constant and tau for time averaging"); + RTYPE ("orire-fc", ir->orires_fc, 0.0); + RTYPE ("orire-tau", ir->orires_tau, 0.0); + STYPE ("orire-fitgrp", is->orirefitgrp, NULL); + CTYPE ("Output frequency for trace(SD) and S to energy file"); + ITYPE ("nstorireout", ir->nstorireout, 100); + + /* free energy variables */ + CCTYPE ("Free energy variables"); + EETYPE("free-energy", ir->efep, efep_names); + STYPE ("couple-moltype", is->couple_moltype, NULL); + EETYPE("couple-lambda0", opts->couple_lam0, couple_lam); + EETYPE("couple-lambda1", opts->couple_lam1, couple_lam); + EETYPE("couple-intramol", opts->bCoupleIntra, yesno_names); + + RTYPE ("init-lambda", fep->init_lambda, -1); /* start with -1 so + we can recognize if + it was not entered */ + ITYPE ("init-lambda-state", fep->init_fep_state, -1); + RTYPE ("delta-lambda", fep->delta_lambda, 0.0); + ITYPE ("nstdhdl", fep->nstdhdl, 50); + STYPE ("fep-lambdas", is->fep_lambda[efptFEP], NULL); + STYPE ("mass-lambdas", is->fep_lambda[efptMASS], NULL); + STYPE ("coul-lambdas", is->fep_lambda[efptCOUL], NULL); + STYPE ("vdw-lambdas", is->fep_lambda[efptVDW], NULL); + STYPE ("bonded-lambdas", is->fep_lambda[efptBONDED], NULL); + STYPE ("restraint-lambdas", is->fep_lambda[efptRESTRAINT], NULL); + STYPE ("temperature-lambdas", is->fep_lambda[efptTEMPERATURE], NULL); + ITYPE ("calc-lambda-neighbors", fep->lambda_neighbors, 1); + STYPE ("init-lambda-weights", is->lambda_weights, NULL); + EETYPE("dhdl-print-energy", fep->bPrintEnergy, yesno_names); + RTYPE ("sc-alpha", fep->sc_alpha, 0.0); + ITYPE ("sc-power", fep->sc_power, 1); + RTYPE ("sc-r-power", fep->sc_r_power, 6.0); + RTYPE ("sc-sigma", fep->sc_sigma, 0.3); + EETYPE("sc-coul", fep->bScCoul, yesno_names); + ITYPE ("dh_hist_size", fep->dh_hist_size, 0); + RTYPE ("dh_hist_spacing", fep->dh_hist_spacing, 0.1); + EETYPE("separate-dhdl-file", fep->separate_dhdl_file, + separate_dhdl_file_names); + EETYPE("dhdl-derivatives", fep->dhdl_derivatives, dhdl_derivatives_names); + ITYPE ("dh_hist_size", fep->dh_hist_size, 0); + RTYPE ("dh_hist_spacing", fep->dh_hist_spacing, 0.1); + + /* Non-equilibrium MD stuff */ + CCTYPE("Non-equilibrium MD stuff"); + STYPE ("acc-grps", is->accgrps, NULL); + STYPE ("accelerate", is->acc, NULL); + STYPE ("freezegrps", is->freeze, NULL); + STYPE ("freezedim", is->frdim, NULL); + RTYPE ("cos-acceleration", ir->cos_accel, 0); + STYPE ("deform", is->deform, NULL); + + /* simulated tempering variables */ + CCTYPE("simulated tempering variables"); + EETYPE("simulated-tempering", ir->bSimTemp, yesno_names); + EETYPE("simulated-tempering-scaling", ir->simtempvals->eSimTempScale, esimtemp_names); + RTYPE("sim-temp-low", ir->simtempvals->simtemp_low, 300.0); + RTYPE("sim-temp-high", ir->simtempvals->simtemp_high, 300.0); + + /* expanded ensemble variables */ + if (ir->efep == efepEXPANDED || ir->bSimTemp) + { + read_expandedparams(&ninp, &inp, expand, wi); + } + + /* Electric fields */ + CCTYPE("Electric fields"); + CTYPE ("Format is number of terms (int) and for all terms an amplitude (real)"); + CTYPE ("and a phase angle (real)"); + STYPE ("E-x", is->efield_x, NULL); + STYPE ("E-xt", is->efield_xt, NULL); + STYPE ("E-y", is->efield_y, NULL); + STYPE ("E-yt", is->efield_yt, NULL); + STYPE ("E-z", is->efield_z, NULL); + STYPE ("E-zt", is->efield_zt, NULL); + + CCTYPE("Ion/water position swapping for computational electrophysiology setups"); + CTYPE("Swap positions along direction: no, X, Y, Z"); + EETYPE("swapcoords", ir->eSwapCoords, eSwapTypes_names); + if (ir->eSwapCoords != eswapNO) + { + snew(ir->swap, 1); + CTYPE("Swap attempt frequency"); + ITYPE("swap-frequency", ir->swap->nstswap, 1); + CTYPE("Two index groups that contain the compartment-partitioning atoms"); + STYPE("split-group0", splitgrp0, NULL); + STYPE("split-group1", splitgrp1, NULL); + CTYPE("Use center of mass of split groups (yes/no), otherwise center of geometry is used"); + EETYPE("massw-split0", ir->swap->massw_split[0], yesno_names); + EETYPE("massw-split1", ir->swap->massw_split[1], yesno_names); + + CTYPE("Group name of ions that can be exchanged with solvent molecules"); + STYPE("swap-group", swapgrp, NULL); + CTYPE("Group name of solvent molecules"); + STYPE("solvent-group", solgrp, NULL); + + CTYPE("Split cylinder: radius, upper and lower extension (nm) (this will define the channels)"); + CTYPE("Note that the split cylinder settings do not have an influence on the swapping protocol,"); + CTYPE("however, if correctly defined, the ion permeation events are counted per channel"); + RTYPE("cyl0-r", ir->swap->cyl0r, 2.0); + RTYPE("cyl0-up", ir->swap->cyl0u, 1.0); + RTYPE("cyl0-down", ir->swap->cyl0l, 1.0); + RTYPE("cyl1-r", ir->swap->cyl1r, 2.0); + RTYPE("cyl1-up", ir->swap->cyl1u, 1.0); + RTYPE("cyl1-down", ir->swap->cyl1l, 1.0); + + CTYPE("Average the number of ions per compartment over these many swap attempt steps"); + ITYPE("coupl-steps", ir->swap->nAverage, 10); + CTYPE("Requested number of anions and cations for each of the two compartments"); + CTYPE("-1 means fix the numbers as found in time step 0"); + ITYPE("anionsA", ir->swap->nanions[0], -1); + ITYPE("cationsA", ir->swap->ncations[0], -1); + ITYPE("anionsB", ir->swap->nanions[1], -1); + ITYPE("cationsB", ir->swap->ncations[1], -1); + CTYPE("Start to swap ions if threshold difference to requested count is reached"); + RTYPE("threshold", ir->swap->threshold, 1.0); + } + + /* AdResS defined thingies */ + CCTYPE ("AdResS parameters"); + EETYPE("adress", ir->bAdress, yesno_names); + if (ir->bAdress) + { + snew(ir->adress, 1); + read_adressparams(&ninp, &inp, ir->adress, wi); + } + + /* User defined thingies */ + CCTYPE ("User defined thingies"); + STYPE ("user1-grps", is->user1, NULL); + STYPE ("user2-grps", is->user2, NULL); + ITYPE ("userint1", ir->userint1, 0); + ITYPE ("userint2", ir->userint2, 0); + ITYPE ("userint3", ir->userint3, 0); + ITYPE ("userint4", ir->userint4, 0); + RTYPE ("userreal1", ir->userreal1, 0); + RTYPE ("userreal2", ir->userreal2, 0); + RTYPE ("userreal3", ir->userreal3, 0); + RTYPE ("userreal4", ir->userreal4, 0); +#undef CTYPE + + write_inpfile(mdparout, ninp, inp, FALSE, wi); + for (i = 0; (i < ninp); i++) + { + sfree(inp[i].name); + sfree(inp[i].value); + } + sfree(inp); + + /* Process options if necessary */ + for (m = 0; m < 2; m++) + { + for (i = 0; i < 2*DIM; i++) + { + dumdub[m][i] = 0.0; + } + if (ir->epc) + { + switch (ir->epct) + { + case epctISOTROPIC: + if (sscanf(dumstr[m], "%lf", &(dumdub[m][XX])) != 1) + { + warning_error(wi, "Pressure coupling not enough values (I need 1)"); + } + dumdub[m][YY] = dumdub[m][ZZ] = dumdub[m][XX]; + break; + case epctSEMIISOTROPIC: + case epctSURFACETENSION: + if (sscanf(dumstr[m], "%lf%lf", + &(dumdub[m][XX]), &(dumdub[m][ZZ])) != 2) + { + warning_error(wi, "Pressure coupling not enough values (I need 2)"); + } + dumdub[m][YY] = dumdub[m][XX]; + break; + case epctANISOTROPIC: + if (sscanf(dumstr[m], "%lf%lf%lf%lf%lf%lf", + &(dumdub[m][XX]), &(dumdub[m][YY]), &(dumdub[m][ZZ]), + &(dumdub[m][3]), &(dumdub[m][4]), &(dumdub[m][5])) != 6) + { + warning_error(wi, "Pressure coupling not enough values (I need 6)"); + } + break; + default: + gmx_fatal(FARGS, "Pressure coupling type %s not implemented yet", + epcoupltype_names[ir->epct]); + } + } + } + clear_mat(ir->ref_p); + clear_mat(ir->compress); + for (i = 0; i < DIM; i++) + { + ir->ref_p[i][i] = dumdub[1][i]; + ir->compress[i][i] = dumdub[0][i]; + } + if (ir->epct == epctANISOTROPIC) + { + ir->ref_p[XX][YY] = dumdub[1][3]; + ir->ref_p[XX][ZZ] = dumdub[1][4]; + ir->ref_p[YY][ZZ] = dumdub[1][5]; + if (ir->ref_p[XX][YY] != 0 && ir->ref_p[XX][ZZ] != 0 && ir->ref_p[YY][ZZ] != 0) + { + warning(wi, "All off-diagonal reference pressures are non-zero. Are you sure you want to apply a threefold shear stress?\n"); + } + ir->compress[XX][YY] = dumdub[0][3]; + ir->compress[XX][ZZ] = dumdub[0][4]; + ir->compress[YY][ZZ] = dumdub[0][5]; + for (i = 0; i < DIM; i++) + { + for (m = 0; m < i; m++) + { + ir->ref_p[i][m] = ir->ref_p[m][i]; + ir->compress[i][m] = ir->compress[m][i]; + } + } + } + + if (ir->comm_mode == ecmNO) + { + ir->nstcomm = 0; + } + + opts->couple_moltype = NULL; + if (strlen(is->couple_moltype) > 0) + { + if (ir->efep != efepNO) + { + opts->couple_moltype = strdup(is->couple_moltype); + if (opts->couple_lam0 == opts->couple_lam1) + { + warning(wi, "The lambda=0 and lambda=1 states for coupling are identical"); + } + if (ir->eI == eiMD && (opts->couple_lam0 == ecouplamNONE || + opts->couple_lam1 == ecouplamNONE)) + { + warning(wi, "For proper sampling of the (nearly) decoupled state, stochastic dynamics should be used"); + } + } + else + { + warning(wi, "Can not couple a molecule with free_energy = no"); + } + } + /* FREE ENERGY AND EXPANDED ENSEMBLE OPTIONS */ + if (ir->efep != efepNO) + { + if (fep->delta_lambda > 0) + { + ir->efep = efepSLOWGROWTH; + } + } + + if (ir->bSimTemp) + { + fep->bPrintEnergy = TRUE; + /* always print out the energy to dhdl if we are doing expanded ensemble, since we need the total energy + if the temperature is changing. */ + } + + if ((ir->efep != efepNO) || ir->bSimTemp) + { + ir->bExpanded = FALSE; + if ((ir->efep == efepEXPANDED) || ir->bSimTemp) + { + ir->bExpanded = TRUE; + } + do_fep_params(ir, is->fep_lambda, is->lambda_weights); + if (ir->bSimTemp) /* done after fep params */ + { + do_simtemp_params(ir); + } + } + else + { + ir->fepvals->n_lambda = 0; + } + + /* WALL PARAMETERS */ + + do_wall_params(ir, is->wall_atomtype, is->wall_density, opts); + + /* ORIENTATION RESTRAINT PARAMETERS */ + + if (opts->bOrire && str_nelem(is->orirefitgrp, MAXPTR, NULL) != 1) + { + warning_error(wi, "ERROR: Need one orientation restraint fit group\n"); + } + + /* DEFORMATION PARAMETERS */ + + clear_mat(ir->deform); + for (i = 0; i < 6; i++) + { + dumdub[0][i] = 0; + } + m = sscanf(is->deform, "%lf %lf %lf %lf %lf %lf", + &(dumdub[0][0]), &(dumdub[0][1]), &(dumdub[0][2]), + &(dumdub[0][3]), &(dumdub[0][4]), &(dumdub[0][5])); + for (i = 0; i < 3; i++) + { + ir->deform[i][i] = dumdub[0][i]; + } + ir->deform[YY][XX] = dumdub[0][3]; + ir->deform[ZZ][XX] = dumdub[0][4]; + ir->deform[ZZ][YY] = dumdub[0][5]; + if (ir->epc != epcNO) + { + for (i = 0; i < 3; i++) + { + for (j = 0; j <= i; j++) + { + if (ir->deform[i][j] != 0 && ir->compress[i][j] != 0) + { + warning_error(wi, "A box element has deform set and compressibility > 0"); + } + } + } + for (i = 0; i < 3; i++) + { + for (j = 0; j < i; j++) + { + if (ir->deform[i][j] != 0) + { + for (m = j; m < DIM; m++) + { + if (ir->compress[m][j] != 0) + { + sprintf(warn_buf, "An off-diagonal box element has deform set while compressibility > 0 for the same component of another box vector, this might lead to spurious periodicity effects."); + warning(wi, warn_buf); + } + } + } + } + } + } + + /* Ion/water position swapping checks */ + if (ir->eSwapCoords != eswapNO) + { + if (ir->swap->nstswap < 1) + { + warning_error(wi, "swap_frequency must be 1 or larger when ion swapping is requested"); + } + if (ir->swap->nAverage < 1) + { + warning_error(wi, "coupl_steps must be 1 or larger.\n"); + } + if (ir->swap->threshold < 1.0) + { + warning_error(wi, "Ion count threshold must be at least 1.\n"); + } + } + + sfree(dumstr[0]); + sfree(dumstr[1]); +} + +static int search_QMstring(const char *s, int ng, const char *gn[]) +{ + /* same as normal search_string, but this one searches QM strings */ + int i; + + for (i = 0; (i < ng); i++) + { + if (gmx_strcasecmp(s, gn[i]) == 0) + { + return i; + } + } + + gmx_fatal(FARGS, "this QM method or basisset (%s) is not implemented\n!", s); + + return -1; + +} /* search_QMstring */ + +/* We would like gn to be const as well, but C doesn't allow this */ +int search_string(const char *s, int ng, char *gn[]) +{ + int i; + + for (i = 0; (i < ng); i++) + { + if (gmx_strcasecmp(s, gn[i]) == 0) + { + return i; + } + } + + gmx_fatal(FARGS, + "Group %s referenced in the .mdp file was not found in the index file.\n" + "Group names must match either [moleculetype] names or custom index group\n" + "names, in which case you must supply an index file to the '-n' option\n" + "of grompp.", + s); + + return -1; +} + +static gmx_bool do_numbering(int natoms, gmx_groups_t *groups, int ng, char *ptrs[], + t_blocka *block, char *gnames[], + int gtype, int restnm, + int grptp, gmx_bool bVerbose, + warninp_t wi) +{ + unsigned short *cbuf; + t_grps *grps = &(groups->grps[gtype]); + int i, j, gid, aj, ognr, ntot = 0; + const char *title; + gmx_bool bRest; + char warn_buf[STRLEN]; + + if (debug) + { + fprintf(debug, "Starting numbering %d groups of type %d\n", ng, gtype); + } + + title = gtypes[gtype]; + + snew(cbuf, natoms); + /* Mark all id's as not set */ + for (i = 0; (i < natoms); i++) + { + cbuf[i] = NOGID; + } + + snew(grps->nm_ind, ng+1); /* +1 for possible rest group */ + for (i = 0; (i < ng); i++) + { + /* Lookup the group name in the block structure */ + gid = search_string(ptrs[i], block->nr, gnames); + if ((grptp != egrptpONE) || (i == 0)) + { + grps->nm_ind[grps->nr++] = gid; + } + if (debug) + { + fprintf(debug, "Found gid %d for group %s\n", gid, ptrs[i]); + } + + /* Now go over the atoms in the group */ + for (j = block->index[gid]; (j < block->index[gid+1]); j++) + { + + aj = block->a[j]; + + /* Range checking */ + if ((aj < 0) || (aj >= natoms)) + { + gmx_fatal(FARGS, "Invalid atom number %d in indexfile", aj); + } + /* Lookup up the old group number */ + ognr = cbuf[aj]; + if (ognr != NOGID) + { + gmx_fatal(FARGS, "Atom %d in multiple %s groups (%d and %d)", + aj+1, title, ognr+1, i+1); + } + else + { + /* Store the group number in buffer */ + if (grptp == egrptpONE) + { + cbuf[aj] = 0; + } + else + { + cbuf[aj] = i; + } + ntot++; + } + } + } + + /* Now check whether we have done all atoms */ + bRest = FALSE; + if (ntot != natoms) + { + if (grptp == egrptpALL) + { + gmx_fatal(FARGS, "%d atoms are not part of any of the %s groups", + natoms-ntot, title); + } + else if (grptp == egrptpPART) + { + sprintf(warn_buf, "%d atoms are not part of any of the %s groups", + natoms-ntot, title); + warning_note(wi, warn_buf); + } + /* Assign all atoms currently unassigned to a rest group */ + for (j = 0; (j < natoms); j++) + { + if (cbuf[j] == NOGID) + { + cbuf[j] = grps->nr; + bRest = TRUE; + } + } + if (grptp != egrptpPART) + { + if (bVerbose) + { + fprintf(stderr, + "Making dummy/rest group for %s containing %d elements\n", + title, natoms-ntot); + } + /* Add group name "rest" */ + grps->nm_ind[grps->nr] = restnm; + + /* Assign the rest name to all atoms not currently assigned to a group */ + for (j = 0; (j < natoms); j++) + { + if (cbuf[j] == NOGID) + { + cbuf[j] = grps->nr; + } + } + grps->nr++; + } + } + + if (grps->nr == 1 && (ntot == 0 || ntot == natoms)) + { + /* All atoms are part of one (or no) group, no index required */ + groups->ngrpnr[gtype] = 0; + groups->grpnr[gtype] = NULL; + } + else + { + groups->ngrpnr[gtype] = natoms; + snew(groups->grpnr[gtype], natoms); + for (j = 0; (j < natoms); j++) + { + groups->grpnr[gtype][j] = cbuf[j]; + } + } + + sfree(cbuf); + + return (bRest && grptp == egrptpPART); +} + +static void calc_nrdf(gmx_mtop_t *mtop, t_inputrec *ir, char **gnames) +{ + t_grpopts *opts; + gmx_groups_t *groups; + t_pull *pull; + int natoms, ai, aj, i, j, d, g, imin, jmin; + t_iatom *ia; + int *nrdf2, *na_vcm, na_tot; + double *nrdf_tc, *nrdf_vcm, nrdf_uc, n_sub = 0; + gmx_mtop_atomloop_all_t aloop; + t_atom *atom; + int mb, mol, ftype, as; + gmx_molblock_t *molb; + gmx_moltype_t *molt; + + /* Calculate nrdf. + * First calc 3xnr-atoms for each group + * then subtract half a degree of freedom for each constraint + * + * Only atoms and nuclei contribute to the degrees of freedom... + */ + + opts = &ir->opts; + + groups = &mtop->groups; + natoms = mtop->natoms; + + /* Allocate one more for a possible rest group */ + /* We need to sum degrees of freedom into doubles, + * since floats give too low nrdf's above 3 million atoms. + */ + snew(nrdf_tc, groups->grps[egcTC].nr+1); + snew(nrdf_vcm, groups->grps[egcVCM].nr+1); + snew(na_vcm, groups->grps[egcVCM].nr+1); + + for (i = 0; i < groups->grps[egcTC].nr; i++) + { + nrdf_tc[i] = 0; + } + for (i = 0; i < groups->grps[egcVCM].nr+1; i++) + { + nrdf_vcm[i] = 0; + } + + snew(nrdf2, natoms); + aloop = gmx_mtop_atomloop_all_init(mtop); + while (gmx_mtop_atomloop_all_next(aloop, &i, &atom)) + { + nrdf2[i] = 0; + if (atom->ptype == eptAtom || atom->ptype == eptNucleus) + { + g = ggrpnr(groups, egcFREEZE, i); + /* Double count nrdf for particle i */ + for (d = 0; d < DIM; d++) + { + if (opts->nFreeze[g][d] == 0) + { + nrdf2[i] += 2; + } + } + nrdf_tc [ggrpnr(groups, egcTC, i)] += 0.5*nrdf2[i]; + nrdf_vcm[ggrpnr(groups, egcVCM, i)] += 0.5*nrdf2[i]; + } + } + + as = 0; + for (mb = 0; mb < mtop->nmolblock; mb++) + { + molb = &mtop->molblock[mb]; + molt = &mtop->moltype[molb->type]; + atom = molt->atoms.atom; + for (mol = 0; mol < molb->nmol; mol++) + { + for (ftype = F_CONSTR; ftype <= F_CONSTRNC; ftype++) + { + ia = molt->ilist[ftype].iatoms; + for (i = 0; i < molt->ilist[ftype].nr; ) + { + /* Subtract degrees of freedom for the constraints, + * if the particles still have degrees of freedom left. + * If one of the particles is a vsite or a shell, then all + * constraint motion will go there, but since they do not + * contribute to the constraints the degrees of freedom do not + * change. + */ + ai = as + ia[1]; + aj = as + ia[2]; + if (((atom[ia[1]].ptype == eptNucleus) || + (atom[ia[1]].ptype == eptAtom)) && + ((atom[ia[2]].ptype == eptNucleus) || + (atom[ia[2]].ptype == eptAtom))) + { + if (nrdf2[ai] > 0) + { + jmin = 1; + } + else + { + jmin = 2; + } + if (nrdf2[aj] > 0) + { + imin = 1; + } + else + { + imin = 2; + } + imin = min(imin, nrdf2[ai]); + jmin = min(jmin, nrdf2[aj]); + nrdf2[ai] -= imin; + nrdf2[aj] -= jmin; + nrdf_tc [ggrpnr(groups, egcTC, ai)] -= 0.5*imin; + nrdf_tc [ggrpnr(groups, egcTC, aj)] -= 0.5*jmin; + nrdf_vcm[ggrpnr(groups, egcVCM, ai)] -= 0.5*imin; + nrdf_vcm[ggrpnr(groups, egcVCM, aj)] -= 0.5*jmin; + } + ia += interaction_function[ftype].nratoms+1; + i += interaction_function[ftype].nratoms+1; + } + } + ia = molt->ilist[F_SETTLE].iatoms; + for (i = 0; i < molt->ilist[F_SETTLE].nr; ) + { + /* Subtract 1 dof from every atom in the SETTLE */ + for (j = 0; j < 3; j++) + { + ai = as + ia[1+j]; + imin = min(2, nrdf2[ai]); + nrdf2[ai] -= imin; + nrdf_tc [ggrpnr(groups, egcTC, ai)] -= 0.5*imin; + nrdf_vcm[ggrpnr(groups, egcVCM, ai)] -= 0.5*imin; + } + ia += 4; + i += 4; + } + as += molt->atoms.nr; + } + } + + if (ir->ePull == epullCONSTRAINT) + { + /* Correct nrdf for the COM constraints. + * We correct using the TC and VCM group of the first atom + * in the reference and pull group. If atoms in one pull group + * belong to different TC or VCM groups it is anyhow difficult + * to determine the optimal nrdf assignment. + */ + pull = ir->pull; + + for (i = 0; i < pull->ncoord; i++) + { + imin = 1; + + for (j = 0; j < 2; j++) + { + const t_pull_group *pgrp; + + pgrp = &pull->group[pull->coord[i].group[j]]; + + if (pgrp->nat > 0) + { + /* Subtract 1/2 dof from each group */ + ai = pgrp->ind[0]; + nrdf_tc [ggrpnr(groups, egcTC, ai)] -= 0.5*imin; + nrdf_vcm[ggrpnr(groups, egcVCM, ai)] -= 0.5*imin; + if (nrdf_tc[ggrpnr(groups, egcTC, ai)] < 0) + { + gmx_fatal(FARGS, "Center of mass pulling constraints caused the number of degrees of freedom for temperature coupling group %s to be negative", gnames[groups->grps[egcTC].nm_ind[ggrpnr(groups, egcTC, ai)]]); + } + } + else + { + /* We need to subtract the whole DOF from group j=1 */ + imin += 1; + } + } + } + } + + if (ir->nstcomm != 0) + { + /* Subtract 3 from the number of degrees of freedom in each vcm group + * when com translation is removed and 6 when rotation is removed + * as well. + */ + switch (ir->comm_mode) + { + case ecmLINEAR: + n_sub = ndof_com(ir); + break; + case ecmANGULAR: + n_sub = 6; + break; + default: + n_sub = 0; + gmx_incons("Checking comm_mode"); + } + + for (i = 0; i < groups->grps[egcTC].nr; i++) + { + /* Count the number of atoms of TC group i for every VCM group */ + for (j = 0; j < groups->grps[egcVCM].nr+1; j++) + { + na_vcm[j] = 0; + } + na_tot = 0; + for (ai = 0; ai < natoms; ai++) + { + if (ggrpnr(groups, egcTC, ai) == i) + { + na_vcm[ggrpnr(groups, egcVCM, ai)]++; + na_tot++; + } + } + /* Correct for VCM removal according to the fraction of each VCM + * group present in this TC group. + */ + nrdf_uc = nrdf_tc[i]; + if (debug) + { + fprintf(debug, "T-group[%d] nrdf_uc = %g, n_sub = %g\n", + i, nrdf_uc, n_sub); + } + nrdf_tc[i] = 0; + for (j = 0; j < groups->grps[egcVCM].nr+1; j++) + { + if (nrdf_vcm[j] > n_sub) + { + nrdf_tc[i] += nrdf_uc*((double)na_vcm[j]/(double)na_tot)* + (nrdf_vcm[j] - n_sub)/nrdf_vcm[j]; + } + if (debug) + { + fprintf(debug, " nrdf_vcm[%d] = %g, nrdf = %g\n", + j, nrdf_vcm[j], nrdf_tc[i]); + } + } + } + } + for (i = 0; (i < groups->grps[egcTC].nr); i++) + { + opts->nrdf[i] = nrdf_tc[i]; + if (opts->nrdf[i] < 0) + { + opts->nrdf[i] = 0; + } + fprintf(stderr, + "Number of degrees of freedom in T-Coupling group %s is %.2f\n", + gnames[groups->grps[egcTC].nm_ind[i]], opts->nrdf[i]); + } + + sfree(nrdf2); + sfree(nrdf_tc); + sfree(nrdf_vcm); + sfree(na_vcm); +} + +static void decode_cos(char *s, t_cosines *cosine) +{ + char *t; + char format[STRLEN], f1[STRLEN]; + double a, phi; + int i; + + t = strdup(s); + trim(t); + + cosine->n = 0; + cosine->a = NULL; + cosine->phi = NULL; + if (strlen(t)) + { + sscanf(t, "%d", &(cosine->n)); + if (cosine->n <= 0) + { + cosine->n = 0; + } + else + { + snew(cosine->a, cosine->n); + snew(cosine->phi, cosine->n); + + sprintf(format, "%%*d"); + for (i = 0; (i < cosine->n); i++) + { + strcpy(f1, format); + strcat(f1, "%lf%lf"); + if (sscanf(t, f1, &a, &phi) < 2) + { + gmx_fatal(FARGS, "Invalid input for electric field shift: '%s'", t); + } + cosine->a[i] = a; + cosine->phi[i] = phi; + strcat(format, "%*lf%*lf"); + } + } + } + sfree(t); +} + +static gmx_bool do_egp_flag(t_inputrec *ir, gmx_groups_t *groups, + const char *option, const char *val, int flag) +{ + /* The maximum number of energy group pairs would be MAXPTR*(MAXPTR+1)/2. + * But since this is much larger than STRLEN, such a line can not be parsed. + * The real maximum is the number of names that fit in a string: STRLEN/2. + */ +#define EGP_MAX (STRLEN/2) + int nelem, i, j, k, nr; + char *names[EGP_MAX]; + char ***gnames; + gmx_bool bSet; + + gnames = groups->grpname; + + nelem = str_nelem(val, EGP_MAX, names); + if (nelem % 2 != 0) + { + gmx_fatal(FARGS, "The number of groups for %s is odd", option); + } + nr = groups->grps[egcENER].nr; + bSet = FALSE; + for (i = 0; i < nelem/2; i++) + { + j = 0; + while ((j < nr) && + gmx_strcasecmp(names[2*i], *(gnames[groups->grps[egcENER].nm_ind[j]]))) + { + j++; + } + if (j == nr) + { + gmx_fatal(FARGS, "%s in %s is not an energy group\n", + names[2*i], option); + } + k = 0; + while ((k < nr) && + gmx_strcasecmp(names[2*i+1], *(gnames[groups->grps[egcENER].nm_ind[k]]))) + { + k++; + } + if (k == nr) + { + gmx_fatal(FARGS, "%s in %s is not an energy group\n", + names[2*i+1], option); + } + if ((j < nr) && (k < nr)) + { + ir->opts.egp_flags[nr*j+k] |= flag; + ir->opts.egp_flags[nr*k+j] |= flag; + bSet = TRUE; + } + } + + return bSet; +} + + +static void make_swap_groups( + t_swapcoords *swap, + char *swapgname, + char *splitg0name, + char *splitg1name, + char *solgname, + t_blocka *grps, + char **gnames) +{ + int ig = -1, i = 0, j; + char *splitg; + + + /* Just a quick check here, more thorough checks are in mdrun */ + if (strcmp(splitg0name, splitg1name) == 0) + { + gmx_fatal(FARGS, "The split groups can not both be '%s'.", splitg0name); + } + + /* First get the swap group index atoms */ + ig = search_string(swapgname, grps->nr, gnames); + swap->nat = grps->index[ig+1] - grps->index[ig]; + if (swap->nat > 0) + { + fprintf(stderr, "Swap group '%s' contains %d atoms.\n", swapgname, swap->nat); + snew(swap->ind, swap->nat); + for (i = 0; i < swap->nat; i++) + { + swap->ind[i] = grps->a[grps->index[ig]+i]; + } + } + else + { + gmx_fatal(FARGS, "You defined an empty group of atoms for swapping."); + } + + /* Now do so for the split groups */ + for (j = 0; j < 2; j++) + { + if (j == 0) + { + splitg = splitg0name; + } + else + { + splitg = splitg1name; + } + + ig = search_string(splitg, grps->nr, gnames); + swap->nat_split[j] = grps->index[ig+1] - grps->index[ig]; + if (swap->nat_split[j] > 0) + { + fprintf(stderr, "Split group %d '%s' contains %d atom%s.\n", + j, splitg, swap->nat_split[j], (swap->nat_split[j] > 1) ? "s" : ""); + snew(swap->ind_split[j], swap->nat_split[j]); + for (i = 0; i < swap->nat_split[j]; i++) + { + swap->ind_split[j][i] = grps->a[grps->index[ig]+i]; + } + } + else + { + gmx_fatal(FARGS, "Split group %d has to contain at least 1 atom!", j); + } + } + + /* Now get the solvent group index atoms */ + ig = search_string(solgname, grps->nr, gnames); + swap->nat_sol = grps->index[ig+1] - grps->index[ig]; + if (swap->nat_sol > 0) + { + fprintf(stderr, "Solvent group '%s' contains %d atoms.\n", solgname, swap->nat_sol); + snew(swap->ind_sol, swap->nat_sol); + for (i = 0; i < swap->nat_sol; i++) + { + swap->ind_sol[i] = grps->a[grps->index[ig]+i]; + } + } + else + { + gmx_fatal(FARGS, "You defined an empty group of solvent. Cannot exchange ions."); + } +} + + +void make_IMD_group(t_IMD *IMDgroup, char *IMDgname, t_blocka *grps, char **gnames) +{ + int ig = -1, i; + + + ig = search_string(IMDgname, grps->nr, gnames); + IMDgroup->nat = grps->index[ig+1] - grps->index[ig]; + + if (IMDgroup->nat > 0) + { + fprintf(stderr, "Group '%s' with %d atoms can be activated for interactive molecular dynamics (IMD).\n", + IMDgname, IMDgroup->nat); + snew(IMDgroup->ind, IMDgroup->nat); + for (i = 0; i < IMDgroup->nat; i++) + { + IMDgroup->ind[i] = grps->a[grps->index[ig]+i]; + } + } +} + + +void do_index(const char* mdparin, const char *ndx, + gmx_mtop_t *mtop, + gmx_bool bVerbose, + t_inputrec *ir, rvec *v, + warninp_t wi) +{ + t_blocka *grps; + gmx_groups_t *groups; + int natoms; + t_symtab *symtab; + t_atoms atoms_all; + char warnbuf[STRLEN], **gnames; + int nr, ntcg, ntau_t, nref_t, nacc, nofg, nSA, nSA_points, nSA_time, nSA_temp; + real tau_min; + int nstcmin; + int nacg, nfreeze, nfrdim, nenergy, nvcm, nuser; + char *ptr1[MAXPTR], *ptr2[MAXPTR], *ptr3[MAXPTR]; + int i, j, k, restnm; + real SAtime; + gmx_bool bExcl, bTable, bSetTCpar, bAnneal, bRest; + int nQMmethod, nQMbasis, nQMcharge, nQMmult, nbSH, nCASorb, nCASelec, + nSAon, nSAoff, nSAsteps, nQMg, nbOPT, nbTS; + char warn_buf[STRLEN]; + + if (bVerbose) + { + fprintf(stderr, "processing index file...\n"); + } + debug_gmx(); + if (ndx == NULL) + { + snew(grps, 1); + snew(grps->index, 1); + snew(gnames, 1); + atoms_all = gmx_mtop_global_atoms(mtop); + analyse(&atoms_all, grps, &gnames, FALSE, TRUE); + free_t_atoms(&atoms_all, FALSE); + } + else + { + grps = init_index(ndx, &gnames); + } + + groups = &mtop->groups; + natoms = mtop->natoms; + symtab = &mtop->symtab; + + snew(groups->grpname, grps->nr+1); + + for (i = 0; (i < grps->nr); i++) + { + groups->grpname[i] = put_symtab(symtab, gnames[i]); + } + groups->grpname[i] = put_symtab(symtab, "rest"); + restnm = i; + srenew(gnames, grps->nr+1); + gnames[restnm] = *(groups->grpname[i]); + groups->ngrpname = grps->nr+1; + + set_warning_line(wi, mdparin, -1); + + ntau_t = str_nelem(is->tau_t, MAXPTR, ptr1); + nref_t = str_nelem(is->ref_t, MAXPTR, ptr2); + ntcg = str_nelem(is->tcgrps, MAXPTR, ptr3); + if ((ntau_t != ntcg) || (nref_t != ntcg)) + { + gmx_fatal(FARGS, "Invalid T coupling input: %d groups, %d ref-t values and " + "%d tau-t values", ntcg, nref_t, ntau_t); + } + + bSetTCpar = (ir->etc || EI_SD(ir->eI) || ir->eI == eiBD || EI_TPI(ir->eI)); + do_numbering(natoms, groups, ntcg, ptr3, grps, gnames, egcTC, + restnm, bSetTCpar ? egrptpALL : egrptpALL_GENREST, bVerbose, wi); + nr = groups->grps[egcTC].nr; + ir->opts.ngtc = nr; + snew(ir->opts.nrdf, nr); + snew(ir->opts.tau_t, nr); + snew(ir->opts.ref_t, nr); + if (ir->eI == eiBD && ir->bd_fric == 0) + { + fprintf(stderr, "bd-fric=0, so tau-t will be used as the inverse friction constant(s)\n"); + } + + if (bSetTCpar) + { + if (nr != nref_t) + { + gmx_fatal(FARGS, "Not enough ref-t and tau-t values!"); + } + + tau_min = 1e20; + for (i = 0; (i < nr); i++) + { + ir->opts.tau_t[i] = strtod(ptr1[i], NULL); + if ((ir->eI == eiBD || ir->eI == eiSD2) && ir->opts.tau_t[i] <= 0) + { + sprintf(warn_buf, "With integrator %s tau-t should be larger than 0", ei_names[ir->eI]); + warning_error(wi, warn_buf); + } + + if (ir->etc != etcVRESCALE && ir->opts.tau_t[i] == 0) + { + warning_note(wi, "tau-t = -1 is the value to signal that a group should not have temperature coupling. Treating your use of tau-t = 0 as if you used -1."); + } + + if (ir->opts.tau_t[i] >= 0) + { + tau_min = min(tau_min, ir->opts.tau_t[i]); + } + } + if (ir->etc != etcNO && ir->nsttcouple == -1) + { + ir->nsttcouple = ir_optimal_nsttcouple(ir); + } + + if (EI_VV(ir->eI)) + { + if ((ir->etc == etcNOSEHOOVER) && (ir->epc == epcBERENDSEN)) + { + gmx_fatal(FARGS, "Cannot do Nose-Hoover temperature with Berendsen pressure control with md-vv; use either vrescale temperature with berendsen pressure or Nose-Hoover temperature with MTTK pressure"); + } + if ((ir->epc == epcMTTK) && (ir->etc > etcNO)) + { + if (ir->nstpcouple != ir->nsttcouple) + { + int mincouple = min(ir->nstpcouple, ir->nsttcouple); + ir->nstpcouple = ir->nsttcouple = mincouple; + sprintf(warn_buf, "for current Trotter decomposition methods with vv, nsttcouple and nstpcouple must be equal. Both have been reset to min(nsttcouple,nstpcouple) = %d", mincouple); + warning_note(wi, warn_buf); + } + } + } + /* velocity verlet with averaged kinetic energy KE = 0.5*(v(t+1/2) - v(t-1/2)) is implemented + primarily for testing purposes, and does not work with temperature coupling other than 1 */ + + if (ETC_ANDERSEN(ir->etc)) + { + if (ir->nsttcouple != 1) + { + ir->nsttcouple = 1; + sprintf(warn_buf, "Andersen temperature control methods assume nsttcouple = 1; there is no need for larger nsttcouple > 1, since no global parameters are computed. nsttcouple has been reset to 1"); + warning_note(wi, warn_buf); + } + } + nstcmin = tcouple_min_integration_steps(ir->etc); + if (nstcmin > 1) + { + if (tau_min/(ir->delta_t*ir->nsttcouple) < nstcmin) + { + sprintf(warn_buf, "For proper integration of the %s thermostat, tau-t (%g) should be at least %d times larger than nsttcouple*dt (%g)", + ETCOUPLTYPE(ir->etc), + tau_min, nstcmin, + ir->nsttcouple*ir->delta_t); + warning(wi, warn_buf); + } + } + for (i = 0; (i < nr); i++) + { + ir->opts.ref_t[i] = strtod(ptr2[i], NULL); + if (ir->opts.ref_t[i] < 0) + { + gmx_fatal(FARGS, "ref-t for group %d negative", i); + } + } + /* set the lambda mc temperature to the md integrator temperature (which should be defined + if we are in this conditional) if mc_temp is negative */ + if (ir->expandedvals->mc_temp < 0) + { + ir->expandedvals->mc_temp = ir->opts.ref_t[0]; /*for now, set to the first reft */ + } + } + + /* Simulated annealing for each group. There are nr groups */ + nSA = str_nelem(is->anneal, MAXPTR, ptr1); + if (nSA == 1 && (ptr1[0][0] == 'n' || ptr1[0][0] == 'N')) + { + nSA = 0; + } + if (nSA > 0 && nSA != nr) + { + gmx_fatal(FARGS, "Not enough annealing values: %d (for %d groups)\n", nSA, nr); + } + else + { + snew(ir->opts.annealing, nr); + snew(ir->opts.anneal_npoints, nr); + snew(ir->opts.anneal_time, nr); + snew(ir->opts.anneal_temp, nr); + for (i = 0; i < nr; i++) + { + ir->opts.annealing[i] = eannNO; + ir->opts.anneal_npoints[i] = 0; + ir->opts.anneal_time[i] = NULL; + ir->opts.anneal_temp[i] = NULL; + } + if (nSA > 0) + { + bAnneal = FALSE; + for (i = 0; i < nr; i++) + { + if (ptr1[i][0] == 'n' || ptr1[i][0] == 'N') + { + ir->opts.annealing[i] = eannNO; + } + else if (ptr1[i][0] == 's' || ptr1[i][0] == 'S') + { + ir->opts.annealing[i] = eannSINGLE; + bAnneal = TRUE; + } + else if (ptr1[i][0] == 'p' || ptr1[i][0] == 'P') + { + ir->opts.annealing[i] = eannPERIODIC; + bAnneal = TRUE; + } + } + if (bAnneal) + { + /* Read the other fields too */ + nSA_points = str_nelem(is->anneal_npoints, MAXPTR, ptr1); + if (nSA_points != nSA) + { + gmx_fatal(FARGS, "Found %d annealing-npoints values for %d groups\n", nSA_points, nSA); + } + for (k = 0, i = 0; i < nr; i++) + { + ir->opts.anneal_npoints[i] = strtol(ptr1[i], NULL, 10); + if (ir->opts.anneal_npoints[i] == 1) + { + gmx_fatal(FARGS, "Please specify at least a start and an end point for annealing\n"); + } + snew(ir->opts.anneal_time[i], ir->opts.anneal_npoints[i]); + snew(ir->opts.anneal_temp[i], ir->opts.anneal_npoints[i]); + k += ir->opts.anneal_npoints[i]; + } + + nSA_time = str_nelem(is->anneal_time, MAXPTR, ptr1); + if (nSA_time != k) + { + gmx_fatal(FARGS, "Found %d annealing-time values, wanter %d\n", nSA_time, k); + } + nSA_temp = str_nelem(is->anneal_temp, MAXPTR, ptr2); + if (nSA_temp != k) + { + gmx_fatal(FARGS, "Found %d annealing-temp values, wanted %d\n", nSA_temp, k); + } + + for (i = 0, k = 0; i < nr; i++) + { + + for (j = 0; j < ir->opts.anneal_npoints[i]; j++) + { + ir->opts.anneal_time[i][j] = strtod(ptr1[k], NULL); + ir->opts.anneal_temp[i][j] = strtod(ptr2[k], NULL); + if (j == 0) + { + if (ir->opts.anneal_time[i][0] > (ir->init_t+GMX_REAL_EPS)) + { + gmx_fatal(FARGS, "First time point for annealing > init_t.\n"); + } + } + else + { + /* j>0 */ + if (ir->opts.anneal_time[i][j] < ir->opts.anneal_time[i][j-1]) + { + gmx_fatal(FARGS, "Annealing timepoints out of order: t=%f comes after t=%f\n", + ir->opts.anneal_time[i][j], ir->opts.anneal_time[i][j-1]); + } + } + if (ir->opts.anneal_temp[i][j] < 0) + { + gmx_fatal(FARGS, "Found negative temperature in annealing: %f\n", ir->opts.anneal_temp[i][j]); + } + k++; + } + } + /* Print out some summary information, to make sure we got it right */ + for (i = 0, k = 0; i < nr; i++) + { + if (ir->opts.annealing[i] != eannNO) + { + j = groups->grps[egcTC].nm_ind[i]; + fprintf(stderr, "Simulated annealing for group %s: %s, %d timepoints\n", + *(groups->grpname[j]), eann_names[ir->opts.annealing[i]], + ir->opts.anneal_npoints[i]); + fprintf(stderr, "Time (ps) Temperature (K)\n"); + /* All terms except the last one */ + for (j = 0; j < (ir->opts.anneal_npoints[i]-1); j++) + { + fprintf(stderr, "%9.1f %5.1f\n", ir->opts.anneal_time[i][j], ir->opts.anneal_temp[i][j]); + } + + /* Finally the last one */ + j = ir->opts.anneal_npoints[i]-1; + if (ir->opts.annealing[i] == eannSINGLE) + { + fprintf(stderr, "%9.1f- %5.1f\n", ir->opts.anneal_time[i][j], ir->opts.anneal_temp[i][j]); + } + else + { + fprintf(stderr, "%9.1f %5.1f\n", ir->opts.anneal_time[i][j], ir->opts.anneal_temp[i][j]); + if (fabs(ir->opts.anneal_temp[i][j]-ir->opts.anneal_temp[i][0]) > GMX_REAL_EPS) + { + warning_note(wi, "There is a temperature jump when your annealing loops back.\n"); + } + } + } + } + } + } + } + + if (ir->ePull != epullNO) + { + make_pull_groups(ir->pull, is->pull_grp, grps, gnames); + + make_pull_coords(ir->pull); + } + + if (ir->bRot) + { + make_rotation_groups(ir->rot, is->rot_grp, grps, gnames); + } + + if (ir->eSwapCoords != eswapNO) + { + make_swap_groups(ir->swap, swapgrp, splitgrp0, splitgrp1, solgrp, grps, gnames); + } + + /* Make indices for IMD session */ + if (ir->bIMD) + { + make_IMD_group(ir->imd, is->imd_grp, grps, gnames); + } + + nacc = str_nelem(is->acc, MAXPTR, ptr1); + nacg = str_nelem(is->accgrps, MAXPTR, ptr2); + if (nacg*DIM != nacc) + { + gmx_fatal(FARGS, "Invalid Acceleration input: %d groups and %d acc. values", + nacg, nacc); + } + do_numbering(natoms, groups, nacg, ptr2, grps, gnames, egcACC, + restnm, egrptpALL_GENREST, bVerbose, wi); + nr = groups->grps[egcACC].nr; + snew(ir->opts.acc, nr); + ir->opts.ngacc = nr; + + for (i = k = 0; (i < nacg); i++) + { + for (j = 0; (j < DIM); j++, k++) + { + ir->opts.acc[i][j] = strtod(ptr1[k], NULL); + } + } + for (; (i < nr); i++) + { + for (j = 0; (j < DIM); j++) + { + ir->opts.acc[i][j] = 0; + } + } + + nfrdim = str_nelem(is->frdim, MAXPTR, ptr1); + nfreeze = str_nelem(is->freeze, MAXPTR, ptr2); + if (nfrdim != DIM*nfreeze) + { + gmx_fatal(FARGS, "Invalid Freezing input: %d groups and %d freeze values", + nfreeze, nfrdim); + } + do_numbering(natoms, groups, nfreeze, ptr2, grps, gnames, egcFREEZE, + restnm, egrptpALL_GENREST, bVerbose, wi); + nr = groups->grps[egcFREEZE].nr; + ir->opts.ngfrz = nr; + snew(ir->opts.nFreeze, nr); + for (i = k = 0; (i < nfreeze); i++) + { + for (j = 0; (j < DIM); j++, k++) + { + ir->opts.nFreeze[i][j] = (gmx_strncasecmp(ptr1[k], "Y", 1) == 0); + if (!ir->opts.nFreeze[i][j]) + { + if (gmx_strncasecmp(ptr1[k], "N", 1) != 0) + { + sprintf(warnbuf, "Please use Y(ES) or N(O) for freezedim only " + "(not %s)", ptr1[k]); + warning(wi, warn_buf); + } + } + } + } + for (; (i < nr); i++) + { + for (j = 0; (j < DIM); j++) + { + ir->opts.nFreeze[i][j] = 0; + } + } + + nenergy = str_nelem(is->energy, MAXPTR, ptr1); + do_numbering(natoms, groups, nenergy, ptr1, grps, gnames, egcENER, + restnm, egrptpALL_GENREST, bVerbose, wi); + add_wall_energrps(groups, ir->nwall, symtab); + ir->opts.ngener = groups->grps[egcENER].nr; + nvcm = str_nelem(is->vcm, MAXPTR, ptr1); + bRest = + do_numbering(natoms, groups, nvcm, ptr1, grps, gnames, egcVCM, + restnm, nvcm == 0 ? egrptpALL_GENREST : egrptpPART, bVerbose, wi); + if (bRest) + { + warning(wi, "Some atoms are not part of any center of mass motion removal group.\n" + "This may lead to artifacts.\n" + "In most cases one should use one group for the whole system."); + } + + /* Now we have filled the freeze struct, so we can calculate NRDF */ + calc_nrdf(mtop, ir, gnames); + + if (v && NULL) + { + real fac, ntot = 0; + + /* Must check per group! */ + for (i = 0; (i < ir->opts.ngtc); i++) + { + ntot += ir->opts.nrdf[i]; + } + if (ntot != (DIM*natoms)) + { + fac = sqrt(ntot/(DIM*natoms)); + if (bVerbose) + { + fprintf(stderr, "Scaling velocities by a factor of %.3f to account for constraints\n" + "and removal of center of mass motion\n", fac); + } + for (i = 0; (i < natoms); i++) + { + svmul(fac, v[i], v[i]); + } + } + } + + nuser = str_nelem(is->user1, MAXPTR, ptr1); + do_numbering(natoms, groups, nuser, ptr1, grps, gnames, egcUser1, + restnm, egrptpALL_GENREST, bVerbose, wi); + nuser = str_nelem(is->user2, MAXPTR, ptr1); + do_numbering(natoms, groups, nuser, ptr1, grps, gnames, egcUser2, + restnm, egrptpALL_GENREST, bVerbose, wi); + nuser = str_nelem(is->x_compressed_groups, MAXPTR, ptr1); + do_numbering(natoms, groups, nuser, ptr1, grps, gnames, egcCompressedX, + restnm, egrptpONE, bVerbose, wi); + nofg = str_nelem(is->orirefitgrp, MAXPTR, ptr1); + do_numbering(natoms, groups, nofg, ptr1, grps, gnames, egcORFIT, + restnm, egrptpALL_GENREST, bVerbose, wi); + + /* QMMM input processing */ + nQMg = str_nelem(is->QMMM, MAXPTR, ptr1); + nQMmethod = str_nelem(is->QMmethod, MAXPTR, ptr2); + nQMbasis = str_nelem(is->QMbasis, MAXPTR, ptr3); + if ((nQMmethod != nQMg) || (nQMbasis != nQMg)) + { + gmx_fatal(FARGS, "Invalid QMMM input: %d groups %d basissets" + " and %d methods\n", nQMg, nQMbasis, nQMmethod); + } + /* group rest, if any, is always MM! */ + do_numbering(natoms, groups, nQMg, ptr1, grps, gnames, egcQMMM, + restnm, egrptpALL_GENREST, bVerbose, wi); + nr = nQMg; /*atoms->grps[egcQMMM].nr;*/ + ir->opts.ngQM = nQMg; + snew(ir->opts.QMmethod, nr); + snew(ir->opts.QMbasis, nr); + for (i = 0; i < nr; i++) + { + /* input consists of strings: RHF CASSCF PM3 .. These need to be + * converted to the corresponding enum in names.c + */ + ir->opts.QMmethod[i] = search_QMstring(ptr2[i], eQMmethodNR, + eQMmethod_names); + ir->opts.QMbasis[i] = search_QMstring(ptr3[i], eQMbasisNR, + eQMbasis_names); + + } + nQMmult = str_nelem(is->QMmult, MAXPTR, ptr1); + nQMcharge = str_nelem(is->QMcharge, MAXPTR, ptr2); + nbSH = str_nelem(is->bSH, MAXPTR, ptr3); + snew(ir->opts.QMmult, nr); + snew(ir->opts.QMcharge, nr); + snew(ir->opts.bSH, nr); + + for (i = 0; i < nr; i++) + { + ir->opts.QMmult[i] = strtol(ptr1[i], NULL, 10); + ir->opts.QMcharge[i] = strtol(ptr2[i], NULL, 10); + ir->opts.bSH[i] = (gmx_strncasecmp(ptr3[i], "Y", 1) == 0); + } + + nCASelec = str_nelem(is->CASelectrons, MAXPTR, ptr1); + nCASorb = str_nelem(is->CASorbitals, MAXPTR, ptr2); + snew(ir->opts.CASelectrons, nr); + snew(ir->opts.CASorbitals, nr); + for (i = 0; i < nr; i++) + { + ir->opts.CASelectrons[i] = strtol(ptr1[i], NULL, 10); + ir->opts.CASorbitals[i] = strtol(ptr2[i], NULL, 10); + } + /* special optimization options */ + + nbOPT = str_nelem(is->bOPT, MAXPTR, ptr1); + nbTS = str_nelem(is->bTS, MAXPTR, ptr2); + snew(ir->opts.bOPT, nr); + snew(ir->opts.bTS, nr); + for (i = 0; i < nr; i++) + { + ir->opts.bOPT[i] = (gmx_strncasecmp(ptr1[i], "Y", 1) == 0); + ir->opts.bTS[i] = (gmx_strncasecmp(ptr2[i], "Y", 1) == 0); + } + nSAon = str_nelem(is->SAon, MAXPTR, ptr1); + nSAoff = str_nelem(is->SAoff, MAXPTR, ptr2); + nSAsteps = str_nelem(is->SAsteps, MAXPTR, ptr3); + snew(ir->opts.SAon, nr); + snew(ir->opts.SAoff, nr); + snew(ir->opts.SAsteps, nr); + + for (i = 0; i < nr; i++) + { + ir->opts.SAon[i] = strtod(ptr1[i], NULL); + ir->opts.SAoff[i] = strtod(ptr2[i], NULL); + ir->opts.SAsteps[i] = strtol(ptr3[i], NULL, 10); + } + /* end of QMMM input */ + + if (bVerbose) + { + for (i = 0; (i < egcNR); i++) + { + fprintf(stderr, "%-16s has %d element(s):", gtypes[i], groups->grps[i].nr); + for (j = 0; (j < groups->grps[i].nr); j++) + { + fprintf(stderr, " %s", *(groups->grpname[groups->grps[i].nm_ind[j]])); + } + fprintf(stderr, "\n"); + } + } + + nr = groups->grps[egcENER].nr; + snew(ir->opts.egp_flags, nr*nr); + + bExcl = do_egp_flag(ir, groups, "energygrp-excl", is->egpexcl, EGP_EXCL); + if (bExcl && ir->cutoff_scheme == ecutsVERLET) + { + warning_error(wi, "Energy group exclusions are not (yet) implemented for the Verlet scheme"); + } + if (bExcl && EEL_FULL(ir->coulombtype)) + { + warning(wi, "Can not exclude the lattice Coulomb energy between energy groups"); + } + + bTable = do_egp_flag(ir, groups, "energygrp-table", is->egptable, EGP_TABLE); + if (bTable && !(ir->vdwtype == evdwUSER) && + !(ir->coulombtype == eelUSER) && !(ir->coulombtype == eelPMEUSER) && + !(ir->coulombtype == eelPMEUSERSWITCH)) + { + gmx_fatal(FARGS, "Can only have energy group pair tables in combination with user tables for VdW and/or Coulomb"); + } + + decode_cos(is->efield_x, &(ir->ex[XX])); + decode_cos(is->efield_xt, &(ir->et[XX])); + decode_cos(is->efield_y, &(ir->ex[YY])); + decode_cos(is->efield_yt, &(ir->et[YY])); + decode_cos(is->efield_z, &(ir->ex[ZZ])); + decode_cos(is->efield_zt, &(ir->et[ZZ])); + + if (ir->bAdress) + { + do_adress_index(ir->adress, groups, gnames, &(ir->opts), wi); + } + + for (i = 0; (i < grps->nr); i++) + { + sfree(gnames[i]); + } + sfree(gnames); + done_blocka(grps); + sfree(grps); + +} + + + +static void check_disre(gmx_mtop_t *mtop) +{ + gmx_ffparams_t *ffparams; + t_functype *functype; + t_iparams *ip; + int i, ndouble, ftype; + int label, old_label; + + if (gmx_mtop_ftype_count(mtop, F_DISRES) > 0) + { + ffparams = &mtop->ffparams; + functype = ffparams->functype; + ip = ffparams->iparams; + ndouble = 0; + old_label = -1; + for (i = 0; i < ffparams->ntypes; i++) + { + ftype = functype[i]; + if (ftype == F_DISRES) + { + label = ip[i].disres.label; + if (label == old_label) + { + fprintf(stderr, "Distance restraint index %d occurs twice\n", label); + ndouble++; + } + old_label = label; + } + } + if (ndouble > 0) + { + gmx_fatal(FARGS, "Found %d double distance restraint indices,\n" + "probably the parameters for multiple pairs in one restraint " + "are not identical\n", ndouble); + } + } +} + +static gmx_bool absolute_reference(t_inputrec *ir, gmx_mtop_t *sys, + gmx_bool posres_only, + ivec AbsRef) +{ + int d, g, i; + gmx_mtop_ilistloop_t iloop; + t_ilist *ilist; + int nmol; + t_iparams *pr; + + clear_ivec(AbsRef); + + if (!posres_only) + { + /* Check the COM */ + for (d = 0; d < DIM; d++) + { + AbsRef[d] = (d < ndof_com(ir) ? 0 : 1); + } + /* Check for freeze groups */ + for (g = 0; g < ir->opts.ngfrz; g++) + { + for (d = 0; d < DIM; d++) + { + if (ir->opts.nFreeze[g][d] != 0) + { + AbsRef[d] = 1; + } + } + } + } + + /* Check for position restraints */ + iloop = gmx_mtop_ilistloop_init(sys); + while (gmx_mtop_ilistloop_next(iloop, &ilist, &nmol)) + { + if (nmol > 0 && + (AbsRef[XX] == 0 || AbsRef[YY] == 0 || AbsRef[ZZ] == 0)) + { + for (i = 0; i < ilist[F_POSRES].nr; i += 2) + { + pr = &sys->ffparams.iparams[ilist[F_POSRES].iatoms[i]]; + for (d = 0; d < DIM; d++) + { + if (pr->posres.fcA[d] != 0) + { + AbsRef[d] = 1; + } + } + } + for (i = 0; i < ilist[F_FBPOSRES].nr; i += 2) + { + /* Check for flat-bottom posres */ + pr = &sys->ffparams.iparams[ilist[F_FBPOSRES].iatoms[i]]; + if (pr->fbposres.k != 0) + { + switch (pr->fbposres.geom) + { + case efbposresSPHERE: + AbsRef[XX] = AbsRef[YY] = AbsRef[ZZ] = 1; + break; + case efbposresCYLINDER: + AbsRef[XX] = AbsRef[YY] = 1; + break; + case efbposresX: /* d=XX */ + case efbposresY: /* d=YY */ + case efbposresZ: /* d=ZZ */ + d = pr->fbposres.geom - efbposresX; + AbsRef[d] = 1; + break; + default: + gmx_fatal(FARGS, " Invalid geometry for flat-bottom position restraint.\n" + "Expected nr between 1 and %d. Found %d\n", efbposresNR-1, + pr->fbposres.geom); + } + } + } + } + } + + return (AbsRef[XX] != 0 && AbsRef[YY] != 0 && AbsRef[ZZ] != 0); +} + +static void +check_combination_rule_differences(const gmx_mtop_t *mtop, int state, + gmx_bool *bC6ParametersWorkWithGeometricRules, + gmx_bool *bC6ParametersWorkWithLBRules, + gmx_bool *bLBRulesPossible) +{ + int ntypes, tpi, tpj, thisLBdiff, thisgeomdiff; + int *typecount; + real tol; + double geometricdiff, LBdiff; + double c6i, c6j, c12i, c12j; + double c6, c6_geometric, c6_LB; + double sigmai, sigmaj, epsi, epsj; + gmx_bool bCanDoLBRules, bCanDoGeometricRules; + const char *ptr; + + /* A tolerance of 1e-5 seems reasonable for (possibly hand-typed) + * force-field floating point parameters. + */ + tol = 1e-5; + ptr = getenv("GMX_LJCOMB_TOL"); + if (ptr != NULL) + { + double dbl; + + sscanf(ptr, "%lf", &dbl); + tol = dbl; + } + + *bC6ParametersWorkWithLBRules = TRUE; + *bC6ParametersWorkWithGeometricRules = TRUE; + bCanDoLBRules = TRUE; + bCanDoGeometricRules = TRUE; + ntypes = mtop->ffparams.atnr; + snew(typecount, ntypes); + gmx_mtop_count_atomtypes(mtop, state, typecount); + geometricdiff = LBdiff = 0.0; + *bLBRulesPossible = TRUE; + for (tpi = 0; tpi < ntypes; ++tpi) + { + c6i = mtop->ffparams.iparams[(ntypes + 1) * tpi].lj.c6; + c12i = mtop->ffparams.iparams[(ntypes + 1) * tpi].lj.c12; + for (tpj = tpi; tpj < ntypes; ++tpj) + { + c6j = mtop->ffparams.iparams[(ntypes + 1) * tpj].lj.c6; + c12j = mtop->ffparams.iparams[(ntypes + 1) * tpj].lj.c12; + c6 = mtop->ffparams.iparams[ntypes * tpi + tpj].lj.c6; + c6_geometric = sqrt(c6i * c6j); + if (!gmx_numzero(c6_geometric)) + { + if (!gmx_numzero(c12i) && !gmx_numzero(c12j)) + { + sigmai = pow(c12i / c6i, 1.0/6.0); + sigmaj = pow(c12j / c6j, 1.0/6.0); + epsi = c6i * c6i /(4.0 * c12i); + epsj = c6j * c6j /(4.0 * c12j); + c6_LB = 4.0 * pow(epsi * epsj, 1.0/2.0) * pow(0.5 * (sigmai + sigmaj), 6); + } + else + { + *bLBRulesPossible = FALSE; + c6_LB = c6_geometric; + } + bCanDoLBRules = gmx_within_tol(c6_LB, c6, tol); + } + + if (FALSE == bCanDoLBRules) + { + *bC6ParametersWorkWithLBRules = FALSE; + } + + bCanDoGeometricRules = gmx_within_tol(c6_geometric, c6, tol); + + if (FALSE == bCanDoGeometricRules) + { + *bC6ParametersWorkWithGeometricRules = FALSE; + } + } + } + sfree(typecount); +} + +static void +check_combination_rules(const t_inputrec *ir, const gmx_mtop_t *mtop, + warninp_t wi) +{ + char err_buf[256]; + gmx_bool bLBRulesPossible, bC6ParametersWorkWithGeometricRules, bC6ParametersWorkWithLBRules; + + check_combination_rule_differences(mtop, 0, + &bC6ParametersWorkWithGeometricRules, + &bC6ParametersWorkWithLBRules, + &bLBRulesPossible); + if (ir->ljpme_combination_rule == eljpmeLB) + { + if (FALSE == bC6ParametersWorkWithLBRules || FALSE == bLBRulesPossible) + { + warning(wi, "You are using arithmetic-geometric combination rules " + "in LJ-PME, but your non-bonded C6 parameters do not " + "follow these rules."); + } + } + else + { + if (FALSE == bC6ParametersWorkWithGeometricRules) + { + if (ir->eDispCorr != edispcNO) + { + warning_note(wi, "You are using geometric combination rules in " + "LJ-PME, but your non-bonded C6 parameters do " + "not follow these rules. " + "This will introduce very small errors in the forces and energies in " + "your simulations. Dispersion correction will correct total energy " + "and/or pressure for isotropic systems, but not forces or surface tensions."); + } + else + { + warning_note(wi, "You are using geometric combination rules in " + "LJ-PME, but your non-bonded C6 parameters do " + "not follow these rules. " + "This will introduce very small errors in the forces and energies in " + "your simulations. If your system is homogeneous, consider using dispersion correction " + "for the total energy and pressure."); + } + } + } +} + +void triple_check(const char *mdparin, t_inputrec *ir, gmx_mtop_t *sys, + warninp_t wi) +{ + char err_buf[STRLEN]; + int i, m, c, nmol, npct; + gmx_bool bCharge, bAcc; + real gdt_max, *mgrp, mt; + rvec acc; + gmx_mtop_atomloop_block_t aloopb; + gmx_mtop_atomloop_all_t aloop; + t_atom *atom; + ivec AbsRef; + char warn_buf[STRLEN]; + + set_warning_line(wi, mdparin, -1); + + if (ir->cutoff_scheme == ecutsVERLET && + ir->verletbuf_tol > 0 && + ir->nstlist > 1 && + ((EI_MD(ir->eI) || EI_SD(ir->eI)) && + (ir->etc == etcVRESCALE || ir->etc == etcBERENDSEN))) + { + /* Check if a too small Verlet buffer might potentially + * cause more drift than the thermostat can couple off. + */ + /* Temperature error fraction for warning and suggestion */ + const real T_error_warn = 0.002; + const real T_error_suggest = 0.001; + /* For safety: 2 DOF per atom (typical with constraints) */ + const real nrdf_at = 2; + real T, tau, max_T_error; + int i; + + T = 0; + tau = 0; + for (i = 0; i < ir->opts.ngtc; i++) + { + T = max(T, ir->opts.ref_t[i]); + tau = max(tau, ir->opts.tau_t[i]); + } + if (T > 0) + { + /* This is a worst case estimate of the temperature error, + * assuming perfect buffer estimation and no cancelation + * of errors. The factor 0.5 is because energy distributes + * equally over Ekin and Epot. + */ + max_T_error = 0.5*tau*ir->verletbuf_tol/(nrdf_at*BOLTZ*T); + if (max_T_error > T_error_warn) + { + sprintf(warn_buf, "With a verlet-buffer-tolerance of %g kJ/mol/ps, a reference temperature of %g and a tau_t of %g, your temperature might be off by up to %.1f%%. To ensure the error is below %.1f%%, decrease verlet-buffer-tolerance to %.0e or decrease tau_t.", + ir->verletbuf_tol, T, tau, + 100*max_T_error, + 100*T_error_suggest, + ir->verletbuf_tol*T_error_suggest/max_T_error); + warning(wi, warn_buf); + } + } + } + + if (ETC_ANDERSEN(ir->etc)) + { + int i; + + for (i = 0; i < ir->opts.ngtc; i++) + { + sprintf(err_buf, "all tau_t must currently be equal using Andersen temperature control, violated for group %d", i); + CHECK(ir->opts.tau_t[0] != ir->opts.tau_t[i]); + sprintf(err_buf, "all tau_t must be postive using Andersen temperature control, tau_t[%d]=%10.6f", + i, ir->opts.tau_t[i]); + CHECK(ir->opts.tau_t[i] < 0); + } + + for (i = 0; i < ir->opts.ngtc; i++) + { + int nsteps = (int)(ir->opts.tau_t[i]/ir->delta_t); + sprintf(err_buf, "tau_t/delta_t for group %d for temperature control method %s must be a multiple of nstcomm (%d), as velocities of atoms in coupled groups are randomized every time step. The input tau_t (%8.3f) leads to %d steps per randomization", i, etcoupl_names[ir->etc], ir->nstcomm, ir->opts.tau_t[i], nsteps); + CHECK((nsteps % ir->nstcomm) && (ir->etc == etcANDERSENMASSIVE)); + } + } + + if (EI_DYNAMICS(ir->eI) && !EI_SD(ir->eI) && ir->eI != eiBD && + ir->comm_mode == ecmNO && + !(absolute_reference(ir, sys, FALSE, AbsRef) || ir->nsteps <= 10) && + !ETC_ANDERSEN(ir->etc)) + { + warning(wi, "You are not using center of mass motion removal (mdp option comm-mode), numerical rounding errors can lead to build up of kinetic energy of the center of mass"); + } + + /* Check for pressure coupling with absolute position restraints */ + if (ir->epc != epcNO && ir->refcoord_scaling == erscNO) + { + absolute_reference(ir, sys, TRUE, AbsRef); + { + for (m = 0; m < DIM; m++) + { + if (AbsRef[m] && norm2(ir->compress[m]) > 0) + { + warning(wi, "You are using pressure coupling with absolute position restraints, this will give artifacts. Use the refcoord_scaling option."); + break; + } + } + } + } + + bCharge = FALSE; + aloopb = gmx_mtop_atomloop_block_init(sys); + while (gmx_mtop_atomloop_block_next(aloopb, &atom, &nmol)) + { + if (atom->q != 0 || atom->qB != 0) + { + bCharge = TRUE; + } + } + + if (!bCharge) + { + if (EEL_FULL(ir->coulombtype)) + { + sprintf(err_buf, + "You are using full electrostatics treatment %s for a system without charges.\n" + "This costs a lot of performance for just processing zeros, consider using %s instead.\n", + EELTYPE(ir->coulombtype), EELTYPE(eelCUT)); + warning(wi, err_buf); + } + } + else + { + if (ir->coulombtype == eelCUT && ir->rcoulomb > 0 && !ir->implicit_solvent) + { + sprintf(err_buf, + "You are using a plain Coulomb cut-off, which might produce artifacts.\n" + "You might want to consider using %s electrostatics.\n", + EELTYPE(eelPME)); + warning_note(wi, err_buf); + } + } + + /* Check if combination rules used in LJ-PME are the same as in the force field */ + if (EVDW_PME(ir->vdwtype)) + { + check_combination_rules(ir, sys, wi); + } + + /* Generalized reaction field */ + if (ir->opts.ngtc == 0) + { + sprintf(err_buf, "No temperature coupling while using coulombtype %s", + eel_names[eelGRF]); + CHECK(ir->coulombtype == eelGRF); + } + else + { + sprintf(err_buf, "When using coulombtype = %s" + " ref-t for temperature coupling should be > 0", + eel_names[eelGRF]); + CHECK((ir->coulombtype == eelGRF) && (ir->opts.ref_t[0] <= 0)); + } + + if (ir->eI == eiSD1 && + (gmx_mtop_ftype_count(sys, F_CONSTR) > 0 || + gmx_mtop_ftype_count(sys, F_SETTLE) > 0)) + { + sprintf(warn_buf, "With constraints integrator %s is less accurate, consider using %s instead", ei_names[ir->eI], ei_names[eiSD2]); + warning_note(wi, warn_buf); + } + + bAcc = FALSE; + for (i = 0; (i < sys->groups.grps[egcACC].nr); i++) + { + for (m = 0; (m < DIM); m++) + { + if (fabs(ir->opts.acc[i][m]) > 1e-6) + { + bAcc = TRUE; + } + } + } + if (bAcc) + { + clear_rvec(acc); + snew(mgrp, sys->groups.grps[egcACC].nr); + aloop = gmx_mtop_atomloop_all_init(sys); + while (gmx_mtop_atomloop_all_next(aloop, &i, &atom)) + { + mgrp[ggrpnr(&sys->groups, egcACC, i)] += atom->m; + } + mt = 0.0; + for (i = 0; (i < sys->groups.grps[egcACC].nr); i++) + { + for (m = 0; (m < DIM); m++) + { + acc[m] += ir->opts.acc[i][m]*mgrp[i]; + } + mt += mgrp[i]; + } + for (m = 0; (m < DIM); m++) + { + if (fabs(acc[m]) > 1e-6) + { + const char *dim[DIM] = { "X", "Y", "Z" }; + fprintf(stderr, + "Net Acceleration in %s direction, will %s be corrected\n", + dim[m], ir->nstcomm != 0 ? "" : "not"); + if (ir->nstcomm != 0 && m < ndof_com(ir)) + { + acc[m] /= mt; + for (i = 0; (i < sys->groups.grps[egcACC].nr); i++) + { + ir->opts.acc[i][m] -= acc[m]; + } + } + } + } + sfree(mgrp); + } + + if (ir->efep != efepNO && ir->fepvals->sc_alpha != 0 && + !gmx_within_tol(sys->ffparams.reppow, 12.0, 10*GMX_DOUBLE_EPS)) + { + gmx_fatal(FARGS, "Soft-core interactions are only supported with VdW repulsion power 12"); + } + + if (ir->ePull != epullNO) + { + gmx_bool bPullAbsoluteRef; + + bPullAbsoluteRef = FALSE; + for (i = 0; i < ir->pull->ncoord; i++) + { + bPullAbsoluteRef = bPullAbsoluteRef || + ir->pull->coord[i].group[0] == 0 || + ir->pull->coord[i].group[1] == 0; + } + if (bPullAbsoluteRef) + { + absolute_reference(ir, sys, FALSE, AbsRef); + for (m = 0; m < DIM; m++) + { + if (ir->pull->dim[m] && !AbsRef[m]) + { + warning(wi, "You are using an absolute reference for pulling, but the rest of the system does not have an absolute reference. This will lead to artifacts."); + break; + } + } + } + + if (ir->pull->eGeom == epullgDIRPBC) + { + for (i = 0; i < 3; i++) + { + for (m = 0; m <= i; m++) + { + if ((ir->epc != epcNO && ir->compress[i][m] != 0) || + ir->deform[i][m] != 0) + { + for (c = 0; c < ir->pull->ncoord; c++) + { + if (ir->pull->coord[c].vec[m] != 0) + { + gmx_fatal(FARGS, "Can not have dynamic box while using pull geometry '%s' (dim %c)", EPULLGEOM(ir->pull->eGeom), 'x'+m); + } + } + } + } + } + } + } + + check_disre(sys); +} + +void double_check(t_inputrec *ir, matrix box, gmx_bool bConstr, warninp_t wi) +{ + real min_size; + gmx_bool bTWIN; + char warn_buf[STRLEN]; + const char *ptr; + + ptr = check_box(ir->ePBC, box); + if (ptr) + { + warning_error(wi, ptr); + } + + if (bConstr && ir->eConstrAlg == econtSHAKE) + { + if (ir->shake_tol <= 0.0) + { + sprintf(warn_buf, "ERROR: shake-tol must be > 0 instead of %g\n", + ir->shake_tol); + warning_error(wi, warn_buf); + } + + if (IR_TWINRANGE(*ir) && ir->nstlist > 1) + { + sprintf(warn_buf, "With twin-range cut-off's and SHAKE the virial and the pressure are incorrect."); + if (ir->epc == epcNO) + { + warning(wi, warn_buf); + } + else + { + warning_error(wi, warn_buf); + } + } + } + + if ( (ir->eConstrAlg == econtLINCS) && bConstr) + { + /* If we have Lincs constraints: */ + if (ir->eI == eiMD && ir->etc == etcNO && + ir->eConstrAlg == econtLINCS && ir->nLincsIter == 1) + { + sprintf(warn_buf, "For energy conservation with LINCS, lincs_iter should be 2 or larger.\n"); + warning_note(wi, warn_buf); + } + + if ((ir->eI == eiCG || ir->eI == eiLBFGS) && (ir->nProjOrder < 8)) + { + sprintf(warn_buf, "For accurate %s with LINCS constraints, lincs-order should be 8 or more.", ei_names[ir->eI]); + warning_note(wi, warn_buf); + } + if (ir->epc == epcMTTK) + { + warning_error(wi, "MTTK not compatible with lincs -- use shake instead."); + } + } + + if (bConstr && ir->epc == epcMTTK) + { + warning_note(wi, "MTTK with constraints is deprecated, and will be removed in GROMACS 5.1"); + } + + if (ir->LincsWarnAngle > 90.0) + { + sprintf(warn_buf, "lincs-warnangle can not be larger than 90 degrees, setting it to 90.\n"); + warning(wi, warn_buf); + ir->LincsWarnAngle = 90.0; + } + + if (ir->ePBC != epbcNONE) + { + if (ir->nstlist == 0) + { + warning(wi, "With nstlist=0 atoms are only put into the box at step 0, therefore drifting atoms might cause the simulation to crash."); + } + bTWIN = (ir->rlistlong > ir->rlist); + if (ir->ns_type == ensGRID) + { + if (sqr(ir->rlistlong) >= max_cutoff2(ir->ePBC, box)) + { + sprintf(warn_buf, "ERROR: The cut-off length is longer than half the shortest box vector or longer than the smallest box diagonal element. Increase the box size or decrease %s.\n", + bTWIN ? (ir->rcoulomb == ir->rlistlong ? "rcoulomb" : "rvdw") : "rlist"); + warning_error(wi, warn_buf); + } + } + else + { + min_size = min(box[XX][XX], min(box[YY][YY], box[ZZ][ZZ])); + if (2*ir->rlistlong >= min_size) + { + sprintf(warn_buf, "ERROR: One of the box lengths is smaller than twice the cut-off length. Increase the box size or decrease rlist."); + warning_error(wi, warn_buf); + if (TRICLINIC(box)) + { + fprintf(stderr, "Grid search might allow larger cut-off's than simple search with triclinic boxes."); + } + } + } + } +} + +void check_chargegroup_radii(const gmx_mtop_t *mtop, const t_inputrec *ir, + rvec *x, + warninp_t wi) +{ + real rvdw1, rvdw2, rcoul1, rcoul2; + char warn_buf[STRLEN]; + + calc_chargegroup_radii(mtop, x, &rvdw1, &rvdw2, &rcoul1, &rcoul2); + + if (rvdw1 > 0) + { + printf("Largest charge group radii for Van der Waals: %5.3f, %5.3f nm\n", + rvdw1, rvdw2); + } + if (rcoul1 > 0) + { + printf("Largest charge group radii for Coulomb: %5.3f, %5.3f nm\n", + rcoul1, rcoul2); + } + + if (ir->rlist > 0) + { + if (rvdw1 + rvdw2 > ir->rlist || + rcoul1 + rcoul2 > ir->rlist) + { + sprintf(warn_buf, + "The sum of the two largest charge group radii (%f) " + "is larger than rlist (%f)\n", + max(rvdw1+rvdw2, rcoul1+rcoul2), ir->rlist); + warning(wi, warn_buf); + } + else + { + /* Here we do not use the zero at cut-off macro, + * since user defined interactions might purposely + * not be zero at the cut-off. + */ + if (ir_vdw_is_zero_at_cutoff(ir) && + rvdw1 + rvdw2 > ir->rlistlong - ir->rvdw) + { + sprintf(warn_buf, "The sum of the two largest charge group " + "radii (%f) is larger than %s (%f) - rvdw (%f).\n" + "With exact cut-offs, better performance can be " + "obtained with cutoff-scheme = %s, because it " + "does not use charge groups at all.", + rvdw1+rvdw2, + ir->rlistlong > ir->rlist ? "rlistlong" : "rlist", + ir->rlistlong, ir->rvdw, + ecutscheme_names[ecutsVERLET]); + if (ir_NVE(ir)) + { + warning(wi, warn_buf); + } + else + { + warning_note(wi, warn_buf); + } + } + if (ir_coulomb_is_zero_at_cutoff(ir) && + rcoul1 + rcoul2 > ir->rlistlong - ir->rcoulomb) + { + sprintf(warn_buf, "The sum of the two largest charge group radii (%f) is larger than %s (%f) - rcoulomb (%f).\n" + "With exact cut-offs, better performance can be obtained with cutoff-scheme = %s, because it does not use charge groups at all.", + rcoul1+rcoul2, + ir->rlistlong > ir->rlist ? "rlistlong" : "rlist", + ir->rlistlong, ir->rcoulomb, + ecutscheme_names[ecutsVERLET]); + if (ir_NVE(ir)) + { + warning(wi, warn_buf); + } + else + { + warning_note(wi, warn_buf); + } + } + } + } +} diff --cc src/gromacs/legacyheaders/nonbonded.h index 6176d60631,0000000000..d05b3e5d7e mode 100644,000000..100644 --- a/src/gromacs/legacyheaders/nonbonded.h +++ b/src/gromacs/legacyheaders/nonbonded.h @@@ -1,97 -1,0 +1,98 @@@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2004, The GROMACS development team. + * Copyright (c) 2013,2014, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ + +#ifndef _nonbonded_h +#define _nonbonded_h + +#include "typedefs.h" +#include "pbc.h" +#include "network.h" +#include "tgroup.h" +#include "genborn.h" + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +} /* fixes auto-indentation problems */ +#endif + + + +void +gmx_nonbonded_setup(t_forcerec * fr, + gmx_bool bGenericKernelOnly); + + + + + +void +gmx_nonbonded_set_kernel_pointers(FILE * fplog, - t_nblist * nl); ++ t_nblist * nl, ++ gmx_bool bElecAndVdwSwitchDiffers); + + + +#define GMX_NONBONDED_DO_LR (1<<0) +#define GMX_NONBONDED_DO_FORCE (1<<1) +#define GMX_NONBONDED_DO_SHIFTFORCE (1<<2) +#define GMX_NONBONDED_DO_FOREIGNLAMBDA (1<<3) +#define GMX_NONBONDED_DO_POTENTIAL (1<<4) +#define GMX_NONBONDED_DO_SR (1<<5) + +void +do_nonbonded(t_forcerec *fr, + rvec x[], rvec f_shortrange[], rvec f_longrange[], t_mdatoms *md, t_blocka *excl, + gmx_grppairener_t *grppener, + t_nrnb *nrnb, real *lambda, real dvdlambda[], + int nls, int eNL, int flags); + +/* Calculate VdW/charge listed pair interactions (usually 1-4 interactions). + * global_atom_index is only passed for printing error messages. + */ +real +do_nonbonded_listed(int ftype, int nbonds, const t_iatom iatoms[], const t_iparams iparams[], + const rvec x[], rvec f[], rvec fshift[], const t_pbc *pbc, const t_graph *g, + real *lambda, real *dvdl, const t_mdatoms *md, const t_forcerec *fr, + gmx_grppairener_t *grppener, int *global_atom_index); + +#ifdef __cplusplus +} +#endif + +#endif diff --cc src/gromacs/mdlib/forcerec.c index 165a1243d2,0000000000..3b2d69ca04 mode 100644,000000..100644 --- a/src/gromacs/mdlib/forcerec.c +++ b/src/gromacs/mdlib/forcerec.c @@@ -1,3297 -1,0 +1,3326 @@@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2004, The GROMACS development team. + * Copyright (c) 2013,2014, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include +#include "sysstuff.h" +#include "typedefs.h" +#include "types/commrec.h" +#include "vec.h" +#include "gromacs/math/utilities.h" +#include "macros.h" +#include "gromacs/utility/smalloc.h" +#include "macros.h" +#include "gmx_fatal.h" +#include "physics.h" +#include "force.h" +#include "tables.h" +#include "nonbonded.h" +#include "invblock.h" +#include "names.h" +#include "network.h" +#include "pbc.h" +#include "ns.h" +#include "mshift.h" +#include "txtdump.h" +#include "coulomb.h" +#include "md_support.h" +#include "md_logging.h" +#include "domdec.h" +#include "qmmm.h" +#include "copyrite.h" +#include "mtop_util.h" +#include "nbnxn_simd.h" +#include "nbnxn_search.h" +#include "nbnxn_atomdata.h" +#include "nbnxn_consts.h" +#include "gmx_omp_nthreads.h" +#include "gmx_detect_hardware.h" +#include "inputrec.h" + +#include "types/nbnxn_cuda_types_ext.h" +#include "gpu_utils.h" +#include "nbnxn_cuda_data_mgmt.h" +#include "pmalloc_cuda.h" + +t_forcerec *mk_forcerec(void) +{ + t_forcerec *fr; + + snew(fr, 1); + + return fr; +} + +#ifdef DEBUG +static void pr_nbfp(FILE *fp, real *nbfp, gmx_bool bBHAM, int atnr) +{ + int i, j; + + for (i = 0; (i < atnr); i++) + { + for (j = 0; (j < atnr); j++) + { + fprintf(fp, "%2d - %2d", i, j); + if (bBHAM) + { + fprintf(fp, " a=%10g, b=%10g, c=%10g\n", BHAMA(nbfp, atnr, i, j), + BHAMB(nbfp, atnr, i, j), BHAMC(nbfp, atnr, i, j)/6.0); + } + else + { + fprintf(fp, " c6=%10g, c12=%10g\n", C6(nbfp, atnr, i, j)/6.0, + C12(nbfp, atnr, i, j)/12.0); + } + } + } +} +#endif + +static real *mk_nbfp(const gmx_ffparams_t *idef, gmx_bool bBHAM) +{ + real *nbfp; + int i, j, k, atnr; + + atnr = idef->atnr; + if (bBHAM) + { + snew(nbfp, 3*atnr*atnr); + for (i = k = 0; (i < atnr); i++) + { + for (j = 0; (j < atnr); j++, k++) + { + BHAMA(nbfp, atnr, i, j) = idef->iparams[k].bham.a; + BHAMB(nbfp, atnr, i, j) = idef->iparams[k].bham.b; + /* nbfp now includes the 6.0 derivative prefactor */ + BHAMC(nbfp, atnr, i, j) = idef->iparams[k].bham.c*6.0; + } + } + } + else + { + snew(nbfp, 2*atnr*atnr); + for (i = k = 0; (i < atnr); i++) + { + for (j = 0; (j < atnr); j++, k++) + { + /* nbfp now includes the 6.0/12.0 derivative prefactors */ + C6(nbfp, atnr, i, j) = idef->iparams[k].lj.c6*6.0; + C12(nbfp, atnr, i, j) = idef->iparams[k].lj.c12*12.0; + } + } + } + + return nbfp; +} + +static real *make_ljpme_c6grid(const gmx_ffparams_t *idef, t_forcerec *fr) +{ + int i, j, k, atnr; + real c6, c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj; + real *grid; + + /* For LJ-PME simulations, we correct the energies with the reciprocal space + * inside of the cut-off. To do this the non-bonded kernels needs to have + * access to the C6-values used on the reciprocal grid in pme.c + */ + + atnr = idef->atnr; + snew(grid, 2*atnr*atnr); + for (i = k = 0; (i < atnr); i++) + { + for (j = 0; (j < atnr); j++, k++) + { + c6i = idef->iparams[i*(atnr+1)].lj.c6; + c12i = idef->iparams[i*(atnr+1)].lj.c12; + c6j = idef->iparams[j*(atnr+1)].lj.c6; + c12j = idef->iparams[j*(atnr+1)].lj.c12; + c6 = sqrt(c6i * c6j); + if (fr->ljpme_combination_rule == eljpmeLB + && !gmx_numzero(c6) && !gmx_numzero(c12i) && !gmx_numzero(c12j)) + { + sigmai = pow(c12i / c6i, 1.0/6.0); + sigmaj = pow(c12j / c6j, 1.0/6.0); + epsi = c6i * c6i / c12i; + epsj = c6j * c6j / c12j; + c6 = sqrt(epsi * epsj) * pow(0.5*(sigmai+sigmaj), 6); + } + /* Store the elements at the same relative positions as C6 in nbfp in order + * to simplify access in the kernels + */ + grid[2*(atnr*i+j)] = c6*6.0; + } + } + return grid; +} + +static real *mk_nbfp_combination_rule(const gmx_ffparams_t *idef, int comb_rule) +{ + real *nbfp; + int i, j, k, atnr; + real c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj; + real c6, c12; + + atnr = idef->atnr; + snew(nbfp, 2*atnr*atnr); + for (i = 0; i < atnr; ++i) + { + for (j = 0; j < atnr; ++j) + { + c6i = idef->iparams[i*(atnr+1)].lj.c6; + c12i = idef->iparams[i*(atnr+1)].lj.c12; + c6j = idef->iparams[j*(atnr+1)].lj.c6; + c12j = idef->iparams[j*(atnr+1)].lj.c12; + c6 = sqrt(c6i * c6j); + c12 = sqrt(c12i * c12j); + if (comb_rule == eCOMB_ARITHMETIC + && !gmx_numzero(c6) && !gmx_numzero(c12)) + { + sigmai = pow(c12i / c6i, 1.0/6.0); + sigmaj = pow(c12j / c6j, 1.0/6.0); + epsi = c6i * c6i / c12i; + epsj = c6j * c6j / c12j; + c6 = epsi * epsj * pow(0.5*(sigmai+sigmaj), 6); + c12 = epsi * epsj * pow(0.5*(sigmai+sigmaj), 12); + } + C6(nbfp, atnr, i, j) = c6*6.0; + C12(nbfp, atnr, i, j) = c12*12.0; + } + } + return nbfp; +} + +/* This routine sets fr->solvent_opt to the most common solvent in the + * system, e.g. esolSPC or esolTIP4P. It will also mark each charge group in + * the fr->solvent_type array with the correct type (or esolNO). + * + * Charge groups that fulfill the conditions but are not identical to the + * most common one will be marked as esolNO in the solvent_type array. + * + * TIP3p is identical to SPC for these purposes, so we call it + * SPC in the arrays (Apologies to Bill Jorgensen ;-) + * + * NOTE: QM particle should not + * become an optimized solvent. Not even if there is only one charge + * group in the Qm + */ + +typedef struct +{ + int model; + int count; + int vdwtype[4]; + real charge[4]; +} solvent_parameters_t; + +static void +check_solvent_cg(const gmx_moltype_t *molt, + int cg0, + int nmol, + const unsigned char *qm_grpnr, + const t_grps *qm_grps, + t_forcerec * fr, + int *n_solvent_parameters, + solvent_parameters_t **solvent_parameters_p, + int cginfo, + int *cg_sp) +{ + const t_blocka *excl; + t_atom *atom; + int j, k; + int j0, j1, nj; + gmx_bool perturbed; + gmx_bool has_vdw[4]; + gmx_bool match; + real tmp_charge[4] = { 0.0 }; /* init to zero to make gcc4.8 happy */ + int tmp_vdwtype[4] = { 0 }; /* init to zero to make gcc4.8 happy */ + int tjA; + gmx_bool qm; + solvent_parameters_t *solvent_parameters; + + /* We use a list with parameters for each solvent type. + * Every time we discover a new molecule that fulfills the basic + * conditions for a solvent we compare with the previous entries + * in these lists. If the parameters are the same we just increment + * the counter for that type, and otherwise we create a new type + * based on the current molecule. + * + * Once we've finished going through all molecules we check which + * solvent is most common, and mark all those molecules while we + * clear the flag on all others. + */ + + solvent_parameters = *solvent_parameters_p; + + /* Mark the cg first as non optimized */ + *cg_sp = -1; + + /* Check if this cg has no exclusions with atoms in other charge groups + * and all atoms inside the charge group excluded. + * We only have 3 or 4 atom solvent loops. + */ + if (GET_CGINFO_EXCL_INTER(cginfo) || + !GET_CGINFO_EXCL_INTRA(cginfo)) + { + return; + } + + /* Get the indices of the first atom in this charge group */ + j0 = molt->cgs.index[cg0]; + j1 = molt->cgs.index[cg0+1]; + + /* Number of atoms in our molecule */ + nj = j1 - j0; + + if (debug) + { + fprintf(debug, + "Moltype '%s': there are %d atoms in this charge group\n", + *molt->name, nj); + } + + /* Check if it could be an SPC (3 atoms) or TIP4p (4) water, + * otherwise skip it. + */ + if (nj < 3 || nj > 4) + { + return; + } + + /* Check if we are doing QM on this group */ + qm = FALSE; + if (qm_grpnr != NULL) + { + for (j = j0; j < j1 && !qm; j++) + { + qm = (qm_grpnr[j] < qm_grps->nr - 1); + } + } + /* Cannot use solvent optimization with QM */ + if (qm) + { + return; + } + + atom = molt->atoms.atom; + + /* Still looks like a solvent, time to check parameters */ + + /* If it is perturbed (free energy) we can't use the solvent loops, + * so then we just skip to the next molecule. + */ + perturbed = FALSE; + + for (j = j0; j < j1 && !perturbed; j++) + { + perturbed = PERTURBED(atom[j]); + } + + if (perturbed) + { + return; + } + + /* Now it's only a question if the VdW and charge parameters + * are OK. Before doing the check we compare and see if they are + * identical to a possible previous solvent type. + * First we assign the current types and charges. + */ + for (j = 0; j < nj; j++) + { + tmp_vdwtype[j] = atom[j0+j].type; + tmp_charge[j] = atom[j0+j].q; + } + + /* Does it match any previous solvent type? */ + for (k = 0; k < *n_solvent_parameters; k++) + { + match = TRUE; + + + /* We can only match SPC with 3 atoms and TIP4p with 4 atoms */ + if ( (solvent_parameters[k].model == esolSPC && nj != 3) || + (solvent_parameters[k].model == esolTIP4P && nj != 4) ) + { + match = FALSE; + } + + /* Check that types & charges match for all atoms in molecule */ + for (j = 0; j < nj && match == TRUE; j++) + { + if (tmp_vdwtype[j] != solvent_parameters[k].vdwtype[j]) + { + match = FALSE; + } + if (tmp_charge[j] != solvent_parameters[k].charge[j]) + { + match = FALSE; + } + } + if (match == TRUE) + { + /* Congratulations! We have a matched solvent. + * Flag it with this type for later processing. + */ + *cg_sp = k; + solvent_parameters[k].count += nmol; + + /* We are done with this charge group */ + return; + } + } + + /* If we get here, we have a tentative new solvent type. + * Before we add it we must check that it fulfills the requirements + * of the solvent optimized loops. First determine which atoms have + * VdW interactions. + */ + for (j = 0; j < nj; j++) + { + has_vdw[j] = FALSE; + tjA = tmp_vdwtype[j]; + + /* Go through all other tpes and see if any have non-zero + * VdW parameters when combined with this one. + */ + for (k = 0; k < fr->ntype && (has_vdw[j] == FALSE); k++) + { + /* We already checked that the atoms weren't perturbed, + * so we only need to check state A now. + */ + if (fr->bBHAM) + { + has_vdw[j] = (has_vdw[j] || + (BHAMA(fr->nbfp, fr->ntype, tjA, k) != 0.0) || + (BHAMB(fr->nbfp, fr->ntype, tjA, k) != 0.0) || + (BHAMC(fr->nbfp, fr->ntype, tjA, k) != 0.0)); + } + else + { + /* Standard LJ */ + has_vdw[j] = (has_vdw[j] || + (C6(fr->nbfp, fr->ntype, tjA, k) != 0.0) || + (C12(fr->nbfp, fr->ntype, tjA, k) != 0.0)); + } + } + } + + /* Now we know all we need to make the final check and assignment. */ + if (nj == 3) + { + /* So, is it an SPC? + * For this we require thatn all atoms have charge, + * the charges on atom 2 & 3 should be the same, and only + * atom 1 might have VdW. + */ + if (has_vdw[1] == FALSE && + has_vdw[2] == FALSE && + tmp_charge[0] != 0 && + tmp_charge[1] != 0 && + tmp_charge[2] == tmp_charge[1]) + { + srenew(solvent_parameters, *n_solvent_parameters+1); + solvent_parameters[*n_solvent_parameters].model = esolSPC; + solvent_parameters[*n_solvent_parameters].count = nmol; + for (k = 0; k < 3; k++) + { + solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k]; + solvent_parameters[*n_solvent_parameters].charge[k] = tmp_charge[k]; + } + + *cg_sp = *n_solvent_parameters; + (*n_solvent_parameters)++; + } + } + else if (nj == 4) + { + /* Or could it be a TIP4P? + * For this we require thatn atoms 2,3,4 have charge, but not atom 1. + * Only atom 1 mght have VdW. + */ + if (has_vdw[1] == FALSE && + has_vdw[2] == FALSE && + has_vdw[3] == FALSE && + tmp_charge[0] == 0 && + tmp_charge[1] != 0 && + tmp_charge[2] == tmp_charge[1] && + tmp_charge[3] != 0) + { + srenew(solvent_parameters, *n_solvent_parameters+1); + solvent_parameters[*n_solvent_parameters].model = esolTIP4P; + solvent_parameters[*n_solvent_parameters].count = nmol; + for (k = 0; k < 4; k++) + { + solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k]; + solvent_parameters[*n_solvent_parameters].charge[k] = tmp_charge[k]; + } + + *cg_sp = *n_solvent_parameters; + (*n_solvent_parameters)++; + } + } + + *solvent_parameters_p = solvent_parameters; +} + +static void +check_solvent(FILE * fp, + const gmx_mtop_t * mtop, + t_forcerec * fr, + cginfo_mb_t *cginfo_mb) +{ + const t_block * cgs; + const t_block * mols; + const gmx_moltype_t *molt; + int mb, mol, cg_mol, at_offset, cg_offset, am, cgm, i, nmol_ch, nmol; + int n_solvent_parameters; + solvent_parameters_t *solvent_parameters; + int **cg_sp; + int bestsp, bestsol; + + if (debug) + { + fprintf(debug, "Going to determine what solvent types we have.\n"); + } + + mols = &mtop->mols; + + n_solvent_parameters = 0; + solvent_parameters = NULL; + /* Allocate temporary array for solvent type */ + snew(cg_sp, mtop->nmolblock); + + cg_offset = 0; + at_offset = 0; + for (mb = 0; mb < mtop->nmolblock; mb++) + { + molt = &mtop->moltype[mtop->molblock[mb].type]; + cgs = &molt->cgs; + /* Here we have to loop over all individual molecules + * because we need to check for QMMM particles. + */ + snew(cg_sp[mb], cginfo_mb[mb].cg_mod); + nmol_ch = cginfo_mb[mb].cg_mod/cgs->nr; + nmol = mtop->molblock[mb].nmol/nmol_ch; + for (mol = 0; mol < nmol_ch; mol++) + { + cgm = mol*cgs->nr; + am = mol*cgs->index[cgs->nr]; + for (cg_mol = 0; cg_mol < cgs->nr; cg_mol++) + { + check_solvent_cg(molt, cg_mol, nmol, + mtop->groups.grpnr[egcQMMM] ? + mtop->groups.grpnr[egcQMMM]+at_offset+am : 0, + &mtop->groups.grps[egcQMMM], + fr, + &n_solvent_parameters, &solvent_parameters, + cginfo_mb[mb].cginfo[cgm+cg_mol], + &cg_sp[mb][cgm+cg_mol]); + } + } + cg_offset += cgs->nr; + at_offset += cgs->index[cgs->nr]; + } + + /* Puh! We finished going through all charge groups. + * Now find the most common solvent model. + */ + + /* Most common solvent this far */ + bestsp = -2; + for (i = 0; i < n_solvent_parameters; i++) + { + if (bestsp == -2 || + solvent_parameters[i].count > solvent_parameters[bestsp].count) + { + bestsp = i; + } + } + + if (bestsp >= 0) + { + bestsol = solvent_parameters[bestsp].model; + } + else + { + bestsol = esolNO; + } + +#ifdef DISABLE_WATER_NLIST + bestsol = esolNO; +#endif + + fr->nWatMol = 0; + for (mb = 0; mb < mtop->nmolblock; mb++) + { + cgs = &mtop->moltype[mtop->molblock[mb].type].cgs; + nmol = (mtop->molblock[mb].nmol*cgs->nr)/cginfo_mb[mb].cg_mod; + for (i = 0; i < cginfo_mb[mb].cg_mod; i++) + { + if (cg_sp[mb][i] == bestsp) + { + SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], bestsol); + fr->nWatMol += nmol; + } + else + { + SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], esolNO); + } + } + sfree(cg_sp[mb]); + } + sfree(cg_sp); + + if (bestsol != esolNO && fp != NULL) + { + fprintf(fp, "\nEnabling %s-like water optimization for %d molecules.\n\n", + esol_names[bestsol], + solvent_parameters[bestsp].count); + } + + sfree(solvent_parameters); + fr->solvent_opt = bestsol; +} + +enum { + acNONE = 0, acCONSTRAINT, acSETTLE +}; + +static cginfo_mb_t *init_cginfo_mb(FILE *fplog, const gmx_mtop_t *mtop, + t_forcerec *fr, gmx_bool bNoSolvOpt, + gmx_bool *bFEP_NonBonded, + gmx_bool *bExcl_IntraCGAll_InterCGNone) +{ + const t_block *cgs; + const t_blocka *excl; + const gmx_moltype_t *molt; + const gmx_molblock_t *molb; + cginfo_mb_t *cginfo_mb; + gmx_bool *type_VDW; + int *cginfo; + int cg_offset, a_offset, cgm, am; + int mb, m, ncg_tot, cg, a0, a1, gid, ai, j, aj, excl_nalloc; + int *a_con; + int ftype; + int ia; + gmx_bool bId, *bExcl, bExclIntraAll, bExclInter, bHaveVDW, bHaveQ, bHavePerturbedAtoms; + + ncg_tot = ncg_mtop(mtop); + snew(cginfo_mb, mtop->nmolblock); + + snew(type_VDW, fr->ntype); + for (ai = 0; ai < fr->ntype; ai++) + { + type_VDW[ai] = FALSE; + for (j = 0; j < fr->ntype; j++) + { + type_VDW[ai] = type_VDW[ai] || + fr->bBHAM || + C6(fr->nbfp, fr->ntype, ai, j) != 0 || + C12(fr->nbfp, fr->ntype, ai, j) != 0; + } + } + + *bFEP_NonBonded = FALSE; + *bExcl_IntraCGAll_InterCGNone = TRUE; + + excl_nalloc = 10; + snew(bExcl, excl_nalloc); + cg_offset = 0; + a_offset = 0; + for (mb = 0; mb < mtop->nmolblock; mb++) + { + molb = &mtop->molblock[mb]; + molt = &mtop->moltype[molb->type]; + cgs = &molt->cgs; + excl = &molt->excls; + + /* Check if the cginfo is identical for all molecules in this block. + * If so, we only need an array of the size of one molecule. + * Otherwise we make an array of #mol times #cgs per molecule. + */ + bId = TRUE; + am = 0; + for (m = 0; m < molb->nmol; m++) + { + am = m*cgs->index[cgs->nr]; + for (cg = 0; cg < cgs->nr; cg++) + { + a0 = cgs->index[cg]; + a1 = cgs->index[cg+1]; + if (ggrpnr(&mtop->groups, egcENER, a_offset+am+a0) != + ggrpnr(&mtop->groups, egcENER, a_offset +a0)) + { + bId = FALSE; + } + if (mtop->groups.grpnr[egcQMMM] != NULL) + { + for (ai = a0; ai < a1; ai++) + { + if (mtop->groups.grpnr[egcQMMM][a_offset+am+ai] != + mtop->groups.grpnr[egcQMMM][a_offset +ai]) + { + bId = FALSE; + } + } + } + } + } + + cginfo_mb[mb].cg_start = cg_offset; + cginfo_mb[mb].cg_end = cg_offset + molb->nmol*cgs->nr; + cginfo_mb[mb].cg_mod = (bId ? 1 : molb->nmol)*cgs->nr; + snew(cginfo_mb[mb].cginfo, cginfo_mb[mb].cg_mod); + cginfo = cginfo_mb[mb].cginfo; + + /* Set constraints flags for constrained atoms */ + snew(a_con, molt->atoms.nr); + for (ftype = 0; ftype < F_NRE; ftype++) + { + if (interaction_function[ftype].flags & IF_CONSTRAINT) + { + int nral; + + nral = NRAL(ftype); + for (ia = 0; ia < molt->ilist[ftype].nr; ia += 1+nral) + { + int a; + + for (a = 0; a < nral; a++) + { + a_con[molt->ilist[ftype].iatoms[ia+1+a]] = + (ftype == F_SETTLE ? acSETTLE : acCONSTRAINT); + } + } + } + } + + for (m = 0; m < (bId ? 1 : molb->nmol); m++) + { + cgm = m*cgs->nr; + am = m*cgs->index[cgs->nr]; + for (cg = 0; cg < cgs->nr; cg++) + { + a0 = cgs->index[cg]; + a1 = cgs->index[cg+1]; + + /* Store the energy group in cginfo */ + gid = ggrpnr(&mtop->groups, egcENER, a_offset+am+a0); + SET_CGINFO_GID(cginfo[cgm+cg], gid); + + /* Check the intra/inter charge group exclusions */ + if (a1-a0 > excl_nalloc) + { + excl_nalloc = a1 - a0; + srenew(bExcl, excl_nalloc); + } + /* bExclIntraAll: all intra cg interactions excluded + * bExclInter: any inter cg interactions excluded + */ + bExclIntraAll = TRUE; + bExclInter = FALSE; + bHaveVDW = FALSE; + bHaveQ = FALSE; + bHavePerturbedAtoms = FALSE; + for (ai = a0; ai < a1; ai++) + { + /* Check VDW and electrostatic interactions */ + bHaveVDW = bHaveVDW || (type_VDW[molt->atoms.atom[ai].type] || + type_VDW[molt->atoms.atom[ai].typeB]); + bHaveQ = bHaveQ || (molt->atoms.atom[ai].q != 0 || + molt->atoms.atom[ai].qB != 0); + + bHavePerturbedAtoms = bHavePerturbedAtoms || (PERTURBED(molt->atoms.atom[ai]) != 0); + + /* Clear the exclusion list for atom ai */ + for (aj = a0; aj < a1; aj++) + { + bExcl[aj-a0] = FALSE; + } + /* Loop over all the exclusions of atom ai */ + for (j = excl->index[ai]; j < excl->index[ai+1]; j++) + { + aj = excl->a[j]; + if (aj < a0 || aj >= a1) + { + bExclInter = TRUE; + } + else + { + bExcl[aj-a0] = TRUE; + } + } + /* Check if ai excludes a0 to a1 */ + for (aj = a0; aj < a1; aj++) + { + if (!bExcl[aj-a0]) + { + bExclIntraAll = FALSE; + } + } + + switch (a_con[ai]) + { + case acCONSTRAINT: + SET_CGINFO_CONSTR(cginfo[cgm+cg]); + break; + case acSETTLE: + SET_CGINFO_SETTLE(cginfo[cgm+cg]); + break; + default: + break; + } + } + if (bExclIntraAll) + { + SET_CGINFO_EXCL_INTRA(cginfo[cgm+cg]); + } + if (bExclInter) + { + SET_CGINFO_EXCL_INTER(cginfo[cgm+cg]); + } + if (a1 - a0 > MAX_CHARGEGROUP_SIZE) + { + /* The size in cginfo is currently only read with DD */ + gmx_fatal(FARGS, "A charge group has size %d which is larger than the limit of %d atoms", a1-a0, MAX_CHARGEGROUP_SIZE); + } + if (bHaveVDW) + { + SET_CGINFO_HAS_VDW(cginfo[cgm+cg]); + } + if (bHaveQ) + { + SET_CGINFO_HAS_Q(cginfo[cgm+cg]); + } + if (bHavePerturbedAtoms && fr->efep != efepNO) + { + SET_CGINFO_FEP(cginfo[cgm+cg]); + *bFEP_NonBonded = TRUE; + } + /* Store the charge group size */ + SET_CGINFO_NATOMS(cginfo[cgm+cg], a1-a0); + + if (!bExclIntraAll || bExclInter) + { + *bExcl_IntraCGAll_InterCGNone = FALSE; + } + } + } + + sfree(a_con); + + cg_offset += molb->nmol*cgs->nr; + a_offset += molb->nmol*cgs->index[cgs->nr]; + } + sfree(bExcl); + + /* the solvent optimizer is called after the QM is initialized, + * because we don't want to have the QM subsystemto become an + * optimized solvent + */ + + check_solvent(fplog, mtop, fr, cginfo_mb); + + if (getenv("GMX_NO_SOLV_OPT")) + { + if (fplog) + { + fprintf(fplog, "Found environment variable GMX_NO_SOLV_OPT.\n" + "Disabling all solvent optimization\n"); + } + fr->solvent_opt = esolNO; + } + if (bNoSolvOpt) + { + fr->solvent_opt = esolNO; + } + if (!fr->solvent_opt) + { + for (mb = 0; mb < mtop->nmolblock; mb++) + { + for (cg = 0; cg < cginfo_mb[mb].cg_mod; cg++) + { + SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[cg], esolNO); + } + } + } + + return cginfo_mb; +} + +static int *cginfo_expand(int nmb, cginfo_mb_t *cgi_mb) +{ + int ncg, mb, cg; + int *cginfo; + + ncg = cgi_mb[nmb-1].cg_end; + snew(cginfo, ncg); + mb = 0; + for (cg = 0; cg < ncg; cg++) + { + while (cg >= cgi_mb[mb].cg_end) + { + mb++; + } + cginfo[cg] = + cgi_mb[mb].cginfo[(cg - cgi_mb[mb].cg_start) % cgi_mb[mb].cg_mod]; + } + + return cginfo; +} + +static void set_chargesum(FILE *log, t_forcerec *fr, const gmx_mtop_t *mtop) +{ + /*This now calculates sum for q and c6*/ + double qsum, q2sum, q, c6sum, c6; + int mb, nmol, i; + const t_atoms *atoms; + + qsum = 0; + q2sum = 0; + c6sum = 0; + for (mb = 0; mb < mtop->nmolblock; mb++) + { + nmol = mtop->molblock[mb].nmol; + atoms = &mtop->moltype[mtop->molblock[mb].type].atoms; + for (i = 0; i < atoms->nr; i++) + { + q = atoms->atom[i].q; + qsum += nmol*q; + q2sum += nmol*q*q; + c6 = mtop->ffparams.iparams[atoms->atom[i].type*(mtop->ffparams.atnr+1)].lj.c6; + c6sum += nmol*c6; + } + } + fr->qsum[0] = qsum; + fr->q2sum[0] = q2sum; + fr->c6sum[0] = c6sum; + + if (fr->efep != efepNO) + { + qsum = 0; + q2sum = 0; + c6sum = 0; + for (mb = 0; mb < mtop->nmolblock; mb++) + { + nmol = mtop->molblock[mb].nmol; + atoms = &mtop->moltype[mtop->molblock[mb].type].atoms; + for (i = 0; i < atoms->nr; i++) + { + q = atoms->atom[i].qB; + qsum += nmol*q; + q2sum += nmol*q*q; + c6 = mtop->ffparams.iparams[atoms->atom[i].typeB*(mtop->ffparams.atnr+1)].lj.c6; + c6sum += nmol*c6; + } + fr->qsum[1] = qsum; + fr->q2sum[1] = q2sum; + fr->c6sum[1] = c6sum; + } + } + else + { + fr->qsum[1] = fr->qsum[0]; + fr->q2sum[1] = fr->q2sum[0]; + fr->c6sum[1] = fr->c6sum[0]; + } + if (log) + { + if (fr->efep == efepNO) + { + fprintf(log, "System total charge: %.3f\n", fr->qsum[0]); + } + else + { + fprintf(log, "System total charge, top. A: %.3f top. B: %.3f\n", + fr->qsum[0], fr->qsum[1]); + } + } +} + +void update_forcerec(t_forcerec *fr, matrix box) +{ + if (fr->eeltype == eelGRF) + { + calc_rffac(NULL, fr->eeltype, fr->epsilon_r, fr->epsilon_rf, + fr->rcoulomb, fr->temp, fr->zsquare, box, + &fr->kappa, &fr->k_rf, &fr->c_rf); + } +} + +void set_avcsixtwelve(FILE *fplog, t_forcerec *fr, const gmx_mtop_t *mtop) +{ + const t_atoms *atoms, *atoms_tpi; + const t_blocka *excl; + int mb, nmol, nmolc, i, j, tpi, tpj, j1, j2, k, n, nexcl, q; + gmx_int64_t npair, npair_ij, tmpi, tmpj; + double csix, ctwelve; + int ntp, *typecount; + gmx_bool bBHAM; + real *nbfp; + real *nbfp_comb = NULL; + + ntp = fr->ntype; + bBHAM = fr->bBHAM; + nbfp = fr->nbfp; + + /* For LJ-PME, we want to correct for the difference between the + * actual C6 values and the C6 values used by the LJ-PME based on + * combination rules. */ + + if (EVDW_PME(fr->vdwtype)) + { + nbfp_comb = mk_nbfp_combination_rule(&mtop->ffparams, + (fr->ljpme_combination_rule == eljpmeLB) ? eCOMB_ARITHMETIC : eCOMB_GEOMETRIC); + for (tpi = 0; tpi < ntp; ++tpi) + { + for (tpj = 0; tpj < ntp; ++tpj) + { + C6(nbfp_comb, ntp, tpi, tpj) = + C6(nbfp, ntp, tpi, tpj) - C6(nbfp_comb, ntp, tpi, tpj); + C12(nbfp_comb, ntp, tpi, tpj) = C12(nbfp, ntp, tpi, tpj); + } + } + nbfp = nbfp_comb; + } + for (q = 0; q < (fr->efep == efepNO ? 1 : 2); q++) + { + csix = 0; + ctwelve = 0; + npair = 0; + nexcl = 0; + if (!fr->n_tpi) + { + /* Count the types so we avoid natoms^2 operations */ + snew(typecount, ntp); + gmx_mtop_count_atomtypes(mtop, q, typecount); + + for (tpi = 0; tpi < ntp; tpi++) + { + for (tpj = tpi; tpj < ntp; tpj++) + { + tmpi = typecount[tpi]; + tmpj = typecount[tpj]; + if (tpi != tpj) + { + npair_ij = tmpi*tmpj; + } + else + { + npair_ij = tmpi*(tmpi - 1)/2; + } + if (bBHAM) + { + /* nbfp now includes the 6.0 derivative prefactor */ + csix += npair_ij*BHAMC(nbfp, ntp, tpi, tpj)/6.0; + } + else + { + /* nbfp now includes the 6.0/12.0 derivative prefactors */ + csix += npair_ij* C6(nbfp, ntp, tpi, tpj)/6.0; + ctwelve += npair_ij* C12(nbfp, ntp, tpi, tpj)/12.0; + } + npair += npair_ij; + } + } + sfree(typecount); + /* Subtract the excluded pairs. + * The main reason for substracting exclusions is that in some cases + * some combinations might never occur and the parameters could have + * any value. These unused values should not influence the dispersion + * correction. + */ + for (mb = 0; mb < mtop->nmolblock; mb++) + { + nmol = mtop->molblock[mb].nmol; + atoms = &mtop->moltype[mtop->molblock[mb].type].atoms; + excl = &mtop->moltype[mtop->molblock[mb].type].excls; + for (i = 0; (i < atoms->nr); i++) + { + if (q == 0) + { + tpi = atoms->atom[i].type; + } + else + { + tpi = atoms->atom[i].typeB; + } + j1 = excl->index[i]; + j2 = excl->index[i+1]; + for (j = j1; j < j2; j++) + { + k = excl->a[j]; + if (k > i) + { + if (q == 0) + { + tpj = atoms->atom[k].type; + } + else + { + tpj = atoms->atom[k].typeB; + } + if (bBHAM) + { + /* nbfp now includes the 6.0 derivative prefactor */ + csix -= nmol*BHAMC(nbfp, ntp, tpi, tpj)/6.0; + } + else + { + /* nbfp now includes the 6.0/12.0 derivative prefactors */ + csix -= nmol*C6 (nbfp, ntp, tpi, tpj)/6.0; + ctwelve -= nmol*C12(nbfp, ntp, tpi, tpj)/12.0; + } + nexcl += nmol; + } + } + } + } + } + else + { + /* Only correct for the interaction of the test particle + * with the rest of the system. + */ + atoms_tpi = + &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].atoms; + + npair = 0; + for (mb = 0; mb < mtop->nmolblock; mb++) + { + nmol = mtop->molblock[mb].nmol; + atoms = &mtop->moltype[mtop->molblock[mb].type].atoms; + for (j = 0; j < atoms->nr; j++) + { + nmolc = nmol; + /* Remove the interaction of the test charge group + * with itself. + */ + if (mb == mtop->nmolblock-1) + { + nmolc--; + + if (mb == 0 && nmol == 1) + { + gmx_fatal(FARGS, "Old format tpr with TPI, please generate a new tpr file"); + } + } + if (q == 0) + { + tpj = atoms->atom[j].type; + } + else + { + tpj = atoms->atom[j].typeB; + } + for (i = 0; i < fr->n_tpi; i++) + { + if (q == 0) + { + tpi = atoms_tpi->atom[i].type; + } + else + { + tpi = atoms_tpi->atom[i].typeB; + } + if (bBHAM) + { + /* nbfp now includes the 6.0 derivative prefactor */ + csix += nmolc*BHAMC(nbfp, ntp, tpi, tpj)/6.0; + } + else + { + /* nbfp now includes the 6.0/12.0 derivative prefactors */ + csix += nmolc*C6 (nbfp, ntp, tpi, tpj)/6.0; + ctwelve += nmolc*C12(nbfp, ntp, tpi, tpj)/12.0; + } + npair += nmolc; + } + } + } + } + if (npair - nexcl <= 0 && fplog) + { + fprintf(fplog, "\nWARNING: There are no atom pairs for dispersion correction\n\n"); + csix = 0; + ctwelve = 0; + } + else + { + csix /= npair - nexcl; + ctwelve /= npair - nexcl; + } + if (debug) + { + fprintf(debug, "Counted %d exclusions\n", nexcl); + fprintf(debug, "Average C6 parameter is: %10g\n", (double)csix); + fprintf(debug, "Average C12 parameter is: %10g\n", (double)ctwelve); + } + fr->avcsix[q] = csix; + fr->avctwelve[q] = ctwelve; + } + + if (EVDW_PME(fr->vdwtype)) + { + sfree(nbfp_comb); + } + + if (fplog != NULL) + { + if (fr->eDispCorr == edispcAllEner || + fr->eDispCorr == edispcAllEnerPres) + { + fprintf(fplog, "Long Range LJ corr.: %10.4e, %10.4e\n", + fr->avcsix[0], fr->avctwelve[0]); + } + else + { + fprintf(fplog, "Long Range LJ corr.: %10.4e\n", fr->avcsix[0]); + } + } +} + + +static void set_bham_b_max(FILE *fplog, t_forcerec *fr, + const gmx_mtop_t *mtop) +{ + const t_atoms *at1, *at2; + int mt1, mt2, i, j, tpi, tpj, ntypes; + real b, bmin; + real *nbfp; + + if (fplog) + { + fprintf(fplog, "Determining largest Buckingham b parameter for table\n"); + } + nbfp = fr->nbfp; + ntypes = fr->ntype; + + bmin = -1; + fr->bham_b_max = 0; + for (mt1 = 0; mt1 < mtop->nmoltype; mt1++) + { + at1 = &mtop->moltype[mt1].atoms; + for (i = 0; (i < at1->nr); i++) + { + tpi = at1->atom[i].type; + if (tpi >= ntypes) + { + gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", i, tpi, ntypes); + } + + for (mt2 = mt1; mt2 < mtop->nmoltype; mt2++) + { + at2 = &mtop->moltype[mt2].atoms; + for (j = 0; (j < at2->nr); j++) + { + tpj = at2->atom[j].type; + if (tpj >= ntypes) + { + gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", j, tpj, ntypes); + } + b = BHAMB(nbfp, ntypes, tpi, tpj); + if (b > fr->bham_b_max) + { + fr->bham_b_max = b; + } + if ((b < bmin) || (bmin == -1)) + { + bmin = b; + } + } + } + } + } + if (fplog) + { + fprintf(fplog, "Buckingham b parameters, min: %g, max: %g\n", + bmin, fr->bham_b_max); + } +} + +static void make_nbf_tables(FILE *fp, const output_env_t oenv, + t_forcerec *fr, real rtab, + const t_commrec *cr, + const char *tabfn, char *eg1, char *eg2, + t_nblists *nbl) +{ + char buf[STRLEN]; + int i, j; + + if (tabfn == NULL) + { + if (debug) + { + fprintf(debug, "No table file name passed, can not read table, can not do non-bonded interactions\n"); + } + return; + } + + sprintf(buf, "%s", tabfn); + if (eg1 && eg2) + { + /* Append the two energy group names */ + sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "_%s_%s.%s", + eg1, eg2, ftp2ext(efXVG)); + } + nbl->table_elec_vdw = make_tables(fp, oenv, fr, MASTER(cr), buf, rtab, 0); + /* Copy the contents of the table to separate coulomb and LJ tables too, + * to improve cache performance. + */ + /* For performance reasons we want + * the table data to be aligned to 16-byte. The pointers could be freed + * but currently aren't. + */ + nbl->table_elec.interaction = GMX_TABLE_INTERACTION_ELEC; + nbl->table_elec.format = nbl->table_elec_vdw.format; + nbl->table_elec.r = nbl->table_elec_vdw.r; + nbl->table_elec.n = nbl->table_elec_vdw.n; + nbl->table_elec.scale = nbl->table_elec_vdw.scale; + nbl->table_elec.scale_exp = nbl->table_elec_vdw.scale_exp; + nbl->table_elec.formatsize = nbl->table_elec_vdw.formatsize; + nbl->table_elec.ninteractions = 1; + nbl->table_elec.stride = nbl->table_elec.formatsize * nbl->table_elec.ninteractions; + snew_aligned(nbl->table_elec.data, nbl->table_elec.stride*(nbl->table_elec.n+1), 32); + + nbl->table_vdw.interaction = GMX_TABLE_INTERACTION_VDWREP_VDWDISP; + nbl->table_vdw.format = nbl->table_elec_vdw.format; + nbl->table_vdw.r = nbl->table_elec_vdw.r; + nbl->table_vdw.n = nbl->table_elec_vdw.n; + nbl->table_vdw.scale = nbl->table_elec_vdw.scale; + nbl->table_vdw.scale_exp = nbl->table_elec_vdw.scale_exp; + nbl->table_vdw.formatsize = nbl->table_elec_vdw.formatsize; + nbl->table_vdw.ninteractions = 2; + nbl->table_vdw.stride = nbl->table_vdw.formatsize * nbl->table_vdw.ninteractions; + snew_aligned(nbl->table_vdw.data, nbl->table_vdw.stride*(nbl->table_vdw.n+1), 32); + + for (i = 0; i <= nbl->table_elec_vdw.n; i++) + { + for (j = 0; j < 4; j++) + { + nbl->table_elec.data[4*i+j] = nbl->table_elec_vdw.data[12*i+j]; + } + for (j = 0; j < 8; j++) + { + nbl->table_vdw.data[8*i+j] = nbl->table_elec_vdw.data[12*i+4+j]; + } + } +} + +static void count_tables(int ftype1, int ftype2, const gmx_mtop_t *mtop, + int *ncount, int **count) +{ + const gmx_moltype_t *molt; + const t_ilist *il; + int mt, ftype, stride, i, j, tabnr; + + for (mt = 0; mt < mtop->nmoltype; mt++) + { + molt = &mtop->moltype[mt]; + for (ftype = 0; ftype < F_NRE; ftype++) + { + if (ftype == ftype1 || ftype == ftype2) + { + il = &molt->ilist[ftype]; + stride = 1 + NRAL(ftype); + for (i = 0; i < il->nr; i += stride) + { + tabnr = mtop->ffparams.iparams[il->iatoms[i]].tab.table; + if (tabnr < 0) + { + gmx_fatal(FARGS, "A bonded table number is smaller than 0: %d\n", tabnr); + } + if (tabnr >= *ncount) + { + srenew(*count, tabnr+1); + for (j = *ncount; j < tabnr+1; j++) + { + (*count)[j] = 0; + } + *ncount = tabnr+1; + } + (*count)[tabnr]++; + } + } + } + } +} + +static bondedtable_t *make_bonded_tables(FILE *fplog, + int ftype1, int ftype2, + const gmx_mtop_t *mtop, + const char *basefn, const char *tabext) +{ + int i, ncount, *count; + char tabfn[STRLEN]; + bondedtable_t *tab; + + tab = NULL; + + ncount = 0; + count = NULL; + count_tables(ftype1, ftype2, mtop, &ncount, &count); + + if (ncount > 0) + { + snew(tab, ncount); + for (i = 0; i < ncount; i++) + { + if (count[i] > 0) + { + sprintf(tabfn, "%s", basefn); + sprintf(tabfn + strlen(basefn) - strlen(ftp2ext(efXVG)) - 1, "_%s%d.%s", + tabext, i, ftp2ext(efXVG)); + tab[i] = make_bonded_table(fplog, tabfn, NRAL(ftype1)-2); + } + } + sfree(count); + } + + return tab; +} + +void forcerec_set_ranges(t_forcerec *fr, + int ncg_home, int ncg_force, + int natoms_force, + int natoms_force_constr, int natoms_f_novirsum) +{ + fr->cg0 = 0; + fr->hcg = ncg_home; + + /* fr->ncg_force is unused in the standard code, + * but it can be useful for modified code dealing with charge groups. + */ + fr->ncg_force = ncg_force; + fr->natoms_force = natoms_force; + fr->natoms_force_constr = natoms_force_constr; + + if (fr->natoms_force_constr > fr->nalloc_force) + { + fr->nalloc_force = over_alloc_dd(fr->natoms_force_constr); + + if (fr->bTwinRange) + { + srenew(fr->f_twin, fr->nalloc_force); + } + } + + if (fr->bF_NoVirSum) + { + fr->f_novirsum_n = natoms_f_novirsum; + if (fr->f_novirsum_n > fr->f_novirsum_nalloc) + { + fr->f_novirsum_nalloc = over_alloc_dd(fr->f_novirsum_n); + srenew(fr->f_novirsum_alloc, fr->f_novirsum_nalloc); + } + } + else + { + fr->f_novirsum_n = 0; + } +} + +static real cutoff_inf(real cutoff) +{ + if (cutoff == 0) + { + cutoff = GMX_CUTOFF_INF; + } + + return cutoff; +} + +static void make_adress_tf_tables(FILE *fp, const output_env_t oenv, + t_forcerec *fr, const t_inputrec *ir, + const char *tabfn, const gmx_mtop_t *mtop, + matrix box) +{ + char buf[STRLEN]; + int i, j; + + if (tabfn == NULL) + { + gmx_fatal(FARGS, "No thermoforce table file given. Use -tabletf to specify a file\n"); + return; + } + + snew(fr->atf_tabs, ir->adress->n_tf_grps); + + sprintf(buf, "%s", tabfn); + for (i = 0; i < ir->adress->n_tf_grps; i++) + { + j = ir->adress->tf_table_index[i]; /* get energy group index */ + sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "tf_%s.%s", + *(mtop->groups.grpname[mtop->groups.grps[egcENER].nm_ind[j]]), ftp2ext(efXVG)); + if (fp) + { + fprintf(fp, "loading tf table for energygrp index %d from %s\n", ir->adress->tf_table_index[i], buf); + } + fr->atf_tabs[i] = make_atf_table(fp, oenv, fr, buf, box); + } + +} + +gmx_bool can_use_allvsall(const t_inputrec *ir, gmx_bool bPrintNote, t_commrec *cr, FILE *fp) +{ + gmx_bool bAllvsAll; + + bAllvsAll = + ( + ir->rlist == 0 && + ir->rcoulomb == 0 && + ir->rvdw == 0 && + ir->ePBC == epbcNONE && + ir->vdwtype == evdwCUT && + ir->coulombtype == eelCUT && + ir->efep == efepNO && + (ir->implicit_solvent == eisNO || + (ir->implicit_solvent == eisGBSA && (ir->gb_algorithm == egbSTILL || + ir->gb_algorithm == egbHCT || + ir->gb_algorithm == egbOBC))) && + getenv("GMX_NO_ALLVSALL") == NULL + ); + + if (bAllvsAll && ir->opts.ngener > 1) + { + const char *note = "NOTE: Can not use all-vs-all force loops, because there are multiple energy monitor groups; you might get significantly higher performance when using only a single energy monitor group.\n"; + + if (bPrintNote) + { + if (MASTER(cr)) + { + fprintf(stderr, "\n%s\n", note); + } + if (fp != NULL) + { + fprintf(fp, "\n%s\n", note); + } + } + bAllvsAll = FALSE; + } + + if (bAllvsAll && fp && MASTER(cr)) + { + fprintf(fp, "\nUsing SIMD all-vs-all kernels.\n\n"); + } + + return bAllvsAll; +} + + +static void init_forcerec_f_threads(t_forcerec *fr, int nenergrp) +{ + int t, i; + + /* These thread local data structures are used for bondeds only */ + fr->nthreads = gmx_omp_nthreads_get(emntBonded); + + if (fr->nthreads > 1) + { + snew(fr->f_t, fr->nthreads); + /* Thread 0 uses the global force and energy arrays */ + for (t = 1; t < fr->nthreads; t++) + { + fr->f_t[t].f = NULL; + fr->f_t[t].f_nalloc = 0; + snew(fr->f_t[t].fshift, SHIFTS); + fr->f_t[t].grpp.nener = nenergrp*nenergrp; + for (i = 0; i < egNR; i++) + { + snew(fr->f_t[t].grpp.ener[i], fr->f_t[t].grpp.nener); + } + } + } +} + + +gmx_bool nbnxn_acceleration_supported(FILE *fplog, + const t_commrec *cr, + const t_inputrec *ir, + gmx_bool bGPU) +{ + if (!bGPU && (ir->vdwtype == evdwPME && ir->ljpme_combination_rule == eljpmeLB)) + { + md_print_warn(cr, fplog, "LJ-PME with Lorentz-Berthelot is not supported with %s, falling back to %s\n", + bGPU ? "GPUs" : "SIMD kernels", + bGPU ? "CPU only" : "plain-C kernels"); + return FALSE; + } + + return TRUE; +} + + +static void pick_nbnxn_kernel_cpu(const t_inputrec gmx_unused *ir, + int *kernel_type, + int *ewald_excl) +{ + *kernel_type = nbnxnk4x4_PlainC; + *ewald_excl = ewaldexclTable; + +#ifdef GMX_NBNXN_SIMD + { +#ifdef GMX_NBNXN_SIMD_4XN + *kernel_type = nbnxnk4xN_SIMD_4xN; +#endif +#ifdef GMX_NBNXN_SIMD_2XNN + *kernel_type = nbnxnk4xN_SIMD_2xNN; +#endif + +#if defined GMX_NBNXN_SIMD_2XNN && defined GMX_NBNXN_SIMD_4XN + /* We need to choose if we want 2x(N+N) or 4xN kernels. + * Currently this is based on the SIMD acceleration choice, + * but it might be better to decide this at runtime based on CPU. + * + * 4xN calculates more (zero) interactions, but has less pair-search + * work and much better kernel instruction scheduling. + * + * Up till now we have only seen that on Intel Sandy/Ivy Bridge, + * which doesn't have FMA, both the analytical and tabulated Ewald + * kernels have similar pair rates for 4x8 and 2x(4+4), so we choose + * 2x(4+4) because it results in significantly fewer pairs. + * For RF, the raw pair rate of the 4x8 kernel is higher than 2x(4+4), + * 10% with HT, 50% without HT. As we currently don't detect the actual + * use of HT, use 4x8 to avoid a potential performance hit. + * On Intel Haswell 4x8 is always faster. + */ + *kernel_type = nbnxnk4xN_SIMD_4xN; + +#ifndef GMX_SIMD_HAVE_FMA + if (EEL_PME_EWALD(ir->coulombtype) || + EVDW_PME(ir->vdwtype)) + { + /* We have Ewald kernels without FMA (Intel Sandy/Ivy Bridge). + * There are enough instructions to make 2x(4+4) efficient. + */ + *kernel_type = nbnxnk4xN_SIMD_2xNN; + } +#endif +#endif /* GMX_NBNXN_SIMD_2XNN && GMX_NBNXN_SIMD_4XN */ + + + if (getenv("GMX_NBNXN_SIMD_4XN") != NULL) + { +#ifdef GMX_NBNXN_SIMD_4XN + *kernel_type = nbnxnk4xN_SIMD_4xN; +#else + gmx_fatal(FARGS, "SIMD 4xN kernels requested, but Gromacs has been compiled without support for these kernels"); +#endif + } + if (getenv("GMX_NBNXN_SIMD_2XNN") != NULL) + { +#ifdef GMX_NBNXN_SIMD_2XNN + *kernel_type = nbnxnk4xN_SIMD_2xNN; +#else + gmx_fatal(FARGS, "SIMD 2x(N+N) kernels requested, but Gromacs has been compiled without support for these kernels"); +#endif + } + + /* Analytical Ewald exclusion correction is only an option in + * the SIMD kernel. + * Since table lookup's don't parallelize with SIMD, analytical + * will probably always be faster for a SIMD width of 8 or more. + * With FMA analytical is sometimes faster for a width if 4 as well. + * On BlueGene/Q, this is faster regardless of precision. + * In single precision, this is faster on Bulldozer. + */ +#if GMX_SIMD_REAL_WIDTH >= 8 || \ + (GMX_SIMD_REAL_WIDTH >= 4 && defined GMX_SIMD_HAVE_FMA && !defined GMX_DOUBLE) || \ + defined GMX_SIMD_IBM_QPX + *ewald_excl = ewaldexclAnalytical; +#endif + if (getenv("GMX_NBNXN_EWALD_TABLE") != NULL) + { + *ewald_excl = ewaldexclTable; + } + if (getenv("GMX_NBNXN_EWALD_ANALYTICAL") != NULL) + { + *ewald_excl = ewaldexclAnalytical; + } + + } +#endif /* GMX_NBNXN_SIMD */ +} + + +const char *lookup_nbnxn_kernel_name(int kernel_type) +{ + const char *returnvalue = NULL; + switch (kernel_type) + { + case nbnxnkNotSet: + returnvalue = "not set"; + break; + case nbnxnk4x4_PlainC: + returnvalue = "plain C"; + break; + case nbnxnk4xN_SIMD_4xN: + case nbnxnk4xN_SIMD_2xNN: +#ifdef GMX_NBNXN_SIMD +#if defined GMX_SIMD_X86_SSE2 + returnvalue = "SSE2"; +#elif defined GMX_SIMD_X86_SSE4_1 + returnvalue = "SSE4.1"; +#elif defined GMX_SIMD_X86_AVX_128_FMA + returnvalue = "AVX_128_FMA"; +#elif defined GMX_SIMD_X86_AVX_256 + returnvalue = "AVX_256"; +#elif defined GMX_SIMD_X86_AVX2_256 + returnvalue = "AVX2_256"; +#else + returnvalue = "SIMD"; +#endif +#else /* GMX_NBNXN_SIMD */ + returnvalue = "not available"; +#endif /* GMX_NBNXN_SIMD */ + break; + case nbnxnk8x8x8_CUDA: returnvalue = "CUDA"; break; + case nbnxnk8x8x8_PlainC: returnvalue = "plain C"; break; + + case nbnxnkNR: + default: + gmx_fatal(FARGS, "Illegal kernel type selected"); + returnvalue = NULL; + break; + } + return returnvalue; +}; + +static void pick_nbnxn_kernel(FILE *fp, + const t_commrec *cr, + gmx_bool use_simd_kernels, + gmx_bool bUseGPU, + gmx_bool bEmulateGPU, + const t_inputrec *ir, + int *kernel_type, + int *ewald_excl, + gmx_bool bDoNonbonded) +{ + assert(kernel_type); + + *kernel_type = nbnxnkNotSet; + *ewald_excl = ewaldexclTable; + + if (bEmulateGPU) + { + *kernel_type = nbnxnk8x8x8_PlainC; + + if (bDoNonbonded) + { + md_print_warn(cr, fp, "Emulating a GPU run on the CPU (slow)"); + } + } + else if (bUseGPU) + { + *kernel_type = nbnxnk8x8x8_CUDA; + } + + if (*kernel_type == nbnxnkNotSet) + { + /* LJ PME with LB combination rule does 7 mesh operations. + * This so slow that we don't compile SIMD non-bonded kernels for that. + */ + if (use_simd_kernels && + nbnxn_acceleration_supported(fp, cr, ir, FALSE)) + { + pick_nbnxn_kernel_cpu(ir, kernel_type, ewald_excl); + } + else + { + *kernel_type = nbnxnk4x4_PlainC; + } + } + + if (bDoNonbonded && fp != NULL) + { + fprintf(fp, "\nUsing %s %dx%d non-bonded kernels\n\n", + lookup_nbnxn_kernel_name(*kernel_type), + nbnxn_kernel_pairlist_simple(*kernel_type) ? NBNXN_CPU_CLUSTER_I_SIZE : NBNXN_GPU_CLUSTER_SIZE, + nbnxn_kernel_to_cj_size(*kernel_type)); + + if (nbnxnk4x4_PlainC == *kernel_type || + nbnxnk8x8x8_PlainC == *kernel_type) + { + md_print_warn(cr, fp, + "WARNING: Using the slow %s kernels. This should\n" + "not happen during routine usage on supported platforms.\n\n", + lookup_nbnxn_kernel_name(*kernel_type)); + } + } +} + +static void pick_nbnxn_resources(const t_commrec *cr, + const gmx_hw_info_t *hwinfo, + gmx_bool bDoNonbonded, + gmx_bool *bUseGPU, + gmx_bool *bEmulateGPU, + const gmx_gpu_opt_t *gpu_opt) +{ + gmx_bool bEmulateGPUEnvVarSet; + char gpu_err_str[STRLEN]; + + *bUseGPU = FALSE; + + bEmulateGPUEnvVarSet = (getenv("GMX_EMULATE_GPU") != NULL); + + /* Run GPU emulation mode if GMX_EMULATE_GPU is defined. Because + * GPUs (currently) only handle non-bonded calculations, we will + * automatically switch to emulation if non-bonded calculations are + * turned off via GMX_NO_NONBONDED - this is the simple and elegant + * way to turn off GPU initialization, data movement, and cleanup. + * + * GPU emulation can be useful to assess the performance one can expect by + * adding GPU(s) to the machine. The conditional below allows this even + * if mdrun is compiled without GPU acceleration support. + * Note that you should freezing the system as otherwise it will explode. + */ + *bEmulateGPU = (bEmulateGPUEnvVarSet || + (!bDoNonbonded && + gpu_opt->ncuda_dev_use > 0)); + + /* Enable GPU mode when GPUs are available or no GPU emulation is requested. + */ + if (gpu_opt->ncuda_dev_use > 0 && !(*bEmulateGPU)) + { + /* Each PP node will use the intra-node id-th device from the + * list of detected/selected GPUs. */ + if (!init_gpu(cr->rank_pp_intranode, gpu_err_str, + &hwinfo->gpu_info, gpu_opt)) + { + /* At this point the init should never fail as we made sure that + * we have all the GPUs we need. If it still does, we'll bail. */ + gmx_fatal(FARGS, "On node %d failed to initialize GPU #%d: %s", + cr->nodeid, + get_gpu_device_id(&hwinfo->gpu_info, gpu_opt, + cr->rank_pp_intranode), + gpu_err_str); + } + + /* Here we actually turn on hardware GPU acceleration */ + *bUseGPU = TRUE; + } +} + +gmx_bool uses_simple_tables(int cutoff_scheme, + nonbonded_verlet_t *nbv, + int group) +{ + gmx_bool bUsesSimpleTables = TRUE; + int grp_index; + + switch (cutoff_scheme) + { + case ecutsGROUP: + bUsesSimpleTables = TRUE; + break; + case ecutsVERLET: + assert(NULL != nbv && NULL != nbv->grp); + grp_index = (group < 0) ? 0 : (nbv->ngrp - 1); + bUsesSimpleTables = nbnxn_kernel_pairlist_simple(nbv->grp[grp_index].kernel_type); + break; + default: + gmx_incons("unimplemented"); + } + return bUsesSimpleTables; +} + +static void init_ewald_f_table(interaction_const_t *ic, + gmx_bool bUsesSimpleTables, + real rtab) +{ + real maxr; + + if (bUsesSimpleTables) + { + /* With a spacing of 0.0005 we are at the force summation accuracy + * for the SSE kernels for "normal" atomistic simulations. + */ + ic->tabq_scale = ewald_spline3_table_scale(ic->ewaldcoeff_q, + ic->rcoulomb); + + maxr = (rtab > ic->rcoulomb) ? rtab : ic->rcoulomb; + ic->tabq_size = (int)(maxr*ic->tabq_scale) + 2; + } + else + { + ic->tabq_size = GPU_EWALD_COULOMB_FORCE_TABLE_SIZE; + /* Subtract 2 iso 1 to avoid access out of range due to rounding */ + ic->tabq_scale = (ic->tabq_size - 2)/ic->rcoulomb; + } + + sfree_aligned(ic->tabq_coul_FDV0); + sfree_aligned(ic->tabq_coul_F); + sfree_aligned(ic->tabq_coul_V); + + sfree_aligned(ic->tabq_vdw_FDV0); + sfree_aligned(ic->tabq_vdw_F); + sfree_aligned(ic->tabq_vdw_V); + + if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype)) + { + /* Create the original table data in FDV0 */ + snew_aligned(ic->tabq_coul_FDV0, ic->tabq_size*4, 32); + snew_aligned(ic->tabq_coul_F, ic->tabq_size, 32); + snew_aligned(ic->tabq_coul_V, ic->tabq_size, 32); + table_spline3_fill_ewald_lr(ic->tabq_coul_F, ic->tabq_coul_V, ic->tabq_coul_FDV0, + ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff_q, v_q_ewald_lr); + } + + if (EVDW_PME(ic->vdwtype)) + { + snew_aligned(ic->tabq_vdw_FDV0, ic->tabq_size*4, 32); + snew_aligned(ic->tabq_vdw_F, ic->tabq_size, 32); + snew_aligned(ic->tabq_vdw_V, ic->tabq_size, 32); + table_spline3_fill_ewald_lr(ic->tabq_vdw_F, ic->tabq_vdw_V, ic->tabq_vdw_FDV0, + ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff_lj, v_lj_ewald_lr); + } +} + +void init_interaction_const_tables(FILE *fp, + interaction_const_t *ic, + gmx_bool bUsesSimpleTables, + real rtab) +{ + real spacing; + + if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype) || EVDW_PME(ic->vdwtype)) + { + init_ewald_f_table(ic, bUsesSimpleTables, rtab); + + if (fp != NULL) + { + fprintf(fp, "Initialized non-bonded Ewald correction tables, spacing: %.2e size: %d\n\n", + 1/ic->tabq_scale, ic->tabq_size); + } + } +} + +static void clear_force_switch_constants(shift_consts_t *sc) +{ + sc->c2 = 0; + sc->c3 = 0; + sc->cpot = 0; +} + +static void force_switch_constants(real p, + real rsw, real rc, + shift_consts_t *sc) +{ + /* Here we determine the coefficient for shifting the force to zero + * between distance rsw and the cut-off rc. + * For a potential of r^-p, we have force p*r^-(p+1). + * But to save flops we absorb p in the coefficient. + * Thus we get: + * force/p = r^-(p+1) + c2*r^2 + c3*r^3 + * potential = r^-p + c2/3*r^3 + c3/4*r^4 + cpot + */ + sc->c2 = ((p + 1)*rsw - (p + 4)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 2)); + sc->c3 = -((p + 1)*rsw - (p + 3)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 3)); + sc->cpot = -pow(rc, -p) + p*sc->c2/3*pow(rc - rsw, 3) + p*sc->c3/4*pow(rc - rsw, 4); +} + +static void potential_switch_constants(real rsw, real rc, + switch_consts_t *sc) +{ + /* The switch function is 1 at rsw and 0 at rc. + * The derivative and second derivate are zero at both ends. + * rsw = max(r - r_switch, 0) + * sw = 1 + c3*rsw^3 + c4*rsw^4 + c5*rsw^5 + * dsw = 3*c3*rsw^2 + 4*c4*rsw^3 + 5*c5*rsw^4 + * force = force*dsw - potential*sw + * potential *= sw + */ + sc->c3 = -10*pow(rc - rsw, -3); + sc->c4 = 15*pow(rc - rsw, -4); + sc->c5 = -6*pow(rc - rsw, -5); +} + +static void +init_interaction_const(FILE *fp, + const t_commrec gmx_unused *cr, + interaction_const_t **interaction_const, + const t_forcerec *fr, + real rtab) +{ + interaction_const_t *ic; + gmx_bool bUsesSimpleTables = TRUE; + + snew(ic, 1); + + /* Just allocate something so we can free it */ + snew_aligned(ic->tabq_coul_FDV0, 16, 32); + snew_aligned(ic->tabq_coul_F, 16, 32); + snew_aligned(ic->tabq_coul_V, 16, 32); + + ic->rlist = fr->rlist; + ic->rlistlong = fr->rlistlong; + + /* Lennard-Jones */ + ic->vdwtype = fr->vdwtype; + ic->vdw_modifier = fr->vdw_modifier; + ic->rvdw = fr->rvdw; + ic->rvdw_switch = fr->rvdw_switch; + ic->ewaldcoeff_lj = fr->ewaldcoeff_lj; + ic->ljpme_comb_rule = fr->ljpme_combination_rule; + ic->sh_lj_ewald = 0; + clear_force_switch_constants(&ic->dispersion_shift); + clear_force_switch_constants(&ic->repulsion_shift); + + switch (ic->vdw_modifier) + { + case eintmodPOTSHIFT: + /* Only shift the potential, don't touch the force */ + ic->dispersion_shift.cpot = -pow(ic->rvdw, -6.0); + ic->repulsion_shift.cpot = -pow(ic->rvdw, -12.0); + if (EVDW_PME(ic->vdwtype)) + { + real crc2; + + crc2 = sqr(ic->ewaldcoeff_lj*ic->rvdw); + ic->sh_lj_ewald = (exp(-crc2)*(1 + crc2 + 0.5*crc2*crc2) - 1)*pow(ic->rvdw, -6.0); + } + break; + case eintmodFORCESWITCH: + /* Switch the force, switch and shift the potential */ + force_switch_constants(6.0, ic->rvdw_switch, ic->rvdw, + &ic->dispersion_shift); + force_switch_constants(12.0, ic->rvdw_switch, ic->rvdw, + &ic->repulsion_shift); + break; + case eintmodPOTSWITCH: + /* Switch the potential and force */ + potential_switch_constants(ic->rvdw_switch, ic->rvdw, + &ic->vdw_switch); + break; + case eintmodNONE: + case eintmodEXACTCUTOFF: + /* Nothing to do here */ + break; + default: + gmx_incons("unimplemented potential modifier"); + } + + ic->sh_invrc6 = -ic->dispersion_shift.cpot; + + /* Electrostatics */ + ic->eeltype = fr->eeltype; + ic->coulomb_modifier = fr->coulomb_modifier; + ic->rcoulomb = fr->rcoulomb; + ic->epsilon_r = fr->epsilon_r; + ic->epsfac = fr->epsfac; + ic->ewaldcoeff_q = fr->ewaldcoeff_q; + + if (fr->coulomb_modifier == eintmodPOTSHIFT) + { + ic->sh_ewald = gmx_erfc(ic->ewaldcoeff_q*ic->rcoulomb); + } + else + { + ic->sh_ewald = 0; + } + + /* Reaction-field */ + if (EEL_RF(ic->eeltype)) + { + ic->epsilon_rf = fr->epsilon_rf; + ic->k_rf = fr->k_rf; + ic->c_rf = fr->c_rf; + } + else + { + /* For plain cut-off we might use the reaction-field kernels */ + ic->epsilon_rf = ic->epsilon_r; + ic->k_rf = 0; + if (fr->coulomb_modifier == eintmodPOTSHIFT) + { + ic->c_rf = 1/ic->rcoulomb; + } + else + { + ic->c_rf = 0; + } + } + + if (fp != NULL) + { + real dispersion_shift; + + dispersion_shift = ic->dispersion_shift.cpot; + if (EVDW_PME(ic->vdwtype)) + { + dispersion_shift -= ic->sh_lj_ewald; + } + fprintf(fp, "Potential shift: LJ r^-12: %.3e r^-6: %.3e", + ic->repulsion_shift.cpot, dispersion_shift); + + if (ic->eeltype == eelCUT) + { + fprintf(fp, ", Coulomb %.e", -ic->c_rf); + } + else if (EEL_PME(ic->eeltype)) + { + fprintf(fp, ", Ewald %.3e", -ic->sh_ewald); + } + fprintf(fp, "\n"); + } + + *interaction_const = ic; + + if (fr->nbv != NULL && fr->nbv->bUseGPU) + { + nbnxn_cuda_init_const(fr->nbv->cu_nbv, ic, fr->nbv->grp); + + /* With tMPI + GPUs some ranks may be sharing GPU(s) and therefore + * also sharing texture references. To keep the code simple, we don't + * treat texture references as shared resources, but this means that + * the coulomb_tab and nbfp texture refs will get updated by multiple threads. + * Hence, to ensure that the non-bonded kernels don't start before all + * texture binding operations are finished, we need to wait for all ranks + * to arrive here before continuing. + * + * Note that we could omit this barrier if GPUs are not shared (or + * texture objects are used), but as this is initialization code, there + * is not point in complicating things. + */ +#ifdef GMX_THREAD_MPI + if (PAR(cr)) + { + gmx_barrier(cr); + } +#endif /* GMX_THREAD_MPI */ + } + + bUsesSimpleTables = uses_simple_tables(fr->cutoff_scheme, fr->nbv, -1); + init_interaction_const_tables(fp, ic, bUsesSimpleTables, rtab); +} + +static void init_nb_verlet(FILE *fp, + nonbonded_verlet_t **nb_verlet, + gmx_bool bFEP_NonBonded, + const t_inputrec *ir, + const t_forcerec *fr, + const t_commrec *cr, + const char *nbpu_opt) +{ + nonbonded_verlet_t *nbv; + int i; + char *env; + gmx_bool bEmulateGPU, bHybridGPURun = FALSE; + + nbnxn_alloc_t *nb_alloc; + nbnxn_free_t *nb_free; + + snew(nbv, 1); + + pick_nbnxn_resources(cr, fr->hwinfo, + fr->bNonbonded, + &nbv->bUseGPU, + &bEmulateGPU, + fr->gpu_opt); + + nbv->nbs = NULL; + + nbv->ngrp = (DOMAINDECOMP(cr) ? 2 : 1); + for (i = 0; i < nbv->ngrp; i++) + { + nbv->grp[i].nbl_lists.nnbl = 0; + nbv->grp[i].nbat = NULL; + nbv->grp[i].kernel_type = nbnxnkNotSet; + + if (i == 0) /* local */ + { + pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels, + nbv->bUseGPU, bEmulateGPU, ir, + &nbv->grp[i].kernel_type, + &nbv->grp[i].ewald_excl, + fr->bNonbonded); + } + else /* non-local */ + { + if (nbpu_opt != NULL && strcmp(nbpu_opt, "gpu_cpu") == 0) + { + /* Use GPU for local, select a CPU kernel for non-local */ + pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels, + FALSE, FALSE, ir, + &nbv->grp[i].kernel_type, + &nbv->grp[i].ewald_excl, + fr->bNonbonded); + + bHybridGPURun = TRUE; + } + else + { + /* Use the same kernel for local and non-local interactions */ + nbv->grp[i].kernel_type = nbv->grp[0].kernel_type; + nbv->grp[i].ewald_excl = nbv->grp[0].ewald_excl; + } + } + } + + if (nbv->bUseGPU) + { + /* init the NxN GPU data; the last argument tells whether we'll have + * both local and non-local NB calculation on GPU */ + nbnxn_cuda_init(fp, &nbv->cu_nbv, + &fr->hwinfo->gpu_info, fr->gpu_opt, + cr->rank_pp_intranode, + (nbv->ngrp > 1) && !bHybridGPURun); + + if ((env = getenv("GMX_NB_MIN_CI")) != NULL) + { + char *end; + + nbv->min_ci_balanced = strtol(env, &end, 10); + if (!end || (*end != 0) || nbv->min_ci_balanced <= 0) + { + gmx_fatal(FARGS, "Invalid value passed in GMX_NB_MIN_CI=%s, positive integer required", env); + } + + if (debug) + { + fprintf(debug, "Neighbor-list balancing parameter: %d (passed as env. var.)\n", + nbv->min_ci_balanced); + } + } + else + { + nbv->min_ci_balanced = nbnxn_cuda_min_ci_balanced(nbv->cu_nbv); + if (debug) + { + fprintf(debug, "Neighbor-list balancing parameter: %d (auto-adjusted to the number of GPU multi-processors)\n", + nbv->min_ci_balanced); + } + } + } + else + { + nbv->min_ci_balanced = 0; + } + + *nb_verlet = nbv; + + nbnxn_init_search(&nbv->nbs, + DOMAINDECOMP(cr) ? &cr->dd->nc : NULL, + DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : NULL, + bFEP_NonBonded, + gmx_omp_nthreads_get(emntNonbonded)); + + for (i = 0; i < nbv->ngrp; i++) + { + if (nbv->grp[0].kernel_type == nbnxnk8x8x8_CUDA) + { + nb_alloc = &pmalloc; + nb_free = &pfree; + } + else + { + nb_alloc = NULL; + nb_free = NULL; + } + + nbnxn_init_pairlist_set(&nbv->grp[i].nbl_lists, + nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type), + /* 8x8x8 "non-simple" lists are ATM always combined */ + !nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type), + nb_alloc, nb_free); + + if (i == 0 || + nbv->grp[0].kernel_type != nbv->grp[i].kernel_type) + { + gmx_bool bSimpleList; + int enbnxninitcombrule; + + bSimpleList = nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type); + + if (bSimpleList && (fr->vdwtype == evdwCUT && (fr->vdw_modifier == eintmodNONE || fr->vdw_modifier == eintmodPOTSHIFT))) + { + /* Plain LJ cut-off: we can optimize with combination rules */ + enbnxninitcombrule = enbnxninitcombruleDETECT; + } + else if (fr->vdwtype == evdwPME) + { + /* LJ-PME: we need to use a combination rule for the grid */ + if (fr->ljpme_combination_rule == eljpmeGEOM) + { + enbnxninitcombrule = enbnxninitcombruleGEOM; + } + else + { + enbnxninitcombrule = enbnxninitcombruleLB; + } + } + else + { + /* We use a full combination matrix: no rule required */ + enbnxninitcombrule = enbnxninitcombruleNONE; + } + + + snew(nbv->grp[i].nbat, 1); + nbnxn_atomdata_init(fp, + nbv->grp[i].nbat, + nbv->grp[i].kernel_type, + enbnxninitcombrule, + fr->ntype, fr->nbfp, + ir->opts.ngener, + bSimpleList ? gmx_omp_nthreads_get(emntNonbonded) : 1, + nb_alloc, nb_free); + } + else + { + nbv->grp[i].nbat = nbv->grp[0].nbat; + } + } +} + +void init_forcerec(FILE *fp, + const output_env_t oenv, + t_forcerec *fr, + t_fcdata *fcd, + const t_inputrec *ir, + const gmx_mtop_t *mtop, + const t_commrec *cr, + matrix box, + const char *tabfn, + const char *tabafn, + const char *tabpfn, + const char *tabbfn, + const char *nbpu_opt, + gmx_bool bNoSolvOpt, + real print_force) +{ + int i, j, m, natoms, ngrp, negp_pp, negptable, egi, egj; + real rtab; + char *env; + double dbl; + const t_block *cgs; + gmx_bool bGenericKernelOnly; + gmx_bool bMakeTables, bMakeSeparate14Table, bSomeNormalNbListsAreInUse; + gmx_bool bFEP_NonBonded; + t_nblists *nbl; + int *nm_ind, egp_flags; + + if (fr->hwinfo == NULL) + { + /* Detect hardware, gather information. + * In mdrun, hwinfo has already been set before calling init_forcerec. + * Here we ignore GPUs, as tools will not use them anyhow. + */ + fr->hwinfo = gmx_detect_hardware(fp, cr, FALSE); + } + + /* By default we turn SIMD kernels on, but it might be turned off further down... */ + fr->use_simd_kernels = TRUE; + + fr->bDomDec = DOMAINDECOMP(cr); + + natoms = mtop->natoms; + + if (check_box(ir->ePBC, box)) + { + gmx_fatal(FARGS, check_box(ir->ePBC, box)); + } + + /* Test particle insertion ? */ + if (EI_TPI(ir->eI)) + { + /* Set to the size of the molecule to be inserted (the last one) */ + /* Because of old style topologies, we have to use the last cg + * instead of the last molecule type. + */ + cgs = &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].cgs; + fr->n_tpi = cgs->index[cgs->nr] - cgs->index[cgs->nr-1]; + if (fr->n_tpi != mtop->mols.index[mtop->mols.nr] - mtop->mols.index[mtop->mols.nr-1]) + { + gmx_fatal(FARGS, "The molecule to insert can not consist of multiple charge groups.\nMake it a single charge group."); + } + } + else + { + fr->n_tpi = 0; + } + + /* Copy AdResS parameters */ + if (ir->bAdress) + { + fr->adress_type = ir->adress->type; + fr->adress_const_wf = ir->adress->const_wf; + fr->adress_ex_width = ir->adress->ex_width; + fr->adress_hy_width = ir->adress->hy_width; + fr->adress_icor = ir->adress->icor; + fr->adress_site = ir->adress->site; + fr->adress_ex_forcecap = ir->adress->ex_forcecap; + fr->adress_do_hybridpairs = ir->adress->do_hybridpairs; + + + snew(fr->adress_group_explicit, ir->adress->n_energy_grps); + for (i = 0; i < ir->adress->n_energy_grps; i++) + { + fr->adress_group_explicit[i] = ir->adress->group_explicit[i]; + } + + fr->n_adress_tf_grps = ir->adress->n_tf_grps; + snew(fr->adress_tf_table_index, fr->n_adress_tf_grps); + for (i = 0; i < fr->n_adress_tf_grps; i++) + { + fr->adress_tf_table_index[i] = ir->adress->tf_table_index[i]; + } + copy_rvec(ir->adress->refs, fr->adress_refs); + } + else + { + fr->adress_type = eAdressOff; + fr->adress_do_hybridpairs = FALSE; + } + + /* Copy the user determined parameters */ + fr->userint1 = ir->userint1; + fr->userint2 = ir->userint2; + fr->userint3 = ir->userint3; + fr->userint4 = ir->userint4; + fr->userreal1 = ir->userreal1; + fr->userreal2 = ir->userreal2; + fr->userreal3 = ir->userreal3; + fr->userreal4 = ir->userreal4; + + /* Shell stuff */ + fr->fc_stepsize = ir->fc_stepsize; + + /* Free energy */ + fr->efep = ir->efep; + fr->sc_alphavdw = ir->fepvals->sc_alpha; + if (ir->fepvals->bScCoul) + { + fr->sc_alphacoul = ir->fepvals->sc_alpha; + fr->sc_sigma6_min = pow(ir->fepvals->sc_sigma_min, 6); + } + else + { + fr->sc_alphacoul = 0; + fr->sc_sigma6_min = 0; /* only needed when bScCoul is on */ + } + fr->sc_power = ir->fepvals->sc_power; + fr->sc_r_power = ir->fepvals->sc_r_power; + fr->sc_sigma6_def = pow(ir->fepvals->sc_sigma, 6); + + env = getenv("GMX_SCSIGMA_MIN"); + if (env != NULL) + { + dbl = 0; + sscanf(env, "%lf", &dbl); + fr->sc_sigma6_min = pow(dbl, 6); + if (fp) + { + fprintf(fp, "Setting the minimum soft core sigma to %g nm\n", dbl); + } + } + + fr->bNonbonded = TRUE; + if (getenv("GMX_NO_NONBONDED") != NULL) + { + /* turn off non-bonded calculations */ + fr->bNonbonded = FALSE; + md_print_warn(cr, fp, + "Found environment variable GMX_NO_NONBONDED.\n" + "Disabling nonbonded calculations.\n"); + } + + bGenericKernelOnly = FALSE; + + /* We now check in the NS code whether a particular combination of interactions + * can be used with water optimization, and disable it if that is not the case. + */ + + if (getenv("GMX_NB_GENERIC") != NULL) + { + if (fp != NULL) + { + fprintf(fp, + "Found environment variable GMX_NB_GENERIC.\n" + "Disabling all interaction-specific nonbonded kernels, will only\n" + "use the slow generic ones in src/gmxlib/nonbonded/nb_generic.c\n\n"); + } + bGenericKernelOnly = TRUE; + } + + if (bGenericKernelOnly == TRUE) + { + bNoSolvOpt = TRUE; + } + + if ( (getenv("GMX_DISABLE_SIMD_KERNELS") != NULL) || (getenv("GMX_NOOPTIMIZEDKERNELS") != NULL) ) + { + fr->use_simd_kernels = FALSE; + if (fp != NULL) + { + fprintf(fp, + "\nFound environment variable GMX_DISABLE_SIMD_KERNELS.\n" + "Disabling the usage of any SIMD-specific kernel routines (e.g. SSE2/SSE4.1/AVX).\n\n"); + } + } + + fr->bBHAM = (mtop->ffparams.functype[0] == F_BHAM); + + /* Check if we can/should do all-vs-all kernels */ + fr->bAllvsAll = can_use_allvsall(ir, FALSE, NULL, NULL); + fr->AllvsAll_work = NULL; + fr->AllvsAll_workgb = NULL; + + /* All-vs-all kernels have not been implemented in 4.6, and + * the SIMD group kernels are also buggy in this case. Non-SIMD + * group kernels are OK. See Redmine #1249. */ + if (fr->bAllvsAll) + { + fr->bAllvsAll = FALSE; + fr->use_simd_kernels = FALSE; + if (fp != NULL) + { + fprintf(fp, + "\nYour simulation settings would have triggered the efficient all-vs-all\n" + "kernels in GROMACS 4.5, but these have not been implemented in GROMACS\n" + "4.6. Also, we can't use the accelerated SIMD kernels here because\n" + "of an unfixed bug. The reference C kernels are correct, though, so\n" + "we are proceeding by disabling all CPU architecture-specific\n" + "(e.g. SSE2/SSE4/AVX) routines. If performance is important, please\n" + "use GROMACS 4.5.7 or try cutoff-scheme = Verlet.\n\n"); + } + } + + /* Neighbour searching stuff */ + fr->cutoff_scheme = ir->cutoff_scheme; + fr->bGrid = (ir->ns_type == ensGRID); + fr->ePBC = ir->ePBC; + + if (fr->cutoff_scheme == ecutsGROUP) + { + const char *note = "NOTE: This file uses the deprecated 'group' cutoff_scheme. This will be\n" + "removed in a future release when 'verlet' supports all interaction forms.\n"; + + if (MASTER(cr)) + { + fprintf(stderr, "\n%s\n", note); + } + if (fp != NULL) + { + fprintf(fp, "\n%s\n", note); + } + } + + /* Determine if we will do PBC for distances in bonded interactions */ + if (fr->ePBC == epbcNONE) + { + fr->bMolPBC = FALSE; + } + else + { + if (!DOMAINDECOMP(cr)) + { + /* The group cut-off scheme and SHAKE assume charge groups + * are whole, but not using molpbc is faster in most cases. + */ + if (fr->cutoff_scheme == ecutsGROUP || + (ir->eConstrAlg == econtSHAKE && + (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0 || + gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0))) + { + fr->bMolPBC = ir->bPeriodicMols; + } + else + { + fr->bMolPBC = TRUE; + if (getenv("GMX_USE_GRAPH") != NULL) + { + fr->bMolPBC = FALSE; + if (fp) + { + fprintf(fp, "\nGMX_MOLPBC is set, using the graph for bonded interactions\n\n"); + } + } + } + } + else + { + fr->bMolPBC = dd_bonded_molpbc(cr->dd, fr->ePBC); + } + } + fr->bGB = (ir->implicit_solvent == eisGBSA); + + fr->rc_scaling = ir->refcoord_scaling; + copy_rvec(ir->posres_com, fr->posres_com); + copy_rvec(ir->posres_comB, fr->posres_comB); + fr->rlist = cutoff_inf(ir->rlist); + fr->rlistlong = cutoff_inf(ir->rlistlong); + fr->eeltype = ir->coulombtype; + fr->vdwtype = ir->vdwtype; + fr->ljpme_combination_rule = ir->ljpme_combination_rule; + + fr->coulomb_modifier = ir->coulomb_modifier; + fr->vdw_modifier = ir->vdw_modifier; + + /* Electrostatics: Translate from interaction-setting-in-mdp-file to kernel interaction format */ + switch (fr->eeltype) + { + case eelCUT: + fr->nbkernel_elec_interaction = (fr->bGB) ? GMX_NBKERNEL_ELEC_GENERALIZEDBORN : GMX_NBKERNEL_ELEC_COULOMB; + break; + + case eelRF: + case eelGRF: + case eelRF_NEC: + fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD; + break; + + case eelRF_ZERO: + fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD; + fr->coulomb_modifier = eintmodEXACTCUTOFF; + break; + + case eelSWITCH: + case eelSHIFT: + case eelUSER: + case eelENCADSHIFT: + case eelPMESWITCH: + case eelPMEUSER: + case eelPMEUSERSWITCH: + fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE; + break; + + case eelPME: + case eelEWALD: + fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_EWALD; + break; + + default: + gmx_fatal(FARGS, "Unsupported electrostatic interaction: %s", eel_names[fr->eeltype]); + break; + } + + /* Vdw: Translate from mdp settings to kernel format */ + switch (fr->vdwtype) + { + case evdwCUT: + if (fr->bBHAM) + { + fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_BUCKINGHAM; + } + else + { + fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LENNARDJONES; + } + break; + case evdwPME: + fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LJEWALD; + break; + + case evdwSWITCH: + case evdwSHIFT: + case evdwUSER: + case evdwENCADSHIFT: + fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE; + break; + + default: + gmx_fatal(FARGS, "Unsupported vdw interaction: %s", evdw_names[fr->vdwtype]); + break; + } + + /* These start out identical to ir, but might be altered if we e.g. tabulate the interaction in the kernel */ + fr->nbkernel_elec_modifier = fr->coulomb_modifier; + fr->nbkernel_vdw_modifier = fr->vdw_modifier; + ++ fr->rvdw = cutoff_inf(ir->rvdw); ++ fr->rvdw_switch = ir->rvdw_switch; ++ fr->rcoulomb = cutoff_inf(ir->rcoulomb); ++ fr->rcoulomb_switch = ir->rcoulomb_switch; ++ + fr->bTwinRange = fr->rlistlong > fr->rlist; + fr->bEwald = (EEL_PME(fr->eeltype) || fr->eeltype == eelEWALD); + + fr->reppow = mtop->ffparams.reppow; + + if (ir->cutoff_scheme == ecutsGROUP) + { + fr->bvdwtab = ((fr->vdwtype != evdwCUT || !gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS)) + && !EVDW_PME(fr->vdwtype)); + /* We have special kernels for standard Ewald and PME, but the pme-switch ones are tabulated above */ + fr->bcoultab = !(fr->eeltype == eelCUT || + fr->eeltype == eelEWALD || + fr->eeltype == eelPME || + fr->eeltype == eelRF || + fr->eeltype == eelRF_ZERO); + + /* If the user absolutely wants different switch/shift settings for coul/vdw, it is likely + * going to be faster to tabulate the interaction than calling the generic kernel. ++ * However, if generic kernels have been requested we keep things analytically. + */ - if (fr->nbkernel_elec_modifier == eintmodPOTSWITCH && fr->nbkernel_vdw_modifier == eintmodPOTSWITCH) ++ if (fr->nbkernel_elec_modifier == eintmodPOTSWITCH && ++ fr->nbkernel_vdw_modifier == eintmodPOTSWITCH && ++ bGenericKernelOnly == FALSE) + { + if ((fr->rcoulomb_switch != fr->rvdw_switch) || (fr->rcoulomb != fr->rvdw)) + { + fr->bcoultab = TRUE; ++ /* Once we tabulate electrostatics, we can use the switch function for LJ, ++ * which would otherwise need two tables. ++ */ + } + } + else if ((fr->nbkernel_elec_modifier == eintmodPOTSHIFT && fr->nbkernel_vdw_modifier == eintmodPOTSHIFT) || + ((fr->nbkernel_elec_interaction == GMX_NBKERNEL_ELEC_REACTIONFIELD && + fr->nbkernel_elec_modifier == eintmodEXACTCUTOFF && + (fr->nbkernel_vdw_modifier == eintmodPOTSWITCH || fr->nbkernel_vdw_modifier == eintmodPOTSHIFT)))) + { - if (fr->rcoulomb != fr->rvdw) ++ if ((fr->rcoulomb != fr->rvdw) && (bGenericKernelOnly == FALSE)) + { + fr->bcoultab = TRUE; + } + } + ++ if (fr->nbkernel_elec_modifier == eintmodFORCESWITCH) ++ { ++ fr->bcoultab = TRUE; ++ } ++ if (fr->nbkernel_vdw_modifier == eintmodFORCESWITCH) ++ { ++ fr->bvdwtab = TRUE; ++ } ++ + if (getenv("GMX_REQUIRE_TABLES")) + { + fr->bvdwtab = TRUE; + fr->bcoultab = TRUE; + } + + if (fp) + { + fprintf(fp, "Table routines are used for coulomb: %s\n", bool_names[fr->bcoultab]); + fprintf(fp, "Table routines are used for vdw: %s\n", bool_names[fr->bvdwtab ]); + } + + if (fr->bvdwtab == TRUE) + { + fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE; + fr->nbkernel_vdw_modifier = eintmodNONE; + } + if (fr->bcoultab == TRUE) + { + fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE; + fr->nbkernel_elec_modifier = eintmodNONE; + } + } + + if (ir->cutoff_scheme == ecutsVERLET) + { + if (!gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS)) + { + gmx_fatal(FARGS, "Cut-off scheme %S only supports LJ repulsion power 12", ecutscheme_names[ir->cutoff_scheme]); + } + fr->bvdwtab = FALSE; + fr->bcoultab = FALSE; + } + + /* Tables are used for direct ewald sum */ + if (fr->bEwald) + { + if (EEL_PME(ir->coulombtype)) + { + if (fp) + { + fprintf(fp, "Will do PME sum in reciprocal space for electrostatic interactions.\n"); + } + if (ir->coulombtype == eelP3M_AD) + { + please_cite(fp, "Hockney1988"); + please_cite(fp, "Ballenegger2012"); + } + else + { + please_cite(fp, "Essmann95a"); + } + + if (ir->ewald_geometry == eewg3DC) + { + if (fp) + { + fprintf(fp, "Using the Ewald3DC correction for systems with a slab geometry.\n"); + } + please_cite(fp, "In-Chul99a"); + } + } + fr->ewaldcoeff_q = calc_ewaldcoeff_q(ir->rcoulomb, ir->ewald_rtol); + init_ewald_tab(&(fr->ewald_table), ir, fp); + if (fp) + { + fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for Ewald\n", + 1/fr->ewaldcoeff_q); + } + } + + if (EVDW_PME(ir->vdwtype)) + { + if (fp) + { + fprintf(fp, "Will do PME sum in reciprocal space for LJ dispersion interactions.\n"); + } + please_cite(fp, "Essmann95a"); + fr->ewaldcoeff_lj = calc_ewaldcoeff_lj(ir->rvdw, ir->ewald_rtol_lj); + if (fp) + { + fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for LJ Ewald\n", + 1/fr->ewaldcoeff_lj); + } + } + + /* Electrostatics */ + fr->epsilon_r = ir->epsilon_r; + fr->epsilon_rf = ir->epsilon_rf; + fr->fudgeQQ = mtop->ffparams.fudgeQQ; - fr->rcoulomb_switch = ir->rcoulomb_switch; - fr->rcoulomb = cutoff_inf(ir->rcoulomb); + + /* Parameters for generalized RF */ + fr->zsquare = 0.0; + fr->temp = 0.0; + + if (fr->eeltype == eelGRF) + { + init_generalized_rf(fp, mtop, ir, fr); + } + + fr->bF_NoVirSum = (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype) || + gmx_mtop_ftype_count(mtop, F_POSRES) > 0 || + gmx_mtop_ftype_count(mtop, F_FBPOSRES) > 0 || + IR_ELEC_FIELD(*ir) || + (fr->adress_icor != eAdressICOff) + ); + + if (fr->cutoff_scheme == ecutsGROUP && + ncg_mtop(mtop) > fr->cg_nalloc && !DOMAINDECOMP(cr)) + { + /* Count the total number of charge groups */ + fr->cg_nalloc = ncg_mtop(mtop); + srenew(fr->cg_cm, fr->cg_nalloc); + } + if (fr->shift_vec == NULL) + { + snew(fr->shift_vec, SHIFTS); + } + + if (fr->fshift == NULL) + { + snew(fr->fshift, SHIFTS); + } + + if (fr->nbfp == NULL) + { + fr->ntype = mtop->ffparams.atnr; + fr->nbfp = mk_nbfp(&mtop->ffparams, fr->bBHAM); + if (EVDW_PME(fr->vdwtype)) + { + fr->ljpme_c6grid = make_ljpme_c6grid(&mtop->ffparams, fr); + } + } + + /* Copy the energy group exclusions */ + fr->egp_flags = ir->opts.egp_flags; + + /* Van der Waals stuff */ - fr->rvdw = cutoff_inf(ir->rvdw); - fr->rvdw_switch = ir->rvdw_switch; + if ((fr->vdwtype != evdwCUT) && (fr->vdwtype != evdwUSER) && !fr->bBHAM) + { + if (fr->rvdw_switch >= fr->rvdw) + { + gmx_fatal(FARGS, "rvdw_switch (%f) must be < rvdw (%f)", + fr->rvdw_switch, fr->rvdw); + } + if (fp) + { + fprintf(fp, "Using %s Lennard-Jones, switch between %g and %g nm\n", + (fr->eeltype == eelSWITCH) ? "switched" : "shifted", + fr->rvdw_switch, fr->rvdw); + } + } + + if (fr->bBHAM && EVDW_PME(fr->vdwtype)) + { + gmx_fatal(FARGS, "LJ PME not supported with Buckingham"); + } + + if (fr->bBHAM && (fr->vdwtype == evdwSHIFT || fr->vdwtype == evdwSWITCH)) + { + gmx_fatal(FARGS, "Switch/shift interaction not supported with Buckingham"); + } + + if (fr->bBHAM && fr->cutoff_scheme == ecutsVERLET) + { + gmx_fatal(FARGS, "Verlet cutoff-scheme is not supported with Buckingham"); + } + + if (fp) + { + fprintf(fp, "Cut-off's: NS: %g Coulomb: %g %s: %g\n", + fr->rlist, fr->rcoulomb, fr->bBHAM ? "BHAM" : "LJ", fr->rvdw); + } + + fr->eDispCorr = ir->eDispCorr; + if (ir->eDispCorr != edispcNO) + { + set_avcsixtwelve(fp, fr, mtop); + } + + if (fr->bBHAM) + { + set_bham_b_max(fp, fr, mtop); + } + + fr->gb_epsilon_solvent = ir->gb_epsilon_solvent; + + /* Copy the GBSA data (radius, volume and surftens for each + * atomtype) from the topology atomtype section to forcerec. + */ + snew(fr->atype_radius, fr->ntype); + snew(fr->atype_vol, fr->ntype); + snew(fr->atype_surftens, fr->ntype); + snew(fr->atype_gb_radius, fr->ntype); + snew(fr->atype_S_hct, fr->ntype); + + if (mtop->atomtypes.nr > 0) + { + for (i = 0; i < fr->ntype; i++) + { + fr->atype_radius[i] = mtop->atomtypes.radius[i]; + } + for (i = 0; i < fr->ntype; i++) + { + fr->atype_vol[i] = mtop->atomtypes.vol[i]; + } + for (i = 0; i < fr->ntype; i++) + { + fr->atype_surftens[i] = mtop->atomtypes.surftens[i]; + } + for (i = 0; i < fr->ntype; i++) + { + fr->atype_gb_radius[i] = mtop->atomtypes.gb_radius[i]; + } + for (i = 0; i < fr->ntype; i++) + { + fr->atype_S_hct[i] = mtop->atomtypes.S_hct[i]; + } + } + + /* Generate the GB table if needed */ + if (fr->bGB) + { +#ifdef GMX_DOUBLE + fr->gbtabscale = 2000; +#else + fr->gbtabscale = 500; +#endif + + fr->gbtabr = 100; + fr->gbtab = make_gb_table(oenv, fr); + + init_gb(&fr->born, fr, ir, mtop, ir->gb_algorithm); + + /* Copy local gb data (for dd, this is done in dd_partition_system) */ + if (!DOMAINDECOMP(cr)) + { + make_local_gb(cr, fr->born, ir->gb_algorithm); + } + } + + /* Set the charge scaling */ + if (fr->epsilon_r != 0) + { + fr->epsfac = ONE_4PI_EPS0/fr->epsilon_r; + } + else + { + /* eps = 0 is infinite dieletric: no coulomb interactions */ + fr->epsfac = 0; + } + + /* Reaction field constants */ + if (EEL_RF(fr->eeltype)) + { + calc_rffac(fp, fr->eeltype, fr->epsilon_r, fr->epsilon_rf, + fr->rcoulomb, fr->temp, fr->zsquare, box, + &fr->kappa, &fr->k_rf, &fr->c_rf); + } + + /*This now calculates sum for q and c6*/ + set_chargesum(fp, fr, mtop); + + /* if we are using LR electrostatics, and they are tabulated, + * the tables will contain modified coulomb interactions. + * Since we want to use the non-shifted ones for 1-4 + * coulombic interactions, we must have an extra set of tables. + */ + + /* Construct tables. + * A little unnecessary to make both vdw and coul tables sometimes, + * but what the heck... */ + + bMakeTables = fr->bcoultab || fr->bvdwtab || fr->bEwald || + (ir->eDispCorr != edispcNO && ir_vdw_switched(ir)); + + bMakeSeparate14Table = ((!bMakeTables || fr->eeltype != eelCUT || fr->vdwtype != evdwCUT || ++ fr->coulomb_modifier != eintmodNONE || ++ fr->vdw_modifier != eintmodNONE || + fr->bBHAM || fr->bEwald) && + (gmx_mtop_ftype_count(mtop, F_LJ14) > 0 || + gmx_mtop_ftype_count(mtop, F_LJC14_Q) > 0 || + gmx_mtop_ftype_count(mtop, F_LJC_PAIRS_NB) > 0)); + + negp_pp = ir->opts.ngener - ir->nwall; + negptable = 0; + if (!bMakeTables) + { + bSomeNormalNbListsAreInUse = TRUE; + fr->nnblists = 1; + } + else + { + bSomeNormalNbListsAreInUse = (ir->eDispCorr != edispcNO); + for (egi = 0; egi < negp_pp; egi++) + { + for (egj = egi; egj < negp_pp; egj++) + { + egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)]; + if (!(egp_flags & EGP_EXCL)) + { + if (egp_flags & EGP_TABLE) + { + negptable++; + } + else + { + bSomeNormalNbListsAreInUse = TRUE; + } + } + } + } + if (bSomeNormalNbListsAreInUse) + { + fr->nnblists = negptable + 1; + } + else + { + fr->nnblists = negptable; + } + if (fr->nnblists > 1) + { + snew(fr->gid2nblists, ir->opts.ngener*ir->opts.ngener); + } + } + + if (ir->adress) + { + fr->nnblists *= 2; + } + + snew(fr->nblists, fr->nnblists); + + /* This code automatically gives table length tabext without cut-off's, + * in that case grompp should already have checked that we do not need + * normal tables and we only generate tables for 1-4 interactions. + */ + rtab = ir->rlistlong + ir->tabext; + + if (bMakeTables) + { + /* make tables for ordinary interactions */ + if (bSomeNormalNbListsAreInUse) + { + make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]); + if (ir->adress) + { + make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[fr->nnblists/2]); + } + if (!bMakeSeparate14Table) + { + fr->tab14 = fr->nblists[0].table_elec_vdw; + } + m = 1; + } + else + { + m = 0; + } + if (negptable > 0) + { + /* Read the special tables for certain energy group pairs */ + nm_ind = mtop->groups.grps[egcENER].nm_ind; + for (egi = 0; egi < negp_pp; egi++) + { + for (egj = egi; egj < negp_pp; egj++) + { + egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)]; + if ((egp_flags & EGP_TABLE) && !(egp_flags & EGP_EXCL)) + { + nbl = &(fr->nblists[m]); + if (fr->nnblists > 1) + { + fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = m; + } + /* Read the table file with the two energy groups names appended */ + make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, + *mtop->groups.grpname[nm_ind[egi]], + *mtop->groups.grpname[nm_ind[egj]], + &fr->nblists[m]); + if (ir->adress) + { + make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, + *mtop->groups.grpname[nm_ind[egi]], + *mtop->groups.grpname[nm_ind[egj]], + &fr->nblists[fr->nnblists/2+m]); + } + m++; + } + else if (fr->nnblists > 1) + { + fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = 0; + } + } + } + } + } ++ else if ((fr->eDispCorr != edispcNO) && ++ ((fr->vdw_modifier == eintmodPOTSWITCH) || ++ (fr->vdw_modifier == eintmodFORCESWITCH) || ++ (fr->vdw_modifier == eintmodPOTSHIFT))) ++ { ++ /* Tables might not be used for the potential modifier interactions per se, but ++ * we still need them to evaluate switch/shift dispersion corrections in this case. ++ */ ++ make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]); ++ } ++ + if (bMakeSeparate14Table) + { + /* generate extra tables with plain Coulomb for 1-4 interactions only */ + fr->tab14 = make_tables(fp, oenv, fr, MASTER(cr), tabpfn, rtab, + GMX_MAKETABLES_14ONLY); + } + + /* Read AdResS Thermo Force table if needed */ + if (fr->adress_icor == eAdressICThermoForce) + { + /* old todo replace */ + + if (ir->adress->n_tf_grps > 0) + { + make_adress_tf_tables(fp, oenv, fr, ir, tabfn, mtop, box); + + } + else + { + /* load the default table */ + snew(fr->atf_tabs, 1); + fr->atf_tabs[DEFAULT_TF_TABLE] = make_atf_table(fp, oenv, fr, tabafn, box); + } + } + + /* Wall stuff */ + fr->nwall = ir->nwall; + if (ir->nwall && ir->wall_type == ewtTABLE) + { + make_wall_tables(fp, oenv, ir, tabfn, &mtop->groups, fr); + } + + if (fcd && tabbfn) + { + fcd->bondtab = make_bonded_tables(fp, + F_TABBONDS, F_TABBONDSNC, + mtop, tabbfn, "b"); + fcd->angletab = make_bonded_tables(fp, + F_TABANGLES, -1, + mtop, tabbfn, "a"); + fcd->dihtab = make_bonded_tables(fp, + F_TABDIHS, -1, + mtop, tabbfn, "d"); + } + else + { + if (debug) + { + fprintf(debug, "No fcdata or table file name passed, can not read table, can not do bonded interactions\n"); + } + } + + /* QM/MM initialization if requested + */ + if (ir->bQMMM) + { + fprintf(stderr, "QM/MM calculation requested.\n"); + } + + fr->bQMMM = ir->bQMMM; + fr->qr = mk_QMMMrec(); + + /* Set all the static charge group info */ + fr->cginfo_mb = init_cginfo_mb(fp, mtop, fr, bNoSolvOpt, + &bFEP_NonBonded, + &fr->bExcl_IntraCGAll_InterCGNone); + if (DOMAINDECOMP(cr)) + { + fr->cginfo = NULL; + } + else + { + fr->cginfo = cginfo_expand(mtop->nmolblock, fr->cginfo_mb); + } + + if (!DOMAINDECOMP(cr)) + { + forcerec_set_ranges(fr, ncg_mtop(mtop), ncg_mtop(mtop), + mtop->natoms, mtop->natoms, mtop->natoms); + } + + fr->print_force = print_force; + + + /* coarse load balancing vars */ + fr->t_fnbf = 0.; + fr->t_wait = 0.; + fr->timesteps = 0; + + /* Initialize neighbor search */ + init_ns(fp, cr, &fr->ns, fr, mtop); + + if (cr->duty & DUTY_PP) + { + gmx_nonbonded_setup(fr, bGenericKernelOnly); + /* + if (ir->bAdress) + { + gmx_setup_adress_kernels(fp,bGenericKernelOnly); + } + */ + } + + /* Initialize the thread working data for bonded interactions */ + init_forcerec_f_threads(fr, mtop->groups.grps[egcENER].nr); + + snew(fr->excl_load, fr->nthreads+1); + + if (fr->cutoff_scheme == ecutsVERLET) + { + if (ir->rcoulomb != ir->rvdw) + { + gmx_fatal(FARGS, "With Verlet lists rcoulomb and rvdw should be identical"); + } + + init_nb_verlet(fp, &fr->nbv, bFEP_NonBonded, ir, fr, cr, nbpu_opt); + } + + /* fr->ic is used both by verlet and group kernels (to some extent) now */ + init_interaction_const(fp, cr, &fr->ic, fr, rtab); + + if (ir->eDispCorr != edispcNO) + { + calc_enervirdiff(fp, ir->eDispCorr, fr); + } +} + +#define pr_real(fp, r) fprintf(fp, "%s: %e\n",#r, r) +#define pr_int(fp, i) fprintf((fp), "%s: %d\n",#i, i) +#define pr_bool(fp, b) fprintf((fp), "%s: %s\n",#b, bool_names[b]) + +void pr_forcerec(FILE *fp, t_forcerec *fr) +{ + int i; + + pr_real(fp, fr->rlist); + pr_real(fp, fr->rcoulomb); + pr_real(fp, fr->fudgeQQ); + pr_bool(fp, fr->bGrid); + pr_bool(fp, fr->bTwinRange); + /*pr_int(fp,fr->cg0); + pr_int(fp,fr->hcg);*/ + for (i = 0; i < fr->nnblists; i++) + { + pr_int(fp, fr->nblists[i].table_elec_vdw.n); + } + pr_real(fp, fr->rcoulomb_switch); + pr_real(fp, fr->rcoulomb); + + fflush(fp); +} + +void forcerec_set_excl_load(t_forcerec *fr, + const gmx_localtop_t *top) +{ + const int *ind, *a; + int t, i, j, ntot, n, ntarget; + + ind = top->excls.index; + a = top->excls.a; + + ntot = 0; + for (i = 0; i < top->excls.nr; i++) + { + for (j = ind[i]; j < ind[i+1]; j++) + { + if (a[j] > i) + { + ntot++; + } + } + } + + fr->excl_load[0] = 0; + n = 0; + i = 0; + for (t = 1; t <= fr->nthreads; t++) + { + ntarget = (ntot*t)/fr->nthreads; + while (i < top->excls.nr && n < ntarget) + { + for (j = ind[i]; j < ind[i+1]; j++) + { + if (a[j] > i) + { + n++; + } + } + i++; + } + fr->excl_load[t] = i; + } +} diff --cc src/gromacs/mdlib/ns.c index 5208983dec,0000000000..496f605300 mode 100644,000000..100644 --- a/src/gromacs/mdlib/ns.c +++ b/src/gromacs/mdlib/ns.c @@@ -1,2978 -1,0 +1,2970 @@@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2004, The GROMACS development team. + * Copyright (c) 2013,2014, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include "sysstuff.h" +#include "gromacs/utility/smalloc.h" +#include "macros.h" +#include "gromacs/math/utilities.h" +#include "vec.h" +#include "types/commrec.h" +#include "network.h" +#include "nsgrid.h" +#include "force.h" +#include "nonbonded.h" +#include "ns.h" +#include "pbc.h" +#include "names.h" +#include "gmx_fatal.h" +#include "nrnb.h" +#include "txtdump.h" +#include "mtop_util.h" + +#include "domdec.h" +#include "adress.h" + + +/* + * E X C L U S I O N H A N D L I N G + */ + +#ifdef DEBUG +static void SETEXCL_(t_excl e[], atom_id i, atom_id j) +{ + e[j] = e[j] | (1< 0) ? length % simd_width : 0; + + return (offset == 0) ? length : length-offset+simd_width; +} +/************************************************ + * + * U T I L I T I E S F O R N S + * + ************************************************/ + +void reallocate_nblist(t_nblist *nl) +{ + if (gmx_debug_at) + { + fprintf(debug, "reallocating neigborlist (ielec=%d, ivdw=%d, igeometry=%d, type=%d), maxnri=%d\n", + nl->ielec, nl->ivdw, nl->igeometry, nl->type, nl->maxnri); + } + srenew(nl->iinr, nl->maxnri); + if (nl->igeometry == GMX_NBLIST_GEOMETRY_CG_CG) + { + srenew(nl->iinr_end, nl->maxnri); + } + srenew(nl->gid, nl->maxnri); + srenew(nl->shift, nl->maxnri); + srenew(nl->jindex, nl->maxnri+1); +} + + +static void init_nblist(FILE *log, t_nblist *nl_sr, t_nblist *nl_lr, + int maxsr, int maxlr, + int ivdw, int ivdwmod, + int ielec, int ielecmod, - int igeometry, int type) ++ int igeometry, int type, ++ gmx_bool bElecAndVdwSwitchDiffers) +{ + t_nblist *nl; + int homenr; + int i, nn; + + for (i = 0; (i < 2); i++) + { + nl = (i == 0) ? nl_sr : nl_lr; + homenr = (i == 0) ? maxsr : maxlr; + + if (nl == NULL) + { + continue; + } + + + /* Set coul/vdw in neighborlist, and for the normal loops we determine + * an index of which one to call. + */ + nl->ivdw = ivdw; + nl->ivdwmod = ivdwmod; + nl->ielec = ielec; + nl->ielecmod = ielecmod; + nl->type = type; + nl->igeometry = igeometry; + + if (nl->type == GMX_NBLIST_INTERACTION_FREE_ENERGY) + { + nl->igeometry = GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE; + } + + /* This will also set the simd_padding_width field */ - gmx_nonbonded_set_kernel_pointers( (i == 0) ? log : NULL, nl); ++ gmx_nonbonded_set_kernel_pointers( (i == 0) ? log : NULL, nl, bElecAndVdwSwitchDiffers); + + /* maxnri is influenced by the number of shifts (maximum is 8) + * and the number of energy groups. + * If it is not enough, nl memory will be reallocated during the run. + * 4 seems to be a reasonable factor, which only causes reallocation + * during runs with tiny and many energygroups. + */ + nl->maxnri = homenr*4; + nl->maxnrj = 0; + nl->nri = -1; + nl->nrj = 0; + nl->iinr = NULL; + nl->gid = NULL; + nl->shift = NULL; + nl->jindex = NULL; + nl->jjnr = NULL; + nl->excl_fep = NULL; + reallocate_nblist(nl); + nl->jindex[0] = 0; + + if (debug) + { + fprintf(debug, "Initiating neighbourlist (ielec=%d, ivdw=%d, type=%d) for %s interactions,\nwith %d SR, %d LR atoms.\n", + nl->ielec, nl->ivdw, nl->type, gmx_nblist_geometry_names[nl->igeometry], maxsr, maxlr); + } + } +} + +void init_neighbor_list(FILE *log, t_forcerec *fr, int homenr) +{ + /* Make maxlr tunable! (does not seem to be a big difference though) + * This parameter determines the number of i particles in a long range + * neighbourlist. Too few means many function calls, too many means + * cache trashing. + */ + int maxsr, maxsr_wat, maxlr, maxlr_wat; - int ielec, ielecf, ivdw, ielecmod, ielecmodf, ivdwmod, type; ++ int ielec, ivdw, ielecmod, ivdwmod, type; + int solvent; + int igeometry_def, igeometry_w, igeometry_ww; + int i; ++ gmx_bool bElecAndVdwSwitchDiffers; + t_nblists *nbl; + + /* maxsr = homenr-fr->nWatMol*3; */ + maxsr = homenr; + + if (maxsr < 0) + { + gmx_fatal(FARGS, "%s, %d: Negative number of short range atoms.\n" + "Call your Gromacs dealer for assistance.", __FILE__, __LINE__); + } + /* This is just for initial allocation, so we do not reallocate + * all the nlist arrays many times in a row. + * The numbers seem very accurate, but they are uncritical. + */ + maxsr_wat = min(fr->nWatMol, (homenr+2)/3); + if (fr->bTwinRange) + { + maxlr = 50; + maxlr_wat = min(maxsr_wat, maxlr); + } + else + { + maxlr = maxlr_wat = 0; + } + + /* Determine the values for ielec/ivdw. */ - ielec = fr->nbkernel_elec_interaction; - ivdw = fr->nbkernel_vdw_interaction; - ielecmod = fr->nbkernel_elec_modifier; - ivdwmod = fr->nbkernel_vdw_modifier; - type = GMX_NBLIST_INTERACTION_STANDARD; ++ ielec = fr->nbkernel_elec_interaction; ++ ivdw = fr->nbkernel_vdw_interaction; ++ ielecmod = fr->nbkernel_elec_modifier; ++ ivdwmod = fr->nbkernel_vdw_modifier; ++ type = GMX_NBLIST_INTERACTION_STANDARD; ++ bElecAndVdwSwitchDiffers = ( (fr->rcoulomb_switch != fr->rvdw_switch) || (fr->rcoulomb != fr->rvdw)); + + fr->ns.bCGlist = (getenv("GMX_NBLISTCG") != 0); + if (!fr->ns.bCGlist) + { + igeometry_def = GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE; + } + else + { + igeometry_def = GMX_NBLIST_GEOMETRY_CG_CG; + if (log != NULL) + { + fprintf(log, "\nUsing charge-group - charge-group neighbor lists and kernels\n\n"); + } + } + + if (fr->solvent_opt == esolTIP4P) + { + igeometry_w = GMX_NBLIST_GEOMETRY_WATER4_PARTICLE; + igeometry_ww = GMX_NBLIST_GEOMETRY_WATER4_WATER4; + } + else + { + igeometry_w = GMX_NBLIST_GEOMETRY_WATER3_PARTICLE; + igeometry_ww = GMX_NBLIST_GEOMETRY_WATER3_WATER3; + } + + for (i = 0; i < fr->nnblists; i++) + { + nbl = &(fr->nblists[i]); + + if ((fr->adress_type != eAdressOff) && (i >= fr->nnblists/2)) + { + type = GMX_NBLIST_INTERACTION_ADRESS; + } + init_nblist(log, &nbl->nlist_sr[eNL_VDWQQ], &nbl->nlist_lr[eNL_VDWQQ], - maxsr, maxlr, ivdw, ivdwmod, ielec, ielecmod, igeometry_def, type); ++ maxsr, maxlr, ivdw, ivdwmod, ielec, ielecmod, igeometry_def, type, bElecAndVdwSwitchDiffers); + init_nblist(log, &nbl->nlist_sr[eNL_VDW], &nbl->nlist_lr[eNL_VDW], - maxsr, maxlr, ivdw, ivdwmod, GMX_NBKERNEL_ELEC_NONE, eintmodNONE, igeometry_def, type); ++ maxsr, maxlr, ivdw, ivdwmod, GMX_NBKERNEL_ELEC_NONE, eintmodNONE, igeometry_def, type, bElecAndVdwSwitchDiffers); + init_nblist(log, &nbl->nlist_sr[eNL_QQ], &nbl->nlist_lr[eNL_QQ], - maxsr, maxlr, GMX_NBKERNEL_VDW_NONE, eintmodNONE, ielec, ielecmod, igeometry_def, type); ++ maxsr, maxlr, GMX_NBKERNEL_VDW_NONE, eintmodNONE, ielec, ielecmod, igeometry_def, type, bElecAndVdwSwitchDiffers); + init_nblist(log, &nbl->nlist_sr[eNL_VDWQQ_WATER], &nbl->nlist_lr[eNL_VDWQQ_WATER], - maxsr_wat, maxlr_wat, ivdw, ivdwmod, ielec, ielecmod, igeometry_w, type); ++ maxsr_wat, maxlr_wat, ivdw, ivdwmod, ielec, ielecmod, igeometry_w, type, bElecAndVdwSwitchDiffers); + init_nblist(log, &nbl->nlist_sr[eNL_QQ_WATER], &nbl->nlist_lr[eNL_QQ_WATER], - maxsr_wat, maxlr_wat, GMX_NBKERNEL_VDW_NONE, eintmodNONE, ielec, ielecmod, igeometry_w, type); ++ maxsr_wat, maxlr_wat, GMX_NBKERNEL_VDW_NONE, eintmodNONE, ielec, ielecmod, igeometry_w, type, bElecAndVdwSwitchDiffers); + init_nblist(log, &nbl->nlist_sr[eNL_VDWQQ_WATERWATER], &nbl->nlist_lr[eNL_VDWQQ_WATERWATER], - maxsr_wat, maxlr_wat, ivdw, ivdwmod, ielec, ielecmod, igeometry_ww, type); ++ maxsr_wat, maxlr_wat, ivdw, ivdwmod, ielec, ielecmod, igeometry_ww, type, bElecAndVdwSwitchDiffers); + init_nblist(log, &nbl->nlist_sr[eNL_QQ_WATERWATER], &nbl->nlist_lr[eNL_QQ_WATERWATER], - maxsr_wat, maxlr_wat, GMX_NBKERNEL_VDW_NONE, eintmodNONE, ielec, ielecmod, igeometry_ww, type); ++ maxsr_wat, maxlr_wat, GMX_NBKERNEL_VDW_NONE, eintmodNONE, ielec, ielecmod, igeometry_ww, type, bElecAndVdwSwitchDiffers); + + /* Did we get the solvent loops so we can use optimized water kernels? */ + if (nbl->nlist_sr[eNL_VDWQQ_WATER].kernelptr_vf == NULL + || nbl->nlist_sr[eNL_QQ_WATER].kernelptr_vf == NULL +#ifndef DISABLE_WATERWATER_NLIST + || nbl->nlist_sr[eNL_VDWQQ_WATERWATER].kernelptr_vf == NULL + || nbl->nlist_sr[eNL_QQ_WATERWATER].kernelptr_vf == NULL +#endif + ) + { + fr->solvent_opt = esolNO; + if (log != NULL) + { + fprintf(log, "Note: The available nonbonded kernels do not support water optimization - disabling.\n"); + } + } + + if (fr->efep != efepNO) + { - if ((fr->bEwald) && (fr->sc_alphacoul > 0)) /* need to handle long range differently if using softcore */ - { - ielecf = GMX_NBKERNEL_ELEC_EWALD; - ielecmodf = eintmodNONE; - } - else - { - ielecf = ielec; - ielecmodf = ielecmod; - } - + init_nblist(log, &nbl->nlist_sr[eNL_VDWQQ_FREE], &nbl->nlist_lr[eNL_VDWQQ_FREE], - maxsr, maxlr, ivdw, ivdwmod, ielecf, ielecmod, GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE, GMX_NBLIST_INTERACTION_FREE_ENERGY); ++ maxsr, maxlr, ivdw, ivdwmod, ielec, ielecmod, GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE, GMX_NBLIST_INTERACTION_FREE_ENERGY, bElecAndVdwSwitchDiffers); + init_nblist(log, &nbl->nlist_sr[eNL_VDW_FREE], &nbl->nlist_lr[eNL_VDW_FREE], - maxsr, maxlr, ivdw, ivdwmod, GMX_NBKERNEL_ELEC_NONE, eintmodNONE, GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE, GMX_NBLIST_INTERACTION_FREE_ENERGY); ++ maxsr, maxlr, ivdw, ivdwmod, GMX_NBKERNEL_ELEC_NONE, eintmodNONE, GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE, GMX_NBLIST_INTERACTION_FREE_ENERGY, bElecAndVdwSwitchDiffers); + init_nblist(log, &nbl->nlist_sr[eNL_QQ_FREE], &nbl->nlist_lr[eNL_QQ_FREE], - maxsr, maxlr, GMX_NBKERNEL_VDW_NONE, eintmodNONE, ielecf, ielecmod, GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE, GMX_NBLIST_INTERACTION_FREE_ENERGY); ++ maxsr, maxlr, GMX_NBKERNEL_VDW_NONE, eintmodNONE, ielec, ielecmod, GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE, GMX_NBLIST_INTERACTION_FREE_ENERGY, bElecAndVdwSwitchDiffers); + } + } + /* QMMM MM list */ + if (fr->bQMMM && fr->qr->QMMMscheme != eQMMMschemeoniom) + { + init_nblist(log, &fr->QMMMlist, NULL, - maxsr, maxlr, 0, 0, ielec, ielecmod, GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE, GMX_NBLIST_INTERACTION_STANDARD); ++ maxsr, maxlr, 0, 0, ielec, ielecmod, GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE, GMX_NBLIST_INTERACTION_STANDARD, bElecAndVdwSwitchDiffers); + } + + if (log != NULL) + { + fprintf(log, "\n"); + } + + fr->ns.nblist_initialized = TRUE; +} + +static void reset_nblist(t_nblist *nl) +{ + nl->nri = -1; + nl->nrj = 0; + if (nl->jindex) + { + nl->jindex[0] = 0; + } +} + +static void reset_neighbor_lists(t_forcerec *fr, gmx_bool bResetSR, gmx_bool bResetLR) +{ + int n, i; + + if (fr->bQMMM) + { + /* only reset the short-range nblist */ + reset_nblist(&(fr->QMMMlist)); + } + + for (n = 0; n < fr->nnblists; n++) + { + for (i = 0; i < eNL_NR; i++) + { + if (bResetSR) + { + reset_nblist( &(fr->nblists[n].nlist_sr[i]) ); + } + if (bResetLR) + { + reset_nblist( &(fr->nblists[n].nlist_lr[i]) ); + } + } + } +} + + + + +static gmx_inline void new_i_nblist(t_nblist *nlist, atom_id i_atom, int shift, int gid) +{ + int i, k, nri, nshift; + + nri = nlist->nri; + + /* Check whether we have to increase the i counter */ + if ((nri == -1) || + (nlist->iinr[nri] != i_atom) || + (nlist->shift[nri] != shift) || + (nlist->gid[nri] != gid)) + { + /* This is something else. Now see if any entries have + * been added in the list of the previous atom. + */ + if ((nri == -1) || + ((nlist->jindex[nri+1] > nlist->jindex[nri]) && + (nlist->gid[nri] != -1))) + { + /* If so increase the counter */ + nlist->nri++; + nri++; + if (nlist->nri >= nlist->maxnri) + { + nlist->maxnri += over_alloc_large(nlist->nri); + reallocate_nblist(nlist); + } + } + /* Set the number of neighbours and the atom number */ + nlist->jindex[nri+1] = nlist->jindex[nri]; + nlist->iinr[nri] = i_atom; + nlist->gid[nri] = gid; + nlist->shift[nri] = shift; + } + else + { + /* Adding to previous list. First remove possible previous padding */ + if (nlist->simd_padding_width > 1) + { + while (nlist->nrj > 0 && nlist->jjnr[nlist->nrj-1] < 0) + { + nlist->nrj--; + } + } + } +} + +static gmx_inline void close_i_nblist(t_nblist *nlist) +{ + int nri = nlist->nri; + int len; + + if (nri >= 0) + { + /* Add elements up to padding. Since we allocate memory in units + * of the simd_padding width, we do not have to check for possible + * list reallocation here. + */ + while ((nlist->nrj % nlist->simd_padding_width) != 0) + { + /* Use -4 here, so we can write forces for 4 atoms before real data */ + nlist->jjnr[nlist->nrj++] = -4; + } + nlist->jindex[nri+1] = nlist->nrj; + + len = nlist->nrj - nlist->jindex[nri]; + } +} + +static gmx_inline void close_nblist(t_nblist *nlist) +{ + /* Only close this nblist when it has been initialized. + * Avoid the creation of i-lists with no j-particles. + */ + if (nlist->nrj == 0) + { + /* Some assembly kernels do not support empty lists, + * make sure here that we don't generate any empty lists. + * With the current ns code this branch is taken in two cases: + * No i-particles at all: nri=-1 here + * There are i-particles, but no j-particles; nri=0 here + */ + nlist->nri = 0; + } + else + { + /* Close list number nri by incrementing the count */ + nlist->nri++; + } +} + +static gmx_inline void close_neighbor_lists(t_forcerec *fr, gmx_bool bMakeQMMMnblist) +{ + int n, i; + + if (bMakeQMMMnblist) + { + close_nblist(&(fr->QMMMlist)); + } + + for (n = 0; n < fr->nnblists; n++) + { + for (i = 0; (i < eNL_NR); i++) + { + close_nblist(&(fr->nblists[n].nlist_sr[i])); + close_nblist(&(fr->nblists[n].nlist_lr[i])); + } + } +} + + +static gmx_inline void add_j_to_nblist(t_nblist *nlist, atom_id j_atom, gmx_bool bLR) +{ + int nrj = nlist->nrj; + + if (nlist->nrj >= nlist->maxnrj) + { + nlist->maxnrj = round_up_to_simd_width(over_alloc_small(nlist->nrj + 1), nlist->simd_padding_width); + + if (gmx_debug_at) + { + fprintf(debug, "Increasing %s nblist (ielec=%d,ivdw=%d,type=%d,igeometry=%d) j size to %d\n", + bLR ? "LR" : "SR", nlist->ielec, nlist->ivdw, nlist->type, nlist->igeometry, nlist->maxnrj); + } + + srenew(nlist->jjnr, nlist->maxnrj); + } + + nlist->jjnr[nrj] = j_atom; + nlist->nrj++; +} + +static gmx_inline void add_j_to_nblist_cg(t_nblist *nlist, + atom_id j_start, int j_end, + t_excl *bexcl, gmx_bool i_is_j, + gmx_bool bLR) +{ + int nrj = nlist->nrj; + int j; + + if (nlist->nrj >= nlist->maxnrj) + { + nlist->maxnrj = over_alloc_small(nlist->nrj + 1); + if (gmx_debug_at) + { + fprintf(debug, "Increasing %s nblist (ielec=%d,ivdw=%d,type=%d,igeometry=%d) j size to %d\n", + bLR ? "LR" : "SR", nlist->ielec, nlist->ivdw, nlist->type, nlist->igeometry, nlist->maxnrj); + } + + srenew(nlist->jjnr, nlist->maxnrj); + srenew(nlist->jjnr_end, nlist->maxnrj); + srenew(nlist->excl, nlist->maxnrj*MAX_CGCGSIZE); + } + + nlist->jjnr[nrj] = j_start; + nlist->jjnr_end[nrj] = j_end; + + if (j_end - j_start > MAX_CGCGSIZE) + { + gmx_fatal(FARGS, "The charge-group - charge-group neighborlist do not support charge groups larger than %d, found a charge group of size %d", MAX_CGCGSIZE, j_end-j_start); + } + + /* Set the exclusions */ + for (j = j_start; j < j_end; j++) + { + nlist->excl[nrj*MAX_CGCGSIZE + j - j_start] = bexcl[j]; + } + if (i_is_j) + { + /* Avoid double counting of intra-cg interactions */ + for (j = 1; j < j_end-j_start; j++) + { + nlist->excl[nrj*MAX_CGCGSIZE + j] |= (1<nrj++; +} + +typedef void + put_in_list_t (gmx_bool bHaveVdW[], + int ngid, + t_mdatoms * md, + int icg, + int jgid, + int nj, + atom_id jjcg[], + atom_id index[], + t_excl bExcl[], + int shift, + t_forcerec * fr, + gmx_bool bLR, + gmx_bool bDoVdW, + gmx_bool bDoCoul, + int solvent_opt); + +static void +put_in_list_at(gmx_bool bHaveVdW[], + int ngid, + t_mdatoms * md, + int icg, + int jgid, + int nj, + atom_id jjcg[], + atom_id index[], + t_excl bExcl[], + int shift, + t_forcerec * fr, + gmx_bool bLR, + gmx_bool bDoVdW, + gmx_bool bDoCoul, + int solvent_opt) +{ + /* The a[] index has been removed, + * to put it back in i_atom should be a[i0] and jj should be a[jj]. + */ + t_nblist * vdwc; + t_nblist * vdw; + t_nblist * coul; + t_nblist * vdwc_free = NULL; + t_nblist * vdw_free = NULL; + t_nblist * coul_free = NULL; + t_nblist * vdwc_ww = NULL; + t_nblist * coul_ww = NULL; + + int i, j, jcg, igid, gid, nbl_ind, ind_ij; + atom_id jj, jj0, jj1, i_atom; + int i0, nicg, len; + + int *cginfo; + int *type, *typeB; + real *charge, *chargeB; + real qi, qiB, qq, rlj; + gmx_bool bFreeEnergy, bFree, bFreeJ, bNotEx, *bPert; + gmx_bool bDoVdW_i, bDoCoul_i, bDoCoul_i_sol; + int iwater, jwater; + t_nblist *nlist; + + /* Copy some pointers */ + cginfo = fr->cginfo; + charge = md->chargeA; + chargeB = md->chargeB; + type = md->typeA; + typeB = md->typeB; + bPert = md->bPerturbed; + + /* Get atom range */ + i0 = index[icg]; + nicg = index[icg+1]-i0; + + /* Get the i charge group info */ + igid = GET_CGINFO_GID(cginfo[icg]); + + iwater = (solvent_opt != esolNO) ? GET_CGINFO_SOLOPT(cginfo[icg]) : esolNO; + + bFreeEnergy = FALSE; + if (md->nPerturbed) + { + /* Check if any of the particles involved are perturbed. + * If not we can do the cheaper normal put_in_list + * and use more solvent optimization. + */ + for (i = 0; i < nicg; i++) + { + bFreeEnergy |= bPert[i0+i]; + } + /* Loop over the j charge groups */ + for (j = 0; (j < nj && !bFreeEnergy); j++) + { + jcg = jjcg[j]; + jj0 = index[jcg]; + jj1 = index[jcg+1]; + /* Finally loop over the atoms in the j-charge group */ + for (jj = jj0; jj < jj1; jj++) + { + bFreeEnergy |= bPert[jj]; + } + } + } + + /* Unpack pointers to neighbourlist structs */ + if (fr->nnblists == 1) + { + nbl_ind = 0; + } + else + { + nbl_ind = fr->gid2nblists[GID(igid, jgid, ngid)]; + } + if (bLR) + { + nlist = fr->nblists[nbl_ind].nlist_lr; + } + else + { + nlist = fr->nblists[nbl_ind].nlist_sr; + } + + if (iwater != esolNO) + { + vdwc = &nlist[eNL_VDWQQ_WATER]; + vdw = &nlist[eNL_VDW]; + coul = &nlist[eNL_QQ_WATER]; +#ifndef DISABLE_WATERWATER_NLIST + vdwc_ww = &nlist[eNL_VDWQQ_WATERWATER]; + coul_ww = &nlist[eNL_QQ_WATERWATER]; +#endif + } + else + { + vdwc = &nlist[eNL_VDWQQ]; + vdw = &nlist[eNL_VDW]; + coul = &nlist[eNL_QQ]; + } + + if (!bFreeEnergy) + { + if (iwater != esolNO) + { + /* Loop over the atoms in the i charge group */ + i_atom = i0; + gid = GID(igid, jgid, ngid); + /* Create new i_atom for each energy group */ + if (bDoCoul && bDoVdW) + { + new_i_nblist(vdwc, i_atom, shift, gid); +#ifndef DISABLE_WATERWATER_NLIST + new_i_nblist(vdwc_ww, i_atom, shift, gid); +#endif + } + if (bDoVdW) + { + new_i_nblist(vdw, i_atom, shift, gid); + } + if (bDoCoul) + { + new_i_nblist(coul, i_atom, shift, gid); +#ifndef DISABLE_WATERWATER_NLIST + new_i_nblist(coul_ww, i_atom, shift, gid); +#endif + } + /* Loop over the j charge groups */ + for (j = 0; (j < nj); j++) + { + jcg = jjcg[j]; + + if (jcg == icg) + { + continue; + } + + jj0 = index[jcg]; + jwater = GET_CGINFO_SOLOPT(cginfo[jcg]); + + if (iwater == esolSPC && jwater == esolSPC) + { + /* Interaction between two SPC molecules */ + if (!bDoCoul) + { + /* VdW only - only first atoms in each water interact */ + add_j_to_nblist(vdw, jj0, bLR); + } + else + { +#ifdef DISABLE_WATERWATER_NLIST + /* Add entries for the three atoms - only do VdW if we need to */ + if (!bDoVdW) + { + add_j_to_nblist(coul, jj0, bLR); + } + else + { + add_j_to_nblist(vdwc, jj0, bLR); + } + add_j_to_nblist(coul, jj0+1, bLR); + add_j_to_nblist(coul, jj0+2, bLR); +#else + /* One entry for the entire water-water interaction */ + if (!bDoVdW) + { + add_j_to_nblist(coul_ww, jj0, bLR); + } + else + { + add_j_to_nblist(vdwc_ww, jj0, bLR); + } +#endif + } + } + else if (iwater == esolTIP4P && jwater == esolTIP4P) + { + /* Interaction between two TIP4p molecules */ + if (!bDoCoul) + { + /* VdW only - only first atoms in each water interact */ + add_j_to_nblist(vdw, jj0, bLR); + } + else + { +#ifdef DISABLE_WATERWATER_NLIST + /* Add entries for the four atoms - only do VdW if we need to */ + if (bDoVdW) + { + add_j_to_nblist(vdw, jj0, bLR); + } + add_j_to_nblist(coul, jj0+1, bLR); + add_j_to_nblist(coul, jj0+2, bLR); + add_j_to_nblist(coul, jj0+3, bLR); +#else + /* One entry for the entire water-water interaction */ + if (!bDoVdW) + { + add_j_to_nblist(coul_ww, jj0, bLR); + } + else + { + add_j_to_nblist(vdwc_ww, jj0, bLR); + } +#endif + } + } + else + { + /* j charge group is not water, but i is. + * Add entries to the water-other_atom lists; the geometry of the water + * molecule doesn't matter - that is taken care of in the nonbonded kernel, + * so we don't care if it is SPC or TIP4P... + */ + + jj1 = index[jcg+1]; + + if (!bDoVdW) + { + for (jj = jj0; (jj < jj1); jj++) + { + if (charge[jj] != 0) + { + add_j_to_nblist(coul, jj, bLR); + } + } + } + else if (!bDoCoul) + { + for (jj = jj0; (jj < jj1); jj++) + { + if (bHaveVdW[type[jj]]) + { + add_j_to_nblist(vdw, jj, bLR); + } + } + } + else + { + /* _charge_ _groups_ interact with both coulomb and LJ */ + /* Check which atoms we should add to the lists! */ + for (jj = jj0; (jj < jj1); jj++) + { + if (bHaveVdW[type[jj]]) + { + if (charge[jj] != 0) + { + add_j_to_nblist(vdwc, jj, bLR); + } + else + { + add_j_to_nblist(vdw, jj, bLR); + } + } + else if (charge[jj] != 0) + { + add_j_to_nblist(coul, jj, bLR); + } + } + } + } + } + close_i_nblist(vdw); + close_i_nblist(coul); + close_i_nblist(vdwc); +#ifndef DISABLE_WATERWATER_NLIST + close_i_nblist(coul_ww); + close_i_nblist(vdwc_ww); +#endif + } + else + { + /* no solvent as i charge group */ + /* Loop over the atoms in the i charge group */ + for (i = 0; i < nicg; i++) + { + i_atom = i0+i; + gid = GID(igid, jgid, ngid); + qi = charge[i_atom]; + + /* Create new i_atom for each energy group */ + if (bDoVdW && bDoCoul) + { + new_i_nblist(vdwc, i_atom, shift, gid); + } + if (bDoVdW) + { + new_i_nblist(vdw, i_atom, shift, gid); + } + if (bDoCoul) + { + new_i_nblist(coul, i_atom, shift, gid); + } + bDoVdW_i = (bDoVdW && bHaveVdW[type[i_atom]]); + bDoCoul_i = (bDoCoul && qi != 0); + + if (bDoVdW_i || bDoCoul_i) + { + /* Loop over the j charge groups */ + for (j = 0; (j < nj); j++) + { + jcg = jjcg[j]; + + /* Check for large charge groups */ + if (jcg == icg) + { + jj0 = i0 + i + 1; + } + else + { + jj0 = index[jcg]; + } + + jj1 = index[jcg+1]; + /* Finally loop over the atoms in the j-charge group */ + for (jj = jj0; jj < jj1; jj++) + { + bNotEx = NOTEXCL(bExcl, i, jj); + + if (bNotEx) + { + if (!bDoVdW_i) + { + if (charge[jj] != 0) + { + add_j_to_nblist(coul, jj, bLR); + } + } + else if (!bDoCoul_i) + { + if (bHaveVdW[type[jj]]) + { + add_j_to_nblist(vdw, jj, bLR); + } + } + else + { + if (bHaveVdW[type[jj]]) + { + if (charge[jj] != 0) + { + add_j_to_nblist(vdwc, jj, bLR); + } + else + { + add_j_to_nblist(vdw, jj, bLR); + } + } + else if (charge[jj] != 0) + { + add_j_to_nblist(coul, jj, bLR); + } + } + } + } + } + } + close_i_nblist(vdw); + close_i_nblist(coul); + close_i_nblist(vdwc); + } + } + } + else + { + /* we are doing free energy */ + vdwc_free = &nlist[eNL_VDWQQ_FREE]; + vdw_free = &nlist[eNL_VDW_FREE]; + coul_free = &nlist[eNL_QQ_FREE]; + /* Loop over the atoms in the i charge group */ + for (i = 0; i < nicg; i++) + { + i_atom = i0+i; + gid = GID(igid, jgid, ngid); + qi = charge[i_atom]; + qiB = chargeB[i_atom]; + + /* Create new i_atom for each energy group */ + if (bDoVdW && bDoCoul) + { + new_i_nblist(vdwc, i_atom, shift, gid); + } + if (bDoVdW) + { + new_i_nblist(vdw, i_atom, shift, gid); + } + if (bDoCoul) + { + new_i_nblist(coul, i_atom, shift, gid); + } + + new_i_nblist(vdw_free, i_atom, shift, gid); + new_i_nblist(coul_free, i_atom, shift, gid); + new_i_nblist(vdwc_free, i_atom, shift, gid); + + bDoVdW_i = (bDoVdW && + (bHaveVdW[type[i_atom]] || bHaveVdW[typeB[i_atom]])); + bDoCoul_i = (bDoCoul && (qi != 0 || qiB != 0)); + /* For TIP4P the first atom does not have a charge, + * but the last three do. So we should still put an atom + * without LJ but with charge in the water-atom neighborlist + * for a TIP4p i charge group. + * For SPC type water the first atom has LJ and charge, + * so there is no such problem. + */ + if (iwater == esolNO) + { + bDoCoul_i_sol = bDoCoul_i; + } + else + { + bDoCoul_i_sol = bDoCoul; + } + + if (bDoVdW_i || bDoCoul_i_sol) + { + /* Loop over the j charge groups */ + for (j = 0; (j < nj); j++) + { + jcg = jjcg[j]; + + /* Check for large charge groups */ + if (jcg == icg) + { + jj0 = i0 + i + 1; + } + else + { + jj0 = index[jcg]; + } + + jj1 = index[jcg+1]; + /* Finally loop over the atoms in the j-charge group */ + bFree = bPert[i_atom]; + for (jj = jj0; (jj < jj1); jj++) + { + bFreeJ = bFree || bPert[jj]; + /* Complicated if, because the water H's should also + * see perturbed j-particles + */ + if (iwater == esolNO || i == 0 || bFreeJ) + { + bNotEx = NOTEXCL(bExcl, i, jj); + + if (bNotEx) + { + if (bFreeJ) + { + if (!bDoVdW_i) + { + if (charge[jj] != 0 || chargeB[jj] != 0) + { + add_j_to_nblist(coul_free, jj, bLR); + } + } + else if (!bDoCoul_i) + { + if (bHaveVdW[type[jj]] || bHaveVdW[typeB[jj]]) + { + add_j_to_nblist(vdw_free, jj, bLR); + } + } + else + { + if (bHaveVdW[type[jj]] || bHaveVdW[typeB[jj]]) + { + if (charge[jj] != 0 || chargeB[jj] != 0) + { + add_j_to_nblist(vdwc_free, jj, bLR); + } + else + { + add_j_to_nblist(vdw_free, jj, bLR); + } + } + else if (charge[jj] != 0 || chargeB[jj] != 0) + { + add_j_to_nblist(coul_free, jj, bLR); + } + } + } + else if (!bDoVdW_i) + { + /* This is done whether or not bWater is set */ + if (charge[jj] != 0) + { + add_j_to_nblist(coul, jj, bLR); + } + } + else if (!bDoCoul_i_sol) + { + if (bHaveVdW[type[jj]]) + { + add_j_to_nblist(vdw, jj, bLR); + } + } + else + { + if (bHaveVdW[type[jj]]) + { + if (charge[jj] != 0) + { + add_j_to_nblist(vdwc, jj, bLR); + } + else + { + add_j_to_nblist(vdw, jj, bLR); + } + } + else if (charge[jj] != 0) + { + add_j_to_nblist(coul, jj, bLR); + } + } + } + } + } + } + } + close_i_nblist(vdw); + close_i_nblist(coul); + close_i_nblist(vdwc); + close_i_nblist(vdw_free); + close_i_nblist(coul_free); + close_i_nblist(vdwc_free); + } + } +} + +static void +put_in_list_adress(gmx_bool bHaveVdW[], + int ngid, + t_mdatoms * md, + int icg, + int jgid, + int nj, + atom_id jjcg[], + atom_id index[], + t_excl bExcl[], + int shift, + t_forcerec * fr, + gmx_bool bLR, + gmx_bool bDoVdW, + gmx_bool bDoCoul, + int solvent_opt) +{ + /* The a[] index has been removed, + * to put it back in i_atom should be a[i0] and jj should be a[jj]. + */ + t_nblist * vdwc; + t_nblist * vdw; + t_nblist * coul; + t_nblist * vdwc_adress = NULL; + t_nblist * vdw_adress = NULL; + t_nblist * coul_adress = NULL; + t_nblist * vdwc_ww = NULL; + t_nblist * coul_ww = NULL; + + int i, j, jcg, igid, gid, nbl_ind, nbl_ind_adress; + atom_id jj, jj0, jj1, i_atom; + int i0, nicg, len; + + int *cginfo; + int *type, *typeB; + real *charge, *chargeB; + real *wf; + real qi, qiB, qq, rlj; + gmx_bool bFreeEnergy, bFree, bFreeJ, bNotEx, *bPert; + gmx_bool bDoVdW_i, bDoCoul_i, bDoCoul_i_sol; + gmx_bool b_hybrid; + gmx_bool j_all_atom; + int iwater, jwater; + t_nblist *nlist, *nlist_adress; + gmx_bool bEnergyGroupCG; + + /* Copy some pointers */ + cginfo = fr->cginfo; + charge = md->chargeA; + chargeB = md->chargeB; + type = md->typeA; + typeB = md->typeB; + bPert = md->bPerturbed; + wf = md->wf; + + /* Get atom range */ + i0 = index[icg]; + nicg = index[icg+1]-i0; + + /* Get the i charge group info */ + igid = GET_CGINFO_GID(cginfo[icg]); + + iwater = (solvent_opt != esolNO) ? GET_CGINFO_SOLOPT(cginfo[icg]) : esolNO; + + if (md->nPerturbed) + { + gmx_fatal(FARGS, "AdResS does not support free energy pertubation\n"); + } + + /* Unpack pointers to neighbourlist structs */ + if (fr->nnblists == 2) + { + nbl_ind = 0; + nbl_ind_adress = 1; + } + else + { + nbl_ind = fr->gid2nblists[GID(igid, jgid, ngid)]; + nbl_ind_adress = nbl_ind+fr->nnblists/2; + } + if (bLR) + { + nlist = fr->nblists[nbl_ind].nlist_lr; + nlist_adress = fr->nblists[nbl_ind_adress].nlist_lr; + } + else + { + nlist = fr->nblists[nbl_ind].nlist_sr; + nlist_adress = fr->nblists[nbl_ind_adress].nlist_sr; + } + + + vdwc = &nlist[eNL_VDWQQ]; + vdw = &nlist[eNL_VDW]; + coul = &nlist[eNL_QQ]; + + vdwc_adress = &nlist_adress[eNL_VDWQQ]; + vdw_adress = &nlist_adress[eNL_VDW]; + coul_adress = &nlist_adress[eNL_QQ]; + + /* We do not support solvent optimization with AdResS for now. + For this we would need hybrid solvent-other kernels */ + + /* no solvent as i charge group */ + /* Loop over the atoms in the i charge group */ + for (i = 0; i < nicg; i++) + { + i_atom = i0+i; + gid = GID(igid, jgid, ngid); + qi = charge[i_atom]; + + /* Create new i_atom for each energy group */ + if (bDoVdW && bDoCoul) + { + new_i_nblist(vdwc, i_atom, shift, gid); + new_i_nblist(vdwc_adress, i_atom, shift, gid); + + } + if (bDoVdW) + { + new_i_nblist(vdw, i_atom, shift, gid); + new_i_nblist(vdw_adress, i_atom, shift, gid); + + } + if (bDoCoul) + { + new_i_nblist(coul, i_atom, shift, gid); + new_i_nblist(coul_adress, i_atom, shift, gid); + } + bDoVdW_i = (bDoVdW && bHaveVdW[type[i_atom]]); + bDoCoul_i = (bDoCoul && qi != 0); + + /* Here we find out whether the energy groups interaction belong to a + * coarse-grained (vsite) or atomistic interaction. Note that, beacuse + * interactions between coarse-grained and other (atomistic) energygroups + * are excluded automatically by grompp, it is sufficient to check for + * the group id of atom i (igid) */ + bEnergyGroupCG = !egp_explicit(fr, igid); + + if (bDoVdW_i || bDoCoul_i) + { + /* Loop over the j charge groups */ + for (j = 0; (j < nj); j++) + { + jcg = jjcg[j]; + + /* Check for large charge groups */ + if (jcg == icg) + { + jj0 = i0 + i + 1; + } + else + { + jj0 = index[jcg]; + } + + jj1 = index[jcg+1]; + /* Finally loop over the atoms in the j-charge group */ + for (jj = jj0; jj < jj1; jj++) + { + bNotEx = NOTEXCL(bExcl, i, jj); + + /* Now we have to exclude interactions which will be zero + * anyway due to the AdResS weights (in previous implementations + * this was done in the force kernel). This is necessary as + * pure interactions (those with b_hybrid=false, i.e. w_i*w_j==1 or 0) + * are put into neighbour lists which will be passed to the + * standard (optimized) kernels for speed. The interactions with + * b_hybrid=true are placed into the _adress neighbour lists and + * processed by the generic AdResS kernel. + */ + if ( (bEnergyGroupCG && + wf[i_atom] >= 1-GMX_REAL_EPS && wf[jj] >= 1-GMX_REAL_EPS ) || + ( !bEnergyGroupCG && wf[jj] <= GMX_REAL_EPS ) ) + { + continue; + } + + b_hybrid = !((wf[i_atom] >= 1-GMX_REAL_EPS && wf[jj] >= 1-GMX_REAL_EPS) || + (wf[i_atom] <= GMX_REAL_EPS && wf[jj] <= GMX_REAL_EPS)); + + if (bNotEx) + { + if (!bDoVdW_i) + { + if (charge[jj] != 0) + { + if (!b_hybrid) + { + add_j_to_nblist(coul, jj, bLR); + } + else + { + add_j_to_nblist(coul_adress, jj, bLR); + } + } + } + else if (!bDoCoul_i) + { + if (bHaveVdW[type[jj]]) + { + if (!b_hybrid) + { + add_j_to_nblist(vdw, jj, bLR); + } + else + { + add_j_to_nblist(vdw_adress, jj, bLR); + } + } + } + else + { + if (bHaveVdW[type[jj]]) + { + if (charge[jj] != 0) + { + if (!b_hybrid) + { + add_j_to_nblist(vdwc, jj, bLR); + } + else + { + add_j_to_nblist(vdwc_adress, jj, bLR); + } + } + else + { + if (!b_hybrid) + { + add_j_to_nblist(vdw, jj, bLR); + } + else + { + add_j_to_nblist(vdw_adress, jj, bLR); + } + + } + } + else if (charge[jj] != 0) + { + if (!b_hybrid) + { + add_j_to_nblist(coul, jj, bLR); + } + else + { + add_j_to_nblist(coul_adress, jj, bLR); + } + + } + } + } + } + } + + close_i_nblist(vdw); + close_i_nblist(coul); + close_i_nblist(vdwc); + close_i_nblist(vdw_adress); + close_i_nblist(coul_adress); + close_i_nblist(vdwc_adress); + } + } +} + +static void +put_in_list_qmmm(gmx_bool gmx_unused bHaveVdW[], + int ngid, + t_mdatoms gmx_unused * md, + int icg, + int jgid, + int nj, + atom_id jjcg[], + atom_id index[], + t_excl bExcl[], + int shift, + t_forcerec * fr, + gmx_bool bLR, + gmx_bool gmx_unused bDoVdW, + gmx_bool gmx_unused bDoCoul, + int gmx_unused solvent_opt) +{ + t_nblist * coul; + int i, j, jcg, igid, gid; + atom_id jj, jj0, jj1, i_atom; + int i0, nicg; + gmx_bool bNotEx; + + /* Get atom range */ + i0 = index[icg]; + nicg = index[icg+1]-i0; + + /* Get the i charge group info */ + igid = GET_CGINFO_GID(fr->cginfo[icg]); + + coul = &fr->QMMMlist; + + /* Loop over atoms in the ith charge group */ + for (i = 0; i < nicg; i++) + { + i_atom = i0+i; + gid = GID(igid, jgid, ngid); + /* Create new i_atom for each energy group */ + new_i_nblist(coul, i_atom, shift, gid); + + /* Loop over the j charge groups */ + for (j = 0; j < nj; j++) + { + jcg = jjcg[j]; + + /* Charge groups cannot have QM and MM atoms simultaneously */ + if (jcg != icg) + { + jj0 = index[jcg]; + jj1 = index[jcg+1]; + /* Finally loop over the atoms in the j-charge group */ + for (jj = jj0; jj < jj1; jj++) + { + bNotEx = NOTEXCL(bExcl, i, jj); + if (bNotEx) + { + add_j_to_nblist(coul, jj, bLR); + } + } + } + } + close_i_nblist(coul); + } +} + +static void +put_in_list_cg(gmx_bool gmx_unused bHaveVdW[], + int ngid, + t_mdatoms gmx_unused * md, + int icg, + int jgid, + int nj, + atom_id jjcg[], + atom_id index[], + t_excl bExcl[], + int shift, + t_forcerec * fr, + gmx_bool bLR, + gmx_bool gmx_unused bDoVdW, + gmx_bool gmx_unused bDoCoul, + int gmx_unused solvent_opt) +{ + int cginfo; + int igid, gid, nbl_ind; + t_nblist * vdwc; + int j, jcg; + + cginfo = fr->cginfo[icg]; + + igid = GET_CGINFO_GID(cginfo); + gid = GID(igid, jgid, ngid); + + /* Unpack pointers to neighbourlist structs */ + if (fr->nnblists == 1) + { + nbl_ind = 0; + } + else + { + nbl_ind = fr->gid2nblists[gid]; + } + if (bLR) + { + vdwc = &fr->nblists[nbl_ind].nlist_lr[eNL_VDWQQ]; + } + else + { + vdwc = &fr->nblists[nbl_ind].nlist_sr[eNL_VDWQQ]; + } + + /* Make a new neighbor list for charge group icg. + * Currently simply one neighbor list is made with LJ and Coulomb. + * If required, zero interactions could be removed here + * or in the force loop. + */ + new_i_nblist(vdwc, index[icg], shift, gid); + vdwc->iinr_end[vdwc->nri] = index[icg+1]; + + for (j = 0; (j < nj); j++) + { + jcg = jjcg[j]; + /* Skip the icg-icg pairs if all self interactions are excluded */ + if (!(jcg == icg && GET_CGINFO_EXCL_INTRA(cginfo))) + { + /* Here we add the j charge group jcg to the list, + * exclusions are also added to the list. + */ + add_j_to_nblist_cg(vdwc, index[jcg], index[jcg+1], bExcl, icg == jcg, bLR); + } + } + + close_i_nblist(vdwc); +} + +static void setexcl(atom_id start, atom_id end, t_blocka *excl, gmx_bool b, + t_excl bexcl[]) +{ + atom_id i, k; + + if (b) + { + for (i = start; i < end; i++) + { + for (k = excl->index[i]; k < excl->index[i+1]; k++) + { + SETEXCL(bexcl, i-start, excl->a[k]); + } + } + } + else + { + for (i = start; i < end; i++) + { + for (k = excl->index[i]; k < excl->index[i+1]; k++) + { + RMEXCL(bexcl, i-start, excl->a[k]); + } + } + } +} + +int calc_naaj(int icg, int cgtot) +{ + int naaj; + + if ((cgtot % 2) == 1) + { + /* Odd number of charge groups, easy */ + naaj = 1 + (cgtot/2); + } + else if ((cgtot % 4) == 0) + { + /* Multiple of four is hard */ + if (icg < cgtot/2) + { + if ((icg % 2) == 0) + { + naaj = 1+(cgtot/2); + } + else + { + naaj = cgtot/2; + } + } + else + { + if ((icg % 2) == 1) + { + naaj = 1+(cgtot/2); + } + else + { + naaj = cgtot/2; + } + } + } + else + { + /* cgtot/2 = odd */ + if ((icg % 2) == 0) + { + naaj = 1+(cgtot/2); + } + else + { + naaj = cgtot/2; + } + } +#ifdef DEBUG + fprintf(log, "naaj=%d\n", naaj); +#endif + + return naaj; +} + +/************************************************ + * + * S I M P L E C O R E S T U F F + * + ************************************************/ + +static real calc_image_tric(rvec xi, rvec xj, matrix box, + rvec b_inv, int *shift) +{ + /* This code assumes that the cut-off is smaller than + * a half times the smallest diagonal element of the box. + */ + const real h25 = 2.5; + real dx, dy, dz; + real r2; + int tx, ty, tz; + + /* Compute diff vector */ + dz = xj[ZZ] - xi[ZZ]; + dy = xj[YY] - xi[YY]; + dx = xj[XX] - xi[XX]; + + /* Perform NINT operation, using trunc operation, therefore + * we first add 2.5 then subtract 2 again + */ + tz = dz*b_inv[ZZ] + h25; + tz -= 2; + dz -= tz*box[ZZ][ZZ]; + dy -= tz*box[ZZ][YY]; + dx -= tz*box[ZZ][XX]; + + ty = dy*b_inv[YY] + h25; + ty -= 2; + dy -= ty*box[YY][YY]; + dx -= ty*box[YY][XX]; + + tx = dx*b_inv[XX]+h25; + tx -= 2; + dx -= tx*box[XX][XX]; + + /* Distance squared */ + r2 = (dx*dx) + (dy*dy) + (dz*dz); + + *shift = XYZ2IS(tx, ty, tz); + + return r2; +} + +static real calc_image_rect(rvec xi, rvec xj, rvec box_size, + rvec b_inv, int *shift) +{ + const real h15 = 1.5; + real ddx, ddy, ddz; + real dx, dy, dz; + real r2; + int tx, ty, tz; + + /* Compute diff vector */ + dx = xj[XX] - xi[XX]; + dy = xj[YY] - xi[YY]; + dz = xj[ZZ] - xi[ZZ]; + + /* Perform NINT operation, using trunc operation, therefore + * we first add 1.5 then subtract 1 again + */ + tx = dx*b_inv[XX] + h15; + ty = dy*b_inv[YY] + h15; + tz = dz*b_inv[ZZ] + h15; + tx--; + ty--; + tz--; + + /* Correct diff vector for translation */ + ddx = tx*box_size[XX] - dx; + ddy = ty*box_size[YY] - dy; + ddz = tz*box_size[ZZ] - dz; + + /* Distance squared */ + r2 = (ddx*ddx) + (ddy*ddy) + (ddz*ddz); + + *shift = XYZ2IS(tx, ty, tz); + + return r2; +} + +static void add_simple(t_ns_buf *nsbuf, int nrj, atom_id cg_j, + gmx_bool bHaveVdW[], int ngid, t_mdatoms *md, + int icg, int jgid, t_block *cgs, t_excl bexcl[], + int shift, t_forcerec *fr, put_in_list_t *put_in_list) +{ + if (nsbuf->nj + nrj > MAX_CG) + { + put_in_list(bHaveVdW, ngid, md, icg, jgid, nsbuf->ncg, nsbuf->jcg, + cgs->index, bexcl, shift, fr, FALSE, TRUE, TRUE, fr->solvent_opt); + /* Reset buffer contents */ + nsbuf->ncg = nsbuf->nj = 0; + } + nsbuf->jcg[nsbuf->ncg++] = cg_j; + nsbuf->nj += nrj; +} + +static void ns_inner_tric(rvec x[], int icg, int *i_egp_flags, + int njcg, atom_id jcg[], + matrix box, rvec b_inv, real rcut2, + t_block *cgs, t_ns_buf **ns_buf, + gmx_bool bHaveVdW[], int ngid, t_mdatoms *md, + t_excl bexcl[], t_forcerec *fr, + put_in_list_t *put_in_list) +{ + int shift; + int j, nrj, jgid; + int *cginfo = fr->cginfo; + atom_id cg_j, *cgindex; + t_ns_buf *nsbuf; + + cgindex = cgs->index; + shift = CENTRAL; + for (j = 0; (j < njcg); j++) + { + cg_j = jcg[j]; + nrj = cgindex[cg_j+1]-cgindex[cg_j]; + if (calc_image_tric(x[icg], x[cg_j], box, b_inv, &shift) < rcut2) + { + jgid = GET_CGINFO_GID(cginfo[cg_j]); + if (!(i_egp_flags[jgid] & EGP_EXCL)) + { + add_simple(&ns_buf[jgid][shift], nrj, cg_j, + bHaveVdW, ngid, md, icg, jgid, cgs, bexcl, shift, fr, + put_in_list); + } + } + } +} + +static void ns_inner_rect(rvec x[], int icg, int *i_egp_flags, + int njcg, atom_id jcg[], + gmx_bool bBox, rvec box_size, rvec b_inv, real rcut2, + t_block *cgs, t_ns_buf **ns_buf, + gmx_bool bHaveVdW[], int ngid, t_mdatoms *md, + t_excl bexcl[], t_forcerec *fr, + put_in_list_t *put_in_list) +{ + int shift; + int j, nrj, jgid; + int *cginfo = fr->cginfo; + atom_id cg_j, *cgindex; + t_ns_buf *nsbuf; + + cgindex = cgs->index; + if (bBox) + { + shift = CENTRAL; + for (j = 0; (j < njcg); j++) + { + cg_j = jcg[j]; + nrj = cgindex[cg_j+1]-cgindex[cg_j]; + if (calc_image_rect(x[icg], x[cg_j], box_size, b_inv, &shift) < rcut2) + { + jgid = GET_CGINFO_GID(cginfo[cg_j]); + if (!(i_egp_flags[jgid] & EGP_EXCL)) + { + add_simple(&ns_buf[jgid][shift], nrj, cg_j, + bHaveVdW, ngid, md, icg, jgid, cgs, bexcl, shift, fr, + put_in_list); + } + } + } + } + else + { + for (j = 0; (j < njcg); j++) + { + cg_j = jcg[j]; + nrj = cgindex[cg_j+1]-cgindex[cg_j]; + if ((rcut2 == 0) || (distance2(x[icg], x[cg_j]) < rcut2)) + { + jgid = GET_CGINFO_GID(cginfo[cg_j]); + if (!(i_egp_flags[jgid] & EGP_EXCL)) + { + add_simple(&ns_buf[jgid][CENTRAL], nrj, cg_j, + bHaveVdW, ngid, md, icg, jgid, cgs, bexcl, CENTRAL, fr, + put_in_list); + } + } + } + } +} + +/* ns_simple_core needs to be adapted for QMMM still 2005 */ + +static int ns_simple_core(t_forcerec *fr, + gmx_localtop_t *top, + t_mdatoms *md, + matrix box, rvec box_size, + t_excl bexcl[], atom_id *aaj, + int ngid, t_ns_buf **ns_buf, + put_in_list_t *put_in_list, gmx_bool bHaveVdW[]) +{ + int naaj, k; + real rlist2; + int nsearch, icg, jcg, igid, i0, nri, nn; + int *cginfo; + t_ns_buf *nsbuf; + /* atom_id *i_atoms; */ + t_block *cgs = &(top->cgs); + t_blocka *excl = &(top->excls); + rvec b_inv; + int m; + gmx_bool bBox, bTriclinic; + int *i_egp_flags; + + rlist2 = sqr(fr->rlist); + + bBox = (fr->ePBC != epbcNONE); + if (bBox) + { + for (m = 0; (m < DIM); m++) + { + b_inv[m] = divide_err(1.0, box_size[m]); + } + bTriclinic = TRICLINIC(box); + } + else + { + bTriclinic = FALSE; + } + + cginfo = fr->cginfo; + + nsearch = 0; + for (icg = fr->cg0; (icg < fr->hcg); icg++) + { + /* + i0 = cgs->index[icg]; + nri = cgs->index[icg+1]-i0; + i_atoms = &(cgs->a[i0]); + i_eg_excl = fr->eg_excl + ngid*md->cENER[*i_atoms]; + setexcl(nri,i_atoms,excl,TRUE,bexcl); + */ + igid = GET_CGINFO_GID(cginfo[icg]); + i_egp_flags = fr->egp_flags + ngid*igid; + setexcl(cgs->index[icg], cgs->index[icg+1], excl, TRUE, bexcl); + + naaj = calc_naaj(icg, cgs->nr); + if (bTriclinic) + { + ns_inner_tric(fr->cg_cm, icg, i_egp_flags, naaj, &(aaj[icg]), + box, b_inv, rlist2, cgs, ns_buf, + bHaveVdW, ngid, md, bexcl, fr, put_in_list); + } + else + { + ns_inner_rect(fr->cg_cm, icg, i_egp_flags, naaj, &(aaj[icg]), + bBox, box_size, b_inv, rlist2, cgs, ns_buf, + bHaveVdW, ngid, md, bexcl, fr, put_in_list); + } + nsearch += naaj; + + for (nn = 0; (nn < ngid); nn++) + { + for (k = 0; (k < SHIFTS); k++) + { + nsbuf = &(ns_buf[nn][k]); + if (nsbuf->ncg > 0) + { + put_in_list(bHaveVdW, ngid, md, icg, nn, nsbuf->ncg, nsbuf->jcg, + cgs->index, bexcl, k, fr, FALSE, TRUE, TRUE, fr->solvent_opt); + nsbuf->ncg = nsbuf->nj = 0; + } + } + } + /* setexcl(nri,i_atoms,excl,FALSE,bexcl); */ + setexcl(cgs->index[icg], cgs->index[icg+1], excl, FALSE, bexcl); + } + close_neighbor_lists(fr, FALSE); + + return nsearch; +} + +/************************************************ + * + * N S 5 G R I D S T U F F + * + ************************************************/ + +static gmx_inline void get_dx(int Nx, real gridx, real rc2, int xgi, real x, + int *dx0, int *dx1, real *dcx2) +{ + real dcx, tmp; + int xgi0, xgi1, i; + + if (xgi < 0) + { + *dx0 = 0; + xgi0 = -1; + *dx1 = -1; + xgi1 = 0; + } + else if (xgi >= Nx) + { + *dx0 = Nx; + xgi0 = Nx-1; + *dx1 = Nx-1; + xgi1 = Nx; + } + else + { + dcx2[xgi] = 0; + *dx0 = xgi; + xgi0 = xgi-1; + *dx1 = xgi; + xgi1 = xgi+1; + } + + for (i = xgi0; i >= 0; i--) + { + dcx = (i+1)*gridx-x; + tmp = dcx*dcx; + if (tmp >= rc2) + { + break; + } + *dx0 = i; + dcx2[i] = tmp; + } + for (i = xgi1; i < Nx; i++) + { + dcx = i*gridx-x; + tmp = dcx*dcx; + if (tmp >= rc2) + { + break; + } + *dx1 = i; + dcx2[i] = tmp; + } +} + +static gmx_inline void get_dx_dd(int Nx, real gridx, real rc2, int xgi, real x, + int ncpddc, int shift_min, int shift_max, + int *g0, int *g1, real *dcx2) +{ + real dcx, tmp; + int g_min, g_max, shift_home; + + if (xgi < 0) + { + g_min = 0; + g_max = Nx - 1; + *g0 = 0; + *g1 = -1; + } + else if (xgi >= Nx) + { + g_min = 0; + g_max = Nx - 1; + *g0 = Nx; + *g1 = Nx - 1; + } + else + { + if (ncpddc == 0) + { + g_min = 0; + g_max = Nx - 1; + } + else + { + if (xgi < ncpddc) + { + shift_home = 0; + } + else + { + shift_home = -1; + } + g_min = (shift_min == shift_home ? 0 : ncpddc); + g_max = (shift_max == shift_home ? ncpddc - 1 : Nx - 1); + } + if (shift_min > 0) + { + *g0 = g_min; + *g1 = g_min - 1; + } + else if (shift_max < 0) + { + *g0 = g_max + 1; + *g1 = g_max; + } + else + { + *g0 = xgi; + *g1 = xgi; + dcx2[xgi] = 0; + } + } + + while (*g0 > g_min) + { + /* Check one grid cell down */ + dcx = ((*g0 - 1) + 1)*gridx - x; + tmp = dcx*dcx; + if (tmp >= rc2) + { + break; + } + (*g0)--; + dcx2[*g0] = tmp; + } + + while (*g1 < g_max) + { + /* Check one grid cell up */ + dcx = (*g1 + 1)*gridx - x; + tmp = dcx*dcx; + if (tmp >= rc2) + { + break; + } + (*g1)++; + dcx2[*g1] = tmp; + } +} + + +#define sqr(x) ((x)*(x)) +#define calc_dx2(XI, YI, ZI, y) (sqr(XI-y[XX]) + sqr(YI-y[YY]) + sqr(ZI-y[ZZ])) +#define calc_cyl_dx2(XI, YI, y) (sqr(XI-y[XX]) + sqr(YI-y[YY])) +/**************************************************** + * + * F A S T N E I G H B O R S E A R C H I N G + * + * Optimized neighboursearching routine using grid + * at least 1x1x1, see GROMACS manual + * + ****************************************************/ + + +static void get_cutoff2(t_forcerec *fr, gmx_bool bDoLongRange, + real *rvdw2, real *rcoul2, + real *rs2, real *rm2, real *rl2) +{ + *rs2 = sqr(fr->rlist); + + if (bDoLongRange && fr->bTwinRange) + { + /* With plain cut-off or RF we need to make the list exactly + * up to the cut-off and the cut-off's can be different, + * so we can not simply set them to rlistlong. + * To keep this code compatible with (exotic) old cases, + * we also create lists up to rvdw/rcoulomb for PME and Ewald. + * The interaction check should correspond to: + * !ir_vdw/coulomb_might_be_zero_at_cutoff from inputrec.c. + */ + if (((fr->vdwtype == evdwCUT || fr->vdwtype == evdwPME) && + fr->vdw_modifier == eintmodNONE) || + fr->rvdw <= fr->rlist) + { + *rvdw2 = sqr(fr->rvdw); + } + else + { + *rvdw2 = sqr(fr->rlistlong); + } + if (((fr->eeltype == eelCUT || + (EEL_RF(fr->eeltype) && fr->eeltype != eelRF_ZERO) || + fr->eeltype == eelPME || + fr->eeltype == eelEWALD) && + fr->coulomb_modifier == eintmodNONE) || + fr->rcoulomb <= fr->rlist) + { + *rcoul2 = sqr(fr->rcoulomb); + } + else + { + *rcoul2 = sqr(fr->rlistlong); + } + } + else + { + /* Workaround for a gcc -O3 or -ffast-math problem */ + *rvdw2 = *rs2; + *rcoul2 = *rs2; + } + *rm2 = min(*rvdw2, *rcoul2); + *rl2 = max(*rvdw2, *rcoul2); +} + +static void init_nsgrid_lists(t_forcerec *fr, int ngid, gmx_ns_t *ns) +{ + real rvdw2, rcoul2, rs2, rm2, rl2; + int j; + + get_cutoff2(fr, TRUE, &rvdw2, &rcoul2, &rs2, &rm2, &rl2); + + /* Short range buffers */ + snew(ns->nl_sr, ngid); + /* Counters */ + snew(ns->nsr, ngid); + snew(ns->nlr_ljc, ngid); + snew(ns->nlr_one, ngid); + + /* Always allocate both list types, since rcoulomb might now change with PME load balancing */ + /* Long range VdW and Coul buffers */ + snew(ns->nl_lr_ljc, ngid); + /* Long range VdW or Coul only buffers */ + snew(ns->nl_lr_one, ngid); + + for (j = 0; (j < ngid); j++) + { + snew(ns->nl_sr[j], MAX_CG); + snew(ns->nl_lr_ljc[j], MAX_CG); + snew(ns->nl_lr_one[j], MAX_CG); + } + if (debug) + { + fprintf(debug, + "ns5_core: rs2 = %g, rm2 = %g, rl2 = %g (nm^2)\n", + rs2, rm2, rl2); + } +} + +static int nsgrid_core(t_commrec *cr, t_forcerec *fr, + matrix box, int ngid, + gmx_localtop_t *top, + t_grid *grid, + t_excl bexcl[], gmx_bool *bExcludeAlleg, + t_mdatoms *md, + put_in_list_t *put_in_list, + gmx_bool bHaveVdW[], + gmx_bool bDoLongRange, gmx_bool bMakeQMMMnblist) +{ + gmx_ns_t *ns; + atom_id **nl_lr_ljc, **nl_lr_one, **nl_sr; + int *nlr_ljc, *nlr_one, *nsr; + gmx_domdec_t *dd = NULL; + t_block *cgs = &(top->cgs); + int *cginfo = fr->cginfo; + /* atom_id *i_atoms,*cgsindex=cgs->index; */ + ivec sh0, sh1, shp; + int cell_x, cell_y, cell_z; + int d, tx, ty, tz, dx, dy, dz, cj; +#ifdef ALLOW_OFFDIAG_LT_HALFDIAG + int zsh_ty, zsh_tx, ysh_tx; +#endif + int dx0, dx1, dy0, dy1, dz0, dz1; + int Nx, Ny, Nz, shift = -1, j, nrj, nns, nn = -1; + real gridx, gridy, gridz, grid_x, grid_y, grid_z; + real *dcx2, *dcy2, *dcz2; + int zgi, ygi, xgi; + int cg0, cg1, icg = -1, cgsnr, i0, igid, nri, naaj, max_jcg; + int jcg0, jcg1, jjcg, cgj0, jgid; + int *grida, *gridnra, *gridind; + gmx_bool rvdw_lt_rcoul, rcoul_lt_rvdw; + rvec xi, *cgcm, grid_offset; + real r2, rs2, rvdw2, rcoul2, rm2, rl2, XI, YI, ZI, dcx, dcy, dcz, tmp1, tmp2; + int *i_egp_flags; + gmx_bool bDomDec, bTriclinicX, bTriclinicY; + ivec ncpddc; + + ns = &fr->ns; + + bDomDec = DOMAINDECOMP(cr); + if (bDomDec) + { + dd = cr->dd; + } + + bTriclinicX = ((YY < grid->npbcdim && + (!bDomDec || dd->nc[YY] == 1) && box[YY][XX] != 0) || + (ZZ < grid->npbcdim && + (!bDomDec || dd->nc[ZZ] == 1) && box[ZZ][XX] != 0)); + bTriclinicY = (ZZ < grid->npbcdim && + (!bDomDec || dd->nc[ZZ] == 1) && box[ZZ][YY] != 0); + + cgsnr = cgs->nr; + + get_cutoff2(fr, bDoLongRange, &rvdw2, &rcoul2, &rs2, &rm2, &rl2); + + rvdw_lt_rcoul = (rvdw2 >= rcoul2); + rcoul_lt_rvdw = (rcoul2 >= rvdw2); + + if (bMakeQMMMnblist) + { + rm2 = rl2; + rs2 = rl2; + } + + nl_sr = ns->nl_sr; + nsr = ns->nsr; + nl_lr_ljc = ns->nl_lr_ljc; + nl_lr_one = ns->nl_lr_one; + nlr_ljc = ns->nlr_ljc; + nlr_one = ns->nlr_one; + + /* Unpack arrays */ + cgcm = fr->cg_cm; + Nx = grid->n[XX]; + Ny = grid->n[YY]; + Nz = grid->n[ZZ]; + grida = grid->a; + gridind = grid->index; + gridnra = grid->nra; + nns = 0; + + gridx = grid->cell_size[XX]; + gridy = grid->cell_size[YY]; + gridz = grid->cell_size[ZZ]; + grid_x = 1/gridx; + grid_y = 1/gridy; + grid_z = 1/gridz; + copy_rvec(grid->cell_offset, grid_offset); + copy_ivec(grid->ncpddc, ncpddc); + dcx2 = grid->dcx2; + dcy2 = grid->dcy2; + dcz2 = grid->dcz2; + +#ifdef ALLOW_OFFDIAG_LT_HALFDIAG + zsh_ty = floor(-box[ZZ][YY]/box[YY][YY]+0.5); + zsh_tx = floor(-box[ZZ][XX]/box[XX][XX]+0.5); + ysh_tx = floor(-box[YY][XX]/box[XX][XX]+0.5); + if (zsh_tx != 0 && ysh_tx != 0) + { + /* This could happen due to rounding, when both ratios are 0.5 */ + ysh_tx = 0; + } +#endif + + debug_gmx(); + + if (fr->n_tpi) + { + /* We only want a list for the test particle */ + cg0 = cgsnr - 1; + } + else + { + cg0 = grid->icg0; + } + cg1 = grid->icg1; + + /* Set the shift range */ + for (d = 0; d < DIM; d++) + { + sh0[d] = -1; + sh1[d] = 1; + /* Check if we need periodicity shifts. + * Without PBC or with domain decomposition we don't need them. + */ + if (d >= ePBC2npbcdim(fr->ePBC) || (bDomDec && dd->nc[d] > 1)) + { + shp[d] = 0; + } + else + { + if (d == XX && + box[XX][XX] - fabs(box[YY][XX]) - fabs(box[ZZ][XX]) < sqrt(rl2)) + { + shp[d] = 2; + } + else + { + shp[d] = 1; + } + } + } + + /* Loop over charge groups */ + for (icg = cg0; (icg < cg1); icg++) + { + igid = GET_CGINFO_GID(cginfo[icg]); + /* Skip this charge group if all energy groups are excluded! */ + if (bExcludeAlleg[igid]) + { + continue; + } + + i0 = cgs->index[icg]; + + if (bMakeQMMMnblist) + { + /* Skip this charge group if it is not a QM atom while making a + * QM/MM neighbourlist + */ + if (md->bQM[i0] == FALSE) + { + continue; /* MM particle, go to next particle */ + } + + /* Compute the number of charge groups that fall within the control + * of this one (icg) + */ + naaj = calc_naaj(icg, cgsnr); + jcg0 = icg; + jcg1 = icg + naaj; + max_jcg = cgsnr; + } + else + { + /* make a normal neighbourlist */ + + if (bDomDec) + { + /* Get the j charge-group and dd cell shift ranges */ + dd_get_ns_ranges(cr->dd, icg, &jcg0, &jcg1, sh0, sh1); + max_jcg = 0; + } + else + { + /* Compute the number of charge groups that fall within the control + * of this one (icg) + */ + naaj = calc_naaj(icg, cgsnr); + jcg0 = icg; + jcg1 = icg + naaj; + + if (fr->n_tpi) + { + /* The i-particle is awlways the test particle, + * so we want all j-particles + */ + max_jcg = cgsnr - 1; + } + else + { + max_jcg = jcg1 - cgsnr; + } + } + } + + i_egp_flags = fr->egp_flags + igid*ngid; + + /* Set the exclusions for the atoms in charge group icg using a bitmask */ + setexcl(i0, cgs->index[icg+1], &top->excls, TRUE, bexcl); + + ci2xyz(grid, icg, &cell_x, &cell_y, &cell_z); + + /* Changed iicg to icg, DvdS 990115 + * (but see consistency check above, DvdS 990330) + */ +#ifdef NS5DB + fprintf(log, "icg=%5d, naaj=%5d, cell %d %d %d\n", + icg, naaj, cell_x, cell_y, cell_z); +#endif + /* Loop over shift vectors in three dimensions */ + for (tz = -shp[ZZ]; tz <= shp[ZZ]; tz++) + { + ZI = cgcm[icg][ZZ]+tz*box[ZZ][ZZ]; + /* Calculate range of cells in Z direction that have the shift tz */ + zgi = cell_z + tz*Nz; +#define FAST_DD_NS +#ifndef FAST_DD_NS + get_dx(Nz, gridz, rl2, zgi, ZI, &dz0, &dz1, dcz2); +#else + get_dx_dd(Nz, gridz, rl2, zgi, ZI-grid_offset[ZZ], + ncpddc[ZZ], sh0[ZZ], sh1[ZZ], &dz0, &dz1, dcz2); +#endif + if (dz0 > dz1) + { + continue; + } + for (ty = -shp[YY]; ty <= shp[YY]; ty++) + { + YI = cgcm[icg][YY]+ty*box[YY][YY]+tz*box[ZZ][YY]; + /* Calculate range of cells in Y direction that have the shift ty */ + if (bTriclinicY) + { + ygi = (int)(Ny + (YI - grid_offset[YY])*grid_y) - Ny; + } + else + { + ygi = cell_y + ty*Ny; + } +#ifndef FAST_DD_NS + get_dx(Ny, gridy, rl2, ygi, YI, &dy0, &dy1, dcy2); +#else + get_dx_dd(Ny, gridy, rl2, ygi, YI-grid_offset[YY], + ncpddc[YY], sh0[YY], sh1[YY], &dy0, &dy1, dcy2); +#endif + if (dy0 > dy1) + { + continue; + } + for (tx = -shp[XX]; tx <= shp[XX]; tx++) + { + XI = cgcm[icg][XX]+tx*box[XX][XX]+ty*box[YY][XX]+tz*box[ZZ][XX]; + /* Calculate range of cells in X direction that have the shift tx */ + if (bTriclinicX) + { + xgi = (int)(Nx + (XI - grid_offset[XX])*grid_x) - Nx; + } + else + { + xgi = cell_x + tx*Nx; + } +#ifndef FAST_DD_NS + get_dx(Nx, gridx, rl2, xgi*Nx, XI, &dx0, &dx1, dcx2); +#else + get_dx_dd(Nx, gridx, rl2, xgi, XI-grid_offset[XX], + ncpddc[XX], sh0[XX], sh1[XX], &dx0, &dx1, dcx2); +#endif + if (dx0 > dx1) + { + continue; + } + /* Adress: an explicit cg that has a weigthing function of 0 is excluded + * from the neigbour list as it will not interact */ + if (fr->adress_type != eAdressOff) + { + if (md->wf[cgs->index[icg]] <= GMX_REAL_EPS && egp_explicit(fr, igid)) + { + continue; + } + } + /* Get shift vector */ + shift = XYZ2IS(tx, ty, tz); +#ifdef NS5DB + range_check(shift, 0, SHIFTS); +#endif + for (nn = 0; (nn < ngid); nn++) + { + nsr[nn] = 0; + nlr_ljc[nn] = 0; + nlr_one[nn] = 0; + } +#ifdef NS5DB + fprintf(log, "shift: %2d, dx0,1: %2d,%2d, dy0,1: %2d,%2d, dz0,1: %2d,%2d\n", + shift, dx0, dx1, dy0, dy1, dz0, dz1); + fprintf(log, "cgcm: %8.3f %8.3f %8.3f\n", cgcm[icg][XX], + cgcm[icg][YY], cgcm[icg][ZZ]); + fprintf(log, "xi: %8.3f %8.3f %8.3f\n", XI, YI, ZI); +#endif + for (dx = dx0; (dx <= dx1); dx++) + { + tmp1 = rl2 - dcx2[dx]; + for (dy = dy0; (dy <= dy1); dy++) + { + tmp2 = tmp1 - dcy2[dy]; + if (tmp2 > 0) + { + for (dz = dz0; (dz <= dz1); dz++) + { + if (tmp2 > dcz2[dz]) + { + /* Find grid-cell cj in which possible neighbours are */ + cj = xyz2ci(Ny, Nz, dx, dy, dz); + + /* Check out how many cgs (nrj) there in this cell */ + nrj = gridnra[cj]; + + /* Find the offset in the cg list */ + cgj0 = gridind[cj]; + + /* Check if all j's are out of range so we + * can skip the whole cell. + * Should save some time, especially with DD. + */ + if (nrj == 0 || + (grida[cgj0] >= max_jcg && + (grida[cgj0] >= jcg1 || grida[cgj0+nrj-1] < jcg0))) + { + continue; + } + + /* Loop over cgs */ + for (j = 0; (j < nrj); j++) + { + jjcg = grida[cgj0+j]; + + /* check whether this guy is in range! */ + if ((jjcg >= jcg0 && jjcg < jcg1) || + (jjcg < max_jcg)) + { + r2 = calc_dx2(XI, YI, ZI, cgcm[jjcg]); + if (r2 < rl2) + { + /* jgid = gid[cgsatoms[cgsindex[jjcg]]]; */ + jgid = GET_CGINFO_GID(cginfo[jjcg]); + /* check energy group exclusions */ + if (!(i_egp_flags[jgid] & EGP_EXCL)) + { + if (r2 < rs2) + { + if (nsr[jgid] >= MAX_CG) + { + /* Add to short-range list */ + put_in_list(bHaveVdW, ngid, md, icg, jgid, + nsr[jgid], nl_sr[jgid], + cgs->index, /* cgsatoms, */ bexcl, + shift, fr, FALSE, TRUE, TRUE, fr->solvent_opt); + nsr[jgid] = 0; + } + nl_sr[jgid][nsr[jgid]++] = jjcg; + } + else if (r2 < rm2) + { + if (nlr_ljc[jgid] >= MAX_CG) + { + /* Add to LJ+coulomb long-range list */ + put_in_list(bHaveVdW, ngid, md, icg, jgid, + nlr_ljc[jgid], nl_lr_ljc[jgid], top->cgs.index, + bexcl, shift, fr, TRUE, TRUE, TRUE, fr->solvent_opt); + nlr_ljc[jgid] = 0; + } + nl_lr_ljc[jgid][nlr_ljc[jgid]++] = jjcg; + } + else + { + if (nlr_one[jgid] >= MAX_CG) + { + /* Add to long-range list with only coul, or only LJ */ + put_in_list(bHaveVdW, ngid, md, icg, jgid, + nlr_one[jgid], nl_lr_one[jgid], top->cgs.index, + bexcl, shift, fr, TRUE, rvdw_lt_rcoul, rcoul_lt_rvdw, fr->solvent_opt); + nlr_one[jgid] = 0; + } + nl_lr_one[jgid][nlr_one[jgid]++] = jjcg; + } + } + } + nns++; + } + } + } + } + } + } + } + /* CHECK whether there is anything left in the buffers */ + for (nn = 0; (nn < ngid); nn++) + { + if (nsr[nn] > 0) + { + put_in_list(bHaveVdW, ngid, md, icg, nn, nsr[nn], nl_sr[nn], + cgs->index, /* cgsatoms, */ bexcl, + shift, fr, FALSE, TRUE, TRUE, fr->solvent_opt); + } + + if (nlr_ljc[nn] > 0) + { + put_in_list(bHaveVdW, ngid, md, icg, nn, nlr_ljc[nn], + nl_lr_ljc[nn], top->cgs.index, + bexcl, shift, fr, TRUE, TRUE, TRUE, fr->solvent_opt); + } + + if (nlr_one[nn] > 0) + { + put_in_list(bHaveVdW, ngid, md, icg, nn, nlr_one[nn], + nl_lr_one[nn], top->cgs.index, + bexcl, shift, fr, TRUE, rvdw_lt_rcoul, rcoul_lt_rvdw, fr->solvent_opt); + } + } + } + } + } + /* setexcl(nri,i_atoms,&top->atoms.excl,FALSE,bexcl); */ + setexcl(cgs->index[icg], cgs->index[icg+1], &top->excls, FALSE, bexcl); + } + /* No need to perform any left-over force calculations anymore (as we used to do here) + * since we now save the proper long-range lists for later evaluation. + */ + + debug_gmx(); + + /* Close neighbourlists */ + close_neighbor_lists(fr, bMakeQMMMnblist); + + return nns; +} + +void ns_realloc_natoms(gmx_ns_t *ns, int natoms) +{ + int i; + + if (natoms > ns->nra_alloc) + { + ns->nra_alloc = over_alloc_dd(natoms); + srenew(ns->bexcl, ns->nra_alloc); + for (i = 0; i < ns->nra_alloc; i++) + { + ns->bexcl[i] = 0; + } + } +} + +void init_ns(FILE *fplog, const t_commrec *cr, + gmx_ns_t *ns, t_forcerec *fr, + const gmx_mtop_t *mtop) +{ + int mt, icg, nr_in_cg, maxcg, i, j, jcg, ngid, ncg; + t_block *cgs; + char *ptr; + + /* Compute largest charge groups size (# atoms) */ + nr_in_cg = 1; + for (mt = 0; mt < mtop->nmoltype; mt++) + { + cgs = &mtop->moltype[mt].cgs; + for (icg = 0; (icg < cgs->nr); icg++) + { + nr_in_cg = max(nr_in_cg, (int)(cgs->index[icg+1]-cgs->index[icg])); + } + } + + /* Verify whether largest charge group is <= max cg. + * This is determined by the type of the local exclusion type + * Exclusions are stored in bits. (If the type is not large + * enough, enlarge it, unsigned char -> unsigned short -> unsigned long) + */ + maxcg = sizeof(t_excl)*8; + if (nr_in_cg > maxcg) + { + gmx_fatal(FARGS, "Max #atoms in a charge group: %d > %d\n", + nr_in_cg, maxcg); + } + + ngid = mtop->groups.grps[egcENER].nr; + snew(ns->bExcludeAlleg, ngid); + for (i = 0; i < ngid; i++) + { + ns->bExcludeAlleg[i] = TRUE; + for (j = 0; j < ngid; j++) + { + if (!(fr->egp_flags[i*ngid+j] & EGP_EXCL)) + { + ns->bExcludeAlleg[i] = FALSE; + } + } + } + + if (fr->bGrid) + { + /* Grid search */ + ns->grid = init_grid(fplog, fr); + init_nsgrid_lists(fr, ngid, ns); + } + else + { + /* Simple search */ + snew(ns->ns_buf, ngid); + for (i = 0; (i < ngid); i++) + { + snew(ns->ns_buf[i], SHIFTS); + } + ncg = ncg_mtop(mtop); + snew(ns->simple_aaj, 2*ncg); + for (jcg = 0; (jcg < ncg); jcg++) + { + ns->simple_aaj[jcg] = jcg; + ns->simple_aaj[jcg+ncg] = jcg; + } + } + + /* Create array that determines whether or not atoms have VdW */ + snew(ns->bHaveVdW, fr->ntype); + for (i = 0; (i < fr->ntype); i++) + { + for (j = 0; (j < fr->ntype); j++) + { + ns->bHaveVdW[i] = (ns->bHaveVdW[i] || + (fr->bBHAM ? + ((BHAMA(fr->nbfp, fr->ntype, i, j) != 0) || + (BHAMB(fr->nbfp, fr->ntype, i, j) != 0) || + (BHAMC(fr->nbfp, fr->ntype, i, j) != 0)) : + ((C6(fr->nbfp, fr->ntype, i, j) != 0) || + (C12(fr->nbfp, fr->ntype, i, j) != 0)))); + } + } + if (debug) + { + pr_bvec(debug, 0, "bHaveVdW", ns->bHaveVdW, fr->ntype, TRUE); + } + + ns->nra_alloc = 0; + ns->bexcl = NULL; + if (!DOMAINDECOMP(cr)) + { + ns_realloc_natoms(ns, mtop->natoms); + } + + ns->nblist_initialized = FALSE; + + /* nbr list debug dump */ + { + char *ptr = getenv("GMX_DUMP_NL"); + if (ptr) + { + ns->dump_nl = strtol(ptr, NULL, 10); + if (fplog) + { + fprintf(fplog, "GMX_DUMP_NL = %d", ns->dump_nl); + } + } + else + { + ns->dump_nl = 0; + } + } +} + + +int search_neighbours(FILE *log, t_forcerec *fr, + matrix box, + gmx_localtop_t *top, + gmx_groups_t *groups, + t_commrec *cr, + t_nrnb *nrnb, t_mdatoms *md, + gmx_bool bFillGrid, + gmx_bool bDoLongRangeNS) +{ + t_block *cgs = &(top->cgs); + rvec box_size, grid_x0, grid_x1; + int i, j, m, ngid; + real min_size, grid_dens; + int nsearch; + gmx_bool bGrid; + char *ptr; + gmx_bool *i_egp_flags; + int cg_start, cg_end, start, end; + gmx_ns_t *ns; + t_grid *grid; + gmx_domdec_zones_t *dd_zones; + put_in_list_t *put_in_list; + + ns = &fr->ns; + + /* Set some local variables */ + bGrid = fr->bGrid; + ngid = groups->grps[egcENER].nr; + + for (m = 0; (m < DIM); m++) + { + box_size[m] = box[m][m]; + } + + if (fr->ePBC != epbcNONE) + { + if (sqr(fr->rlistlong) >= max_cutoff2(fr->ePBC, box)) + { + gmx_fatal(FARGS, "One of the box vectors has become shorter than twice the cut-off length or box_yy-|box_zy| or box_zz has become smaller than the cut-off."); + } + if (!bGrid) + { + min_size = min(box_size[XX], min(box_size[YY], box_size[ZZ])); + if (2*fr->rlistlong >= min_size) + { + gmx_fatal(FARGS, "One of the box diagonal elements has become smaller than twice the cut-off length."); + } + } + } + + if (DOMAINDECOMP(cr)) + { + ns_realloc_natoms(ns, cgs->index[cgs->nr]); + } + debug_gmx(); + + /* Reset the neighbourlists */ + reset_neighbor_lists(fr, TRUE, TRUE); + + if (bGrid && bFillGrid) + { + + grid = ns->grid; + if (DOMAINDECOMP(cr)) + { + dd_zones = domdec_zones(cr->dd); + } + else + { + dd_zones = NULL; + + get_nsgrid_boundaries(grid->nboundeddim, box, NULL, NULL, NULL, NULL, + cgs->nr, fr->cg_cm, grid_x0, grid_x1, &grid_dens); + + grid_first(log, grid, NULL, NULL, box, grid_x0, grid_x1, + fr->rlistlong, grid_dens); + } + debug_gmx(); + + start = 0; + end = cgs->nr; + + if (DOMAINDECOMP(cr)) + { + end = cgs->nr; + fill_grid(dd_zones, grid, end, -1, end, fr->cg_cm); + grid->icg0 = 0; + grid->icg1 = dd_zones->izone[dd_zones->nizone-1].cg1; + } + else + { + fill_grid(NULL, grid, cgs->nr, fr->cg0, fr->hcg, fr->cg_cm); + grid->icg0 = fr->cg0; + grid->icg1 = fr->hcg; + debug_gmx(); + } + + calc_elemnr(grid, start, end, cgs->nr); + calc_ptrs(grid); + grid_last(grid, start, end, cgs->nr); + + if (gmx_debug_at) + { + check_grid(grid); + print_grid(debug, grid); + } + } + else if (fr->n_tpi) + { + /* Set the grid cell index for the test particle only. + * The cell to cg index is not corrected, but that does not matter. + */ + fill_grid(NULL, ns->grid, fr->hcg, fr->hcg-1, fr->hcg, fr->cg_cm); + } + debug_gmx(); + + if (fr->adress_type == eAdressOff) + { + if (!fr->ns.bCGlist) + { + put_in_list = put_in_list_at; + } + else + { + put_in_list = put_in_list_cg; + } + } + else + { + put_in_list = put_in_list_adress; + } + + /* Do the core! */ + if (bGrid) + { + grid = ns->grid; + nsearch = nsgrid_core(cr, fr, box, ngid, top, + grid, ns->bexcl, ns->bExcludeAlleg, + md, put_in_list, ns->bHaveVdW, + bDoLongRangeNS, FALSE); + + /* neighbour searching withouth QMMM! QM atoms have zero charge in + * the classical calculation. The charge-charge interaction + * between QM and MM atoms is handled in the QMMM core calculation + * (see QMMM.c). The VDW however, we'd like to compute classically + * and the QM MM atom pairs have just been put in the + * corresponding neighbourlists. in case of QMMM we still need to + * fill a special QMMM neighbourlist that contains all neighbours + * of the QM atoms. If bQMMM is true, this list will now be made: + */ + if (fr->bQMMM && fr->qr->QMMMscheme != eQMMMschemeoniom) + { + nsearch += nsgrid_core(cr, fr, box, ngid, top, + grid, ns->bexcl, ns->bExcludeAlleg, + md, put_in_list_qmmm, ns->bHaveVdW, + bDoLongRangeNS, TRUE); + } + } + else + { + nsearch = ns_simple_core(fr, top, md, box, box_size, + ns->bexcl, ns->simple_aaj, + ngid, ns->ns_buf, put_in_list, ns->bHaveVdW); + } + debug_gmx(); + +#ifdef DEBUG + pr_nsblock(log); +#endif + + inc_nrnb(nrnb, eNR_NS, nsearch); + /* inc_nrnb(nrnb,eNR_LR,fr->nlr); */ + + return nsearch; +} + +int natoms_beyond_ns_buffer(t_inputrec *ir, t_forcerec *fr, t_block *cgs, + matrix scale_tot, rvec *x) +{ + int cg0, cg1, cg, a0, a1, a, i, j; + real rint, hbuf2, scale; + rvec *cg_cm, cgsc; + gmx_bool bIsotropic; + int nBeyond; + + nBeyond = 0; + + rint = max(ir->rcoulomb, ir->rvdw); + if (ir->rlist < rint) + { + gmx_fatal(FARGS, "The neighbor search buffer has negative size: %f nm", + ir->rlist - rint); + } + cg_cm = fr->cg_cm; + + cg0 = fr->cg0; + cg1 = fr->hcg; + + if (!EI_DYNAMICS(ir->eI) || !DYNAMIC_BOX(*ir)) + { + hbuf2 = sqr(0.5*(ir->rlist - rint)); + for (cg = cg0; cg < cg1; cg++) + { + a0 = cgs->index[cg]; + a1 = cgs->index[cg+1]; + for (a = a0; a < a1; a++) + { + if (distance2(cg_cm[cg], x[a]) > hbuf2) + { + nBeyond++; + } + } + } + } + else + { + bIsotropic = TRUE; + scale = scale_tot[0][0]; + for (i = 1; i < DIM; i++) + { + /* With anisotropic scaling, the original spherical ns volumes become + * ellipsoids. To avoid costly transformations we use the minimum + * eigenvalue of the scaling matrix for determining the buffer size. + * Since the lower half is 0, the eigenvalues are the diagonal elements. + */ + scale = min(scale, scale_tot[i][i]); + if (scale_tot[i][i] != scale_tot[i-1][i-1]) + { + bIsotropic = FALSE; + } + for (j = 0; j < i; j++) + { + if (scale_tot[i][j] != 0) + { + bIsotropic = FALSE; + } + } + } + hbuf2 = sqr(0.5*(scale*ir->rlist - rint)); + if (bIsotropic) + { + for (cg = cg0; cg < cg1; cg++) + { + svmul(scale, cg_cm[cg], cgsc); + a0 = cgs->index[cg]; + a1 = cgs->index[cg+1]; + for (a = a0; a < a1; a++) + { + if (distance2(cgsc, x[a]) > hbuf2) + { + nBeyond++; + } + } + } + } + else + { + /* Anistropic scaling */ + for (cg = cg0; cg < cg1; cg++) + { + /* Since scale_tot contains the transpose of the scaling matrix, + * we need to multiply with the transpose. + */ + tmvmul_ur0(scale_tot, cg_cm[cg], cgsc); + a0 = cgs->index[cg]; + a1 = cgs->index[cg+1]; + for (a = a0; a < a1; a++) + { + if (distance2(cgsc, x[a]) > hbuf2) + { + nBeyond++; + } + } + } + } + } + + return nBeyond; +} diff --cc src/gromacs/mdlib/sim_util.c index 8155c99ee7,0000000000..02e1248b2c mode 100644,000000..100644 --- a/src/gromacs/mdlib/sim_util.c +++ b/src/gromacs/mdlib/sim_util.c @@@ -1,2856 -1,0 +1,2894 @@@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2004, The GROMACS development team. + * Copyright (c) 2013,2014, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include +#include +#ifdef HAVE_SYS_TIME_H +#include +#endif + +#include "typedefs.h" +#include "gromacs/utility/cstringutil.h" +#include "gromacs/utility/smalloc.h" +#include "names.h" +#include "txtdump.h" +#include "pbc.h" +#include "chargegroup.h" +#include "vec.h" +#include "nrnb.h" +#include "mshift.h" +#include "mdrun.h" +#include "sim_util.h" +#include "update.h" +#include "physics.h" +#include "main.h" +#include "mdatoms.h" +#include "force.h" +#include "bondf.h" +#include "pme.h" +#include "disre.h" +#include "orires.h" +#include "network.h" +#include "calcmu.h" +#include "constr.h" +#include "xvgr.h" +#include "copyrite.h" +#include "domdec.h" +#include "genborn.h" +#include "nbnxn_atomdata.h" +#include "nbnxn_search.h" +#include "nbnxn_kernels/nbnxn_kernel_ref.h" +#include "nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.h" +#include "nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn.h" +#include "nbnxn_kernels/nbnxn_kernel_gpu_ref.h" +#include "nonbonded.h" +#include "../gmxlib/nonbonded/nb_kernel.h" +#include "../gmxlib/nonbonded/nb_free_energy.h" + +#include "gromacs/timing/wallcycle.h" +#include "gromacs/timing/walltime_accounting.h" +#include "gromacs/utility/gmxmpi.h" +#include "gromacs/essentialdynamics/edsam.h" +#include "gromacs/pulling/pull.h" +#include "gromacs/pulling/pull_rotation.h" +#include "gromacs/imd/imd.h" +#include "adress.h" +#include "qmmm.h" + +#include "gmx_omp_nthreads.h" + +#include "nbnxn_cuda_data_mgmt.h" +#include "nbnxn_cuda/nbnxn_cuda.h" + +void print_time(FILE *out, + gmx_walltime_accounting_t walltime_accounting, + gmx_int64_t step, + t_inputrec *ir, + t_commrec gmx_unused *cr) +{ + time_t finish; + char timebuf[STRLEN]; + double dt, elapsed_seconds, time_per_step; + char buf[48]; + +#ifndef GMX_THREAD_MPI + if (!PAR(cr)) +#endif + { + fprintf(out, "\r"); + } + fprintf(out, "step %s", gmx_step_str(step, buf)); + if ((step >= ir->nstlist)) + { + double seconds_since_epoch = gmx_gettime(); + elapsed_seconds = seconds_since_epoch - walltime_accounting_get_start_time_stamp(walltime_accounting); + time_per_step = elapsed_seconds/(step - ir->init_step + 1); + dt = (ir->nsteps + ir->init_step - step) * time_per_step; + + if (ir->nsteps >= 0) + { + if (dt >= 300) + { + finish = (time_t) (seconds_since_epoch + dt); + gmx_ctime_r(&finish, timebuf, STRLEN); + sprintf(buf, "%s", timebuf); + buf[strlen(buf)-1] = '\0'; + fprintf(out, ", will finish %s", buf); + } + else + { + fprintf(out, ", remaining wall clock time: %5d s ", (int)dt); + } + } + else + { + fprintf(out, " performance: %.1f ns/day ", + ir->delta_t/1000*24*60*60/time_per_step); + } + } +#ifndef GMX_THREAD_MPI + if (PAR(cr)) + { + fprintf(out, "\n"); + } +#endif + + fflush(out); +} + +void print_date_and_time(FILE *fplog, int nodeid, const char *title, + double the_time) +{ + char time_string[STRLEN]; + + if (!fplog) + { + return; + } + + { + int i; + char timebuf[STRLEN]; + time_t temp_time = (time_t) the_time; + + gmx_ctime_r(&temp_time, timebuf, STRLEN); + for (i = 0; timebuf[i] >= ' '; i++) + { + time_string[i] = timebuf[i]; + } + time_string[i] = '\0'; + } + + fprintf(fplog, "%s on node %d %s\n", title, nodeid, time_string); +} + +void print_start(FILE *fplog, t_commrec *cr, + gmx_walltime_accounting_t walltime_accounting, + const char *name) +{ + char buf[STRLEN]; + + sprintf(buf, "Started %s", name); + print_date_and_time(fplog, cr->nodeid, buf, + walltime_accounting_get_start_time_stamp(walltime_accounting)); +} + +static void sum_forces(int start, int end, rvec f[], rvec flr[]) +{ + int i; + + if (gmx_debug_at) + { + pr_rvecs(debug, 0, "fsr", f+start, end-start); + pr_rvecs(debug, 0, "flr", flr+start, end-start); + } + for (i = start; (i < end); i++) + { + rvec_inc(f[i], flr[i]); + } +} + +/* + * calc_f_el calculates forces due to an electric field. + * + * force is kJ mol^-1 nm^-1 = e * kJ mol^-1 nm^-1 / e + * + * Et[] contains the parameters for the time dependent + * part of the field (not yet used). + * Ex[] contains the parameters for + * the spatial dependent part of the field. You can have cool periodic + * fields in principle, but only a constant field is supported + * now. + * The function should return the energy due to the electric field + * (if any) but for now returns 0. + * + * WARNING: + * There can be problems with the virial. + * Since the field is not self-consistent this is unavoidable. + * For neutral molecules the virial is correct within this approximation. + * For neutral systems with many charged molecules the error is small. + * But for systems with a net charge or a few charged molecules + * the error can be significant when the field is high. + * Solution: implement a self-consitent electric field into PME. + */ +static void calc_f_el(FILE *fp, int start, int homenr, + real charge[], rvec f[], + t_cosines Ex[], t_cosines Et[], double t) +{ + rvec Ext; + real t0; + int i, m; + + for (m = 0; (m < DIM); m++) + { + if (Et[m].n > 0) + { + if (Et[m].n == 3) + { + t0 = Et[m].a[1]; + Ext[m] = cos(Et[m].a[0]*(t-t0))*exp(-sqr(t-t0)/(2.0*sqr(Et[m].a[2]))); + } + else + { + Ext[m] = cos(Et[m].a[0]*t); + } + } + else + { + Ext[m] = 1.0; + } + if (Ex[m].n > 0) + { + /* Convert the field strength from V/nm to MD-units */ + Ext[m] *= Ex[m].a[0]*FIELDFAC; + for (i = start; (i < start+homenr); i++) + { + f[i][m] += charge[i]*Ext[m]; + } + } + else + { + Ext[m] = 0; + } + } + if (fp != NULL) + { + fprintf(fp, "%10g %10g %10g %10g #FIELD\n", t, + Ext[XX]/FIELDFAC, Ext[YY]/FIELDFAC, Ext[ZZ]/FIELDFAC); + } +} + +static void calc_virial(int start, int homenr, rvec x[], rvec f[], + tensor vir_part, t_graph *graph, matrix box, + t_nrnb *nrnb, const t_forcerec *fr, int ePBC) +{ + int i, j; + tensor virtest; + + /* The short-range virial from surrounding boxes */ + clear_mat(vir_part); + calc_vir(SHIFTS, fr->shift_vec, fr->fshift, vir_part, ePBC == epbcSCREW, box); + inc_nrnb(nrnb, eNR_VIRIAL, SHIFTS); + + /* Calculate partial virial, for local atoms only, based on short range. + * Total virial is computed in global_stat, called from do_md + */ + f_calc_vir(start, start+homenr, x, f, vir_part, graph, box); + inc_nrnb(nrnb, eNR_VIRIAL, homenr); + + /* Add position restraint contribution */ + for (i = 0; i < DIM; i++) + { + vir_part[i][i] += fr->vir_diag_posres[i]; + } + + /* Add wall contribution */ + for (i = 0; i < DIM; i++) + { + vir_part[i][ZZ] += fr->vir_wall_z[i]; + } + + if (debug) + { + pr_rvecs(debug, 0, "vir_part", vir_part, DIM); + } +} + +static void posres_wrapper(FILE *fplog, + int flags, + gmx_bool bSepDVDL, + t_inputrec *ir, + t_nrnb *nrnb, + gmx_localtop_t *top, + matrix box, rvec x[], + gmx_enerdata_t *enerd, + real *lambda, + t_forcerec *fr) +{ + t_pbc pbc; + real v, dvdl; + int i; + + /* Position restraints always require full pbc */ + set_pbc(&pbc, ir->ePBC, box); + dvdl = 0; + v = posres(top->idef.il[F_POSRES].nr, top->idef.il[F_POSRES].iatoms, + top->idef.iparams_posres, + (const rvec*)x, fr->f_novirsum, fr->vir_diag_posres, + ir->ePBC == epbcNONE ? NULL : &pbc, + lambda[efptRESTRAINT], &dvdl, + fr->rc_scaling, fr->ePBC, fr->posres_com, fr->posres_comB); + if (bSepDVDL) + { + gmx_print_sepdvdl(fplog, interaction_function[F_POSRES].longname, v, dvdl); + } + enerd->term[F_POSRES] += v; + /* If just the force constant changes, the FEP term is linear, + * but if k changes, it is not. + */ + enerd->dvdl_nonlin[efptRESTRAINT] += dvdl; + inc_nrnb(nrnb, eNR_POSRES, top->idef.il[F_POSRES].nr/2); + + if ((ir->fepvals->n_lambda > 0) && (flags & GMX_FORCE_DHDL)) + { + for (i = 0; i < enerd->n_lambda; i++) + { + real dvdl_dum, lambda_dum; + + lambda_dum = (i == 0 ? lambda[efptRESTRAINT] : ir->fepvals->all_lambda[efptRESTRAINT][i-1]); + v = posres(top->idef.il[F_POSRES].nr, top->idef.il[F_POSRES].iatoms, + top->idef.iparams_posres, + (const rvec*)x, NULL, NULL, + ir->ePBC == epbcNONE ? NULL : &pbc, lambda_dum, &dvdl, + fr->rc_scaling, fr->ePBC, fr->posres_com, fr->posres_comB); + enerd->enerpart_lambda[i] += v; + } + } +} + +static void fbposres_wrapper(t_inputrec *ir, + t_nrnb *nrnb, + gmx_localtop_t *top, + matrix box, rvec x[], + gmx_enerdata_t *enerd, + t_forcerec *fr) +{ + t_pbc pbc; + real v; + + /* Flat-bottomed position restraints always require full pbc */ + set_pbc(&pbc, ir->ePBC, box); + v = fbposres(top->idef.il[F_FBPOSRES].nr, top->idef.il[F_FBPOSRES].iatoms, + top->idef.iparams_fbposres, + (const rvec*)x, fr->f_novirsum, fr->vir_diag_posres, + ir->ePBC == epbcNONE ? NULL : &pbc, + fr->rc_scaling, fr->ePBC, fr->posres_com); + enerd->term[F_FBPOSRES] += v; + inc_nrnb(nrnb, eNR_FBPOSRES, top->idef.il[F_FBPOSRES].nr/2); +} + +static void pull_potential_wrapper(FILE *fplog, + gmx_bool bSepDVDL, + t_commrec *cr, + t_inputrec *ir, + matrix box, rvec x[], + rvec f[], + tensor vir_force, + t_mdatoms *mdatoms, + gmx_enerdata_t *enerd, + real *lambda, + double t) +{ + t_pbc pbc; + real dvdl; + + /* Calculate the center of mass forces, this requires communication, + * which is why pull_potential is called close to other communication. + * The virial contribution is calculated directly, + * which is why we call pull_potential after calc_virial. + */ + set_pbc(&pbc, ir->ePBC, box); + dvdl = 0; + enerd->term[F_COM_PULL] += + pull_potential(ir->ePull, ir->pull, mdatoms, &pbc, + cr, t, lambda[efptRESTRAINT], x, f, vir_force, &dvdl); + if (bSepDVDL) + { + gmx_print_sepdvdl(fplog, "Com pull", enerd->term[F_COM_PULL], dvdl); + } + enerd->dvdl_lin[efptRESTRAINT] += dvdl; +} + +static void pme_receive_force_ener(FILE *fplog, + gmx_bool bSepDVDL, + t_commrec *cr, + gmx_wallcycle_t wcycle, + gmx_enerdata_t *enerd, + t_forcerec *fr) +{ + real e_q, e_lj, v, dvdl_q, dvdl_lj; + float cycles_ppdpme, cycles_seppme; + + cycles_ppdpme = wallcycle_stop(wcycle, ewcPPDURINGPME); + dd_cycles_add(cr->dd, cycles_ppdpme, ddCyclPPduringPME); + + /* In case of node-splitting, the PP nodes receive the long-range + * forces, virial and energy from the PME nodes here. + */ + wallcycle_start(wcycle, ewcPP_PMEWAITRECVF); + dvdl_q = 0; + dvdl_lj = 0; + gmx_pme_receive_f(cr, fr->f_novirsum, fr->vir_el_recip, &e_q, + fr->vir_lj_recip, &e_lj, &dvdl_q, &dvdl_lj, + &cycles_seppme); + if (bSepDVDL) + { + gmx_print_sepdvdl(fplog, "Electrostatic PME mesh", e_q, dvdl_q); + gmx_print_sepdvdl(fplog, "Lennard-Jones PME mesh", e_lj, dvdl_lj); + } + enerd->term[F_COUL_RECIP] += e_q; + enerd->term[F_LJ_RECIP] += e_lj; + enerd->dvdl_lin[efptCOUL] += dvdl_q; + enerd->dvdl_lin[efptVDW] += dvdl_lj; + + if (wcycle) + { + dd_cycles_add(cr->dd, cycles_seppme, ddCyclPME); + } + wallcycle_stop(wcycle, ewcPP_PMEWAITRECVF); +} + +static void print_large_forces(FILE *fp, t_mdatoms *md, t_commrec *cr, + gmx_int64_t step, real pforce, rvec *x, rvec *f) +{ + int i; + real pf2, fn2; + char buf[STEPSTRSIZE]; + + pf2 = sqr(pforce); + for (i = 0; i < md->homenr; i++) + { + fn2 = norm2(f[i]); + /* We also catch NAN, if the compiler does not optimize this away. */ + if (fn2 >= pf2 || fn2 != fn2) + { + fprintf(fp, "step %s atom %6d x %8.3f %8.3f %8.3f force %12.5e\n", + gmx_step_str(step, buf), + ddglatnr(cr->dd, i), x[i][XX], x[i][YY], x[i][ZZ], sqrt(fn2)); + } + } +} + +static void post_process_forces(t_commrec *cr, + gmx_int64_t step, + t_nrnb *nrnb, gmx_wallcycle_t wcycle, + gmx_localtop_t *top, + matrix box, rvec x[], + rvec f[], + tensor vir_force, + t_mdatoms *mdatoms, + t_graph *graph, + t_forcerec *fr, gmx_vsite_t *vsite, + int flags) +{ + if (fr->bF_NoVirSum) + { + if (vsite) + { + /* Spread the mesh force on virtual sites to the other particles... + * This is parallellized. MPI communication is performed + * if the constructing atoms aren't local. + */ + wallcycle_start(wcycle, ewcVSITESPREAD); + spread_vsite_f(vsite, x, fr->f_novirsum, NULL, + (flags & GMX_FORCE_VIRIAL), fr->vir_el_recip, + nrnb, + &top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr); + wallcycle_stop(wcycle, ewcVSITESPREAD); + } + if (flags & GMX_FORCE_VIRIAL) + { + /* Now add the forces, this is local */ + if (fr->bDomDec) + { + sum_forces(0, fr->f_novirsum_n, f, fr->f_novirsum); + } + else + { + sum_forces(0, mdatoms->homenr, + f, fr->f_novirsum); + } + if (EEL_FULL(fr->eeltype)) + { + /* Add the mesh contribution to the virial */ + m_add(vir_force, fr->vir_el_recip, vir_force); + } + if (EVDW_PME(fr->vdwtype)) + { + /* Add the mesh contribution to the virial */ + m_add(vir_force, fr->vir_lj_recip, vir_force); + } + if (debug) + { + pr_rvecs(debug, 0, "vir_force", vir_force, DIM); + } + } + } + + if (fr->print_force >= 0) + { + print_large_forces(stderr, mdatoms, cr, step, fr->print_force, x, f); + } +} + +static void do_nb_verlet(t_forcerec *fr, + interaction_const_t *ic, + gmx_enerdata_t *enerd, + int flags, int ilocality, + int clearF, + t_nrnb *nrnb, + gmx_wallcycle_t wcycle) +{ + int nnbl, kernel_type, enr_nbnxn_kernel_ljc, enr_nbnxn_kernel_lj; + char *env; + nonbonded_verlet_group_t *nbvg; + gmx_bool bCUDA; + + if (!(flags & GMX_FORCE_NONBONDED)) + { + /* skip non-bonded calculation */ + return; + } + + nbvg = &fr->nbv->grp[ilocality]; + + /* CUDA kernel launch overhead is already timed separately */ + if (fr->cutoff_scheme != ecutsVERLET) + { + gmx_incons("Invalid cut-off scheme passed!"); + } + + bCUDA = (nbvg->kernel_type == nbnxnk8x8x8_CUDA); + + if (!bCUDA) + { + wallcycle_sub_start(wcycle, ewcsNONBONDED); + } + switch (nbvg->kernel_type) + { + case nbnxnk4x4_PlainC: + nbnxn_kernel_ref(&nbvg->nbl_lists, + nbvg->nbat, ic, + fr->shift_vec, + flags, + clearF, + fr->fshift[0], + enerd->grpp.ener[egCOULSR], + fr->bBHAM ? + enerd->grpp.ener[egBHAMSR] : + enerd->grpp.ener[egLJSR]); + break; + + case nbnxnk4xN_SIMD_4xN: + nbnxn_kernel_simd_4xn(&nbvg->nbl_lists, + nbvg->nbat, ic, + nbvg->ewald_excl, + fr->shift_vec, + flags, + clearF, + fr->fshift[0], + enerd->grpp.ener[egCOULSR], + fr->bBHAM ? + enerd->grpp.ener[egBHAMSR] : + enerd->grpp.ener[egLJSR]); + break; + case nbnxnk4xN_SIMD_2xNN: + nbnxn_kernel_simd_2xnn(&nbvg->nbl_lists, + nbvg->nbat, ic, + nbvg->ewald_excl, + fr->shift_vec, + flags, + clearF, + fr->fshift[0], + enerd->grpp.ener[egCOULSR], + fr->bBHAM ? + enerd->grpp.ener[egBHAMSR] : + enerd->grpp.ener[egLJSR]); + break; + + case nbnxnk8x8x8_CUDA: + nbnxn_cuda_launch_kernel(fr->nbv->cu_nbv, nbvg->nbat, flags, ilocality); + break; + + case nbnxnk8x8x8_PlainC: + nbnxn_kernel_gpu_ref(nbvg->nbl_lists.nbl[0], + nbvg->nbat, ic, + fr->shift_vec, + flags, + clearF, + nbvg->nbat->out[0].f, + fr->fshift[0], + enerd->grpp.ener[egCOULSR], + fr->bBHAM ? + enerd->grpp.ener[egBHAMSR] : + enerd->grpp.ener[egLJSR]); + break; + + default: + gmx_incons("Invalid nonbonded kernel type passed!"); + + } + if (!bCUDA) + { + wallcycle_sub_stop(wcycle, ewcsNONBONDED); + } + + if (EEL_RF(ic->eeltype) || ic->eeltype == eelCUT) + { + enr_nbnxn_kernel_ljc = eNR_NBNXN_LJ_RF; + } + else if ((!bCUDA && nbvg->ewald_excl == ewaldexclAnalytical) || + (bCUDA && nbnxn_cuda_is_kernel_ewald_analytical(fr->nbv->cu_nbv))) + { + enr_nbnxn_kernel_ljc = eNR_NBNXN_LJ_EWALD; + } + else + { + enr_nbnxn_kernel_ljc = eNR_NBNXN_LJ_TAB; + } + enr_nbnxn_kernel_lj = eNR_NBNXN_LJ; + if (flags & GMX_FORCE_ENERGY) + { + /* In eNR_??? the nbnxn F+E kernels are always the F kernel + 1 */ + enr_nbnxn_kernel_ljc += 1; + enr_nbnxn_kernel_lj += 1; + } + + inc_nrnb(nrnb, enr_nbnxn_kernel_ljc, + nbvg->nbl_lists.natpair_ljq); + inc_nrnb(nrnb, enr_nbnxn_kernel_lj, + nbvg->nbl_lists.natpair_lj); + /* The Coulomb-only kernels are offset -eNR_NBNXN_LJ_RF+eNR_NBNXN_RF */ + inc_nrnb(nrnb, enr_nbnxn_kernel_ljc-eNR_NBNXN_LJ_RF+eNR_NBNXN_RF, + nbvg->nbl_lists.natpair_q); + + if (ic->vdw_modifier == eintmodFORCESWITCH) + { + /* We add up the switch cost separately */ + inc_nrnb(nrnb, eNR_NBNXN_ADD_LJ_FSW+((flags & GMX_FORCE_ENERGY) ? 1 : 0), + nbvg->nbl_lists.natpair_ljq + nbvg->nbl_lists.natpair_lj); + } + if (ic->vdw_modifier == eintmodPOTSWITCH) + { + /* We add up the switch cost separately */ + inc_nrnb(nrnb, eNR_NBNXN_ADD_LJ_PSW+((flags & GMX_FORCE_ENERGY) ? 1 : 0), + nbvg->nbl_lists.natpair_ljq + nbvg->nbl_lists.natpair_lj); + } + if (ic->vdwtype == evdwPME) + { + /* We add up the LJ Ewald cost separately */ + inc_nrnb(nrnb, eNR_NBNXN_ADD_LJ_EWALD+((flags & GMX_FORCE_ENERGY) ? 1 : 0), + nbvg->nbl_lists.natpair_ljq + nbvg->nbl_lists.natpair_lj); + } +} + +static void do_nb_verlet_fep(nbnxn_pairlist_set_t *nbl_lists, + t_forcerec *fr, + rvec x[], + rvec f[], + t_mdatoms *mdatoms, + t_lambda *fepvals, + real *lambda, + gmx_enerdata_t *enerd, + int flags, + t_nrnb *nrnb, + gmx_wallcycle_t wcycle) +{ + int donb_flags; + nb_kernel_data_t kernel_data; + real lam_i[efptNR]; + real dvdl_nb[efptNR]; + int th; + int i, j; + + donb_flags = 0; + /* Add short-range interactions */ + donb_flags |= GMX_NONBONDED_DO_SR; + + /* Currently all group scheme kernels always calculate (shift-)forces */ + if (flags & GMX_FORCE_FORCES) + { + donb_flags |= GMX_NONBONDED_DO_FORCE; + } + if (flags & GMX_FORCE_VIRIAL) + { + donb_flags |= GMX_NONBONDED_DO_SHIFTFORCE; + } + if (flags & GMX_FORCE_ENERGY) + { + donb_flags |= GMX_NONBONDED_DO_POTENTIAL; + } + if (flags & GMX_FORCE_DO_LR) + { + donb_flags |= GMX_NONBONDED_DO_LR; + } + + kernel_data.flags = donb_flags; + kernel_data.lambda = lambda; + kernel_data.dvdl = dvdl_nb; + + kernel_data.energygrp_elec = enerd->grpp.ener[egCOULSR]; + kernel_data.energygrp_vdw = enerd->grpp.ener[egLJSR]; + + /* reset free energy components */ + for (i = 0; i < efptNR; i++) + { + dvdl_nb[i] = 0; + } + + assert(gmx_omp_nthreads_get(emntNonbonded) == nbl_lists->nnbl); + + wallcycle_sub_start(wcycle, ewcsNONBONDED); +#pragma omp parallel for schedule(static) num_threads(nbl_lists->nnbl) + for (th = 0; th < nbl_lists->nnbl; th++) + { + gmx_nb_free_energy_kernel(nbl_lists->nbl_fep[th], + x, f, fr, mdatoms, &kernel_data, nrnb); + } + + if (fepvals->sc_alpha != 0) + { + enerd->dvdl_nonlin[efptVDW] += dvdl_nb[efptVDW]; + enerd->dvdl_nonlin[efptCOUL] += dvdl_nb[efptCOUL]; + } + else + { + enerd->dvdl_lin[efptVDW] += dvdl_nb[efptVDW]; + enerd->dvdl_lin[efptCOUL] += dvdl_nb[efptCOUL]; + } + + /* If we do foreign lambda and we have soft-core interactions + * we have to recalculate the (non-linear) energies contributions. + */ + if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && fepvals->sc_alpha != 0) + { + kernel_data.flags = (donb_flags & ~(GMX_NONBONDED_DO_FORCE | GMX_NONBONDED_DO_SHIFTFORCE)) | GMX_NONBONDED_DO_FOREIGNLAMBDA; + kernel_data.lambda = lam_i; + kernel_data.energygrp_elec = enerd->foreign_grpp.ener[egCOULSR]; + kernel_data.energygrp_vdw = enerd->foreign_grpp.ener[egLJSR]; + /* Note that we add to kernel_data.dvdl, but ignore the result */ + + for (i = 0; i < enerd->n_lambda; i++) + { + for (j = 0; j < efptNR; j++) + { + lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]); + } + reset_foreign_enerdata(enerd); +#pragma omp parallel for schedule(static) num_threads(nbl_lists->nnbl) + for (th = 0; th < nbl_lists->nnbl; th++) + { + gmx_nb_free_energy_kernel(nbl_lists->nbl_fep[th], + x, f, fr, mdatoms, &kernel_data, nrnb); + } + + sum_epot(&(enerd->foreign_grpp), enerd->foreign_term); + enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT]; + } + } + + wallcycle_sub_stop(wcycle, ewcsNONBONDED); +} + +void do_force_cutsVERLET(FILE *fplog, t_commrec *cr, + t_inputrec *inputrec, + gmx_int64_t step, t_nrnb *nrnb, gmx_wallcycle_t wcycle, + gmx_localtop_t *top, + gmx_groups_t gmx_unused *groups, + matrix box, rvec x[], history_t *hist, + rvec f[], + tensor vir_force, + t_mdatoms *mdatoms, + gmx_enerdata_t *enerd, t_fcdata *fcd, + real *lambda, t_graph *graph, + t_forcerec *fr, interaction_const_t *ic, + gmx_vsite_t *vsite, rvec mu_tot, + double t, FILE *field, gmx_edsam_t ed, + gmx_bool bBornRadii, + int flags) +{ + int cg0, cg1, i, j; + int start, homenr; + int nb_kernel_type; + double mu[2*DIM]; + gmx_bool bSepDVDL, bStateChanged, bNS, bFillGrid, bCalcCGCM, bBS; + gmx_bool bDoLongRange, bDoForces, bSepLRF, bUseGPU, bUseOrEmulGPU; + gmx_bool bDiffKernels = FALSE; + matrix boxs; + rvec vzero, box_diag; + real e, v, dvdl; + float cycles_pme, cycles_force, cycles_wait_gpu; + nonbonded_verlet_t *nbv; + + cycles_force = 0; + cycles_wait_gpu = 0; + nbv = fr->nbv; + nb_kernel_type = fr->nbv->grp[0].kernel_type; + + start = 0; + homenr = mdatoms->homenr; + + bSepDVDL = (fr->bSepDVDL && do_per_step(step, inputrec->nstlog)); + + clear_mat(vir_force); + + cg0 = 0; + if (DOMAINDECOMP(cr)) + { + cg1 = cr->dd->ncg_tot; + } + else + { + cg1 = top->cgs.nr; + } + if (fr->n_tpi > 0) + { + cg1--; + } + + bStateChanged = (flags & GMX_FORCE_STATECHANGED); + bNS = (flags & GMX_FORCE_NS) && (fr->bAllvsAll == FALSE); + bFillGrid = (bNS && bStateChanged); + bCalcCGCM = (bFillGrid && !DOMAINDECOMP(cr)); + bDoLongRange = (fr->bTwinRange && bNS && (flags & GMX_FORCE_DO_LR)); + bDoForces = (flags & GMX_FORCE_FORCES); + bSepLRF = (bDoLongRange && bDoForces && (flags & GMX_FORCE_SEPLRF)); + bUseGPU = fr->nbv->bUseGPU; + bUseOrEmulGPU = bUseGPU || (nbv->grp[0].kernel_type == nbnxnk8x8x8_PlainC); + + if (bStateChanged) + { + update_forcerec(fr, box); + + if (NEED_MUTOT(*inputrec)) + { + /* Calculate total (local) dipole moment in a temporary common array. + * This makes it possible to sum them over nodes faster. + */ + calc_mu(start, homenr, + x, mdatoms->chargeA, mdatoms->chargeB, mdatoms->nChargePerturbed, + mu, mu+DIM); + } + } + + if (fr->ePBC != epbcNONE) + { + /* Compute shift vectors every step, + * because of pressure coupling or box deformation! + */ + if ((flags & GMX_FORCE_DYNAMICBOX) && bStateChanged) + { + calc_shifts(box, fr->shift_vec); + } + + if (bCalcCGCM) + { + put_atoms_in_box_omp(fr->ePBC, box, homenr, x); + inc_nrnb(nrnb, eNR_SHIFTX, homenr); + } + else if (EI_ENERGY_MINIMIZATION(inputrec->eI) && graph) + { + unshift_self(graph, box, x); + } + } + + nbnxn_atomdata_copy_shiftvec(flags & GMX_FORCE_DYNAMICBOX, + fr->shift_vec, nbv->grp[0].nbat); + +#ifdef GMX_MPI + if (!(cr->duty & DUTY_PME)) + { + /* Send particle coordinates to the pme nodes. + * Since this is only implemented for domain decomposition + * and domain decomposition does not use the graph, + * we do not need to worry about shifting. + */ + + int pme_flags = 0; + + wallcycle_start(wcycle, ewcPP_PMESENDX); + + bBS = (inputrec->nwall == 2); + if (bBS) + { + copy_mat(box, boxs); + svmul(inputrec->wall_ewald_zfac, boxs[ZZ], boxs[ZZ]); + } + + if (EEL_PME(fr->eeltype)) + { + pme_flags |= GMX_PME_DO_COULOMB; + } + + if (EVDW_PME(fr->vdwtype)) + { + pme_flags |= GMX_PME_DO_LJ; + } + + gmx_pme_send_coordinates(cr, bBS ? boxs : box, x, + mdatoms->nChargePerturbed, mdatoms->nTypePerturbed, lambda[efptCOUL], lambda[efptVDW], + (flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY)), + pme_flags, step); + + wallcycle_stop(wcycle, ewcPP_PMESENDX); + } +#endif /* GMX_MPI */ + + /* do gridding for pair search */ + if (bNS) + { + if (graph && bStateChanged) + { + /* Calculate intramolecular shift vectors to make molecules whole */ + mk_mshift(fplog, graph, fr->ePBC, box, x); + } + + clear_rvec(vzero); + box_diag[XX] = box[XX][XX]; + box_diag[YY] = box[YY][YY]; + box_diag[ZZ] = box[ZZ][ZZ]; + + wallcycle_start(wcycle, ewcNS); + if (!fr->bDomDec) + { + wallcycle_sub_start(wcycle, ewcsNBS_GRID_LOCAL); + nbnxn_put_on_grid(nbv->nbs, fr->ePBC, box, + 0, vzero, box_diag, + 0, mdatoms->homenr, -1, fr->cginfo, x, + 0, NULL, + nbv->grp[eintLocal].kernel_type, + nbv->grp[eintLocal].nbat); + wallcycle_sub_stop(wcycle, ewcsNBS_GRID_LOCAL); + } + else + { + wallcycle_sub_start(wcycle, ewcsNBS_GRID_NONLOCAL); + nbnxn_put_on_grid_nonlocal(nbv->nbs, domdec_zones(cr->dd), + fr->cginfo, x, + nbv->grp[eintNonlocal].kernel_type, + nbv->grp[eintNonlocal].nbat); + wallcycle_sub_stop(wcycle, ewcsNBS_GRID_NONLOCAL); + } + + if (nbv->ngrp == 1 || + nbv->grp[eintNonlocal].nbat == nbv->grp[eintLocal].nbat) + { + nbnxn_atomdata_set(nbv->grp[eintLocal].nbat, eatAll, + nbv->nbs, mdatoms, fr->cginfo); + } + else + { + nbnxn_atomdata_set(nbv->grp[eintLocal].nbat, eatLocal, + nbv->nbs, mdatoms, fr->cginfo); + nbnxn_atomdata_set(nbv->grp[eintNonlocal].nbat, eatAll, + nbv->nbs, mdatoms, fr->cginfo); + } + wallcycle_stop(wcycle, ewcNS); + } + + /* initialize the GPU atom data and copy shift vector */ + if (bUseGPU) + { + if (bNS) + { + wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU_NB); + nbnxn_cuda_init_atomdata(nbv->cu_nbv, nbv->grp[eintLocal].nbat); + wallcycle_stop(wcycle, ewcLAUNCH_GPU_NB); + } + + wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU_NB); + nbnxn_cuda_upload_shiftvec(nbv->cu_nbv, nbv->grp[eintLocal].nbat); + wallcycle_stop(wcycle, ewcLAUNCH_GPU_NB); + } + + /* do local pair search */ + if (bNS) + { + wallcycle_start_nocount(wcycle, ewcNS); + wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_LOCAL); + nbnxn_make_pairlist(nbv->nbs, nbv->grp[eintLocal].nbat, + &top->excls, + ic->rlist, + nbv->min_ci_balanced, + &nbv->grp[eintLocal].nbl_lists, + eintLocal, + nbv->grp[eintLocal].kernel_type, + nrnb); + wallcycle_sub_stop(wcycle, ewcsNBS_SEARCH_LOCAL); + + if (bUseGPU) + { + /* initialize local pair-list on the GPU */ + nbnxn_cuda_init_pairlist(nbv->cu_nbv, + nbv->grp[eintLocal].nbl_lists.nbl[0], + eintLocal); + } + wallcycle_stop(wcycle, ewcNS); + } + else + { + wallcycle_start(wcycle, ewcNB_XF_BUF_OPS); + wallcycle_sub_start(wcycle, ewcsNB_X_BUF_OPS); + nbnxn_atomdata_copy_x_to_nbat_x(nbv->nbs, eatLocal, FALSE, x, + nbv->grp[eintLocal].nbat); + wallcycle_sub_stop(wcycle, ewcsNB_X_BUF_OPS); + wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS); + } + + if (bUseGPU) + { + wallcycle_start(wcycle, ewcLAUNCH_GPU_NB); + /* launch local nonbonded F on GPU */ + do_nb_verlet(fr, ic, enerd, flags, eintLocal, enbvClearFNo, + nrnb, wcycle); + wallcycle_stop(wcycle, ewcLAUNCH_GPU_NB); + } + + /* Communicate coordinates and sum dipole if necessary + + do non-local pair search */ + if (DOMAINDECOMP(cr)) + { + bDiffKernels = (nbv->grp[eintNonlocal].kernel_type != + nbv->grp[eintLocal].kernel_type); + + if (bDiffKernels) + { + /* With GPU+CPU non-bonded calculations we need to copy + * the local coordinates to the non-local nbat struct + * (in CPU format) as the non-local kernel call also + * calculates the local - non-local interactions. + */ + wallcycle_start(wcycle, ewcNB_XF_BUF_OPS); + wallcycle_sub_start(wcycle, ewcsNB_X_BUF_OPS); + nbnxn_atomdata_copy_x_to_nbat_x(nbv->nbs, eatLocal, TRUE, x, + nbv->grp[eintNonlocal].nbat); + wallcycle_sub_stop(wcycle, ewcsNB_X_BUF_OPS); + wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS); + } + + if (bNS) + { + wallcycle_start_nocount(wcycle, ewcNS); + wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_NONLOCAL); + + if (bDiffKernels) + { + nbnxn_grid_add_simple(nbv->nbs, nbv->grp[eintNonlocal].nbat); + } + + nbnxn_make_pairlist(nbv->nbs, nbv->grp[eintNonlocal].nbat, + &top->excls, + ic->rlist, + nbv->min_ci_balanced, + &nbv->grp[eintNonlocal].nbl_lists, + eintNonlocal, + nbv->grp[eintNonlocal].kernel_type, + nrnb); + + wallcycle_sub_stop(wcycle, ewcsNBS_SEARCH_NONLOCAL); + + if (nbv->grp[eintNonlocal].kernel_type == nbnxnk8x8x8_CUDA) + { + /* initialize non-local pair-list on the GPU */ + nbnxn_cuda_init_pairlist(nbv->cu_nbv, + nbv->grp[eintNonlocal].nbl_lists.nbl[0], + eintNonlocal); + } + wallcycle_stop(wcycle, ewcNS); + } + else + { + wallcycle_start(wcycle, ewcMOVEX); + dd_move_x(cr->dd, box, x); + + /* When we don't need the total dipole we sum it in global_stat */ + if (bStateChanged && NEED_MUTOT(*inputrec)) + { + gmx_sumd(2*DIM, mu, cr); + } + wallcycle_stop(wcycle, ewcMOVEX); + + wallcycle_start(wcycle, ewcNB_XF_BUF_OPS); + wallcycle_sub_start(wcycle, ewcsNB_X_BUF_OPS); + nbnxn_atomdata_copy_x_to_nbat_x(nbv->nbs, eatNonlocal, FALSE, x, + nbv->grp[eintNonlocal].nbat); + wallcycle_sub_stop(wcycle, ewcsNB_X_BUF_OPS); + cycles_force += wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS); + } + + if (bUseGPU && !bDiffKernels) + { + wallcycle_start(wcycle, ewcLAUNCH_GPU_NB); + /* launch non-local nonbonded F on GPU */ + do_nb_verlet(fr, ic, enerd, flags, eintNonlocal, enbvClearFNo, + nrnb, wcycle); + cycles_force += wallcycle_stop(wcycle, ewcLAUNCH_GPU_NB); + } + } + + if (bUseGPU) + { + /* launch D2H copy-back F */ + wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU_NB); + if (DOMAINDECOMP(cr) && !bDiffKernels) + { + nbnxn_cuda_launch_cpyback(nbv->cu_nbv, nbv->grp[eintNonlocal].nbat, + flags, eatNonlocal); + } + nbnxn_cuda_launch_cpyback(nbv->cu_nbv, nbv->grp[eintLocal].nbat, + flags, eatLocal); + cycles_force += wallcycle_stop(wcycle, ewcLAUNCH_GPU_NB); + } + + if (bStateChanged && NEED_MUTOT(*inputrec)) + { + if (PAR(cr)) + { + gmx_sumd(2*DIM, mu, cr); + } + + for (i = 0; i < 2; i++) + { + for (j = 0; j < DIM; j++) + { + fr->mu_tot[i][j] = mu[i*DIM + j]; + } + } + } + if (fr->efep == efepNO) + { + copy_rvec(fr->mu_tot[0], mu_tot); + } + else + { + for (j = 0; j < DIM; j++) + { + mu_tot[j] = + (1.0 - lambda[efptCOUL])*fr->mu_tot[0][j] + + lambda[efptCOUL]*fr->mu_tot[1][j]; + } + } + + /* Reset energies */ + reset_enerdata(fr, bNS, enerd, MASTER(cr)); + clear_rvecs(SHIFTS, fr->fshift); + + if (DOMAINDECOMP(cr) && !(cr->duty & DUTY_PME)) + { + wallcycle_start(wcycle, ewcPPDURINGPME); + dd_force_flop_start(cr->dd, nrnb); + } + + if (inputrec->bRot) + { + /* Enforced rotation has its own cycle counter that starts after the collective + * coordinates have been communicated. It is added to ddCyclF to allow + * for proper load-balancing */ + wallcycle_start(wcycle, ewcROT); + do_rotation(cr, inputrec, box, x, t, step, wcycle, bNS); + wallcycle_stop(wcycle, ewcROT); + } + + /* Start the force cycle counter. + * This counter is stopped in do_forcelow_level. + * No parallel communication should occur while this counter is running, + * since that will interfere with the dynamic load balancing. + */ + wallcycle_start(wcycle, ewcFORCE); + if (bDoForces) + { + /* Reset forces for which the virial is calculated separately: + * PME/Ewald forces if necessary */ + if (fr->bF_NoVirSum) + { + if (flags & GMX_FORCE_VIRIAL) + { + fr->f_novirsum = fr->f_novirsum_alloc; + if (fr->bDomDec) + { + clear_rvecs(fr->f_novirsum_n, fr->f_novirsum); + } + else + { + clear_rvecs(homenr, fr->f_novirsum+start); + } + } + else + { + /* We are not calculating the pressure so we do not need + * a separate array for forces that do not contribute + * to the pressure. + */ + fr->f_novirsum = f; + } + } + + /* Clear the short- and long-range forces */ + clear_rvecs(fr->natoms_force_constr, f); + if (bSepLRF && do_per_step(step, inputrec->nstcalclr)) + { + clear_rvecs(fr->natoms_force_constr, fr->f_twin); + } + + clear_rvec(fr->vir_diag_posres); + } + + if (inputrec->ePull == epullCONSTRAINT) + { + clear_pull_forces(inputrec->pull); + } + + /* We calculate the non-bonded forces, when done on the CPU, here. + * We do this before calling do_force_lowlevel, as in there bondeds + * forces are calculated before PME, which does communication. + * With this order, non-bonded and bonded force calculation imbalance + * can be balanced out by the domain decomposition load balancing. + */ + + if (!bUseOrEmulGPU) + { + /* Maybe we should move this into do_force_lowlevel */ + do_nb_verlet(fr, ic, enerd, flags, eintLocal, enbvClearFYes, + nrnb, wcycle); + } + + if (fr->efep != efepNO) + { + /* Calculate the local and non-local free energy interactions here. + * Happens here on the CPU both with and without GPU. + */ + if (fr->nbv->grp[eintLocal].nbl_lists.nbl_fep[0]->nrj > 0) + { + do_nb_verlet_fep(&fr->nbv->grp[eintLocal].nbl_lists, + fr, x, f, mdatoms, + inputrec->fepvals, lambda, + enerd, flags, nrnb, wcycle); + } + + if (DOMAINDECOMP(cr) && + fr->nbv->grp[eintNonlocal].nbl_lists.nbl_fep[0]->nrj > 0) + { + do_nb_verlet_fep(&fr->nbv->grp[eintNonlocal].nbl_lists, + fr, x, f, mdatoms, + inputrec->fepvals, lambda, + enerd, flags, nrnb, wcycle); + } + } + + if (!bUseOrEmulGPU || bDiffKernels) + { + int aloc; + + if (DOMAINDECOMP(cr)) + { + do_nb_verlet(fr, ic, enerd, flags, eintNonlocal, + bDiffKernels ? enbvClearFYes : enbvClearFNo, + nrnb, wcycle); + } + + if (!bUseOrEmulGPU) + { + aloc = eintLocal; + } + else + { + aloc = eintNonlocal; + } + + /* Add all the non-bonded force to the normal force array. + * This can be split into a local a non-local part when overlapping + * communication with calculation with domain decomposition. + */ + cycles_force += wallcycle_stop(wcycle, ewcFORCE); + wallcycle_start(wcycle, ewcNB_XF_BUF_OPS); + wallcycle_sub_start(wcycle, ewcsNB_F_BUF_OPS); + nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs, eatAll, nbv->grp[aloc].nbat, f); + wallcycle_sub_stop(wcycle, ewcsNB_F_BUF_OPS); + cycles_force += wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS); + wallcycle_start_nocount(wcycle, ewcFORCE); + + /* if there are multiple fshift output buffers reduce them */ + if ((flags & GMX_FORCE_VIRIAL) && + nbv->grp[aloc].nbl_lists.nnbl > 1) + { + nbnxn_atomdata_add_nbat_fshift_to_fshift(nbv->grp[aloc].nbat, + fr->fshift); + } + } + + /* update QMMMrec, if necessary */ + if (fr->bQMMM) + { + update_QMMMrec(cr, fr, x, mdatoms, box, top); + } + + if ((flags & GMX_FORCE_BONDED) && top->idef.il[F_POSRES].nr > 0) + { + posres_wrapper(fplog, flags, bSepDVDL, inputrec, nrnb, top, box, x, + enerd, lambda, fr); + } + + if ((flags & GMX_FORCE_BONDED) && top->idef.il[F_FBPOSRES].nr > 0) + { + fbposres_wrapper(inputrec, nrnb, top, box, x, enerd, fr); + } + + /* Compute the bonded and non-bonded energies and optionally forces */ + do_force_lowlevel(fplog, step, fr, inputrec, &(top->idef), + cr, nrnb, wcycle, mdatoms, + x, hist, f, bSepLRF ? fr->f_twin : f, enerd, fcd, top, fr->born, + &(top->atomtypes), bBornRadii, box, + inputrec->fepvals, lambda, graph, &(top->excls), fr->mu_tot, + flags, &cycles_pme); + + if (bSepLRF) + { + if (do_per_step(step, inputrec->nstcalclr)) + { + /* Add the long range forces to the short range forces */ + for (i = 0; i < fr->natoms_force_constr; i++) + { + rvec_add(fr->f_twin[i], f[i], f[i]); + } + } + } + + cycles_force += wallcycle_stop(wcycle, ewcFORCE); + + if (ed) + { + do_flood(cr, inputrec, x, f, ed, box, step, bNS); + } + + if (bUseOrEmulGPU && !bDiffKernels) + { + /* wait for non-local forces (or calculate in emulation mode) */ + if (DOMAINDECOMP(cr)) + { + if (bUseGPU) + { + float cycles_tmp; + + wallcycle_start(wcycle, ewcWAIT_GPU_NB_NL); + nbnxn_cuda_wait_gpu(nbv->cu_nbv, + nbv->grp[eintNonlocal].nbat, + flags, eatNonlocal, + enerd->grpp.ener[egLJSR], enerd->grpp.ener[egCOULSR], + fr->fshift); + cycles_tmp = wallcycle_stop(wcycle, ewcWAIT_GPU_NB_NL); + cycles_wait_gpu += cycles_tmp; + cycles_force += cycles_tmp; + } + else + { + wallcycle_start_nocount(wcycle, ewcFORCE); + do_nb_verlet(fr, ic, enerd, flags, eintNonlocal, enbvClearFYes, + nrnb, wcycle); + cycles_force += wallcycle_stop(wcycle, ewcFORCE); + } + wallcycle_start(wcycle, ewcNB_XF_BUF_OPS); + wallcycle_sub_start(wcycle, ewcsNB_F_BUF_OPS); + /* skip the reduction if there was no non-local work to do */ + if (nbv->grp[eintLocal].nbl_lists.nbl[0]->nsci > 0) + { + nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs, eatNonlocal, + nbv->grp[eintNonlocal].nbat, f); + } + wallcycle_sub_stop(wcycle, ewcsNB_F_BUF_OPS); + cycles_force += wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS); + } + } + + if (bDoForces && DOMAINDECOMP(cr)) + { + /* Communicate the forces */ + wallcycle_start(wcycle, ewcMOVEF); + dd_move_f(cr->dd, f, fr->fshift); + /* Do we need to communicate the separate force array + * for terms that do not contribute to the single sum virial? + * Position restraints and electric fields do not introduce + * inter-cg forces, only full electrostatics methods do. + * When we do not calculate the virial, fr->f_novirsum = f, + * so we have already communicated these forces. + */ + if (EEL_FULL(fr->eeltype) && cr->dd->n_intercg_excl && + (flags & GMX_FORCE_VIRIAL)) + { + dd_move_f(cr->dd, fr->f_novirsum, NULL); + } + if (bSepLRF) + { + /* We should not update the shift forces here, + * since f_twin is already included in f. + */ + dd_move_f(cr->dd, fr->f_twin, NULL); + } + wallcycle_stop(wcycle, ewcMOVEF); + } + + if (bUseOrEmulGPU) + { + /* wait for local forces (or calculate in emulation mode) */ + if (bUseGPU) + { + wallcycle_start(wcycle, ewcWAIT_GPU_NB_L); + nbnxn_cuda_wait_gpu(nbv->cu_nbv, + nbv->grp[eintLocal].nbat, + flags, eatLocal, + enerd->grpp.ener[egLJSR], enerd->grpp.ener[egCOULSR], + fr->fshift); + cycles_wait_gpu += wallcycle_stop(wcycle, ewcWAIT_GPU_NB_L); + + /* now clear the GPU outputs while we finish the step on the CPU */ + + wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU_NB); + nbnxn_cuda_clear_outputs(nbv->cu_nbv, flags); + wallcycle_stop(wcycle, ewcLAUNCH_GPU_NB); + } + else + { + wallcycle_start_nocount(wcycle, ewcFORCE); + do_nb_verlet(fr, ic, enerd, flags, eintLocal, + DOMAINDECOMP(cr) ? enbvClearFNo : enbvClearFYes, + nrnb, wcycle); + wallcycle_stop(wcycle, ewcFORCE); + } + wallcycle_start(wcycle, ewcNB_XF_BUF_OPS); + wallcycle_sub_start(wcycle, ewcsNB_F_BUF_OPS); + if (nbv->grp[eintLocal].nbl_lists.nbl[0]->nsci > 0) + { + /* skip the reduction if there was no non-local work to do */ + nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs, eatLocal, + nbv->grp[eintLocal].nbat, f); + } + wallcycle_sub_stop(wcycle, ewcsNB_F_BUF_OPS); + wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS); + } + + if (DOMAINDECOMP(cr)) + { + dd_force_flop_stop(cr->dd, nrnb); + if (wcycle) + { + dd_cycles_add(cr->dd, cycles_force-cycles_pme, ddCyclF); + if (bUseGPU) + { + dd_cycles_add(cr->dd, cycles_wait_gpu, ddCyclWaitGPU); + } + } + } + + if (bDoForces) + { + if (IR_ELEC_FIELD(*inputrec)) + { + /* Compute forces due to electric field */ + calc_f_el(MASTER(cr) ? field : NULL, + start, homenr, mdatoms->chargeA, fr->f_novirsum, + inputrec->ex, inputrec->et, t); + } + + /* If we have NoVirSum forces, but we do not calculate the virial, + * we sum fr->f_novirum=f later. + */ + if (vsite && !(fr->bF_NoVirSum && !(flags & GMX_FORCE_VIRIAL))) + { + wallcycle_start(wcycle, ewcVSITESPREAD); + spread_vsite_f(vsite, x, f, fr->fshift, FALSE, NULL, nrnb, + &top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr); + wallcycle_stop(wcycle, ewcVSITESPREAD); + + if (bSepLRF) + { + wallcycle_start(wcycle, ewcVSITESPREAD); + spread_vsite_f(vsite, x, fr->f_twin, NULL, FALSE, NULL, + nrnb, + &top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr); + wallcycle_stop(wcycle, ewcVSITESPREAD); + } + } + + if (flags & GMX_FORCE_VIRIAL) + { + /* Calculation of the virial must be done after vsites! */ + calc_virial(0, mdatoms->homenr, x, f, + vir_force, graph, box, nrnb, fr, inputrec->ePBC); + } + } + + if (inputrec->ePull == epullUMBRELLA || inputrec->ePull == epullCONST_F) + { + pull_potential_wrapper(fplog, bSepDVDL, cr, inputrec, box, x, + f, vir_force, mdatoms, enerd, lambda, t); + } + + /* Add the forces from enforced rotation potentials (if any) */ + if (inputrec->bRot) + { + wallcycle_start(wcycle, ewcROTadd); + enerd->term[F_COM_PULL] += add_rot_forces(inputrec->rot, f, cr, step, t); + wallcycle_stop(wcycle, ewcROTadd); + } + + /* Add forces from interactive molecular dynamics (IMD), if bIMD == TRUE. */ + IMD_apply_forces(inputrec->bIMD, inputrec->imd, cr, f, wcycle); + + if (PAR(cr) && !(cr->duty & DUTY_PME)) + { + /* In case of node-splitting, the PP nodes receive the long-range + * forces, virial and energy from the PME nodes here. + */ + pme_receive_force_ener(fplog, bSepDVDL, cr, wcycle, enerd, fr); + } + + if (bDoForces) + { + post_process_forces(cr, step, nrnb, wcycle, + top, box, x, f, vir_force, mdatoms, graph, fr, vsite, + flags); + } + + /* Sum the potential energy terms from group contributions */ + sum_epot(&(enerd->grpp), enerd->term); +} + +void do_force_cutsGROUP(FILE *fplog, t_commrec *cr, + t_inputrec *inputrec, + gmx_int64_t step, t_nrnb *nrnb, gmx_wallcycle_t wcycle, + gmx_localtop_t *top, + gmx_groups_t *groups, + matrix box, rvec x[], history_t *hist, + rvec f[], + tensor vir_force, + t_mdatoms *mdatoms, + gmx_enerdata_t *enerd, t_fcdata *fcd, + real *lambda, t_graph *graph, + t_forcerec *fr, gmx_vsite_t *vsite, rvec mu_tot, + double t, FILE *field, gmx_edsam_t ed, + gmx_bool bBornRadii, + int flags) +{ + int cg0, cg1, i, j; + int start, homenr; + double mu[2*DIM]; + gmx_bool bSepDVDL, bStateChanged, bNS, bFillGrid, bCalcCGCM, bBS; + gmx_bool bDoLongRangeNS, bDoForces, bDoPotential, bSepLRF; + gmx_bool bDoAdressWF; + matrix boxs; + rvec vzero, box_diag; + real e, v, dvdlambda[efptNR]; + t_pbc pbc; + float cycles_pme, cycles_force; + + start = 0; + homenr = mdatoms->homenr; + + bSepDVDL = (fr->bSepDVDL && do_per_step(step, inputrec->nstlog)); + + clear_mat(vir_force); + + cg0 = 0; + if (DOMAINDECOMP(cr)) + { + cg1 = cr->dd->ncg_tot; + } + else + { + cg1 = top->cgs.nr; + } + if (fr->n_tpi > 0) + { + cg1--; + } + + bStateChanged = (flags & GMX_FORCE_STATECHANGED); + bNS = (flags & GMX_FORCE_NS) && (fr->bAllvsAll == FALSE); + /* Should we update the long-range neighborlists at this step? */ + bDoLongRangeNS = fr->bTwinRange && bNS; + /* Should we perform the long-range nonbonded evaluation inside the neighborsearching? */ + bFillGrid = (bNS && bStateChanged); + bCalcCGCM = (bFillGrid && !DOMAINDECOMP(cr)); + bDoForces = (flags & GMX_FORCE_FORCES); + bDoPotential = (flags & GMX_FORCE_ENERGY); + bSepLRF = ((inputrec->nstcalclr > 1) && bDoForces && + (flags & GMX_FORCE_SEPLRF) && (flags & GMX_FORCE_DO_LR)); + + /* should probably move this to the forcerec since it doesn't change */ + bDoAdressWF = ((fr->adress_type != eAdressOff)); + + if (bStateChanged) + { + update_forcerec(fr, box); + + if (NEED_MUTOT(*inputrec)) + { + /* Calculate total (local) dipole moment in a temporary common array. + * This makes it possible to sum them over nodes faster. + */ + calc_mu(start, homenr, + x, mdatoms->chargeA, mdatoms->chargeB, mdatoms->nChargePerturbed, + mu, mu+DIM); + } + } + + if (fr->ePBC != epbcNONE) + { + /* Compute shift vectors every step, + * because of pressure coupling or box deformation! + */ + if ((flags & GMX_FORCE_DYNAMICBOX) && bStateChanged) + { + calc_shifts(box, fr->shift_vec); + } + + if (bCalcCGCM) + { + put_charge_groups_in_box(fplog, cg0, cg1, fr->ePBC, box, + &(top->cgs), x, fr->cg_cm); + inc_nrnb(nrnb, eNR_CGCM, homenr); + inc_nrnb(nrnb, eNR_RESETX, cg1-cg0); + } + else if (EI_ENERGY_MINIMIZATION(inputrec->eI) && graph) + { + unshift_self(graph, box, x); + } + } + else if (bCalcCGCM) + { + calc_cgcm(fplog, cg0, cg1, &(top->cgs), x, fr->cg_cm); + inc_nrnb(nrnb, eNR_CGCM, homenr); + } + + if (bCalcCGCM && gmx_debug_at) + { + pr_rvecs(debug, 0, "cgcm", fr->cg_cm, top->cgs.nr); + } + +#ifdef GMX_MPI + if (!(cr->duty & DUTY_PME)) + { + /* Send particle coordinates to the pme nodes. + * Since this is only implemented for domain decomposition + * and domain decomposition does not use the graph, + * we do not need to worry about shifting. + */ + + int pme_flags = 0; + + wallcycle_start(wcycle, ewcPP_PMESENDX); + + bBS = (inputrec->nwall == 2); + if (bBS) + { + copy_mat(box, boxs); + svmul(inputrec->wall_ewald_zfac, boxs[ZZ], boxs[ZZ]); + } + + if (EEL_PME(fr->eeltype)) + { + pme_flags |= GMX_PME_DO_COULOMB; + } + + if (EVDW_PME(fr->vdwtype)) + { + pme_flags |= GMX_PME_DO_LJ; + } + + gmx_pme_send_coordinates(cr, bBS ? boxs : box, x, + mdatoms->nChargePerturbed, mdatoms->nTypePerturbed, lambda[efptCOUL], lambda[efptVDW], + (flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY)), + pme_flags, step); + + wallcycle_stop(wcycle, ewcPP_PMESENDX); + } +#endif /* GMX_MPI */ + + /* Communicate coordinates and sum dipole if necessary */ + if (DOMAINDECOMP(cr)) + { + wallcycle_start(wcycle, ewcMOVEX); + dd_move_x(cr->dd, box, x); + wallcycle_stop(wcycle, ewcMOVEX); + } + + /* update adress weight beforehand */ + if (bStateChanged && bDoAdressWF) + { + /* need pbc for adress weight calculation with pbc_dx */ + set_pbc(&pbc, inputrec->ePBC, box); + if (fr->adress_site == eAdressSITEcog) + { + update_adress_weights_cog(top->idef.iparams, top->idef.il, x, fr, mdatoms, + inputrec->ePBC == epbcNONE ? NULL : &pbc); + } + else if (fr->adress_site == eAdressSITEcom) + { + update_adress_weights_com(fplog, cg0, cg1, &(top->cgs), x, fr, mdatoms, + inputrec->ePBC == epbcNONE ? NULL : &pbc); + } + else if (fr->adress_site == eAdressSITEatomatom) + { + update_adress_weights_atom_per_atom(cg0, cg1, &(top->cgs), x, fr, mdatoms, + inputrec->ePBC == epbcNONE ? NULL : &pbc); + } + else + { + update_adress_weights_atom(cg0, cg1, &(top->cgs), x, fr, mdatoms, + inputrec->ePBC == epbcNONE ? NULL : &pbc); + } + } + + if (NEED_MUTOT(*inputrec)) + { + + if (bStateChanged) + { + if (PAR(cr)) + { + gmx_sumd(2*DIM, mu, cr); + } + for (i = 0; i < 2; i++) + { + for (j = 0; j < DIM; j++) + { + fr->mu_tot[i][j] = mu[i*DIM + j]; + } + } + } + if (fr->efep == efepNO) + { + copy_rvec(fr->mu_tot[0], mu_tot); + } + else + { + for (j = 0; j < DIM; j++) + { + mu_tot[j] = + (1.0 - lambda[efptCOUL])*fr->mu_tot[0][j] + lambda[efptCOUL]*fr->mu_tot[1][j]; + } + } + } + + /* Reset energies */ + reset_enerdata(fr, bNS, enerd, MASTER(cr)); + clear_rvecs(SHIFTS, fr->fshift); + + if (bNS) + { + wallcycle_start(wcycle, ewcNS); + + if (graph && bStateChanged) + { + /* Calculate intramolecular shift vectors to make molecules whole */ + mk_mshift(fplog, graph, fr->ePBC, box, x); + } + + /* Do the actual neighbour searching */ + ns(fplog, fr, box, + groups, top, mdatoms, + cr, nrnb, bFillGrid, + bDoLongRangeNS); + + wallcycle_stop(wcycle, ewcNS); + } + + if (inputrec->implicit_solvent && bNS) + { + make_gb_nblist(cr, inputrec->gb_algorithm, + x, box, fr, &top->idef, graph, fr->born); + } + + if (DOMAINDECOMP(cr) && !(cr->duty & DUTY_PME)) + { + wallcycle_start(wcycle, ewcPPDURINGPME); + dd_force_flop_start(cr->dd, nrnb); + } + + if (inputrec->bRot) + { + /* Enforced rotation has its own cycle counter that starts after the collective + * coordinates have been communicated. It is added to ddCyclF to allow + * for proper load-balancing */ + wallcycle_start(wcycle, ewcROT); + do_rotation(cr, inputrec, box, x, t, step, wcycle, bNS); + wallcycle_stop(wcycle, ewcROT); + } + + /* Start the force cycle counter. + * This counter is stopped in do_forcelow_level. + * No parallel communication should occur while this counter is running, + * since that will interfere with the dynamic load balancing. + */ + wallcycle_start(wcycle, ewcFORCE); + + if (bDoForces) + { + /* Reset forces for which the virial is calculated separately: + * PME/Ewald forces if necessary */ + if (fr->bF_NoVirSum) + { + if (flags & GMX_FORCE_VIRIAL) + { + fr->f_novirsum = fr->f_novirsum_alloc; + if (fr->bDomDec) + { + clear_rvecs(fr->f_novirsum_n, fr->f_novirsum); + } + else + { + clear_rvecs(homenr, fr->f_novirsum+start); + } + } + else + { + /* We are not calculating the pressure so we do not need + * a separate array for forces that do not contribute + * to the pressure. + */ + fr->f_novirsum = f; + } + } + + /* Clear the short- and long-range forces */ + clear_rvecs(fr->natoms_force_constr, f); + if (bSepLRF && do_per_step(step, inputrec->nstcalclr)) + { + clear_rvecs(fr->natoms_force_constr, fr->f_twin); + } + + clear_rvec(fr->vir_diag_posres); + } + if (inputrec->ePull == epullCONSTRAINT) + { + clear_pull_forces(inputrec->pull); + } + + /* update QMMMrec, if necessary */ + if (fr->bQMMM) + { + update_QMMMrec(cr, fr, x, mdatoms, box, top); + } + + if ((flags & GMX_FORCE_BONDED) && top->idef.il[F_POSRES].nr > 0) + { + posres_wrapper(fplog, flags, bSepDVDL, inputrec, nrnb, top, box, x, + enerd, lambda, fr); + } + + if ((flags & GMX_FORCE_BONDED) && top->idef.il[F_FBPOSRES].nr > 0) + { + fbposres_wrapper(inputrec, nrnb, top, box, x, enerd, fr); + } + + /* Compute the bonded and non-bonded energies and optionally forces */ + do_force_lowlevel(fplog, step, fr, inputrec, &(top->idef), + cr, nrnb, wcycle, mdatoms, + x, hist, f, bSepLRF ? fr->f_twin : f, enerd, fcd, top, fr->born, + &(top->atomtypes), bBornRadii, box, + inputrec->fepvals, lambda, + graph, &(top->excls), fr->mu_tot, + flags, + &cycles_pme); + + if (bSepLRF) + { + if (do_per_step(step, inputrec->nstcalclr)) + { + /* Add the long range forces to the short range forces */ + for (i = 0; i < fr->natoms_force_constr; i++) + { + rvec_add(fr->f_twin[i], f[i], f[i]); + } + } + } + + cycles_force = wallcycle_stop(wcycle, ewcFORCE); + + if (ed) + { + do_flood(cr, inputrec, x, f, ed, box, step, bNS); + } + + if (DOMAINDECOMP(cr)) + { + dd_force_flop_stop(cr->dd, nrnb); + if (wcycle) + { + dd_cycles_add(cr->dd, cycles_force-cycles_pme, ddCyclF); + } + } + + if (bDoForces) + { + if (IR_ELEC_FIELD(*inputrec)) + { + /* Compute forces due to electric field */ + calc_f_el(MASTER(cr) ? field : NULL, + start, homenr, mdatoms->chargeA, fr->f_novirsum, + inputrec->ex, inputrec->et, t); + } + + if (bDoAdressWF && fr->adress_icor == eAdressICThermoForce) + { + /* Compute thermodynamic force in hybrid AdResS region */ + adress_thermo_force(start, homenr, &(top->cgs), x, fr->f_novirsum, fr, mdatoms, + inputrec->ePBC == epbcNONE ? NULL : &pbc); + } + + /* Communicate the forces */ + if (DOMAINDECOMP(cr)) + { + wallcycle_start(wcycle, ewcMOVEF); + dd_move_f(cr->dd, f, fr->fshift); + /* Do we need to communicate the separate force array + * for terms that do not contribute to the single sum virial? + * Position restraints and electric fields do not introduce + * inter-cg forces, only full electrostatics methods do. + * When we do not calculate the virial, fr->f_novirsum = f, + * so we have already communicated these forces. + */ + if (EEL_FULL(fr->eeltype) && cr->dd->n_intercg_excl && + (flags & GMX_FORCE_VIRIAL)) + { + dd_move_f(cr->dd, fr->f_novirsum, NULL); + } + if (bSepLRF) + { + /* We should not update the shift forces here, + * since f_twin is already included in f. + */ + dd_move_f(cr->dd, fr->f_twin, NULL); + } + wallcycle_stop(wcycle, ewcMOVEF); + } + + /* If we have NoVirSum forces, but we do not calculate the virial, + * we sum fr->f_novirum=f later. + */ + if (vsite && !(fr->bF_NoVirSum && !(flags & GMX_FORCE_VIRIAL))) + { + wallcycle_start(wcycle, ewcVSITESPREAD); + spread_vsite_f(vsite, x, f, fr->fshift, FALSE, NULL, nrnb, + &top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr); + wallcycle_stop(wcycle, ewcVSITESPREAD); + + if (bSepLRF) + { + wallcycle_start(wcycle, ewcVSITESPREAD); + spread_vsite_f(vsite, x, fr->f_twin, NULL, FALSE, NULL, + nrnb, + &top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr); + wallcycle_stop(wcycle, ewcVSITESPREAD); + } + } + + if (flags & GMX_FORCE_VIRIAL) + { + /* Calculation of the virial must be done after vsites! */ + calc_virial(0, mdatoms->homenr, x, f, + vir_force, graph, box, nrnb, fr, inputrec->ePBC); + } + } + + if (inputrec->ePull == epullUMBRELLA || inputrec->ePull == epullCONST_F) + { + pull_potential_wrapper(fplog, bSepDVDL, cr, inputrec, box, x, + f, vir_force, mdatoms, enerd, lambda, t); + } + + /* Add the forces from enforced rotation potentials (if any) */ + if (inputrec->bRot) + { + wallcycle_start(wcycle, ewcROTadd); + enerd->term[F_COM_PULL] += add_rot_forces(inputrec->rot, f, cr, step, t); + wallcycle_stop(wcycle, ewcROTadd); + } + + /* Add forces from interactive molecular dynamics (IMD), if bIMD == TRUE. */ + IMD_apply_forces(inputrec->bIMD, inputrec->imd, cr, f, wcycle); + + if (PAR(cr) && !(cr->duty & DUTY_PME)) + { + /* In case of node-splitting, the PP nodes receive the long-range + * forces, virial and energy from the PME nodes here. + */ + pme_receive_force_ener(fplog, bSepDVDL, cr, wcycle, enerd, fr); + } + + if (bDoForces) + { + post_process_forces(cr, step, nrnb, wcycle, + top, box, x, f, vir_force, mdatoms, graph, fr, vsite, + flags); + } + + /* Sum the potential energy terms from group contributions */ + sum_epot(&(enerd->grpp), enerd->term); +} + +void do_force(FILE *fplog, t_commrec *cr, + t_inputrec *inputrec, + gmx_int64_t step, t_nrnb *nrnb, gmx_wallcycle_t wcycle, + gmx_localtop_t *top, + gmx_groups_t *groups, + matrix box, rvec x[], history_t *hist, + rvec f[], + tensor vir_force, + t_mdatoms *mdatoms, + gmx_enerdata_t *enerd, t_fcdata *fcd, + real *lambda, t_graph *graph, + t_forcerec *fr, + gmx_vsite_t *vsite, rvec mu_tot, + double t, FILE *field, gmx_edsam_t ed, + gmx_bool bBornRadii, + int flags) +{ + /* modify force flag if not doing nonbonded */ + if (!fr->bNonbonded) + { + flags &= ~GMX_FORCE_NONBONDED; + } + + switch (inputrec->cutoff_scheme) + { + case ecutsVERLET: + do_force_cutsVERLET(fplog, cr, inputrec, + step, nrnb, wcycle, + top, + groups, + box, x, hist, + f, vir_force, + mdatoms, + enerd, fcd, + lambda, graph, + fr, fr->ic, + vsite, mu_tot, + t, field, ed, + bBornRadii, + flags); + break; + case ecutsGROUP: + do_force_cutsGROUP(fplog, cr, inputrec, + step, nrnb, wcycle, + top, + groups, + box, x, hist, + f, vir_force, + mdatoms, + enerd, fcd, + lambda, graph, + fr, vsite, mu_tot, + t, field, ed, + bBornRadii, + flags); + break; + default: + gmx_incons("Invalid cut-off scheme passed!"); + } +} + + +void do_constrain_first(FILE *fplog, gmx_constr_t constr, + t_inputrec *ir, t_mdatoms *md, + t_state *state, t_commrec *cr, t_nrnb *nrnb, + t_forcerec *fr, gmx_localtop_t *top) +{ + int i, m, start, end; + gmx_int64_t step; + real dt = ir->delta_t; + real dvdl_dum; + rvec *savex; + + snew(savex, state->natoms); + + start = 0; + end = md->homenr; + + if (debug) + { + fprintf(debug, "vcm: start=%d, homenr=%d, end=%d\n", + start, md->homenr, end); + } + /* Do a first constrain to reset particles... */ + step = ir->init_step; + if (fplog) + { + char buf[STEPSTRSIZE]; + fprintf(fplog, "\nConstraining the starting coordinates (step %s)\n", + gmx_step_str(step, buf)); + } + dvdl_dum = 0; + + /* constrain the current position */ + constrain(NULL, TRUE, FALSE, constr, &(top->idef), + ir, NULL, cr, step, 0, md, + state->x, state->x, NULL, + fr->bMolPBC, state->box, + state->lambda[efptBONDED], &dvdl_dum, + NULL, NULL, nrnb, econqCoord, + ir->epc == epcMTTK, state->veta, state->veta); + if (EI_VV(ir->eI)) + { + /* constrain the inital velocity, and save it */ + /* also may be useful if we need the ekin from the halfstep for velocity verlet */ + /* might not yet treat veta correctly */ + constrain(NULL, TRUE, FALSE, constr, &(top->idef), + ir, NULL, cr, step, 0, md, + state->x, state->v, state->v, + fr->bMolPBC, state->box, + state->lambda[efptBONDED], &dvdl_dum, + NULL, NULL, nrnb, econqVeloc, + ir->epc == epcMTTK, state->veta, state->veta); + } + /* constrain the inital velocities at t-dt/2 */ + if (EI_STATE_VELOCITY(ir->eI) && ir->eI != eiVV) + { + for (i = start; (i < end); i++) + { + for (m = 0; (m < DIM); m++) + { + /* Reverse the velocity */ + state->v[i][m] = -state->v[i][m]; + /* Store the position at t-dt in buf */ + savex[i][m] = state->x[i][m] + dt*state->v[i][m]; + } + } + /* Shake the positions at t=-dt with the positions at t=0 + * as reference coordinates. + */ + if (fplog) + { + char buf[STEPSTRSIZE]; + fprintf(fplog, "\nConstraining the coordinates at t0-dt (step %s)\n", + gmx_step_str(step, buf)); + } + dvdl_dum = 0; + constrain(NULL, TRUE, FALSE, constr, &(top->idef), + ir, NULL, cr, step, -1, md, + state->x, savex, NULL, + fr->bMolPBC, state->box, + state->lambda[efptBONDED], &dvdl_dum, + state->v, NULL, nrnb, econqCoord, + ir->epc == epcMTTK, state->veta, state->veta); + + for (i = start; i < end; i++) + { + for (m = 0; m < DIM; m++) + { + /* Re-reverse the velocities */ + state->v[i][m] = -state->v[i][m]; + } + } + } + sfree(savex); +} + + +static void +integrate_table(real vdwtab[], real scale, int offstart, int rstart, int rend, + double *enerout, double *virout) +{ + double enersum, virsum; + double invscale, invscale2, invscale3; + double r, ea, eb, ec, pa, pb, pc, pd; + double y0, f, g, h; + int ri, offset, tabfactor; + + invscale = 1.0/scale; + invscale2 = invscale*invscale; + invscale3 = invscale*invscale2; + + /* Following summation derived from cubic spline definition, + * Numerical Recipies in C, second edition, p. 113-116. Exact for + * the cubic spline. We first calculate the negative of the + * energy from rvdw to rvdw_switch, assuming that g(r)=1, and then + * add the more standard, abrupt cutoff correction to that result, + * yielding the long-range correction for a switched function. We + * perform both the pressure and energy loops at the same time for + * simplicity, as the computational cost is low. */ + + if (offstart == 0) + { + /* Since the dispersion table has been scaled down a factor + * 6.0 and the repulsion a factor 12.0 to compensate for the + * c6/c12 parameters inside nbfp[] being scaled up (to save + * flops in kernels), we need to correct for this. + */ + tabfactor = 6.0; + } + else + { + tabfactor = 12.0; + } + + enersum = 0.0; + virsum = 0.0; + for (ri = rstart; ri < rend; ++ri) + { + r = ri*invscale; + ea = invscale3; + eb = 2.0*invscale2*r; + ec = invscale*r*r; + + pa = invscale3; + pb = 3.0*invscale2*r; + pc = 3.0*invscale*r*r; + pd = r*r*r; + + /* this "8" is from the packing in the vdwtab array - perhaps + should be defined? */ + + offset = 8*ri + offstart; + y0 = vdwtab[offset]; + f = vdwtab[offset+1]; + g = vdwtab[offset+2]; + h = vdwtab[offset+3]; + + enersum += y0*(ea/3 + eb/2 + ec) + f*(ea/4 + eb/3 + ec/2) + g*(ea/5 + eb/4 + ec/3) + h*(ea/6 + eb/5 + ec/4); + virsum += f*(pa/4 + pb/3 + pc/2 + pd) + 2*g*(pa/5 + pb/4 + pc/3 + pd/2) + 3*h*(pa/6 + pb/5 + pc/4 + pd/3); + } + *enerout = 4.0*M_PI*enersum*tabfactor; + *virout = 4.0*M_PI*virsum*tabfactor; +} + +void calc_enervirdiff(FILE *fplog, int eDispCorr, t_forcerec *fr) +{ - double eners[2], virs[2], enersum, virsum, y0, f, g, h; - double r0, r1, r, rc3, rc9, ea, eb, ec, pa, pb, pc, pd; - double invscale, invscale2, invscale3; - int ri0, ri1, ri, i, offstart, offset; - real scale, *vdwtab, tabfactor, tmp; ++ double eners[2], virs[2], enersum, virsum, y0, f, g, h; ++ double r0, r1, r, rc3, rc9, ea, eb, ec, pa, pb, pc, pd; ++ double invscale, invscale2, invscale3; ++ int ri0, ri1, ri, i, offstart, offset; ++ real scale, *vdwtab, tabfactor, tmp; + + fr->enershiftsix = 0; + fr->enershifttwelve = 0; + fr->enerdiffsix = 0; + fr->enerdifftwelve = 0; + fr->virdiffsix = 0; + fr->virdifftwelve = 0; + + if (eDispCorr != edispcNO) + { + for (i = 0; i < 2; i++) + { + eners[i] = 0; + virs[i] = 0; + } - if (fr->vdwtype == evdwSWITCH || fr->vdwtype == evdwSHIFT || - fr->vdw_modifier == eintmodPOTSWITCH || - fr->vdw_modifier == eintmodFORCESWITCH) ++ if ((fr->vdw_modifier == eintmodPOTSHIFT) || ++ (fr->vdw_modifier == eintmodPOTSWITCH) || ++ (fr->vdw_modifier == eintmodFORCESWITCH) || ++ (fr->vdwtype == evdwSHIFT) || ++ (fr->vdwtype == evdwSWITCH)) + { - if (fr->rvdw_switch == 0) ++ if (((fr->vdw_modifier == eintmodPOTSWITCH) || ++ (fr->vdw_modifier == eintmodFORCESWITCH) || ++ (fr->vdwtype == evdwSWITCH)) && fr->rvdw_switch == 0) + { + gmx_fatal(FARGS, + "With dispersion correction rvdw-switch can not be zero " + "for vdw-type = %s", evdw_names[fr->vdwtype]); + } + - scale = fr->nblists[0].table_elec_vdw.scale; ++ scale = fr->nblists[0].table_vdw.scale; + vdwtab = fr->nblists[0].table_vdw.data; + + /* Round the cut-offs to exact table values for precision */ + ri0 = floor(fr->rvdw_switch*scale); + ri1 = ceil(fr->rvdw*scale); ++ ++ /* The code below has some support for handling force-switching, i.e. ++ * when the force (instead of potential) is switched over a limited ++ * region. This leads to a constant shift in the potential inside the ++ * switching region, which we can handle by adding a constant energy ++ * term in the force-switch case just like when we do potential-shift. ++ * ++ * For now this is not enabled, but to keep the functionality in the ++ * code we check separately for switch and shift. When we do force-switch ++ * the shifting point is rvdw_switch, while it is the cutoff when we ++ * have a classical potential-shift. ++ * ++ * For a pure potential-shift the potential has a constant shift ++ * all the way out to the cutoff, and that is it. For other forms ++ * we need to calculate the constant shift up to the point where we ++ * start modifying the potential. ++ */ ++ ri0 = (fr->vdw_modifier == eintmodPOTSHIFT) ? ri1 : ri0; ++ + r0 = ri0/scale; + r1 = ri1/scale; + rc3 = r0*r0*r0; + rc9 = rc3*rc3*rc3; + - if (fr->vdwtype == evdwSHIFT || - fr->vdw_modifier == eintmodFORCESWITCH) ++ if ((fr->vdw_modifier == eintmodFORCESWITCH) || ++ (fr->vdwtype == evdwSHIFT)) + { + /* Determine the constant energy shift below rvdw_switch. + * Table has a scale factor since we have scaled it down to compensate + * for scaling-up c6/c12 with the derivative factors to save flops in analytical kernels. + */ + fr->enershiftsix = (real)(-1.0/(rc3*rc3)) - 6.0*vdwtab[8*ri0]; + fr->enershifttwelve = (real)( 1.0/(rc9*rc3)) - 12.0*vdwtab[8*ri0 + 4]; + } ++ else if (fr->vdw_modifier == eintmodPOTSHIFT) ++ { ++ fr->enershiftsix = (real)(-1.0/(rc3*rc3)); ++ fr->enershifttwelve = (real)( 1.0/(rc9*rc3)); ++ } ++ + /* Add the constant part from 0 to rvdw_switch. + * This integration from 0 to rvdw_switch overcounts the number + * of interactions by 1, as it also counts the self interaction. + * We will correct for this later. + */ + eners[0] += 4.0*M_PI*fr->enershiftsix*rc3/3.0; + eners[1] += 4.0*M_PI*fr->enershifttwelve*rc3/3.0; ++ ++ /* Calculate the contribution in the range [r0,r1] where we ++ * modify the potential. For a pure potential-shift modifier we will ++ * have ri0==ri1, and there will not be any contribution here. ++ */ + for (i = 0; i < 2; i++) + { + enersum = 0; + virsum = 0; + integrate_table(vdwtab, scale, (i == 0 ? 0 : 4), ri0, ri1, &enersum, &virsum); + eners[i] -= enersum; + virs[i] -= virsum; + } + - /* now add the correction for rvdw_switch to infinity */ ++ /* Alright: Above we compensated by REMOVING the parts outside r0 ++ * corresponding to the ideal VdW 1/r6 and /r12 potentials. ++ * ++ * Regardless of whether r0 is the point where we start switching, ++ * or the cutoff where we calculated the constant shift, we include ++ * all the parts we are missing out to infinity from r0 by ++ * calculating the analytical dispersion correction. ++ */ + eners[0] += -4.0*M_PI/(3.0*rc3); + eners[1] += 4.0*M_PI/(9.0*rc9); + virs[0] += 8.0*M_PI/rc3; + virs[1] += -16.0*M_PI/(3.0*rc9); + } + else if (fr->vdwtype == evdwCUT || + EVDW_PME(fr->vdwtype) || + fr->vdwtype == evdwUSER) + { + if (fr->vdwtype == evdwUSER && fplog) + { + fprintf(fplog, + "WARNING: using dispersion correction with user tables\n"); + } + + /* Note that with LJ-PME, the dispersion correction is multiplied + * by the difference between the actual C6 and the value of C6 + * that would produce the combination rule. + * This means the normal energy and virial difference formulas + * can be used here. + */ + + rc3 = fr->rvdw*fr->rvdw*fr->rvdw; + rc9 = rc3*rc3*rc3; + /* Contribution beyond the cut-off */ + eners[0] += -4.0*M_PI/(3.0*rc3); + eners[1] += 4.0*M_PI/(9.0*rc9); + if (fr->vdw_modifier == eintmodPOTSHIFT) + { + /* Contribution within the cut-off */ + eners[0] += -4.0*M_PI/(3.0*rc3); + eners[1] += 4.0*M_PI/(3.0*rc9); + } + /* Contribution beyond the cut-off */ + virs[0] += 8.0*M_PI/rc3; + virs[1] += -16.0*M_PI/(3.0*rc9); + } + else + { + gmx_fatal(FARGS, + "Dispersion correction is not implemented for vdw-type = %s", + evdw_names[fr->vdwtype]); + } + - /* TODO: remove this code once we have group LJ-PME kernels - * that calculate the exact, full LJ param C6/r^6 within the cut-off, - * as the current nbnxn kernels do. - */ ++ /* When we deprecate the group kernels the code below can go too */ + if (fr->vdwtype == evdwPME && fr->cutoff_scheme == ecutsGROUP) + { + /* Calculate self-interaction coefficient (assuming that + * the reciprocal-space contribution is constant in the + * region that contributes to the self-interaction). + */ + fr->enershiftsix = pow(fr->ewaldcoeff_lj, 6) / 6.0; + + eners[0] += -pow(sqrt(M_PI)*fr->ewaldcoeff_lj, 3)/3.0; + virs[0] += pow(sqrt(M_PI)*fr->ewaldcoeff_lj, 3); + } + + fr->enerdiffsix = eners[0]; + fr->enerdifftwelve = eners[1]; + /* The 0.5 is due to the Gromacs definition of the virial */ + fr->virdiffsix = 0.5*virs[0]; + fr->virdifftwelve = 0.5*virs[1]; + } +} + +void calc_dispcorr(FILE *fplog, t_inputrec *ir, t_forcerec *fr, + gmx_int64_t step, int natoms, + matrix box, real lambda, tensor pres, tensor virial, + real *prescorr, real *enercorr, real *dvdlcorr) +{ + gmx_bool bCorrAll, bCorrPres; + real dvdlambda, invvol, dens, ninter, avcsix, avctwelve, enerdiff, svir = 0, spres = 0; + int m; + + *prescorr = 0; + *enercorr = 0; + *dvdlcorr = 0; + + clear_mat(virial); + clear_mat(pres); + + if (ir->eDispCorr != edispcNO) + { + bCorrAll = (ir->eDispCorr == edispcAllEner || + ir->eDispCorr == edispcAllEnerPres); + bCorrPres = (ir->eDispCorr == edispcEnerPres || + ir->eDispCorr == edispcAllEnerPres); + + invvol = 1/det(box); + if (fr->n_tpi) + { + /* Only correct for the interactions with the inserted molecule */ + dens = (natoms - fr->n_tpi)*invvol; + ninter = fr->n_tpi; + } + else + { + dens = natoms*invvol; + ninter = 0.5*natoms; + } + + if (ir->efep == efepNO) + { + avcsix = fr->avcsix[0]; + avctwelve = fr->avctwelve[0]; + } + else + { + avcsix = (1 - lambda)*fr->avcsix[0] + lambda*fr->avcsix[1]; + avctwelve = (1 - lambda)*fr->avctwelve[0] + lambda*fr->avctwelve[1]; + } + + enerdiff = ninter*(dens*fr->enerdiffsix - fr->enershiftsix); + *enercorr += avcsix*enerdiff; + dvdlambda = 0.0; + if (ir->efep != efepNO) + { + dvdlambda += (fr->avcsix[1] - fr->avcsix[0])*enerdiff; + } + if (bCorrAll) + { + enerdiff = ninter*(dens*fr->enerdifftwelve - fr->enershifttwelve); + *enercorr += avctwelve*enerdiff; + if (fr->efep != efepNO) + { + dvdlambda += (fr->avctwelve[1] - fr->avctwelve[0])*enerdiff; + } + } + + if (bCorrPres) + { + svir = ninter*dens*avcsix*fr->virdiffsix/3.0; + if (ir->eDispCorr == edispcAllEnerPres) + { + svir += ninter*dens*avctwelve*fr->virdifftwelve/3.0; + } + /* The factor 2 is because of the Gromacs virial definition */ + spres = -2.0*invvol*svir*PRESFAC; + + for (m = 0; m < DIM; m++) + { + virial[m][m] += svir; + pres[m][m] += spres; + } + *prescorr += spres; + } + + /* Can't currently control when it prints, for now, just print when degugging */ + if (debug) + { + if (bCorrAll) + { + fprintf(debug, "Long Range LJ corr.: %10.4e, %10.4e\n", + avcsix, avctwelve); + } + if (bCorrPres) + { + fprintf(debug, + "Long Range LJ corr.: Epot %10g, Pres: %10g, Vir: %10g\n", + *enercorr, spres, svir); + } + else + { + fprintf(debug, "Long Range LJ corr.: Epot %10g\n", *enercorr); + } + } + + if (fr->bSepDVDL && do_per_step(step, ir->nstlog)) + { + gmx_print_sepdvdl(fplog, "Dispersion correction", *enercorr, dvdlambda); + } + if (fr->efep != efepNO) + { + *dvdlcorr += dvdlambda; + } + } +} + +void do_pbc_first(FILE *fplog, matrix box, t_forcerec *fr, + t_graph *graph, rvec x[]) +{ + if (fplog) + { + fprintf(fplog, "Removing pbc first time\n"); + } + calc_shifts(box, fr->shift_vec); + if (graph) + { + mk_mshift(fplog, graph, fr->ePBC, box, x); + if (gmx_debug_at) + { + p_graph(debug, "do_pbc_first 1", graph); + } + shift_self(graph, box, x); + /* By doing an extra mk_mshift the molecules that are broken + * because they were e.g. imported from another software + * will be made whole again. Such are the healing powers + * of GROMACS. + */ + mk_mshift(fplog, graph, fr->ePBC, box, x); + if (gmx_debug_at) + { + p_graph(debug, "do_pbc_first 2", graph); + } + } + if (fplog) + { + fprintf(fplog, "Done rmpbc\n"); + } +} + +static void low_do_pbc_mtop(FILE *fplog, int ePBC, matrix box, + gmx_mtop_t *mtop, rvec x[], + gmx_bool bFirst) +{ + t_graph *graph; + int mb, as, mol; + gmx_molblock_t *molb; + + if (bFirst && fplog) + { + fprintf(fplog, "Removing pbc first time\n"); + } + + snew(graph, 1); + as = 0; + for (mb = 0; mb < mtop->nmolblock; mb++) + { + molb = &mtop->molblock[mb]; + if (molb->natoms_mol == 1 || + (!bFirst && mtop->moltype[molb->type].cgs.nr == 1)) + { + /* Just one atom or charge group in the molecule, no PBC required */ + as += molb->nmol*molb->natoms_mol; + } + else + { + /* Pass NULL iso fplog to avoid graph prints for each molecule type */ + mk_graph_ilist(NULL, mtop->moltype[molb->type].ilist, + 0, molb->natoms_mol, FALSE, FALSE, graph); + + for (mol = 0; mol < molb->nmol; mol++) + { + mk_mshift(fplog, graph, ePBC, box, x+as); + + shift_self(graph, box, x+as); + /* The molecule is whole now. + * We don't need the second mk_mshift call as in do_pbc_first, + * since we no longer need this graph. + */ + + as += molb->natoms_mol; + } + done_graph(graph); + } + } + sfree(graph); +} + +void do_pbc_first_mtop(FILE *fplog, int ePBC, matrix box, + gmx_mtop_t *mtop, rvec x[]) +{ + low_do_pbc_mtop(fplog, ePBC, box, mtop, x, TRUE); +} + +void do_pbc_mtop(FILE *fplog, int ePBC, matrix box, + gmx_mtop_t *mtop, rvec x[]) +{ + low_do_pbc_mtop(fplog, ePBC, box, mtop, x, FALSE); +} + +void finish_run(FILE *fplog, t_commrec *cr, + t_inputrec *inputrec, + t_nrnb nrnb[], gmx_wallcycle_t wcycle, + gmx_walltime_accounting_t walltime_accounting, + wallclock_gpu_t *gputimes, + gmx_bool bWriteStat) +{ + int i, j; + t_nrnb *nrnb_tot = NULL; + real delta_t; + double nbfs, mflop; + double elapsed_time, + elapsed_time_over_all_ranks, + elapsed_time_over_all_threads, + elapsed_time_over_all_threads_over_all_ranks; + wallcycle_sum(cr, wcycle); + + if (cr->nnodes > 1) + { + snew(nrnb_tot, 1); +#ifdef GMX_MPI + MPI_Allreduce(nrnb->n, nrnb_tot->n, eNRNB, MPI_DOUBLE, MPI_SUM, + cr->mpi_comm_mysim); +#endif + } + else + { + nrnb_tot = nrnb; + } + + elapsed_time = walltime_accounting_get_elapsed_time(walltime_accounting); + elapsed_time_over_all_ranks = elapsed_time; + elapsed_time_over_all_threads = walltime_accounting_get_elapsed_time_over_all_threads(walltime_accounting); + elapsed_time_over_all_threads_over_all_ranks = elapsed_time_over_all_threads; +#ifdef GMX_MPI + if (cr->nnodes > 1) + { + /* reduce elapsed_time over all MPI ranks in the current simulation */ + MPI_Allreduce(&elapsed_time, + &elapsed_time_over_all_ranks, + 1, MPI_DOUBLE, MPI_SUM, + cr->mpi_comm_mysim); + elapsed_time_over_all_ranks /= cr->nnodes; + /* Reduce elapsed_time_over_all_threads over all MPI ranks in the + * current simulation. */ + MPI_Allreduce(&elapsed_time_over_all_threads, + &elapsed_time_over_all_threads_over_all_ranks, + 1, MPI_DOUBLE, MPI_SUM, + cr->mpi_comm_mysim); + } +#endif + + if (SIMMASTER(cr)) + { + print_flop(fplog, nrnb_tot, &nbfs, &mflop); + } + if (cr->nnodes > 1) + { + sfree(nrnb_tot); + } + + if ((cr->duty & DUTY_PP) && DOMAINDECOMP(cr)) + { + print_dd_statistics(cr, inputrec, fplog); + } + + if (SIMMASTER(cr)) + { + wallcycle_print(fplog, cr->nnodes, cr->npmenodes, + elapsed_time_over_all_ranks, + wcycle, gputimes); + + if (EI_DYNAMICS(inputrec->eI)) + { + delta_t = inputrec->delta_t; + } + else + { + delta_t = 0; + } + + if (fplog) + { + print_perf(fplog, elapsed_time_over_all_threads_over_all_ranks, + elapsed_time_over_all_ranks, + walltime_accounting_get_nsteps_done(walltime_accounting), + delta_t, nbfs, mflop); + } + if (bWriteStat) + { + print_perf(stderr, elapsed_time_over_all_threads_over_all_ranks, + elapsed_time_over_all_ranks, + walltime_accounting_get_nsteps_done(walltime_accounting), + delta_t, nbfs, mflop); + } + } +} + +extern void initialize_lambdas(FILE *fplog, t_inputrec *ir, int *fep_state, real *lambda, double *lam0) +{ + /* this function works, but could probably use a logic rewrite to keep all the different + types of efep straight. */ + + int i; + t_lambda *fep = ir->fepvals; + + if ((ir->efep == efepNO) && (ir->bSimTemp == FALSE)) + { + for (i = 0; i < efptNR; i++) + { + lambda[i] = 0.0; + if (lam0) + { + lam0[i] = 0.0; + } + } + return; + } + else + { + *fep_state = fep->init_fep_state; /* this might overwrite the checkpoint + if checkpoint is set -- a kludge is in for now + to prevent this.*/ + for (i = 0; i < efptNR; i++) + { + /* overwrite lambda state with init_lambda for now for backwards compatibility */ + if (fep->init_lambda >= 0) /* if it's -1, it was never initializd */ + { + lambda[i] = fep->init_lambda; + if (lam0) + { + lam0[i] = lambda[i]; + } + } + else + { + lambda[i] = fep->all_lambda[i][*fep_state]; + if (lam0) + { + lam0[i] = lambda[i]; + } + } + } + if (ir->bSimTemp) + { + /* need to rescale control temperatures to match current state */ + for (i = 0; i < ir->opts.ngtc; i++) + { + if (ir->opts.ref_t[i] > 0) + { + ir->opts.ref_t[i] = ir->simtempvals->temperatures[*fep_state]; + } + } + } + } + + /* Send to the log the information on the current lambdas */ + if (fplog != NULL) + { + fprintf(fplog, "Initial vector of lambda components:[ "); + for (i = 0; i < efptNR; i++) + { + fprintf(fplog, "%10.4f ", lambda[i]); + } + fprintf(fplog, "]\n"); + } + return; +} + + +void init_md(FILE *fplog, + t_commrec *cr, t_inputrec *ir, const output_env_t oenv, + double *t, double *t0, + real *lambda, int *fep_state, double *lam0, + t_nrnb *nrnb, gmx_mtop_t *mtop, + gmx_update_t *upd, + int nfile, const t_filenm fnm[], + gmx_mdoutf_t *outf, t_mdebin **mdebin, + tensor force_vir, tensor shake_vir, rvec mu_tot, + gmx_bool *bSimAnn, t_vcm **vcm, unsigned long Flags) +{ + int i, j, n; + real tmpt, mod; + + /* Initial values */ + *t = *t0 = ir->init_t; + + *bSimAnn = FALSE; + for (i = 0; i < ir->opts.ngtc; i++) + { + /* set bSimAnn if any group is being annealed */ + if (ir->opts.annealing[i] != eannNO) + { + *bSimAnn = TRUE; + } + } + if (*bSimAnn) + { + update_annealing_target_temp(&(ir->opts), ir->init_t); + } + + /* Initialize lambda variables */ + initialize_lambdas(fplog, ir, fep_state, lambda, lam0); + + if (upd) + { + *upd = init_update(ir); + } + + + if (vcm != NULL) + { + *vcm = init_vcm(fplog, &mtop->groups, ir); + } + + if (EI_DYNAMICS(ir->eI) && !(Flags & MD_APPENDFILES)) + { + if (ir->etc == etcBERENDSEN) + { + please_cite(fplog, "Berendsen84a"); + } + if (ir->etc == etcVRESCALE) + { + please_cite(fplog, "Bussi2007a"); + } + } + + init_nrnb(nrnb); + + if (nfile != -1) + { + *outf = init_mdoutf(fplog, nfile, fnm, Flags, cr, ir, mtop, oenv); + + *mdebin = init_mdebin((Flags & MD_APPENDFILES) ? NULL : mdoutf_get_fp_ene(*outf), + mtop, ir, mdoutf_get_fp_dhdl(*outf)); + } + + if (ir->bAdress) + { + please_cite(fplog, "Fritsch12"); + please_cite(fplog, "Junghans10"); + } + /* Initiate variables */ + clear_mat(force_vir); + clear_mat(shake_vir); + clear_rvec(mu_tot); + + debug_gmx(); +} diff --cc src/gromacs/mdlib/tables.c index ff73a90e89,0000000000..404516c467 mode 100644,000000..100644 --- a/src/gromacs/mdlib/tables.c +++ b/src/gromacs/mdlib/tables.c @@@ -1,1677 -1,0 +1,1777 @@@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2004, The GROMACS development team. + * Copyright (c) 2013,2014, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include "gromacs/math/utilities.h" +#include "typedefs.h" +#include "names.h" +#include "gromacs/utility/smalloc.h" +#include "gmx_fatal.h" +#include "gromacs/fileio/futil.h" +#include "xvgr.h" +#include "vec.h" +#include "main.h" +#include "network.h" +#include "physics.h" +#include "force.h" +#include "gromacs/fileio/gmxfio.h" +#include "macros.h" +#include "tables.h" + +/* All the possible (implemented) table functions */ +enum { + etabLJ6, + etabLJ12, + etabLJ6Shift, + etabLJ12Shift, + etabShift, + etabRF, + etabRF_ZERO, + etabCOUL, + etabEwald, + etabEwaldSwitch, + etabEwaldUser, + etabEwaldUserSwitch, + etabLJ6Ewald, + etabLJ6Switch, + etabLJ12Switch, + etabCOULSwitch, + etabLJ6Encad, + etabLJ12Encad, + etabCOULEncad, + etabEXPMIN, + etabUSER, + etabNR +}; + +/** Evaluates to true if the table type contains user data. */ +#define ETAB_USER(e) ((e) == etabUSER || \ + (e) == etabEwaldUser || (e) == etabEwaldUserSwitch) + +typedef struct { + const char *name; + gmx_bool bCoulomb; +} t_tab_props; + +/* This structure holds name and a flag that tells whether + this is a Coulomb type funtion */ +static const t_tab_props tprops[etabNR] = { + { "LJ6", FALSE }, + { "LJ12", FALSE }, + { "LJ6Shift", FALSE }, + { "LJ12Shift", FALSE }, + { "Shift", TRUE }, + { "RF", TRUE }, + { "RF-zero", TRUE }, + { "COUL", TRUE }, + { "Ewald", TRUE }, + { "Ewald-Switch", TRUE }, + { "Ewald-User", TRUE }, + { "Ewald-User-Switch", TRUE }, + { "LJ6Ewald", FALSE }, + { "LJ6Switch", FALSE }, + { "LJ12Switch", FALSE }, + { "COULSwitch", TRUE }, + { "LJ6-Encad shift", FALSE }, + { "LJ12-Encad shift", FALSE }, + { "COUL-Encad shift", TRUE }, + { "EXPMIN", FALSE }, + { "USER", FALSE }, +}; + +/* Index in the table that says which function to use */ +enum { + etiCOUL, etiLJ6, etiLJ12, etiNR +}; + +typedef struct { + int nx, nx0; + double tabscale; + double *x, *v, *f; +} t_tabledata; + +#define pow2(x) ((x)*(x)) +#define pow3(x) ((x)*(x)*(x)) +#define pow4(x) ((x)*(x)*(x)*(x)) +#define pow5(x) ((x)*(x)*(x)*(x)*(x)) + +double v_q_ewald_lr(double beta, double r) +{ + if (r == 0) + { + return beta*2/sqrt(M_PI); + } + else + { + return gmx_erfd(beta*r)/r; + } +} + +double v_lj_ewald_lr(double beta, double r) +{ + double br, br2, br4, r6, factor; + if (r == 0) + { + return pow(beta, 6)/6; + } + else + { + br = beta*r; + br2 = br*br; + br4 = br2*br2; + r6 = pow(r, 6.0); + factor = (1.0 - exp(-br2)*(1 + br2 + 0.5*br4))/r6; + return factor; + } +} + +void table_spline3_fill_ewald_lr(real *table_f, + real *table_v, + real *table_fdv0, + int ntab, + real dx, + real beta, + real_space_grid_contribution_computer v_lr) +{ + real tab_max; + int i, i_inrange; + double dc, dc_new; + gmx_bool bOutOfRange; + double v_r0, v_r1, v_inrange, vi, a0, a1, a2dx; + double x_r0; + + /* This function is called using either v_ewald_lr or v_lj_ewald_lr as a function argument + * depending on wether we should create electrostatic or Lennard-Jones Ewald tables. + */ + + if (ntab < 2) + { + gmx_fatal(FARGS, "Can not make a spline table with less than 2 points"); + } + + /* We need some margin to be able to divide table values by r + * in the kernel and also to do the integration arithmetics + * without going out of range. Furthemore, we divide by dx below. + */ + tab_max = GMX_REAL_MAX*0.0001; + + /* This function produces a table with: + * maximum energy error: V'''/(6*12*sqrt(3))*dx^3 + * maximum force error: V'''/(6*4)*dx^2 + * The rms force error is the max error times 1/sqrt(5)=0.45. + */ + + bOutOfRange = FALSE; + i_inrange = ntab; + v_inrange = 0; + dc = 0; + for (i = ntab-1; i >= 0; i--) + { + x_r0 = i*dx; + + v_r0 = (*v_lr)(beta, x_r0); + + if (!bOutOfRange) + { + i_inrange = i; + v_inrange = v_r0; + + vi = v_r0; + } + else + { + /* Linear continuation for the last point in range */ + vi = v_inrange - dc*(i - i_inrange)*dx; + } + + if (table_v != NULL) + { + table_v[i] = vi; + } + + if (i == 0) + { + continue; + } + + /* Get the potential at table point i-1 */ + v_r1 = (*v_lr)(beta, (i-1)*dx); + + if (v_r1 != v_r1 || v_r1 < -tab_max || v_r1 > tab_max) + { + bOutOfRange = TRUE; + } + + if (!bOutOfRange) + { + /* Calculate the average second derivative times dx over interval i-1 to i. + * Using the function values at the end points and in the middle. + */ + a2dx = (v_r0 + v_r1 - 2*(*v_lr)(beta, x_r0-0.5*dx))/(0.25*dx); + /* Set the derivative of the spline to match the difference in potential + * over the interval plus the average effect of the quadratic term. + * This is the essential step for minimizing the error in the force. + */ + dc = (v_r0 - v_r1)/dx + 0.5*a2dx; + } + + if (i == ntab - 1) + { + /* Fill the table with the force, minus the derivative of the spline */ + table_f[i] = -dc; + } + else + { + /* tab[i] will contain the average of the splines over the two intervals */ + table_f[i] += -0.5*dc; + } + + if (!bOutOfRange) + { + /* Make spline s(x) = a0 + a1*(x - xr) + 0.5*a2*(x - xr)^2 + * matching the potential at the two end points + * and the derivative dc at the end point xr. + */ + a0 = v_r0; + a1 = dc; + a2dx = (a1*dx + v_r1 - a0)*2/dx; + + /* Set dc to the derivative at the next point */ + dc_new = a1 - a2dx; + + if (dc_new != dc_new || dc_new < -tab_max || dc_new > tab_max) + { + bOutOfRange = TRUE; + } + else + { + dc = dc_new; + } + } + + table_f[(i-1)] = -0.5*dc; + } + /* Currently the last value only contains half the force: double it */ + table_f[0] *= 2; + + if (table_v != NULL && table_fdv0 != NULL) + { + /* Copy to FDV0 table too. Allocation occurs in forcerec.c, + * init_ewald_f_table(). + */ + for (i = 0; i < ntab-1; i++) + { + table_fdv0[4*i] = table_f[i]; + table_fdv0[4*i+1] = table_f[i+1]-table_f[i]; + table_fdv0[4*i+2] = table_v[i]; + table_fdv0[4*i+3] = 0.0; + } + table_fdv0[4*(ntab-1)] = table_f[(ntab-1)]; + table_fdv0[4*(ntab-1)+1] = -table_f[(ntab-1)]; + table_fdv0[4*(ntab-1)+2] = table_v[(ntab-1)]; + table_fdv0[4*(ntab-1)+3] = 0.0; + } +} + +/* The scale (1/spacing) for third order spline interpolation + * of the Ewald mesh contribution which needs to be subtracted + * from the non-bonded interactions. + */ +real ewald_spline3_table_scale(real ewaldcoeff, real rc) +{ + double erf_x_d3 = 1.0522; /* max of (erf(x)/x)''' */ + double ftol, etol; + double sc_f, sc_e; + + /* Force tolerance: single precision accuracy */ + ftol = GMX_FLOAT_EPS; + sc_f = sqrt(erf_x_d3/(6*4*ftol*ewaldcoeff))*ewaldcoeff; + + /* Energy tolerance: 10x more accurate than the cut-off jump */ + etol = 0.1*gmx_erfc(ewaldcoeff*rc); + etol = max(etol, GMX_REAL_EPS); + sc_e = pow(erf_x_d3/(6*12*sqrt(3)*etol), 1.0/3.0)*ewaldcoeff; + + return max(sc_f, sc_e); +} + +/* Calculate the potential and force for an r value + * in exactly the same way it is done in the inner loop. + * VFtab is a pointer to the table data, offset is + * the point where we should begin and stride is + * 4 if we have a buckingham table, 3 otherwise. + * If you want to evaluate table no N, set offset to 4*N. + * + * We use normal precision here, since that is what we + * will use in the inner loops. + */ +static void evaluate_table(real VFtab[], int offset, int stride, + real tabscale, real r, real *y, real *yp) +{ + int n; + real rt, eps, eps2; + real Y, F, Geps, Heps2, Fp; + + rt = r*tabscale; + n = (int)rt; + eps = rt - n; + eps2 = eps*eps; + n = offset+stride*n; + Y = VFtab[n]; + F = VFtab[n+1]; + Geps = eps*VFtab[n+2]; + Heps2 = eps2*VFtab[n+3]; + Fp = F+Geps+Heps2; + *y = Y+eps*Fp; + *yp = (Fp+Geps+2.0*Heps2)*tabscale; +} + +static void copy2table(int n, int offset, int stride, + double x[], double Vtab[], double Ftab[], real scalefactor, + real dest[]) +{ +/* Use double prec. for the intermediary variables + * and temporary x/vtab/vtab2 data to avoid unnecessary + * loss of precision. + */ + int i, nn0; + double F, G, H, h; + + h = 0; + for (i = 0; (i < n); i++) + { + if (i < n-1) + { + h = x[i+1] - x[i]; + F = -Ftab[i]*h; + G = 3*(Vtab[i+1] - Vtab[i]) + (Ftab[i+1] + 2*Ftab[i])*h; + H = -2*(Vtab[i+1] - Vtab[i]) - (Ftab[i+1] + Ftab[i])*h; + } + else + { + /* Fill the last entry with a linear potential, + * this is mainly for rounding issues with angle and dihedral potentials. + */ + F = -Ftab[i]*h; + G = 0; + H = 0; + } + nn0 = offset + i*stride; + dest[nn0] = scalefactor*Vtab[i]; + dest[nn0+1] = scalefactor*F; + dest[nn0+2] = scalefactor*G; + dest[nn0+3] = scalefactor*H; + } +} + +static void init_table(int n, int nx0, + double tabscale, t_tabledata *td, gmx_bool bAlloc) +{ + int i; + + td->nx = n; + td->nx0 = nx0; + td->tabscale = tabscale; + if (bAlloc) + { + snew(td->x, td->nx); + snew(td->v, td->nx); + snew(td->f, td->nx); + } + for (i = 0; (i < td->nx); i++) + { + td->x[i] = i/tabscale; + } +} + +static void spline_forces(int nx, double h, double v[], gmx_bool bS3, gmx_bool bE3, + double f[]) +{ + int start, end, i; + double v3, b_s, b_e, b; + double beta, *gamma; + + /* Formulas can be found in: + * H.J.C. Berendsen, Simulating the Physical World, Cambridge 2007 + */ + + if (nx < 4 && (bS3 || bE3)) + { + gmx_fatal(FARGS, "Can not generate splines with third derivative boundary conditions with less than 4 (%d) points", nx); + } + + /* To make life easy we initially set the spacing to 1 + * and correct for this at the end. + */ + beta = 2; + if (bS3) + { + /* Fit V''' at the start */ + v3 = v[3] - 3*v[2] + 3*v[1] - v[0]; + if (debug) + { + fprintf(debug, "The left third derivative is %g\n", v3/(h*h*h)); + } + b_s = 2*(v[1] - v[0]) + v3/6; + start = 0; + + if (FALSE) + { + /* Fit V'' at the start */ + real v2; + + v2 = -v[3] + 4*v[2] - 5*v[1] + 2*v[0]; + /* v2 = v[2] - 2*v[1] + v[0]; */ + if (debug) + { + fprintf(debug, "The left second derivative is %g\n", v2/(h*h)); + } + b_s = 3*(v[1] - v[0]) - v2/2; + start = 0; + } + } + else + { + b_s = 3*(v[2] - v[0]) + f[0]*h; + start = 1; + } + if (bE3) + { + /* Fit V''' at the end */ + v3 = v[nx-1] - 3*v[nx-2] + 3*v[nx-3] - v[nx-4]; + if (debug) + { + fprintf(debug, "The right third derivative is %g\n", v3/(h*h*h)); + } + b_e = 2*(v[nx-1] - v[nx-2]) + v3/6; + end = nx; + } + else + { + /* V'=0 at the end */ + b_e = 3*(v[nx-1] - v[nx-3]) + f[nx-1]*h; + end = nx - 1; + } + + snew(gamma, nx); + beta = (bS3 ? 1 : 4); + + /* For V'' fitting */ + /* beta = (bS3 ? 2 : 4); */ + + f[start] = b_s/beta; + for (i = start+1; i < end; i++) + { + gamma[i] = 1/beta; + beta = 4 - gamma[i]; + b = 3*(v[i+1] - v[i-1]); + f[i] = (b - f[i-1])/beta; + } + gamma[end-1] = 1/beta; + beta = (bE3 ? 1 : 4) - gamma[end-1]; + f[end-1] = (b_e - f[end-2])/beta; + + for (i = end-2; i >= start; i--) + { + f[i] -= gamma[i+1]*f[i+1]; + } + sfree(gamma); + + /* Correct for the minus sign and the spacing */ + for (i = start; i < end; i++) + { + f[i] = -f[i]/h; + } +} + +static void set_forces(FILE *fp, int angle, + int nx, double h, double v[], double f[], + int table) +{ + int start, end; + + if (angle == 2) + { + gmx_fatal(FARGS, + "Force generation for dihedral tables is not (yet) implemented"); + } + + start = 0; + while (v[start] == 0) + { + start++; + } + + end = nx; + while (v[end-1] == 0) + { + end--; + } + if (end > nx - 2) + { + end = nx; + } + else + { + end++; + } + + if (fp) + { + fprintf(fp, "Generating forces for table %d, boundary conditions: V''' at %g, %s at %g\n", + table+1, start*h, end == nx ? "V'''" : "V'=0", (end-1)*h); + } + spline_forces(end-start, h, v+start, TRUE, end == nx, f+start); +} + +static void read_tables(FILE *fp, const char *fn, + int ntab, int angle, t_tabledata td[]) +{ + char *libfn; + char buf[STRLEN]; + double **yy = NULL, start, end, dx0, dx1, ssd, vm, vp, f, numf; + int k, i, nx, nx0 = 0, ny, nny, ns; + gmx_bool bAllZero, bZeroV, bZeroF; + double tabscale; + + nny = 2*ntab+1; + libfn = gmxlibfn(fn); + nx = read_xvg(libfn, &yy, &ny); + if (ny != nny) + { + gmx_fatal(FARGS, "Trying to read file %s, but nr columns = %d, should be %d", + libfn, ny, nny); + } + if (angle == 0) + { + if (yy[0][0] != 0.0) + { + gmx_fatal(FARGS, + "The first distance in file %s is %f nm instead of %f nm", + libfn, yy[0][0], 0.0); + } + } + else + { + if (angle == 1) + { + start = 0.0; + } + else + { + start = -180.0; + } + end = 180.0; + if (yy[0][0] != start || yy[0][nx-1] != end) + { + gmx_fatal(FARGS, "The angles in file %s should go from %f to %f instead of %f to %f\n", + libfn, start, end, yy[0][0], yy[0][nx-1]); + } + } + + tabscale = (nx-1)/(yy[0][nx-1] - yy[0][0]); + + if (fp) + { + fprintf(fp, "Read user tables from %s with %d data points.\n", libfn, nx); + if (angle == 0) + { + fprintf(fp, "Tabscale = %g points/nm\n", tabscale); + } + } + + bAllZero = TRUE; + for (k = 0; k < ntab; k++) + { + bZeroV = TRUE; + bZeroF = TRUE; + for (i = 0; (i < nx); i++) + { + if (i >= 2) + { + dx0 = yy[0][i-1] - yy[0][i-2]; + dx1 = yy[0][i] - yy[0][i-1]; + /* Check for 1% deviation in spacing */ + if (fabs(dx1 - dx0) >= 0.005*(fabs(dx0) + fabs(dx1))) + { + gmx_fatal(FARGS, "In table file '%s' the x values are not equally spaced: %f %f %f", fn, yy[0][i-2], yy[0][i-1], yy[0][i]); + } + } + if (yy[1+k*2][i] != 0) + { + bZeroV = FALSE; + if (bAllZero) + { + bAllZero = FALSE; + nx0 = i; + } + if (yy[1+k*2][i] > 0.01*GMX_REAL_MAX || + yy[1+k*2][i] < -0.01*GMX_REAL_MAX) + { + gmx_fatal(FARGS, "Out of range potential value %g in file '%s'", + yy[1+k*2][i], fn); + } + } + if (yy[1+k*2+1][i] != 0) + { + bZeroF = FALSE; + if (bAllZero) + { + bAllZero = FALSE; + nx0 = i; + } + if (yy[1+k*2+1][i] > 0.01*GMX_REAL_MAX || + yy[1+k*2+1][i] < -0.01*GMX_REAL_MAX) + { + gmx_fatal(FARGS, "Out of range force value %g in file '%s'", + yy[1+k*2+1][i], fn); + } + } + } + + if (!bZeroV && bZeroF) + { + set_forces(fp, angle, nx, 1/tabscale, yy[1+k*2], yy[1+k*2+1], k); + } + else + { + /* Check if the second column is close to minus the numerical + * derivative of the first column. + */ + ssd = 0; + ns = 0; + for (i = 1; (i < nx-1); i++) + { + vm = yy[1+2*k][i-1]; + vp = yy[1+2*k][i+1]; + f = yy[1+2*k+1][i]; + if (vm != 0 && vp != 0 && f != 0) + { + /* Take the centered difference */ + numf = -(vp - vm)*0.5*tabscale; + ssd += fabs(2*(f - numf)/(f + numf)); + ns++; + } + } + if (ns > 0) + { + ssd /= ns; + sprintf(buf, "For the %d non-zero entries for table %d in %s the forces deviate on average %d%% from minus the numerical derivative of the potential\n", ns, k, libfn, (int)(100*ssd+0.5)); + if (debug) + { + fprintf(debug, "%s", buf); + } + if (ssd > 0.2) + { + if (fp) + { + fprintf(fp, "\nWARNING: %s\n", buf); + } + fprintf(stderr, "\nWARNING: %s\n", buf); + } + } + } + } + if (bAllZero && fp) + { + fprintf(fp, "\nNOTE: All elements in table %s are zero\n\n", libfn); + } + + for (k = 0; (k < ntab); k++) + { + init_table(nx, nx0, tabscale, &(td[k]), TRUE); + for (i = 0; (i < nx); i++) + { + td[k].x[i] = yy[0][i]; + td[k].v[i] = yy[2*k+1][i]; + td[k].f[i] = yy[2*k+2][i]; + } + } + for (i = 0; (i < ny); i++) + { + sfree(yy[i]); + } + sfree(yy); + sfree(libfn); +} + +static void done_tabledata(t_tabledata *td) +{ + int i; + + if (!td) + { + return; + } + + sfree(td->x); + sfree(td->v); + sfree(td->f); +} + - static void fill_table(t_tabledata *td, int tp, const t_forcerec *fr) ++static void fill_table(t_tabledata *td, int tp, const t_forcerec *fr, ++ gmx_bool b14only) +{ + /* Fill the table according to the formulas in the manual. + * In principle, we only need the potential and the second + * derivative, but then we would have to do lots of calculations + * in the inner loop. By precalculating some terms (see manual) + * we get better eventual performance, despite a larger table. + * + * Since some of these higher-order terms are very small, + * we always use double precision to calculate them here, in order + * to avoid unnecessary loss of precision. + */ +#ifdef DEBUG_SWITCH + FILE *fp; +#endif + int i; + double reppow, p; + double r1, rc, r12, r13; - double r, r2, r6, rc6; ++ double r, r2, r6, rc2, rc6, rc12; + double expr, Vtab, Ftab; + /* Parameters for David's function */ + double A = 0, B = 0, C = 0, A_3 = 0, B_4 = 0; + /* Parameters for the switching function */ + double ksw, swi, swi1; + /* Temporary parameters */ - gmx_bool bSwitch, bShift; ++ gmx_bool bPotentialSwitch, bForceSwitch, bPotentialShift; + double ewc = fr->ewaldcoeff_q; + double ewclj = fr->ewaldcoeff_lj; ++ double Vcut = 0; + - bSwitch = ((tp == etabLJ6Switch) || (tp == etabLJ12Switch) || - (tp == etabCOULSwitch) || - (tp == etabEwaldSwitch) || (tp == etabEwaldUserSwitch)); - - bShift = ((tp == etabLJ6Shift) || (tp == etabLJ12Shift) || - (tp == etabShift)); ++ if (b14only) ++ { ++ bPotentialSwitch = FALSE; ++ bForceSwitch = FALSE; ++ bPotentialShift = FALSE; ++ } ++ else ++ { ++ bPotentialSwitch = ((tp == etabLJ6Switch) || (tp == etabLJ12Switch) || ++ (tp == etabCOULSwitch) || ++ (tp == etabEwaldSwitch) || (tp == etabEwaldUserSwitch) || ++ (tprops[tp].bCoulomb && (fr->coulomb_modifier == eintmodPOTSWITCH)) || ++ (!tprops[tp].bCoulomb && (fr->vdw_modifier == eintmodPOTSWITCH))); ++ bForceSwitch = ((tp == etabLJ6Shift) || (tp == etabLJ12Shift) || ++ (tp == etabShift) || ++ (tprops[tp].bCoulomb && (fr->coulomb_modifier == eintmodFORCESWITCH)) || ++ (!tprops[tp].bCoulomb && (fr->vdw_modifier == eintmodFORCESWITCH))); ++ bPotentialShift = ((tprops[tp].bCoulomb && (fr->coulomb_modifier == eintmodPOTSHIFT)) || ++ (!tprops[tp].bCoulomb && (fr->vdw_modifier == eintmodPOTSHIFT))); ++ } + + reppow = fr->reppow; + + if (tprops[tp].bCoulomb) + { + r1 = fr->rcoulomb_switch; + rc = fr->rcoulomb; + } + else + { + r1 = fr->rvdw_switch; + rc = fr->rvdw; + } - if (bSwitch) ++ if (bPotentialSwitch) + { + ksw = 1.0/(pow5(rc-r1)); + } + else + { + ksw = 0.0; + } - if (bShift) ++ if (bForceSwitch) + { + if (tp == etabShift) + { + p = 1; + } + else if (tp == etabLJ6Shift) + { + p = 6; + } + else + { + p = reppow; + } + + A = p * ((p+1)*r1-(p+4)*rc)/(pow(rc, p+2)*pow2(rc-r1)); + B = -p * ((p+1)*r1-(p+3)*rc)/(pow(rc, p+2)*pow3(rc-r1)); + C = 1.0/pow(rc, p)-A/3.0*pow3(rc-r1)-B/4.0*pow4(rc-r1); + if (tp == etabLJ6Shift) + { + A = -A; + B = -B; + C = -C; + } + A_3 = A/3.0; + B_4 = B/4.0; + } + if (debug) + { + fprintf(debug, "Setting up tables\n"); fflush(debug); + } + +#ifdef DEBUG_SWITCH + fp = xvgropen("switch.xvg", "switch", "r", "s"); +#endif + ++ if (bPotentialShift) ++ { ++ rc2 = rc*rc; ++ rc6 = 1.0/(rc2*rc2*rc2); ++ if (gmx_within_tol(reppow, 12.0, 10*GMX_DOUBLE_EPS)) ++ { ++ rc12 = rc6*rc6; ++ } ++ else ++ { ++ rc12 = pow(rc, -reppow); ++ } ++ ++ switch (tp) ++ { ++ case etabLJ6: ++ /* Dispersion */ ++ Vcut = -rc6; ++ break; ++ case etabLJ6Ewald: ++ Vcut = -rc6*exp(-ewclj*ewclj*rc2)*(1 + ewclj*ewclj*rc2 + pow4(ewclj)*rc2*rc2/2); ++ break; ++ case etabLJ12: ++ /* Repulsion */ ++ Vcut = rc12; ++ break; ++ case etabCOUL: ++ Vcut = 1.0/rc; ++ break; ++ case etabEwald: ++ case etabEwaldSwitch: ++ Vtab = gmx_erfc(ewc*rc)/rc; ++ break; ++ case etabEwaldUser: ++ /* Only calculate minus the reciprocal space contribution */ ++ Vtab = -gmx_erf(ewc*rc)/rc; ++ break; ++ case etabRF: ++ case etabRF_ZERO: ++ /* No need for preventing the usage of modifiers with RF */ ++ Vcut = 0.0; ++ break; ++ case etabEXPMIN: ++ Vcut = exp(-rc); ++ break; ++ default: ++ gmx_fatal(FARGS, "Cannot apply new potential-shift modifier to interaction type '%s' yet. (%s,%d)", ++ tprops[tp].name, __FILE__, __LINE__); ++ } ++ } ++ + for (i = td->nx0; (i < td->nx); i++) + { + r = td->x[i]; + r2 = r*r; + r6 = 1.0/(r2*r2*r2); + if (gmx_within_tol(reppow, 12.0, 10*GMX_DOUBLE_EPS)) + { + r12 = r6*r6; + } + else + { + r12 = pow(r, -reppow); + } + Vtab = 0.0; + Ftab = 0.0; - if (bSwitch) ++ if (bPotentialSwitch) + { + /* swi is function, swi1 1st derivative and swi2 2nd derivative */ + /* The switch function is 1 for rrc, and smooth for + * r1<=r<=rc. The 1st and 2nd derivatives are both zero at + * r1 and rc. + * ksw is just the constant 1/(rc-r1)^5, to save some calculations... + */ + if (r <= r1) + { + swi = 1.0; + swi1 = 0.0; + } + else if (r >= rc) + { + swi = 0.0; + swi1 = 0.0; + } + else + { + swi = 1 - 10*pow3(r-r1)*ksw*pow2(rc-r1) + + 15*pow4(r-r1)*ksw*(rc-r1) - 6*pow5(r-r1)*ksw; + swi1 = -30*pow2(r-r1)*ksw*pow2(rc-r1) + + 60*pow3(r-r1)*ksw*(rc-r1) - 30*pow4(r-r1)*ksw; + } + } + else /* not really needed, but avoids compiler warnings... */ + { + swi = 1.0; + swi1 = 0.0; + } +#ifdef DEBUG_SWITCH + fprintf(fp, "%10g %10g %10g %10g\n", r, swi, swi1, swi2); +#endif + + rc6 = rc*rc*rc; + rc6 = 1.0/(rc6*rc6); + + switch (tp) + { + case etabLJ6: + /* Dispersion */ + Vtab = -r6; + Ftab = 6.0*Vtab/r; + break; + case etabLJ6Switch: + case etabLJ6Shift: + /* Dispersion */ + if (r < rc) + { + Vtab = -r6; + Ftab = 6.0*Vtab/r; + break; + } + break; + case etabLJ12: + /* Repulsion */ + Vtab = r12; + Ftab = reppow*Vtab/r; + break; + case etabLJ12Switch: + case etabLJ12Shift: + /* Repulsion */ + if (r < rc) + { + Vtab = r12; + Ftab = reppow*Vtab/r; + } + break; + case etabLJ6Encad: + if (r < rc) + { + Vtab = -(r6-6.0*(rc-r)*rc6/rc-rc6); + Ftab = -(6.0*r6/r-6.0*rc6/rc); + } + else /* r>rc */ + { + Vtab = 0; + Ftab = 0; + } + break; + case etabLJ12Encad: + if (r < rc) + { + Vtab = -(r6-6.0*(rc-r)*rc6/rc-rc6); + Ftab = -(6.0*r6/r-6.0*rc6/rc); + } + else /* r>rc */ + { + Vtab = 0; + Ftab = 0; + } + break; + case etabCOUL: + Vtab = 1.0/r; + Ftab = 1.0/r2; + break; + case etabCOULSwitch: + case etabShift: + if (r < rc) + { + Vtab = 1.0/r; + Ftab = 1.0/r2; + } + break; + case etabEwald: + case etabEwaldSwitch: + Vtab = gmx_erfc(ewc*r)/r; + Ftab = gmx_erfc(ewc*r)/r2+exp(-(ewc*ewc*r2))*ewc*M_2_SQRTPI/r; + break; + case etabEwaldUser: + case etabEwaldUserSwitch: + /* Only calculate the negative of the reciprocal space contribution */ + Vtab = -gmx_erf(ewc*r)/r; + Ftab = -gmx_erf(ewc*r)/r2+exp(-(ewc*ewc*r2))*ewc*M_2_SQRTPI/r; + break; + case etabLJ6Ewald: + Vtab = -r6*exp(-ewclj*ewclj*r2)*(1 + ewclj*ewclj*r2 + pow4(ewclj)*r2*r2/2); + Ftab = 6.0*Vtab/r - r6*exp(-ewclj*ewclj*r2)*pow5(ewclj)*ewclj*r2*r2*r; + break; + case etabRF: + case etabRF_ZERO: + Vtab = 1.0/r + fr->k_rf*r2 - fr->c_rf; + Ftab = 1.0/r2 - 2*fr->k_rf*r; + if (tp == etabRF_ZERO && r >= rc) + { + Vtab = 0; + Ftab = 0; + } + break; + case etabEXPMIN: + expr = exp(-r); + Vtab = expr; + Ftab = expr; + break; + case etabCOULEncad: + if (r < rc) + { + Vtab = 1.0/r-(rc-r)/(rc*rc)-1.0/rc; + Ftab = 1.0/r2-1.0/(rc*rc); + } + else /* r>rc */ + { + Vtab = 0; + Ftab = 0; + } + break; + default: + gmx_fatal(FARGS, "Table type %d not implemented yet. (%s,%d)", + tp, __FILE__, __LINE__); + } - if (bShift) ++ if (bForceSwitch) + { + /* Normal coulomb with cut-off correction for potential */ + if (r < rc) + { + Vtab -= C; + /* If in Shifting range add something to it */ + if (r > r1) + { + r12 = (r-r1)*(r-r1); + r13 = (r-r1)*r12; + Vtab += -A_3*r13 - B_4*r12*r12; + Ftab += A*r12 + B*r13; + } + } ++ else ++ { ++ /* Make sure interactions are zero outside cutoff with modifiers */ ++ Vtab = 0; ++ Ftab = 0; ++ } ++ } ++ if (bPotentialShift) ++ { ++ if (r < rc) ++ { ++ Vtab -= Vcut; ++ } ++ else ++ { ++ /* Make sure interactions are zero outside cutoff with modifiers */ ++ Vtab = 0; ++ Ftab = 0; ++ } + } + + if (ETAB_USER(tp)) + { + Vtab += td->v[i]; + Ftab += td->f[i]; + } + - if ((r > r1) && bSwitch) ++ if (bPotentialSwitch) + { - Ftab = Ftab*swi - Vtab*swi1; - Vtab = Vtab*swi; ++ if (r >= rc) ++ { ++ /* Make sure interactions are zero outside cutoff with modifiers */ ++ Vtab = 0; ++ Ftab = 0; ++ } ++ else if (r > r1) ++ { ++ Ftab = Ftab*swi - Vtab*swi1; ++ Vtab = Vtab*swi; ++ } + } - + /* Convert to single precision when we store to mem */ + td->v[i] = Vtab; + td->f[i] = Ftab; + } + + /* Continue the table linearly from nx0 to 0. + * These values are only required for energy minimization with overlap or TPI. + */ + for (i = td->nx0-1; i >= 0; i--) + { + td->v[i] = td->v[i+1] + td->f[i+1]*(td->x[i+1] - td->x[i]); + td->f[i] = td->f[i+1]; + } + +#ifdef DEBUG_SWITCH + gmx_fio_fclose(fp); +#endif +} + +static void set_table_type(int tabsel[], const t_forcerec *fr, gmx_bool b14only) +{ + int eltype, vdwtype; + + /* Set the different table indices. + * Coulomb first. + */ + + + if (b14only) + { + switch (fr->eeltype) + { + case eelRF_NEC: + eltype = eelRF; + break; + case eelUSER: + case eelPMEUSER: + case eelPMEUSERSWITCH: + eltype = eelUSER; + break; + default: + eltype = eelCUT; + } + } + else + { + eltype = fr->eeltype; + } + + switch (eltype) + { + case eelCUT: + tabsel[etiCOUL] = etabCOUL; + break; + case eelPOISSON: + tabsel[etiCOUL] = etabShift; + break; + case eelSHIFT: + if (fr->rcoulomb > fr->rcoulomb_switch) + { + tabsel[etiCOUL] = etabShift; + } + else + { + tabsel[etiCOUL] = etabCOUL; + } + break; + case eelEWALD: + case eelPME: + case eelP3M_AD: + tabsel[etiCOUL] = etabEwald; + break; + case eelPMESWITCH: + tabsel[etiCOUL] = etabEwaldSwitch; + break; + case eelPMEUSER: + tabsel[etiCOUL] = etabEwaldUser; + break; + case eelPMEUSERSWITCH: + tabsel[etiCOUL] = etabEwaldUserSwitch; + break; + case eelRF: + case eelGRF: + case eelRF_NEC: + tabsel[etiCOUL] = etabRF; + break; + case eelRF_ZERO: + tabsel[etiCOUL] = etabRF_ZERO; + break; + case eelSWITCH: + tabsel[etiCOUL] = etabCOULSwitch; + break; + case eelUSER: + tabsel[etiCOUL] = etabUSER; + break; + case eelENCADSHIFT: + tabsel[etiCOUL] = etabCOULEncad; + break; + default: + gmx_fatal(FARGS, "Invalid eeltype %d", eltype); + } + + /* Van der Waals time */ + if (fr->bBHAM && !b14only) + { + tabsel[etiLJ6] = etabLJ6; + tabsel[etiLJ12] = etabEXPMIN; + } + else + { + if (b14only && fr->vdwtype != evdwUSER) + { + vdwtype = evdwCUT; + } + else + { + vdwtype = fr->vdwtype; + } + + switch (vdwtype) + { + case evdwSWITCH: + tabsel[etiLJ6] = etabLJ6Switch; + tabsel[etiLJ12] = etabLJ12Switch; + break; + case evdwSHIFT: + tabsel[etiLJ6] = etabLJ6Shift; + tabsel[etiLJ12] = etabLJ12Shift; + break; + case evdwUSER: + tabsel[etiLJ6] = etabUSER; + tabsel[etiLJ12] = etabUSER; + break; + case evdwCUT: + tabsel[etiLJ6] = etabLJ6; + tabsel[etiLJ12] = etabLJ12; + break; + case evdwENCADSHIFT: + tabsel[etiLJ6] = etabLJ6Encad; + tabsel[etiLJ12] = etabLJ12Encad; + break; + case evdwPME: + tabsel[etiLJ6] = etabLJ6Ewald; + tabsel[etiLJ12] = etabLJ12; + break; + default: + gmx_fatal(FARGS, "Invalid vdwtype %d in %s line %d", vdwtype, + __FILE__, __LINE__); + } + + if (!b14only && fr->vdw_modifier != eintmodNONE) + { + if (fr->vdw_modifier != eintmodPOTSHIFT && + fr->vdwtype != evdwCUT) + { + gmx_incons("Potential modifiers other than potential-shift are only implemented for LJ cut-off"); + } + - switch (fr->vdw_modifier) ++ /* LJ-PME and other (shift-only) modifiers are handled by applying the modifiers ++ * to the original interaction forms when we fill the table, so we only check cutoffs here. ++ */ ++ if (fr->vdwtype == evdwCUT) + { - case eintmodNONE: - case eintmodPOTSHIFT: - case eintmodEXACTCUTOFF: - /* No modification */ - break; - case eintmodPOTSWITCH: - tabsel[etiLJ6] = etabLJ6Switch; - tabsel[etiLJ12] = etabLJ12Switch; - break; - case eintmodFORCESWITCH: - tabsel[etiLJ6] = etabLJ6Shift; - tabsel[etiLJ12] = etabLJ12Shift; - break; - default: - gmx_incons("Unsupported vdw_modifier"); ++ switch (fr->vdw_modifier) ++ { ++ case eintmodNONE: ++ case eintmodPOTSHIFT: ++ case eintmodEXACTCUTOFF: ++ /* No modification */ ++ break; ++ case eintmodPOTSWITCH: ++ tabsel[etiLJ6] = etabLJ6Switch; ++ tabsel[etiLJ12] = etabLJ12Switch; ++ break; ++ case eintmodFORCESWITCH: ++ tabsel[etiLJ6] = etabLJ6Shift; ++ tabsel[etiLJ12] = etabLJ12Shift; ++ break; ++ default: ++ gmx_incons("Unsupported vdw_modifier"); ++ } + } + } + } +} + +t_forcetable make_tables(FILE *out, const output_env_t oenv, + const t_forcerec *fr, + gmx_bool bVerbose, const char *fn, + real rtab, int flags) +{ + const char *fns[3] = { "ctab.xvg", "dtab.xvg", "rtab.xvg" }; + const char *fns14[3] = { "ctab14.xvg", "dtab14.xvg", "rtab14.xvg" }; + FILE *fp; + t_tabledata *td; + gmx_bool b14only, bReadTab, bGenTab; + real x0, y0, yp; + int i, j, k, nx, nx0, tabsel[etiNR]; + real scalefactor; + + t_forcetable table; + + b14only = (flags & GMX_MAKETABLES_14ONLY); + + if (flags & GMX_MAKETABLES_FORCEUSER) + { + tabsel[etiCOUL] = etabUSER; + tabsel[etiLJ6] = etabUSER; + tabsel[etiLJ12] = etabUSER; + } + else + { + set_table_type(tabsel, fr, b14only); + } + snew(td, etiNR); + table.r = rtab; + table.scale = 0; + table.n = 0; + table.scale_exp = 0; + nx0 = 10; + nx = 0; + + table.interaction = GMX_TABLE_INTERACTION_ELEC_VDWREP_VDWDISP; + table.format = GMX_TABLE_FORMAT_CUBICSPLINE_YFGH; + table.formatsize = 4; + table.ninteractions = 3; + table.stride = table.formatsize*table.ninteractions; + + /* Check whether we have to read or generate */ + bReadTab = FALSE; + bGenTab = FALSE; + for (i = 0; (i < etiNR); i++) + { + if (ETAB_USER(tabsel[i])) + { + bReadTab = TRUE; + } + if (tabsel[i] != etabUSER) + { + bGenTab = TRUE; + } + } + if (bReadTab) + { + read_tables(out, fn, etiNR, 0, td); + if (rtab == 0 || (flags & GMX_MAKETABLES_14ONLY)) + { + rtab = td[0].x[td[0].nx-1]; + table.n = td[0].nx; + nx = table.n; + } + else + { + if (td[0].x[td[0].nx-1] < rtab) + { + gmx_fatal(FARGS, "Tables in file %s not long enough for cut-off:\n" + "\tshould be at least %f nm\n", fn, rtab); + } + nx = table.n = (int)(rtab*td[0].tabscale + 0.5); + } + table.scale = td[0].tabscale; + nx0 = td[0].nx0; + } + if (bGenTab) + { + if (!bReadTab) + { +#ifdef GMX_DOUBLE + table.scale = 2000.0; +#else + table.scale = 500.0; +#endif + nx = table.n = rtab*table.scale; + } + } + if (fr->bBHAM) + { + if (fr->bham_b_max != 0) + { + table.scale_exp = table.scale/fr->bham_b_max; + } + else + { + table.scale_exp = table.scale; + } + } + + /* Each table type (e.g. coul,lj6,lj12) requires four + * numbers per nx+1 data points. For performance reasons we want + * the table data to be aligned to 16-byte. + */ + snew_aligned(table.data, 12*(nx+1)*sizeof(real), 32); + + for (k = 0; (k < etiNR); k++) + { + if (tabsel[k] != etabUSER) + { + init_table(nx, nx0, + (tabsel[k] == etabEXPMIN) ? table.scale_exp : table.scale, + &(td[k]), !bReadTab); - fill_table(&(td[k]), tabsel[k], fr); ++ fill_table(&(td[k]), tabsel[k], fr, b14only); + if (out) + { + fprintf(out, "%s table with %d data points for %s%s.\n" + "Tabscale = %g points/nm\n", + ETAB_USER(tabsel[k]) ? "Modified" : "Generated", + td[k].nx, b14only ? "1-4 " : "", tprops[tabsel[k]].name, + td[k].tabscale); + } + } + + /* Set scalefactor for c6/c12 tables. This is because we save flops in the non-table kernels + * by including the derivative constants (6.0 or 12.0) in the parameters, since + * we no longer calculate force in most steps. This means the c6/c12 parameters + * have been scaled up, so we need to scale down the table interactions too. + * It comes here since we need to scale user tables too. + */ + if (k == etiLJ6) + { + scalefactor = 1.0/6.0; + } + else if (k == etiLJ12 && tabsel[k] != etabEXPMIN) + { + scalefactor = 1.0/12.0; + } + else + { + scalefactor = 1.0; + } + + copy2table(table.n, k*4, 12, td[k].x, td[k].v, td[k].f, scalefactor, table.data); + + if (bDebugMode() && bVerbose) + { + if (b14only) + { + fp = xvgropen(fns14[k], fns14[k], "r", "V", oenv); + } + else + { + fp = xvgropen(fns[k], fns[k], "r", "V", oenv); + } + /* plot the output 5 times denser than the table data */ + for (i = 5*((nx0+1)/2); i < 5*table.n; i++) + { + x0 = i*table.r/(5*(table.n-1)); + evaluate_table(table.data, 4*k, 12, table.scale, x0, &y0, &yp); + fprintf(fp, "%15.10e %15.10e %15.10e\n", x0, y0, yp); + } + gmx_fio_fclose(fp); + } + done_tabledata(&(td[k])); + } + sfree(td); + + return table; +} + +t_forcetable make_gb_table(const output_env_t oenv, + const t_forcerec *fr) +{ + const char *fns[3] = { "gbctab.xvg", "gbdtab.xvg", "gbrtab.xvg" }; + const char *fns14[3] = { "gbctab14.xvg", "gbdtab14.xvg", "gbrtab14.xvg" }; + FILE *fp; + t_tabledata *td; + gmx_bool bReadTab, bGenTab; + real x0, y0, yp; + int i, j, k, nx, nx0, tabsel[etiNR]; + double r, r2, Vtab, Ftab, expterm; + + t_forcetable table; + + double abs_error_r, abs_error_r2; + double rel_error_r, rel_error_r2; + double rel_error_r_old = 0, rel_error_r2_old = 0; + double x0_r_error, x0_r2_error; + + + /* Only set a Coulomb table for GB */ + /* + tabsel[0]=etabGB; + tabsel[1]=-1; + tabsel[2]=-1; + */ + + /* Set the table dimensions for GB, not really necessary to + * use etiNR (since we only have one table, but ...) + */ + snew(td, 1); + table.interaction = GMX_TABLE_INTERACTION_ELEC; + table.format = GMX_TABLE_FORMAT_CUBICSPLINE_YFGH; + table.r = fr->gbtabr; + table.scale = fr->gbtabscale; + table.scale_exp = 0; + table.n = table.scale*table.r; + table.formatsize = 4; + table.ninteractions = 1; + table.stride = table.formatsize*table.ninteractions; + nx0 = 0; + nx = table.scale*table.r; + + /* Check whether we have to read or generate + * We will always generate a table, so remove the read code + * (Compare with original make_table function + */ + bReadTab = FALSE; + bGenTab = TRUE; + + /* Each table type (e.g. coul,lj6,lj12) requires four + * numbers per datapoint. For performance reasons we want + * the table data to be aligned to 16-byte. This is accomplished + * by allocating 16 bytes extra to a temporary pointer, and then + * calculating an aligned pointer. This new pointer must not be + * used in a free() call, but thankfully we're sloppy enough not + * to do this :-) + */ + + snew_aligned(table.data, 4*nx, 32); + + init_table(nx, nx0, table.scale, &(td[0]), !bReadTab); + + /* Local implementation so we don't have to use the etabGB + * enum above, which will cause problems later when + * making the other tables (right now even though we are using + * GB, the normal Coulomb tables will be created, but this + * will cause a problem since fr->eeltype==etabGB which will not + * be defined in fill_table and set_table_type + */ + + for (i = nx0; i < nx; i++) + { + r = td->x[i]; + r2 = r*r; + expterm = exp(-0.25*r2); + + Vtab = 1/sqrt(r2+expterm); + Ftab = (r-0.25*r*expterm)/((r2+expterm)*sqrt(r2+expterm)); + + /* Convert to single precision when we store to mem */ + td->v[i] = Vtab; + td->f[i] = Ftab; + + } + + copy2table(table.n, 0, 4, td[0].x, td[0].v, td[0].f, 1.0, table.data); + + if (bDebugMode()) + { + fp = xvgropen(fns[0], fns[0], "r", "V", oenv); + /* plot the output 5 times denser than the table data */ + /* for(i=5*nx0;i<5*table.n;i++) */ + for (i = nx0; i < table.n; i++) + { + /* x0=i*table.r/(5*table.n); */ + x0 = i*table.r/table.n; + evaluate_table(table.data, 0, 4, table.scale, x0, &y0, &yp); + fprintf(fp, "%15.10e %15.10e %15.10e\n", x0, y0, yp); + + } + gmx_fio_fclose(fp); + } + + /* + for(i=100*nx0;i<99.81*table.n;i++) + { + r = i*table.r/(100*table.n); + r2 = r*r; + expterm = exp(-0.25*r2); + + Vtab = 1/sqrt(r2+expterm); + Ftab = (r-0.25*r*expterm)/((r2+expterm)*sqrt(r2+expterm)); + + + evaluate_table(table.data,0,4,table.scale,r,&y0,&yp); + printf("gb: i=%d, x0=%g, y0=%15.15f, Vtab=%15.15f, yp=%15.15f, Ftab=%15.15f\n",i,r, y0, Vtab, yp, Ftab); + + abs_error_r=fabs(y0-Vtab); + abs_error_r2=fabs(yp-(-1)*Ftab); + + rel_error_r=abs_error_r/y0; + rel_error_r2=fabs(abs_error_r2/yp); + + + if(rel_error_r>rel_error_r_old) + { + rel_error_r_old=rel_error_r; + x0_r_error=x0; + } + + if(rel_error_r2>rel_error_r2_old) + { + rel_error_r2_old=rel_error_r2; + x0_r2_error=x0; + } + } + + printf("gb: MAX REL ERROR IN R=%15.15f, MAX REL ERROR IN R2=%15.15f\n",rel_error_r_old, rel_error_r2_old); + printf("gb: XO_R=%g, X0_R2=%g\n",x0_r_error, x0_r2_error); + + exit(1); */ + done_tabledata(&(td[0])); + sfree(td); + + return table; + + +} + +t_forcetable make_atf_table(FILE *out, const output_env_t oenv, + const t_forcerec *fr, + const char *fn, + matrix box) +{ + const char *fns[3] = { "tf_tab.xvg", "atfdtab.xvg", "atfrtab.xvg" }; + FILE *fp; + t_tabledata *td; + real x0, y0, yp, rtab; + int i, nx, nx0; + real rx, ry, rz, box_r; + + t_forcetable table; + + + /* Set the table dimensions for ATF, not really necessary to + * use etiNR (since we only have one table, but ...) + */ + snew(td, 1); + + if (fr->adress_type == eAdressSphere) + { + /* take half box diagonal direction as tab range */ + rx = 0.5*box[0][0]+0.5*box[1][0]+0.5*box[2][0]; + ry = 0.5*box[0][1]+0.5*box[1][1]+0.5*box[2][1]; + rz = 0.5*box[0][2]+0.5*box[1][2]+0.5*box[2][2]; + box_r = sqrt(rx*rx+ry*ry+rz*rz); + + } + else + { + /* xsplit: take half box x direction as tab range */ + box_r = box[0][0]/2; + } + table.r = box_r; + table.scale = 0; + table.n = 0; + table.scale_exp = 0; + nx0 = 10; + nx = 0; + + read_tables(out, fn, 1, 0, td); + rtab = td[0].x[td[0].nx-1]; + + if (fr->adress_type == eAdressXSplit && (rtab < box[0][0]/2)) + { + gmx_fatal(FARGS, "AdResS full box therm force table in file %s extends to %f:\n" + "\tshould extend to at least half the length of the box in x-direction" + "%f\n", fn, rtab, box[0][0]/2); + } + if (rtab < box_r) + { + gmx_fatal(FARGS, "AdResS full box therm force table in file %s extends to %f:\n" + "\tshould extend to at least for spherical adress" + "%f (=distance from center to furthermost point in box \n", fn, rtab, box_r); + } + + + table.n = td[0].nx; + nx = table.n; + table.scale = td[0].tabscale; + nx0 = td[0].nx0; + + /* Each table type (e.g. coul,lj6,lj12) requires four + * numbers per datapoint. For performance reasons we want + * the table data to be aligned to 16-byte. This is accomplished + * by allocating 16 bytes extra to a temporary pointer, and then + * calculating an aligned pointer. This new pointer must not be + * used in a free() call, but thankfully we're sloppy enough not + * to do this :-) + */ + + snew_aligned(table.data, 4*nx, 32); + + copy2table(table.n, 0, 4, td[0].x, td[0].v, td[0].f, 1.0, table.data); + + if (bDebugMode()) + { + fp = xvgropen(fns[0], fns[0], "r", "V", oenv); + /* plot the output 5 times denser than the table data */ + /* for(i=5*nx0;i<5*table.n;i++) */ + + for (i = 5*((nx0+1)/2); i < 5*table.n; i++) + { + /* x0=i*table.r/(5*table.n); */ + x0 = i*table.r/(5*(table.n-1)); + evaluate_table(table.data, 0, 4, table.scale, x0, &y0, &yp); + fprintf(fp, "%15.10e %15.10e %15.10e\n", x0, y0, yp); + + } + gmx_ffclose(fp); + } + + done_tabledata(&(td[0])); + sfree(td); + + table.interaction = GMX_TABLE_INTERACTION_ELEC_VDWREP_VDWDISP; + table.format = GMX_TABLE_FORMAT_CUBICSPLINE_YFGH; + table.formatsize = 4; + table.ninteractions = 3; + table.stride = table.formatsize*table.ninteractions; + + + return table; +} + +bondedtable_t make_bonded_table(FILE *fplog, char *fn, int angle) +{ + t_tabledata td; + double start; + int i; + bondedtable_t tab; + + if (angle < 2) + { + start = 0; + } + else + { + start = -180.0; + } + read_tables(fplog, fn, 1, angle, &td); + if (angle > 0) + { + /* Convert the table from degrees to radians */ + for (i = 0; i < td.nx; i++) + { + td.x[i] *= DEG2RAD; + td.f[i] *= RAD2DEG; + } + td.tabscale *= RAD2DEG; + } + tab.n = td.nx; + tab.scale = td.tabscale; + snew(tab.data, tab.n*4); + copy2table(tab.n, 0, 4, td.x, td.v, td.f, 1.0, tab.data); + done_tabledata(&td); + + return tab; +}