From: Roland Schulz <roland@utk.edu>
Date: Tue, 24 Jun 2014 01:52:58 +0000 (-0400)
Subject: Merge release-4-6 into release-5-0
X-Git-Url: http://biod.pnpi.spb.ru/gitweb/?a=commitdiff_plain;h=b2b95f071d3522005949a60e77aa896b45cfc981;p=alexxy%2Fgromacs.git

Merge release-4-6 into release-5-0

This merges commit dced970, which changes many free-energy,
modifier and table-generation code paths, and its fix 349d8056.
That patch 349d8056 contains fixes to potential-shift and
potential-switch, as well as the shift/switch interactions
in combination with free energy. Since 5.0 has undergone
changes in the same areas (both nbnxn free energy, LJ-PME
and force-switch), this commit is a likely place for
bugs to have been introduced, so we keep it as a separate
commit.

Uncrustified the result of the merge.

Conflicts:
	src/gmxlib/nonbonded/nb_free_energy.c
Resolved in favour of whichever branch seemed most right; changes from
dced970, 99aa704d and 5f59569a8 were all relevant here. We have
introduced some new LJ-PME-related variables so that code path is
reasonably similar to the coulomb path. We have also fixed a small
bug where the LJPME self-energy (i==j for verlet kernels) was not
multiplied by 0.5.

	src/gromacs/gmxlib/nonbonded/nonbonded.c
Resolved as for dced970

	src/gromacs/gmxpreprocess/readir.c
Resolved as for dced970

	src/gromacs/mdlib/forcerec.c
Resolved from both branches

	src/gromacs/mdlib/sim_util.c
Resolved from both branches, and from 349d8056

	src/gromacs/mdlib/tables.c
Resolved from both branches, and added a few lines of code
to make LJ-PME work with shift modifiers.

As noted above, to avoid breaking the 5.0 branch, we have
manually added the changes corresponding to 349d8056 to make
sure force-switch (same as vdwtype=shift) results in correct
dispersion correction, and we have added a fix for the sign
of the LJPME grid c6 term in the generic nonbonded kernels.
This means 349d8056 should not be merged in again to 5.0 later.

Change-Id: Ida29b143a1bcb727ff38f9c63bf133bf749477b1
---

b2b95f071d3522005949a60e77aa896b45cfc981
diff --cc src/gromacs/gmxlib/nonbonded/nb_free_energy.c
index a2e145153b,0000000000..058b2a87eb
mode 100644,000000..100644
--- a/src/gromacs/gmxlib/nonbonded/nb_free_energy.c
+++ b/src/gromacs/gmxlib/nonbonded/nb_free_energy.c
@@@ -1,989 -1,0 +1,1066 @@@
 +/*
 + * This file is part of the GROMACS molecular simulation package.
 + *
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team.
 + * Copyright (c) 2013,2014, by the GROMACS development team, led by
 + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
 + * and including many others, as listed in the AUTHORS file in the
 + * top-level source directory and at http://www.gromacs.org.
 + *
 + * GROMACS is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU Lesser General Public License
 + * as published by the Free Software Foundation; either version 2.1
 + * of the License, or (at your option) any later version.
 + *
 + * GROMACS is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 + * Lesser General Public License for more details.
 + *
 + * You should have received a copy of the GNU Lesser General Public
 + * License along with GROMACS; if not, see
 + * http://www.gnu.org/licenses, or write to the Free Software Foundation,
 + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
 + *
 + * If you want to redistribute modifications to GROMACS, please
 + * consider that scientific software is very special. Version
 + * control is crucial - bugs must be traceable. We will be happy to
 + * consider code for inclusion in the official distribution, but
 + * derived work must not be called official GROMACS. Details are found
 + * in the README & COPYING files - if they are missing, get the
 + * official version at http://www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the research papers on the package. Check out http://www.gromacs.org.
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <math.h>
 +
 +#include "vec.h"
 +#include "typedefs.h"
 +#include "nonbonded.h"
 +#include "nb_kernel.h"
 +#include "nrnb.h"
 +#include "macros.h"
 +#include "nb_free_energy.h"
 +
 +#include "gmx_fatal.h"
 +
 +void
 +gmx_nb_free_energy_kernel(const t_nblist * gmx_restrict    nlist,
 +                          rvec * gmx_restrict              xx,
 +                          rvec * gmx_restrict              ff,
 +                          t_forcerec * gmx_restrict        fr,
 +                          const t_mdatoms * gmx_restrict   mdatoms,
 +                          nb_kernel_data_t * gmx_restrict  kernel_data,
 +                          t_nrnb * gmx_restrict            nrnb)
 +{
 +
 +#define  STATE_A  0
 +#define  STATE_B  1
 +#define  NSTATES  2
 +    int           i, j, n, ii, is3, ii3, k, nj0, nj1, jnr, j3, ggid;
 +    real          shX, shY, shZ;
 +    real          Fscal, FscalC[NSTATES], FscalV[NSTATES], tx, ty, tz;
 +    real          Vcoul[NSTATES], Vvdw[NSTATES];
 +    real          rinv6, r, rt, rtC, rtV;
 +    real          iqA, iqB;
 +    real          qq[NSTATES], vctot, krsq;
 +    int           ntiA, ntiB, tj[NSTATES];
 +    real          Vvdw6, Vvdw12, vvtot;
 +    real          ix, iy, iz, fix, fiy, fiz;
 +    real          dx, dy, dz, rsq, rinv;
-     real          c6[NSTATES], c12[NSTATES], c6grid[NSTATES];
++    real          c6[NSTATES], c12[NSTATES], c6grid;
 +    real          LFC[NSTATES], LFV[NSTATES], DLF[NSTATES];
 +    double        dvdl_coul, dvdl_vdw;
 +    real          lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
 +    real          sigma6[NSTATES], alpha_vdw_eff, alpha_coul_eff, sigma2_def, sigma2_min;
 +    real          rp, rpm2, rC, rV, rinvC, rpinvC, rinvV, rpinvV;
 +    real          sigma2[NSTATES], sigma_pow[NSTATES], sigma_powm2[NSTATES], rs, rs2;
 +    int           do_tab, tab_elemsize;
 +    int           n0, n1C, n1V, nnn;
 +    real          Y, F, G, H, Fp, Geps, Heps2, epsC, eps2C, epsV, eps2V, VV, FF;
 +    int           icoul, ivdw;
 +    int           nri;
 +    const int *   iinr;
 +    const int *   jindex;
 +    const int *   jjnr;
 +    const int *   shift;
 +    const int *   gid;
 +    const int *   typeA;
 +    const int *   typeB;
 +    int           ntype;
 +    const real *  shiftvec;
 +    real          dvdl_part;
 +    real *        fshift;
 +    real          tabscale = 0;
 +    const real *  VFtab    = NULL;
 +    const real *  x;
 +    real *        f;
 +    real          facel, krf, crf;
 +    const real *  chargeA;
 +    const real *  chargeB;
 +    real          sigma6_min, sigma6_def, lam_power, sc_power, sc_r_power;
 +    real          alpha_coul, alpha_vdw, lambda_coul, lambda_vdw, ewc_lj;
 +    const real *  nbfp, *nbfp_grid;
 +    real *        dvdl;
 +    real *        Vv;
 +    real *        Vc;
 +    gmx_bool      bDoForces, bDoShiftForces, bDoPotential;
-     real          rcoulomb, sh_ewald;
-     real          rvdw, sh_invrc6;
-     gmx_bool      bExactElecCutoff, bExactVdwCutoff, bExactCutoffAll, bEwald;
++    real          rcoulomb, rvdw, sh_invrc6;
++    gmx_bool      bExactElecCutoff, bExactVdwCutoff, bExactCutoffAll;
++    gmx_bool      bEwald, bEwaldLJ;
 +    real          rcutoff_max2;
-     real          rcutoff, rcutoff2, rswitch, d, d2, swV3, swV4, swV5, swF2, swF3, swF4, sw, dsw, rinvcorr;
-     const real *  tab_ewald_F;
-     const real *  tab_ewald_V;
 +    const real *  tab_ewald_F_lj;
 +    const real *  tab_ewald_V_lj;
-     real          tab_ewald_scale, tab_ewald_halfsp;
++    real          d, d2, sw, dsw, rinvcorr;
++    real          elec_swV3, elec_swV4, elec_swV5, elec_swF2, elec_swF3, elec_swF4;
++    real          vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4;
++    gmx_bool      bConvertEwaldToCoulomb, bConvertLJEwaldToLJ6;
++    gmx_bool      bComputeVdwInteraction, bComputeElecInteraction;
++    const real *  ewtab;
++    int           ewitab;
++    real          ewrt, eweps, ewtabscale, ewtabhalfspace, sh_ewald;
++
++    sh_ewald            = fr->ic->sh_ewald;
++    ewtab               = fr->ic->tabq_coul_FDV0;
++    ewtabscale          = fr->ic->tabq_scale;
++    ewtabhalfspace      = 0.5/ewtabscale;
++    tab_ewald_F_lj      = fr->ic->tabq_vdw_F;
++    tab_ewald_V_lj      = fr->ic->tabq_vdw_V;
 +
 +    x                   = xx[0];
 +    f                   = ff[0];
 +
 +    fshift              = fr->fshift[0];
 +
 +    nri                 = nlist->nri;
 +    iinr                = nlist->iinr;
 +    jindex              = nlist->jindex;
 +    jjnr                = nlist->jjnr;
 +    icoul               = nlist->ielec;
 +    ivdw                = nlist->ivdw;
 +    shift               = nlist->shift;
 +    gid                 = nlist->gid;
 +
 +    shiftvec            = fr->shift_vec[0];
 +    chargeA             = mdatoms->chargeA;
 +    chargeB             = mdatoms->chargeB;
 +    facel               = fr->epsfac;
 +    krf                 = fr->k_rf;
 +    crf                 = fr->c_rf;
 +    ewc_lj              = fr->ewaldcoeff_lj;
 +    Vc                  = kernel_data->energygrp_elec;
 +    typeA               = mdatoms->typeA;
 +    typeB               = mdatoms->typeB;
 +    ntype               = fr->ntype;
 +    nbfp                = fr->nbfp;
 +    nbfp_grid           = fr->ljpme_c6grid;
 +    Vv                  = kernel_data->energygrp_vdw;
 +    lambda_coul         = kernel_data->lambda[efptCOUL];
 +    lambda_vdw          = kernel_data->lambda[efptVDW];
 +    dvdl                = kernel_data->dvdl;
 +    alpha_coul          = fr->sc_alphacoul;
 +    alpha_vdw           = fr->sc_alphavdw;
 +    lam_power           = fr->sc_power;
 +    sc_r_power          = fr->sc_r_power;
 +    sigma6_def          = fr->sc_sigma6_def;
 +    sigma6_min          = fr->sc_sigma6_min;
 +    bDoForces           = kernel_data->flags & GMX_NONBONDED_DO_FORCE;
 +    bDoShiftForces      = kernel_data->flags & GMX_NONBONDED_DO_SHIFTFORCE;
 +    bDoPotential        = kernel_data->flags & GMX_NONBONDED_DO_POTENTIAL;
 +
 +    rcoulomb            = fr->rcoulomb;
 +    sh_ewald            = fr->ic->sh_ewald;
 +    rvdw                = fr->rvdw;
 +    sh_invrc6           = fr->ic->sh_invrc6;
 +
-     /* Ewald (PME) reciprocal force and energy quadratic spline tables */
-     tab_ewald_F         = fr->ic->tabq_coul_F;
-     tab_ewald_V         = fr->ic->tabq_coul_V;
-     tab_ewald_scale     = fr->ic->tabq_scale;
-     tab_ewald_F_lj      = fr->ic->tabq_vdw_F;
-     tab_ewald_V_lj      = fr->ic->tabq_vdw_V;
-     tab_ewald_halfsp    = 0.5/tab_ewald_scale;
++    if (fr->coulomb_modifier == eintmodPOTSWITCH)
++    {
++        d               = fr->rcoulomb-fr->rcoulomb_switch;
++        elec_swV3       = -10.0/(d*d*d);
++        elec_swV4       =  15.0/(d*d*d*d);
++        elec_swV5       =  -6.0/(d*d*d*d*d);
++        elec_swF2       = -30.0/(d*d*d);
++        elec_swF3       =  60.0/(d*d*d*d);
++        elec_swF4       = -30.0/(d*d*d*d*d);
++    }
++    else
++    {
++        /* Avoid warnings from stupid compilers (looking at you, Clang!) */
++        elec_swV3 = elec_swV4 = elec_swV5 = elec_swF2 = elec_swF3 = elec_swF4 = 0.0;
++    }
 +
-     if (fr->coulomb_modifier == eintmodPOTSWITCH || fr->vdw_modifier == eintmodPOTSWITCH)
++    if (fr->vdw_modifier == eintmodPOTSWITCH)
 +    {
-         rcutoff         = (fr->coulomb_modifier == eintmodPOTSWITCH) ? fr->rcoulomb : fr->rvdw;
-         rcutoff2        = rcutoff*rcutoff;
-         rswitch         = (fr->coulomb_modifier == eintmodPOTSWITCH) ? fr->rcoulomb_switch : fr->rvdw_switch;
-         d               = rcutoff-rswitch;
-         swV3            = -10.0/(d*d*d);
-         swV4            =  15.0/(d*d*d*d);
-         swV5            =  -6.0/(d*d*d*d*d);
-         swF2            = -30.0/(d*d*d);
-         swF3            =  60.0/(d*d*d*d);
-         swF4            = -30.0/(d*d*d*d*d);
++        d               = fr->rvdw-fr->rvdw_switch;
++        vdw_swV3        = -10.0/(d*d*d);
++        vdw_swV4        =  15.0/(d*d*d*d);
++        vdw_swV5        =  -6.0/(d*d*d*d*d);
++        vdw_swF2        = -30.0/(d*d*d);
++        vdw_swF3        =  60.0/(d*d*d*d);
++        vdw_swF4        = -30.0/(d*d*d*d*d);
 +    }
 +    else
 +    {
-         /* Stupid compilers dont realize these variables will not be used */
-         rswitch         = 0.0;
-         swV3            = 0.0;
-         swV4            = 0.0;
-         swV5            = 0.0;
-         swF2            = 0.0;
-         swF3            = 0.0;
-         swF4            = 0.0;
++        /* Avoid warnings from stupid compilers (looking at you, Clang!) */
++        vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0;
 +    }
 +
 +    if (fr->cutoff_scheme == ecutsVERLET)
 +    {
 +        const interaction_const_t *ic;
 +
 +        ic = fr->ic;
 +        if (EVDW_PME(ic->vdwtype))
 +        {
 +            ivdw         = GMX_NBKERNEL_VDW_LJEWALD;
 +        }
 +        else
 +        {
 +            ivdw         = GMX_NBKERNEL_VDW_LENNARDJONES;
 +        }
 +
 +        if (ic->eeltype == eelCUT || EEL_RF(ic->eeltype))
 +        {
 +            icoul        = GMX_NBKERNEL_ELEC_REACTIONFIELD;
 +        }
 +        else if (EEL_PME_EWALD(ic->eeltype))
 +        {
 +            icoul        = GMX_NBKERNEL_ELEC_EWALD;
 +        }
 +        else
 +        {
 +            gmx_incons("Unsupported eeltype with Verlet and free-energy");
 +        }
 +
 +        bExactElecCutoff = TRUE;
 +        bExactVdwCutoff  = TRUE;
 +    }
 +    else
 +    {
 +        bExactElecCutoff = (fr->coulomb_modifier != eintmodNONE) || fr->eeltype == eelRF_ZERO;
 +        bExactVdwCutoff  = (fr->vdw_modifier != eintmodNONE);
 +    }
 +
 +    bExactCutoffAll = (bExactElecCutoff && bExactVdwCutoff);
 +    rcutoff_max2    = max(fr->rcoulomb, fr->rvdw);
 +    rcutoff_max2    = rcutoff_max2*rcutoff_max2;
 +
 +    bEwald          = (icoul == GMX_NBKERNEL_ELEC_EWALD);
++    bEwaldLJ        = (ivdw == GMX_NBKERNEL_VDW_LJEWALD);
++
++    /* For Ewald/PME interactions we cannot easily apply the soft-core component to
++     * reciprocal space. When we use vanilla (not switch/shift) Ewald interactions, we
++     * can apply the small trick of subtracting the _reciprocal_ space contribution
++     * in this kernel, and instead apply the free energy interaction to the 1/r
++     * (standard coulomb) interaction.
++     *
++     * However, we cannot use this approach for switch-modified since we would then
++     * effectively end up evaluating a significantly different interaction here compared to the
++     * normal (non-free-energy) kernels, either by applying a cutoff at a different
++     * position than what the user requested, or by switching different
++     * things (1/r rather than short-range Ewald). For these settings, we just
++     * use the traditional short-range Ewald interaction in that case.
++     */
++    bConvertEwaldToCoulomb = (bEwald && (fr->coulomb_modifier != eintmodPOTSWITCH));
++    /* For now the below will always be true (since LJ-PME only works with Shift in Gromacs-5.0),
++     * but writing it this way means we stay in sync with coulomb, and it avoids future bugs.
++     */
++    bConvertLJEwaldToLJ6   = (bEwaldLJ && (fr->vdw_modifier   != eintmodPOTSWITCH));
 +
 +    /* fix compiler warnings */
 +    nj1   = 0;
 +    n1C   = n1V   = 0;
 +    epsC  = epsV  = 0;
 +    eps2C = eps2V = 0;
 +
 +    dvdl_coul  = 0;
 +    dvdl_vdw   = 0;
 +
 +    /* Lambda factor for state A, 1-lambda*/
 +    LFC[STATE_A] = 1.0 - lambda_coul;
 +    LFV[STATE_A] = 1.0 - lambda_vdw;
 +
 +    /* Lambda factor for state B, lambda*/
 +    LFC[STATE_B] = lambda_coul;
 +    LFV[STATE_B] = lambda_vdw;
 +
 +    /*derivative of the lambda factor for state A and B */
 +    DLF[STATE_A] = -1;
 +    DLF[STATE_B] = 1;
 +
 +    for (i = 0; i < NSTATES; i++)
 +    {
 +        lfac_coul[i]  = (lam_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
 +        dlfac_coul[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFC[i]) : 1);
 +        lfac_vdw[i]   = (lam_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
 +        dlfac_vdw[i]  = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFV[i]) : 1);
 +    }
 +    /* precalculate */
 +    sigma2_def = pow(sigma6_def, 1.0/3.0);
 +    sigma2_min = pow(sigma6_min, 1.0/3.0);
 +
 +    /* Ewald (not PME) table is special (icoul==enbcoulFEWALD) */
 +
 +    do_tab = (icoul == GMX_NBKERNEL_ELEC_CUBICSPLINETABLE ||
 +              ivdw == GMX_NBKERNEL_VDW_CUBICSPLINETABLE);
 +    if (do_tab)
 +    {
 +        tabscale         = kernel_data->table_elec_vdw->scale;
 +        VFtab            = kernel_data->table_elec_vdw->data;
 +        /* we always use the combined table here */
 +        tab_elemsize     = 12;
 +    }
 +
 +    for (n = 0; (n < nri); n++)
 +    {
 +        int npair_within_cutoff;
 +
 +        npair_within_cutoff = 0;
 +
 +        is3              = 3*shift[n];
 +        shX              = shiftvec[is3];
 +        shY              = shiftvec[is3+1];
 +        shZ              = shiftvec[is3+2];
 +        nj0              = jindex[n];
 +        nj1              = jindex[n+1];
 +        ii               = iinr[n];
 +        ii3              = 3*ii;
 +        ix               = shX + x[ii3+0];
 +        iy               = shY + x[ii3+1];
 +        iz               = shZ + x[ii3+2];
 +        iqA              = facel*chargeA[ii];
 +        iqB              = facel*chargeB[ii];
 +        ntiA             = 2*ntype*typeA[ii];
 +        ntiB             = 2*ntype*typeB[ii];
 +        vctot            = 0;
 +        vvtot            = 0;
 +        fix              = 0;
 +        fiy              = 0;
 +        fiz              = 0;
 +
 +        for (k = nj0; (k < nj1); k++)
 +        {
 +            jnr              = jjnr[k];
 +            j3               = 3*jnr;
 +            dx               = ix - x[j3];
 +            dy               = iy - x[j3+1];
 +            dz               = iz - x[j3+2];
 +            rsq              = dx*dx + dy*dy + dz*dz;
 +
 +            if (bExactCutoffAll && rsq >= rcutoff_max2)
 +            {
 +                /* We save significant time by skipping all code below.
 +                 * Note that with soft-core interactions, the actual cut-off
 +                 * check might be different. But since the soft-core distance
 +                 * is always larger than r, checking on r here is safe.
 +                 */
 +                continue;
 +            }
 +            npair_within_cutoff++;
 +
 +            if (rsq > 0)
 +            {
 +                rinv         = gmx_invsqrt(rsq);
 +                r            = rsq*rinv;
 +            }
 +            else
 +            {
 +                /* The force at r=0 is zero, because of symmetry.
 +                 * But note that the potential is in general non-zero,
 +                 * since the soft-cored r will be non-zero.
 +                 */
 +                rinv         = 0;
 +                r            = 0;
 +            }
 +
 +            if (sc_r_power == 6.0)
 +            {
 +                rpm2             = rsq*rsq;  /* r4 */
 +                rp               = rpm2*rsq; /* r6 */
 +            }
 +            else if (sc_r_power == 48.0)
 +            {
 +                rp               = rsq*rsq*rsq; /* r6 */
 +                rp               = rp*rp;       /* r12 */
 +                rp               = rp*rp;       /* r24 */
 +                rp               = rp*rp;       /* r48 */
 +                rpm2             = rp/rsq;      /* r46 */
 +            }
 +            else
 +            {
 +                rp             = pow(r, sc_r_power);  /* not currently supported as input, but can handle it */
 +                rpm2           = rp/rsq;
 +            }
 +
 +            Fscal = 0;
 +
 +            qq[STATE_A]      = iqA*chargeA[jnr];
 +            qq[STATE_B]      = iqB*chargeB[jnr];
 +
 +            tj[STATE_A]      = ntiA+2*typeA[jnr];
 +            tj[STATE_B]      = ntiB+2*typeB[jnr];
 +
-             if (ivdw == GMX_NBKERNEL_VDW_LJEWALD)
-             {
-                 c6grid[STATE_A] = nbfp_grid[tj[STATE_A]];
-                 c6grid[STATE_B] = nbfp_grid[tj[STATE_B]];
-             }
- 
 +            if (nlist->excl_fep == NULL || nlist->excl_fep[k])
 +            {
 +                c6[STATE_A]      = nbfp[tj[STATE_A]];
 +                c6[STATE_B]      = nbfp[tj[STATE_B]];
 +
 +                for (i = 0; i < NSTATES; i++)
 +                {
 +                    c12[i]             = nbfp[tj[i]+1];
 +                    if ((c6[i] > 0) && (c12[i] > 0))
 +                    {
 +                        /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
 +                        sigma6[i]       = 0.5*c12[i]/c6[i];
 +                        sigma2[i]       = pow(sigma6[i], 1.0/3.0);
 +                        /* should be able to get rid of this ^^^ internal pow call eventually.  Will require agreement on
 +                           what data to store externally.  Can't be fixed without larger scale changes, so not 4.6 */
 +                        if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
 +                        {
 +                            sigma6[i] = sigma6_min;
 +                            sigma2[i] = sigma2_min;
 +                        }
 +                    }
 +                    else
 +                    {
 +                        sigma6[i]       = sigma6_def;
 +                        sigma2[i]       = sigma2_def;
 +                    }
 +                    if (sc_r_power == 6.0)
 +                    {
 +                        sigma_pow[i]    = sigma6[i];
 +                        sigma_powm2[i]  = sigma6[i]/sigma2[i];
 +                    }
 +                    else if (sc_r_power == 48.0)
 +                    {
 +                        sigma_pow[i]    = sigma6[i]*sigma6[i];       /* sigma^12 */
 +                        sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
 +                        sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
 +                        sigma_powm2[i]  = sigma_pow[i]/sigma2[i];
 +                    }
 +                    else
 +                    {    /* not really supported as input, but in here for testing the general case*/
 +                        sigma_pow[i]    = pow(sigma2[i], sc_r_power/2);
 +                        sigma_powm2[i]  = sigma_pow[i]/(sigma2[i]);
 +                    }
 +                }
 +
 +                /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
 +                if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
 +                {
 +                    alpha_vdw_eff    = 0;
 +                    alpha_coul_eff   = 0;
 +                }
 +                else
 +                {
 +                    alpha_vdw_eff    = alpha_vdw;
 +                    alpha_coul_eff   = alpha_coul;
 +                }
 +
 +                for (i = 0; i < NSTATES; i++)
 +                {
 +                    FscalC[i]    = 0;
 +                    FscalV[i]    = 0;
 +                    Vcoul[i]     = 0;
 +                    Vvdw[i]      = 0;
 +
 +                    /* Only spend time on A or B state if it is non-zero */
 +                    if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
 +                    {
 +                        /* this section has to be inside the loop because of the dependence on sigma_pow */
 +                        rpinvC         = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
 +                        rinvC          = pow(rpinvC, 1.0/sc_r_power);
 +                        rC             = 1.0/rinvC;
 +
 +                        rpinvV         = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
 +                        rinvV          = pow(rpinvV, 1.0/sc_r_power);
 +                        rV             = 1.0/rinvV;
 +
 +                        if (do_tab)
 +                        {
 +                            rtC        = rC*tabscale;
 +                            n0         = rtC;
 +                            epsC       = rtC-n0;
 +                            eps2C      = epsC*epsC;
 +                            n1C        = tab_elemsize*n0;
 +
 +                            rtV        = rV*tabscale;
 +                            n0         = rtV;
 +                            epsV       = rtV-n0;
 +                            eps2V      = epsV*epsV;
 +                            n1V        = tab_elemsize*n0;
 +                        }
 +
-                         /* With Ewald and soft-core we should put the cut-off on r,
-                          * not on the soft-cored rC, as the real-space and
-                          * reciprocal space contributions should (almost) cancel.
++                        /* Only process the coulomb interactions if we have charges,
++                         * and if we either include all entries in the list (no cutoff
++                         * used in the kernel), or if we are within the cutoff.
 +                         */
-                         if (qq[i] != 0 &&
-                             !(bExactElecCutoff &&
-                               ((!bEwald && rC >= rcoulomb) ||
-                                (bEwald && r >= rcoulomb))))
++                        bComputeElecInteraction = !bExactElecCutoff ||
++                            ( bConvertEwaldToCoulomb && r < rcoulomb) ||
++                            (!bConvertEwaldToCoulomb && rC < rcoulomb);
++
++                        if ( (qq[i] != 0) && bComputeElecInteraction)
 +                        {
 +                            switch (icoul)
 +                            {
 +                                case GMX_NBKERNEL_ELEC_COULOMB:
 +                                    /* simple cutoff */
 +                                    Vcoul[i]   = qq[i]*rinvC;
 +                                    FscalC[i]  = Vcoul[i];
-                                     break;
- 
-                                 case GMX_NBKERNEL_ELEC_EWALD:
-                                     /* Ewald FEP is done only on the 1/r part */
-                                     Vcoul[i]   = qq[i]*(rinvC - sh_ewald);
-                                     FscalC[i]  = Vcoul[i];
++                                    /* The shift for the Coulomb potential is stored in
++                                     * the RF parameter c_rf, which is 0 without shift
++                                     */
++                                    Vcoul[i]  -= qq[i]*fr->ic->c_rf;
 +                                    break;
 +
 +                                case GMX_NBKERNEL_ELEC_REACTIONFIELD:
 +                                    /* reaction-field */
 +                                    Vcoul[i]   = qq[i]*(rinvC + krf*rC*rC-crf);
 +                                    FscalC[i]  = qq[i]*(rinvC - 2.0*krf*rC*rC);
 +                                    break;
 +
 +                                case GMX_NBKERNEL_ELEC_CUBICSPLINETABLE:
 +                                    /* non-Ewald tabulated coulomb */
 +                                    nnn        = n1C;
 +                                    Y          = VFtab[nnn];
 +                                    F          = VFtab[nnn+1];
 +                                    Geps       = epsC*VFtab[nnn+2];
 +                                    Heps2      = eps2C*VFtab[nnn+3];
 +                                    Fp         = F+Geps+Heps2;
 +                                    VV         = Y+epsC*Fp;
 +                                    FF         = Fp+Geps+2.0*Heps2;
 +                                    Vcoul[i]   = qq[i]*VV;
 +                                    FscalC[i]  = -qq[i]*tabscale*FF*rC;
 +                                    break;
 +
 +                                case GMX_NBKERNEL_ELEC_GENERALIZEDBORN:
 +                                    gmx_fatal(FARGS, "Free energy and GB not implemented.\n");
 +                                    break;
 +
++                                case GMX_NBKERNEL_ELEC_EWALD:
++                                    if (bConvertEwaldToCoulomb)
++                                    {
++                                        /* Ewald FEP is done only on the 1/r part */
++                                        Vcoul[i]   = qq[i]*(rinvC-sh_ewald);
++                                        FscalC[i]  = qq[i]*rinvC;
++                                    }
++                                    else
++                                    {
++                                        ewrt      = rC*ewtabscale;
++                                        ewitab    = (int) ewrt;
++                                        eweps     = ewrt-ewitab;
++                                        ewitab    = 4*ewitab;
++                                        FscalC[i] = ewtab[ewitab]+eweps*ewtab[ewitab+1];
++                                        rinvcorr  = rinvC-sh_ewald;
++                                        Vcoul[i]  = qq[i]*(rinvcorr-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+FscalC[i])));
++                                        FscalC[i] = qq[i]*(rinvC-rC*FscalC[i]);
++                                    }
++                                    break;
++
 +                                case GMX_NBKERNEL_ELEC_NONE:
 +                                    FscalC[i]  = 0.0;
 +                                    Vcoul[i]   = 0.0;
 +                                    break;
 +
 +                                default:
 +                                    gmx_incons("Invalid icoul in free energy kernel");
 +                                    break;
 +                            }
 +
 +                            if (fr->coulomb_modifier == eintmodPOTSWITCH)
 +                            {
-                                 d                = rC-rswitch;
++                                d                = rC-fr->rcoulomb_switch;
 +                                d                = (d > 0.0) ? d : 0.0;
 +                                d2               = d*d;
-                                 sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
-                                 dsw              = d2*(swF2+d*(swF3+d*swF4));
++                                sw               = 1.0+d2*d*(elec_swV3+d*(elec_swV4+d*elec_swV5));
++                                dsw              = d2*(elec_swF2+d*(elec_swF3+d*elec_swF4));
++
++                                FscalC[i]        = FscalC[i]*sw - rC*Vcoul[i]*dsw;
++                                Vcoul[i]        *= sw;
 +
-                                 Vcoul[i]  *= sw;
-                                 FscalC[i]  = FscalC[i]*sw + Vcoul[i]*dsw;
++                                FscalC[i]        = (rC < rcoulomb) ? FscalC[i] : 0.0;
++                                Vcoul[i]         = (rC < rcoulomb) ? Vcoul[i] : 0.0;
 +                            }
 +                        }
 +
-                         if ((c6[i] != 0 || c12[i] != 0) &&
-                             !(bExactVdwCutoff &&
-                               ((ivdw != GMX_NBKERNEL_VDW_LJEWALD && rV >= rvdw) ||
-                                (ivdw == GMX_NBKERNEL_VDW_LJEWALD && r >= rvdw))))
++                        /* Only process the VDW interactions if we have
++                         * some non-zero parameters, and if we either
++                         * include all entries in the list (no cutoff used
++                         * in the kernel), or if we are within the cutoff.
++                         */
++                        bComputeVdwInteraction = !bExactVdwCutoff ||
++                            ( bConvertLJEwaldToLJ6 && r < rvdw) ||
++                            (!bConvertLJEwaldToLJ6 && rV < rvdw);
++                        if ((c6[i] != 0 || c12[i] != 0) && bComputeVdwInteraction)
 +                        {
 +                            switch (ivdw)
 +                            {
 +                                case GMX_NBKERNEL_VDW_LENNARDJONES:
 +                                case GMX_NBKERNEL_VDW_LJEWALD:
 +                                    /* cutoff LJ */
 +                                    if (sc_r_power == 6.0)
 +                                    {
 +                                        rinv6            = rpinvV;
 +                                    }
 +                                    else
 +                                    {
 +                                        rinv6            = pow(rinvV, 6.0);
 +                                    }
 +                                    Vvdw6            = c6[i]*rinv6;
 +                                    Vvdw12           = c12[i]*rinv6*rinv6;
 +                                    if (fr->vdw_modifier == eintmodPOTSHIFT)
 +                                    {
 +                                        Vvdw[i]          = ( (Vvdw12-c12[i]*sh_invrc6*sh_invrc6)*(1.0/12.0)
 +                                                             -(Vvdw6-c6[i]*sh_invrc6)*(1.0/6.0));
 +                                    }
 +                                    else
 +                                    {
 +                                        Vvdw[i]          = Vvdw12*(1.0/12.0) - Vvdw6*(1.0/6.0);
 +                                    }
 +                                    FscalV[i]        = Vvdw12 - Vvdw6;
 +                                    break;
 +
 +                                case GMX_NBKERNEL_VDW_BUCKINGHAM:
 +                                    gmx_fatal(FARGS, "Buckingham free energy not supported.");
 +                                    break;
 +
 +                                case GMX_NBKERNEL_VDW_CUBICSPLINETABLE:
 +                                    /* Table LJ */
 +                                    nnn = n1V+4;
 +                                    /* dispersion */
 +                                    Y          = VFtab[nnn];
 +                                    F          = VFtab[nnn+1];
 +                                    Geps       = epsV*VFtab[nnn+2];
 +                                    Heps2      = eps2V*VFtab[nnn+3];
 +                                    Fp         = F+Geps+Heps2;
 +                                    VV         = Y+epsV*Fp;
 +                                    FF         = Fp+Geps+2.0*Heps2;
 +                                    Vvdw[i]   += c6[i]*VV;
 +                                    FscalV[i] -= c6[i]*tabscale*FF*rV;
 +
 +                                    /* repulsion */
 +                                    Y          = VFtab[nnn+4];
 +                                    F          = VFtab[nnn+5];
 +                                    Geps       = epsV*VFtab[nnn+6];
 +                                    Heps2      = eps2V*VFtab[nnn+7];
 +                                    Fp         = F+Geps+Heps2;
 +                                    VV         = Y+epsV*Fp;
 +                                    FF         = Fp+Geps+2.0*Heps2;
 +                                    Vvdw[i]   += c12[i]*VV;
 +                                    FscalV[i] -= c12[i]*tabscale*FF*rV;
 +                                    break;
 +
 +                                case GMX_NBKERNEL_VDW_NONE:
 +                                    Vvdw[i]    = 0.0;
 +                                    FscalV[i]  = 0.0;
 +                                    break;
 +
 +                                default:
 +                                    gmx_incons("Invalid ivdw in free energy kernel");
 +                                    break;
 +                            }
 +
 +                            if (fr->vdw_modifier == eintmodPOTSWITCH)
 +                            {
-                                 d          = rV-rswitch;
-                                 d          = (d > 0.0) ? d : 0.0;
-                                 d2         = d*d;
-                                 sw         = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
-                                 dsw        = d2*(swF2+d*(swF3+d*swF4));
++                                d                = rV-fr->rvdw_switch;
++                                d                = (d > 0.0) ? d : 0.0;
++                                d2               = d*d;
++                                sw               = 1.0+d2*d*(vdw_swV3+d*(vdw_swV4+d*vdw_swV5));
++                                dsw              = d2*(vdw_swF2+d*(vdw_swF3+d*vdw_swF4));
 +
-                                 Vvdw[i]   *= sw;
-                                 FscalV[i]  = FscalV[i]*sw + Vvdw[i]*dsw;
++                                FscalV[i]        = FscalV[i]*sw - rV*Vvdw[i]*dsw;
++                                Vvdw[i]         *= sw;
 +
 +                                FscalV[i]  = (rV < rvdw) ? FscalV[i] : 0.0;
 +                                Vvdw[i]    = (rV < rvdw) ? Vvdw[i] : 0.0;
 +                            }
 +                        }
 +
 +                        /* FscalC (and FscalV) now contain: dV/drC * rC
 +                         * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
 +                         * Further down we first multiply by r^p-2 and then by
 +                         * the vector r, which in total gives: dV/drC * (r/rC)^1-p
 +                         */
 +                        FscalC[i] *= rpinvC;
 +                        FscalV[i] *= rpinvV;
 +                    }
 +                }
 +
 +                /* Assemble A and B states */
 +                for (i = 0; i < NSTATES; i++)
 +                {
 +                    vctot         += LFC[i]*Vcoul[i];
 +                    vvtot         += LFV[i]*Vvdw[i];
 +
 +                    Fscal         += LFC[i]*FscalC[i]*rpm2;
 +                    Fscal         += LFV[i]*FscalV[i]*rpm2;
 +
 +                    dvdl_coul     += Vcoul[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*FscalC[i]*sigma_pow[i];
 +                    dvdl_vdw      += Vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*FscalV[i]*sigma_pow[i];
 +                }
 +            }
 +            else if (icoul == GMX_NBKERNEL_ELEC_REACTIONFIELD)
 +            {
 +                /* For excluded pairs, which are only in this pair list when
 +                 * using the Verlet scheme, we don't use soft-core.
 +                 * The group scheme also doesn't soft-core for these.
 +                 * As there is no singularity, there is no need for soft-core.
 +                 */
 +                VV = krf*rsq - crf;
 +                FF = -2.0*krf;
 +
 +                if (ii == jnr)
 +                {
 +                    VV *= 0.5;
 +                }
 +
 +                for (i = 0; i < NSTATES; i++)
 +                {
 +                    vctot      += LFC[i]*qq[i]*VV;
 +                    Fscal      += LFC[i]*qq[i]*FF;
 +                    dvdl_coul  += DLF[i]*qq[i]*VV;
 +                }
 +            }
 +
-             if (icoul == GMX_NBKERNEL_ELEC_EWALD &&
-                 !(bExactElecCutoff && r >= rcoulomb))
++            if (bConvertEwaldToCoulomb && ( !bExactElecCutoff || r < rcoulomb ) )
 +            {
-                 /* Because we compute the soft-core normally,
-                  * we have to remove the Ewald short range portion.
-                  * Done outside of the states loop because this part
-                  * doesn't depend on the scaled R.
++                /* See comment in the preamble. When using Ewald interactions
++                 * (unless we use a switch modifier) we subtract the reciprocal-space
++                 * Ewald component here which made it possible to apply the free
++                 * energy interaction to 1/r (vanilla coulomb short-range part)
++                 * above. This gets us closer to the ideal case of applying
++                 * the softcore to the entire electrostatic interaction,
++                 * including the reciprocal-space component.
 +                 */
-                 real rs, frac, f_lr;
-                 int  ri;
++                real v_lr, f_lr;
 +
-                 rs     = rsq*rinv*tab_ewald_scale;
-                 ri     = (int)rs;
-                 frac   = rs - ri;
-                 f_lr   = (1 - frac)*tab_ewald_F[ri] + frac*tab_ewald_F[ri+1];
-                 FF     = f_lr*rinv;
-                 VV     = tab_ewald_V[ri] - tab_ewald_halfsp*frac*(tab_ewald_F[ri] + f_lr);
++                ewrt      = r*ewtabscale;
++                ewitab    = (int) ewrt;
++                eweps     = ewrt-ewitab;
++                ewitab    = 4*ewitab;
++                f_lr      = ewtab[ewitab]+eweps*ewtab[ewitab+1];
++                v_lr      = (ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+f_lr));
++                f_lr     *= rinv;
 +
 +                if (ii == jnr)
 +                {
-                     VV   *= 0.5;
++                    /* If we get here, the i particle (ii) has itself (jnr)
++                     * in its neighborlist. This can only happen with the Verlet
++                     * scheme, and corresponds to a self-interaction that will
++                     * occur twice. Scale it down by 50% to only include it once.
++                     */
++                    v_lr *= 0.5;
 +                }
 +
 +                for (i = 0; i < NSTATES; i++)
 +                {
-                     vctot      -= LFC[i]*qq[i]*VV;
-                     Fscal      -= LFC[i]*qq[i]*FF;
-                     dvdl_coul  -= (DLF[i]*qq[i])*VV;
++                    vctot      -= LFC[i]*qq[i]*v_lr;
++                    Fscal      -= LFC[i]*qq[i]*f_lr;
++                    dvdl_coul  -= (DLF[i]*qq[i])*v_lr;
 +                }
 +            }
 +
-             if (ivdw == GMX_NBKERNEL_VDW_LJEWALD &&
-                 !(bExactVdwCutoff && r >= rvdw))
++            if (bConvertLJEwaldToLJ6 && (!bExactVdwCutoff || r < rvdw))
 +            {
++                /* See comment in the preamble. When using LJ-Ewald interactions
++                 * (unless we use a switch modifier) we subtract the reciprocal-space
++                 * Ewald component here which made it possible to apply the free
++                 * energy interaction to r^-6 (vanilla LJ6 short-range part)
++                 * above. This gets us closer to the ideal case of applying
++                 * the softcore to the entire VdW interaction,
++                 * including the reciprocal-space component.
++                 */
 +                real rs, frac, f_lr;
 +                int  ri;
 +
-                 rs     = rsq*rinv*tab_ewald_scale;
++                rs     = rsq*rinv*ewtabscale;
 +                ri     = (int)rs;
 +                frac   = rs - ri;
 +                f_lr   = (1 - frac)*tab_ewald_F_lj[ri] + frac*tab_ewald_F_lj[ri+1];
 +                FF     = f_lr*rinv;
-                 VV     = tab_ewald_V_lj[ri] - tab_ewald_halfsp*frac*(tab_ewald_F_lj[ri] + f_lr);
++                VV     = tab_ewald_V_lj[ri] - ewtabhalfspace*frac*(tab_ewald_F_lj[ri] + f_lr);
++
++                if (ii == jnr)
++                {
++                    /* If we get here, the i particle (ii) has itself (jnr)
++                     * in its neighborlist. This can only happen with the Verlet
++                     * scheme, and corresponds to a self-interaction that will
++                     * occur twice. Scale it down by 50% to only include it once.
++                     */
++                    VV *= 0.5;
++                }
++
 +                for (i = 0; i < NSTATES; i++)
 +                {
-                     vvtot      += LFV[i]*c6grid[i]*VV*(1.0/6.0);
-                     Fscal      += LFV[i]*c6grid[i]*FF*(1.0/6.0);
-                     dvdl_vdw   += (DLF[i]*c6grid[i])*VV*(1.0/6.0);
++                    c6grid      = nbfp_grid[tj[i]];
++                    vvtot      += LFV[i]*c6grid*VV*(1.0/6.0);
++                    Fscal      += LFV[i]*c6grid*FF*(1.0/6.0);
++                    dvdl_vdw   += (DLF[i]*c6grid)*VV*(1.0/6.0);
 +                }
 +
 +            }
 +
 +            if (bDoForces)
 +            {
 +                tx         = Fscal*dx;
 +                ty         = Fscal*dy;
 +                tz         = Fscal*dz;
 +                fix        = fix + tx;
 +                fiy        = fiy + ty;
 +                fiz        = fiz + tz;
 +                /* OpenMP atomics are expensive, but this kernels is also
 +                 * expensive, so we can take this hit, instead of using
 +                 * thread-local output buffers and extra reduction.
 +                 */
 +#pragma omp atomic
 +                f[j3]     -= tx;
 +#pragma omp atomic
 +                f[j3+1]   -= ty;
 +#pragma omp atomic
 +                f[j3+2]   -= tz;
 +            }
 +        }
 +
 +        /* The atomics below are expensive with many OpenMP threads.
 +         * Here unperturbed i-particles will usually only have a few
 +         * (perturbed) j-particles in the list. Thus with a buffered list
 +         * we can skip a significant number of i-reductions with a check.
 +         */
 +        if (npair_within_cutoff > 0)
 +        {
 +            if (bDoForces)
 +            {
 +#pragma omp atomic
 +                f[ii3]        += fix;
 +#pragma omp atomic
 +                f[ii3+1]      += fiy;
 +#pragma omp atomic
 +                f[ii3+2]      += fiz;
 +            }
 +            if (bDoShiftForces)
 +            {
 +#pragma omp atomic
 +                fshift[is3]   += fix;
 +#pragma omp atomic
 +                fshift[is3+1] += fiy;
 +#pragma omp atomic
 +                fshift[is3+2] += fiz;
 +            }
 +            if (bDoPotential)
 +            {
 +                ggid               = gid[n];
 +#pragma omp atomic
 +                Vc[ggid]          += vctot;
 +#pragma omp atomic
 +                Vv[ggid]          += vvtot;
 +            }
 +        }
 +    }
 +
 +#pragma omp atomic
 +    dvdl[efptCOUL]     += dvdl_coul;
 + #pragma omp atomic
 +    dvdl[efptVDW]      += dvdl_vdw;
 +
 +    /* Estimate flops, average for free energy stuff:
 +     * 12  flops per outer iteration
 +     * 150 flops per inner iteration
 +     */
 +#pragma omp atomic
 +    inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri*12 + nlist->jindex[n]*150);
 +}
 +
 +real
 +nb_free_energy_evaluate_single(real r2, real sc_r_power, real alpha_coul, real alpha_vdw,
 +                               real tabscale, real *vftab,
 +                               real qqA, real c6A, real c12A, real qqB, real c6B, real c12B,
 +                               real LFC[2], real LFV[2], real DLF[2],
 +                               real lfac_coul[2], real lfac_vdw[2], real dlfac_coul[2], real dlfac_vdw[2],
 +                               real sigma6_def, real sigma6_min, real sigma2_def, real sigma2_min,
 +                               real *velectot, real *vvdwtot, real *dvdl)
 +{
 +    real       r, rp, rpm2, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VV, FF, fscal;
 +    real       qq[2], c6[2], c12[2], sigma6[2], sigma2[2], sigma_pow[2], sigma_powm2[2];
 +    real       alpha_coul_eff, alpha_vdw_eff, dvdl_coul, dvdl_vdw;
 +    real       rpinv, r_coul, r_vdw, velecsum, vvdwsum;
 +    real       fscal_vdw[2], fscal_elec[2];
 +    real       velec[2], vvdw[2];
 +    int        i, ntab;
 +
 +    qq[0]    = qqA;
 +    qq[1]    = qqB;
 +    c6[0]    = c6A;
 +    c6[1]    = c6B;
 +    c12[0]   = c12A;
 +    c12[1]   = c12B;
 +
 +    if (sc_r_power == 6.0)
 +    {
 +        rpm2             = r2*r2;   /* r4 */
 +        rp               = rpm2*r2; /* r6 */
 +    }
 +    else if (sc_r_power == 48.0)
 +    {
 +        rp               = r2*r2*r2; /* r6 */
 +        rp               = rp*rp;    /* r12 */
 +        rp               = rp*rp;    /* r24 */
 +        rp               = rp*rp;    /* r48 */
 +        rpm2             = rp/r2;    /* r46 */
 +    }
 +    else
 +    {
 +        rp             = pow(r2, 0.5*sc_r_power);  /* not currently supported as input, but can handle it */
 +        rpm2           = rp/r2;
 +    }
 +
 +    /* Loop over state A(0) and B(1) */
 +    for (i = 0; i < 2; i++)
 +    {
 +        if ((c6[i] > 0) && (c12[i] > 0))
 +        {
 +            /* The c6 & c12 coefficients now contain the constants 6.0 and 12.0, respectively.
 +             * Correct for this by multiplying with (1/12.0)/(1/6.0)=6.0/12.0=0.5.
 +             */
 +            sigma6[i]       = 0.5*c12[i]/c6[i];
 +            sigma2[i]       = pow(0.5*c12[i]/c6[i], 1.0/3.0);
 +            /* should be able to get rid of this ^^^ internal pow call eventually.  Will require agreement on
 +               what data to store externally.  Can't be fixed without larger scale changes, so not 5.0 */
 +            if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
 +            {
 +                sigma6[i] = sigma6_min;
 +                sigma2[i] = sigma2_min;
 +            }
 +        }
 +        else
 +        {
 +            sigma6[i]       = sigma6_def;
 +            sigma2[i]       = sigma2_def;
 +        }
 +        if (sc_r_power == 6.0)
 +        {
 +            sigma_pow[i]    = sigma6[i];
 +            sigma_powm2[i]  = sigma6[i]/sigma2[i];
 +        }
 +        else if (sc_r_power == 48.0)
 +        {
 +            sigma_pow[i]    = sigma6[i]*sigma6[i];       /* sigma^12 */
 +            sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
 +            sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
 +            sigma_powm2[i]  = sigma_pow[i]/sigma2[i];
 +        }
 +        else
 +        {    /* not really supported as input, but in here for testing the general case*/
 +            sigma_pow[i]    = pow(sigma2[i], sc_r_power/2);
 +            sigma_powm2[i]  = sigma_pow[i]/(sigma2[i]);
 +        }
 +    }
 +
 +    /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
 +    if ((c12[0] > 0) && (c12[1] > 0))
 +    {
 +        alpha_vdw_eff    = 0;
 +        alpha_coul_eff   = 0;
 +    }
 +    else
 +    {
 +        alpha_vdw_eff    = alpha_vdw;
 +        alpha_coul_eff   = alpha_coul;
 +    }
 +
 +    /* Loop over A and B states again */
 +    for (i = 0; i < 2; i++)
 +    {
 +        fscal_elec[i] = 0;
 +        fscal_vdw[i]  = 0;
 +        velec[i]      = 0;
 +        vvdw[i]       = 0;
 +
 +        /* Only spend time on A or B state if it is non-zero */
 +        if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
 +        {
 +            /* Coulomb */
 +            rpinv            = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
 +            r_coul           = pow(rpinv, -1.0/sc_r_power);
 +
 +            /* Electrostatics table lookup data */
 +            rtab             = r_coul*tabscale;
 +            ntab             = rtab;
 +            eps              = rtab-ntab;
 +            eps2             = eps*eps;
 +            ntab             = 12*ntab;
 +            /* Electrostatics */
 +            Y                = vftab[ntab];
 +            F                = vftab[ntab+1];
 +            Geps             = eps*vftab[ntab+2];
 +            Heps2            = eps2*vftab[ntab+3];
 +            Fp               = F+Geps+Heps2;
 +            VV               = Y+eps*Fp;
 +            FF               = Fp+Geps+2.0*Heps2;
 +            velec[i]         = qq[i]*VV;
 +            fscal_elec[i]    = -qq[i]*FF*r_coul*rpinv*tabscale;
 +
 +            /* Vdw */
 +            rpinv            = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
 +            r_vdw            = pow(rpinv, -1.0/sc_r_power);
 +            /* Vdw table lookup data */
 +            rtab             = r_vdw*tabscale;
 +            ntab             = rtab;
 +            eps              = rtab-ntab;
 +            eps2             = eps*eps;
 +            ntab             = 12*ntab;
 +            /* Dispersion */
 +            Y                = vftab[ntab+4];
 +            F                = vftab[ntab+5];
 +            Geps             = eps*vftab[ntab+6];
 +            Heps2            = eps2*vftab[ntab+7];
 +            Fp               = F+Geps+Heps2;
 +            VV               = Y+eps*Fp;
 +            FF               = Fp+Geps+2.0*Heps2;
 +            vvdw[i]          = c6[i]*VV;
 +            fscal_vdw[i]     = -c6[i]*FF;
 +
 +            /* Repulsion */
 +            Y                = vftab[ntab+8];
 +            F                = vftab[ntab+9];
 +            Geps             = eps*vftab[ntab+10];
 +            Heps2            = eps2*vftab[ntab+11];
 +            Fp               = F+Geps+Heps2;
 +            VV               = Y+eps*Fp;
 +            FF               = Fp+Geps+2.0*Heps2;
 +            vvdw[i]         += c12[i]*VV;
 +            fscal_vdw[i]    -= c12[i]*FF;
 +            fscal_vdw[i]    *= r_vdw*rpinv*tabscale;
 +        }
 +    }
 +    /* Now we have velec[i], vvdw[i], and fscal[i] for both states */
 +    /* Assemble A and B states */
 +    velecsum  = 0;
 +    vvdwsum   = 0;
 +    dvdl_coul = 0;
 +    dvdl_vdw  = 0;
 +    fscal     = 0;
 +    for (i = 0; i < 2; i++)
 +    {
 +        velecsum      += LFC[i]*velec[i];
 +        vvdwsum       += LFV[i]*vvdw[i];
 +
 +        fscal         += (LFC[i]*fscal_elec[i]+LFV[i]*fscal_vdw[i])*rpm2;
 +
 +        dvdl_coul     += velec[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*fscal_elec[i]*sigma_pow[i];
 +        dvdl_vdw      += vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*fscal_vdw[i]*sigma_pow[i];
 +    }
 +
 +    dvdl[efptCOUL]     += dvdl_coul;
 +    dvdl[efptVDW]      += dvdl_vdw;
 +
 +    *velectot           = velecsum;
 +    *vvdwtot            = vvdwsum;
 +
 +    return fscal;
 +}
diff --cc src/gromacs/gmxlib/nonbonded/nb_generic.c
index 4f29311dea,0000000000..019ba8b341
mode 100644,000000..100644
--- a/src/gromacs/gmxlib/nonbonded/nb_generic.c
+++ b/src/gromacs/gmxlib/nonbonded/nb_generic.c
@@@ -1,480 -1,0 +1,480 @@@
 +/*
 + * This file is part of the GROMACS molecular simulation package.
 + *
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team.
 + * Copyright (c) 2012,2014, by the GROMACS development team, led by
 + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
 + * and including many others, as listed in the AUTHORS file in the
 + * top-level source directory and at http://www.gromacs.org.
 + *
 + * GROMACS is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU Lesser General Public License
 + * as published by the Free Software Foundation; either version 2.1
 + * of the License, or (at your option) any later version.
 + *
 + * GROMACS is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 + * Lesser General Public License for more details.
 + *
 + * You should have received a copy of the GNU Lesser General Public
 + * License along with GROMACS; if not, see
 + * http://www.gnu.org/licenses, or write to the Free Software Foundation,
 + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
 + *
 + * If you want to redistribute modifications to GROMACS, please
 + * consider that scientific software is very special. Version
 + * control is crucial - bugs must be traceable. We will be happy to
 + * consider code for inclusion in the official distribution, but
 + * derived work must not be called official GROMACS. Details are found
 + * in the README & COPYING files - if they are missing, get the
 + * official version at http://www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the research papers on the package. Check out http://www.gromacs.org.
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <math.h>
 +
 +#include "types/simple.h"
 +#include "vec.h"
 +#include "typedefs.h"
 +#include "nb_generic.h"
 +#include "nrnb.h"
 +
 +#include "gmx_fatal.h"
 +
 +#include "nonbonded.h"
 +#include "nb_kernel.h"
 +
 +void
 +gmx_nb_generic_kernel(t_nblist *                nlist,
 +                      rvec *                    xx,
 +                      rvec *                    ff,
 +                      t_forcerec *              fr,
 +                      t_mdatoms *               mdatoms,
 +                      nb_kernel_data_t *        kernel_data,
 +                      t_nrnb *                  nrnb)
 +{
 +    int           nri, ntype, table_nelements, ielec, ivdw;
 +    real          facel, gbtabscale;
 +    int           n, ii, is3, ii3, k, nj0, nj1, jnr, j3, ggid, nnn, n0;
 +    real          shX, shY, shZ;
 +    real          fscal, felec, fvdw, velec, vvdw, tx, ty, tz;
 +    real          rinvsq;
 +    real          iq;
 +    real          qq, vctot;
 +    int           nti, nvdwparam;
 +    int           tj;
 +    real          rt, r, eps, eps2, Y, F, Geps, Heps2, VV, FF, Fp, fijD, fijR;
 +    real          rinvsix;
 +    real          vvdwtot;
 +    real          vvdw_rep, vvdw_disp;
 +    real          ix, iy, iz, fix, fiy, fiz;
 +    real          jx, jy, jz;
 +    real          dx, dy, dz, rsq, rinv;
 +    real          c6, c12, c6grid, cexp1, cexp2, br;
 +    real *        charge;
 +    real *        shiftvec;
 +    real *        vdwparam, *vdwgridparam;
 +    int *         shift;
 +    int *         type;
 +    real *        fshift;
 +    real *        velecgrp;
 +    real *        vvdwgrp;
 +    real          tabscale;
 +    real *        VFtab;
 +    real *        x;
 +    real *        f;
 +    int           ewitab;
 +    real          ewtabscale, eweps, sh_ewald, ewrt, ewtabhalfspace;
 +    real *        ewtab;
 +    real          rcoulomb2, rvdw, rvdw2, sh_dispersion, sh_repulsion;
 +    real          rcutoff, rcutoff2;
 +    real          rswitch_elec, rswitch_vdw, d, d2, sw, dsw, rinvcorr;
 +    real          elec_swV3, elec_swV4, elec_swV5, elec_swF2, elec_swF3, elec_swF4;
 +    real          vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4;
 +    real          ewclj, ewclj2, ewclj6, ewcljrsq, poly, exponent, sh_lj_ewald;
 +    gmx_bool      bExactElecCutoff, bExactVdwCutoff, bExactCutoff;
 +
 +    x                   = xx[0];
 +    f                   = ff[0];
 +    ielec               = nlist->ielec;
 +    ivdw                = nlist->ivdw;
 +
 +    fshift              = fr->fshift[0];
 +    velecgrp            = kernel_data->energygrp_elec;
 +    vvdwgrp             = kernel_data->energygrp_vdw;
 +    tabscale            = kernel_data->table_elec_vdw->scale;
 +    VFtab               = kernel_data->table_elec_vdw->data;
 +
 +    sh_ewald            = fr->ic->sh_ewald;
 +    ewtab               = fr->ic->tabq_coul_FDV0;
 +    ewtabscale          = fr->ic->tabq_scale;
 +    ewtabhalfspace      = 0.5/ewtabscale;
 +
 +    rcoulomb2           = fr->rcoulomb*fr->rcoulomb;
 +    rvdw                = fr->rvdw;
 +    rvdw2               = rvdw*rvdw;
 +    sh_dispersion       = fr->ic->dispersion_shift.cpot;
 +    sh_repulsion        = fr->ic->repulsion_shift.cpot;
 +    sh_lj_ewald         = fr->ic->sh_lj_ewald;
 +
 +    ewclj               = fr->ewaldcoeff_lj;
 +    ewclj2              = ewclj*ewclj;
 +    ewclj6              = ewclj2*ewclj2*ewclj2;
 +
 +    if (fr->coulomb_modifier == eintmodPOTSWITCH)
 +    {
 +        d               = fr->rcoulomb-fr->rcoulomb_switch;
 +        elec_swV3       = -10.0/(d*d*d);
 +        elec_swV4       =  15.0/(d*d*d*d);
 +        elec_swV5       =  -6.0/(d*d*d*d*d);
 +        elec_swF2       = -30.0/(d*d*d);
 +        elec_swF3       =  60.0/(d*d*d*d);
 +        elec_swF4       = -30.0/(d*d*d*d*d);
 +    }
 +    else
 +    {
 +        /* Avoid warnings from stupid compilers (looking at you, Clang!) */
 +        elec_swV3 = elec_swV4 = elec_swV5 = elec_swF2 = elec_swF3 = elec_swF4 = 0.0;
 +    }
 +    if (fr->vdw_modifier == eintmodPOTSWITCH)
 +    {
 +        d               = fr->rvdw-fr->rvdw_switch;
 +        vdw_swV3        = -10.0/(d*d*d);
 +        vdw_swV4        =  15.0/(d*d*d*d);
 +        vdw_swV5        =  -6.0/(d*d*d*d*d);
 +        vdw_swF2        = -30.0/(d*d*d);
 +        vdw_swF3        =  60.0/(d*d*d*d);
 +        vdw_swF4        = -30.0/(d*d*d*d*d);
 +    }
 +    else
 +    {
 +        /* Avoid warnings from stupid compilers (looking at you, Clang!) */
 +        vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0;
 +    }
 +
 +    bExactElecCutoff    = (fr->coulomb_modifier != eintmodNONE) || fr->eeltype == eelRF_ZERO;
 +    bExactVdwCutoff     = (fr->vdw_modifier != eintmodNONE);
 +    bExactCutoff        = bExactElecCutoff && bExactVdwCutoff;
 +
 +    if (bExactCutoff)
 +    {
 +        rcutoff  = ( fr->rcoulomb > fr->rvdw ) ? fr->rcoulomb : fr->rvdw;
 +        rcutoff2 = rcutoff*rcutoff;
 +    }
 +    else
 +    {
 +        /* Fix warnings for stupid compilers */
 +        rcutoff = rcutoff2 = 1e30;
 +    }
 +
 +    /* avoid compiler warnings for cases that cannot happen */
 +    nnn                 = 0;
 +    eps                 = 0.0;
 +    eps2                = 0.0;
 +
 +    /* 3 VdW parameters for Buckingham, otherwise 2 */
 +    nvdwparam           = (ivdw == GMX_NBKERNEL_VDW_BUCKINGHAM) ? 3 : 2;
 +    table_nelements     = 12;
 +
 +    charge              = mdatoms->chargeA;
 +    type                = mdatoms->typeA;
 +    facel               = fr->epsfac;
 +    shiftvec            = fr->shift_vec[0];
 +    vdwparam            = fr->nbfp;
 +    ntype               = fr->ntype;
 +    vdwgridparam        = fr->ljpme_c6grid;
 +
 +    for (n = 0; (n < nlist->nri); n++)
 +    {
 +        is3              = 3*nlist->shift[n];
 +        shX              = shiftvec[is3];
 +        shY              = shiftvec[is3+1];
 +        shZ              = shiftvec[is3+2];
 +        nj0              = nlist->jindex[n];
 +        nj1              = nlist->jindex[n+1];
 +        ii               = nlist->iinr[n];
 +        ii3              = 3*ii;
 +        ix               = shX + x[ii3+0];
 +        iy               = shY + x[ii3+1];
 +        iz               = shZ + x[ii3+2];
 +        iq               = facel*charge[ii];
 +        nti              = nvdwparam*ntype*type[ii];
 +        vctot            = 0;
 +        vvdwtot          = 0;
 +        fix              = 0;
 +        fiy              = 0;
 +        fiz              = 0;
 +
 +        for (k = nj0; (k < nj1); k++)
 +        {
 +            jnr              = nlist->jjnr[k];
 +            j3               = 3*jnr;
 +            jx               = x[j3+0];
 +            jy               = x[j3+1];
 +            jz               = x[j3+2];
 +            dx               = ix - jx;
 +            dy               = iy - jy;
 +            dz               = iz - jz;
 +            rsq              = dx*dx+dy*dy+dz*dz;
 +            rinv             = gmx_invsqrt(rsq);
 +            rinvsq           = rinv*rinv;
 +            felec            = 0;
 +            fvdw             = 0;
 +            velec            = 0;
 +            vvdw             = 0;
 +
 +            if (bExactCutoff && rsq >= rcutoff2)
 +            {
 +                continue;
 +            }
 +
 +            if (ielec == GMX_NBKERNEL_ELEC_CUBICSPLINETABLE || ivdw == GMX_NBKERNEL_VDW_CUBICSPLINETABLE)
 +            {
 +                r                = rsq*rinv;
 +                rt               = r*tabscale;
 +                n0               = rt;
 +                eps              = rt-n0;
 +                eps2             = eps*eps;
 +                nnn              = table_nelements*n0;
 +            }
 +
 +            /* Coulomb interaction. ielec==0 means no interaction */
 +            if (ielec != GMX_NBKERNEL_ELEC_NONE)
 +            {
 +                qq               = iq*charge[jnr];
 +
 +                switch (ielec)
 +                {
 +                    case GMX_NBKERNEL_ELEC_NONE:
 +                        break;
 +
 +                    case GMX_NBKERNEL_ELEC_COULOMB:
 +                        /* Vanilla cutoff coulomb */
 +                        velec            = qq*rinv;
 +                        felec            = velec*rinvsq;
 +                        /* The shift for the Coulomb potential is stored in
 +                         * the RF parameter c_rf, which is 0 without shift
 +                         */
 +                        velec           -= qq*fr->ic->c_rf;
 +                        break;
 +
 +                    case GMX_NBKERNEL_ELEC_REACTIONFIELD:
 +                        /* Reaction-field */
 +                        velec            = qq*(rinv+fr->k_rf*rsq-fr->c_rf);
 +                        felec            = qq*(rinv*rinvsq-2.0*fr->k_rf);
 +                        break;
 +
 +                    case GMX_NBKERNEL_ELEC_CUBICSPLINETABLE:
 +                        /* Tabulated coulomb */
 +                        Y                = VFtab[nnn];
 +                        F                = VFtab[nnn+1];
 +                        Geps             = eps*VFtab[nnn+2];
 +                        Heps2            = eps2*VFtab[nnn+3];
 +                        Fp               = F+Geps+Heps2;
 +                        VV               = Y+eps*Fp;
 +                        FF               = Fp+Geps+2.0*Heps2;
 +                        velec            = qq*VV;
 +                        felec            = -qq*FF*tabscale*rinv;
 +                        break;
 +
 +                    case GMX_NBKERNEL_ELEC_GENERALIZEDBORN:
 +                        /* GB */
 +                        gmx_fatal(FARGS, "Death & horror! GB generic interaction not implemented.\n");
 +                        break;
 +
 +                    case GMX_NBKERNEL_ELEC_EWALD:
 +                        ewrt             = rsq*rinv*ewtabscale;
 +                        ewitab           = ewrt;
 +                        eweps            = ewrt-ewitab;
 +                        ewitab           = 4*ewitab;
 +                        felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
 +                        rinvcorr         = (fr->coulomb_modifier == eintmodPOTSHIFT) ? rinv-fr->ic->sh_ewald : rinv;
 +                        velec            = qq*(rinvcorr-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
 +                        felec            = qq*rinv*(rinvsq-felec);
 +                        break;
 +
 +                    default:
 +                        gmx_fatal(FARGS, "Death & horror! No generic coulomb interaction for ielec=%d.\n", ielec);
 +                        break;
 +                }
 +                if (fr->coulomb_modifier == eintmodPOTSWITCH)
 +                {
 +                    d                = rsq*rinv-fr->rcoulomb_switch;
 +                    d                = (d > 0.0) ? d : 0.0;
 +                    d2               = d*d;
 +                    sw               = 1.0+d2*d*(elec_swV3+d*(elec_swV4+d*elec_swV5));
 +                    dsw              = d2*(elec_swF2+d*(elec_swF3+d*elec_swF4));
 +                    /* Apply switch function. Note that felec=f/r since it will be multiplied
 +                     * by the i-j displacement vector. This means felec'=f'/r=-(v*sw)'/r=
 +                     * -(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=felec*sw-v*dsw/r
 +                     */
 +                    felec            = felec*sw - rinv*velec*dsw;
 +                    /* Once we have used velec to update felec we can modify velec too */
 +                    velec           *= sw;
 +                }
 +                if (bExactElecCutoff)
 +                {
 +                    felec            = (rsq < rcoulomb2) ? felec : 0.0;
 +                    velec            = (rsq < rcoulomb2) ? velec : 0.0;
 +                }
 +                vctot           += velec;
 +            } /* End of coulomb interactions */
 +
 +
 +            /* VdW interaction. ivdw==0 means no interaction */
 +            if (ivdw != GMX_NBKERNEL_VDW_NONE)
 +            {
 +                tj               = nti+nvdwparam*type[jnr];
 +
 +                switch (ivdw)
 +                {
 +                    case GMX_NBKERNEL_VDW_NONE:
 +                        break;
 +
 +                    case GMX_NBKERNEL_VDW_LENNARDJONES:
 +                        /* Vanilla Lennard-Jones cutoff */
 +                        c6               = vdwparam[tj];
 +                        c12              = vdwparam[tj+1];
 +                        rinvsix          = rinvsq*rinvsq*rinvsq;
 +                        vvdw_disp        = c6*rinvsix;
 +                        vvdw_rep         = c12*rinvsix*rinvsix;
 +                        fvdw             = (vvdw_rep-vvdw_disp)*rinvsq;
 +                        if (fr->vdw_modifier == eintmodPOTSHIFT)
 +                        {
 +                            vvdw             = (vvdw_rep + c12*sh_repulsion)/12.0 - (vvdw_disp + c6*sh_dispersion)/6.0;
 +                        }
 +                        else
 +                        {
 +                            vvdw             = vvdw_rep/12.0-vvdw_disp/6.0;
 +                        }
 +                        break;
 +
 +                    case GMX_NBKERNEL_VDW_BUCKINGHAM:
 +                        /* Buckingham */
 +                        c6               = vdwparam[tj];
 +                        cexp1            = vdwparam[tj+1];
 +                        cexp2            = vdwparam[tj+2];
 +
 +                        rinvsix          = rinvsq*rinvsq*rinvsq;
 +                        vvdw_disp        = c6*rinvsix;
 +                        br               = cexp2*rsq*rinv;
 +                        vvdw_rep         = cexp1*exp(-br);
 +                        fvdw             = (br*vvdw_rep-vvdw_disp)*rinvsq;
 +                        if (fr->vdw_modifier == eintmodPOTSHIFT)
 +                        {
 +                            vvdw             = (vvdw_rep-cexp1*exp(-cexp2*rvdw))-(vvdw_disp + c6*sh_dispersion)/6.0;
 +                        }
 +                        else
 +                        {
 +                            vvdw             = vvdw_rep-vvdw_disp/6.0;
 +                        }
 +                        break;
 +
 +                    case GMX_NBKERNEL_VDW_CUBICSPLINETABLE:
 +                        /* Tabulated VdW */
 +                        c6               = vdwparam[tj];
 +                        c12              = vdwparam[tj+1];
 +                        Y                = VFtab[nnn+4];
 +                        F                = VFtab[nnn+5];
 +                        Geps             = eps*VFtab[nnn+6];
 +                        Heps2            = eps2*VFtab[nnn+7];
 +                        Fp               = F+Geps+Heps2;
 +                        VV               = Y+eps*Fp;
 +                        FF               = Fp+Geps+2.0*Heps2;
 +                        vvdw_disp        = c6*VV;
 +                        fijD             = c6*FF;
 +                        Y                = VFtab[nnn+8];
 +                        F                = VFtab[nnn+9];
 +                        Geps             = eps*VFtab[nnn+10];
 +                        Heps2            = eps2*VFtab[nnn+11];
 +                        Fp               = F+Geps+Heps2;
 +                        VV               = Y+eps*Fp;
 +                        FF               = Fp+Geps+2.0*Heps2;
 +                        vvdw_rep         = c12*VV;
 +                        fijR             = c12*FF;
 +                        fvdw             = -(fijD+fijR)*tabscale*rinv;
 +                        vvdw             = vvdw_disp + vvdw_rep;
 +                        break;
 +
 +
 +                    case GMX_NBKERNEL_VDW_LJEWALD:
 +                        /* LJ-PME */
 +                        rinvsix          = rinvsq*rinvsq*rinvsq;
 +                        ewcljrsq         = ewclj2*rsq;
 +                        exponent         = exp(-ewcljrsq);
 +                        poly             = exponent*(1.0 + ewcljrsq + ewcljrsq*ewcljrsq*0.5);
 +                        c6               = vdwparam[tj];
 +                        c12              = vdwparam[tj+1];
 +                        c6grid           = vdwgridparam[tj];
 +                        vvdw_disp        = (c6-c6grid*(1.0-poly))*rinvsix;
 +                        vvdw_rep         = c12*rinvsix*rinvsix;
 +                        fvdw             = (vvdw_rep - vvdw_disp - c6grid*(1.0/6.0)*exponent*ewclj6)*rinvsq;
 +                        if (fr->vdw_modifier == eintmodPOTSHIFT)
 +                        {
-                             vvdw             = (vvdw_rep + c12*sh_repulsion)/12.0 - (vvdw_disp + c6*sh_dispersion + c6grid*sh_lj_ewald)/6.0;
++                            vvdw             = (vvdw_rep + c12*sh_repulsion)/12.0 - (vvdw_disp + c6*sh_dispersion - c6grid*sh_lj_ewald)/6.0;
 +                        }
 +                        else
 +                        {
 +                            vvdw             = vvdw_rep/12.0-vvdw_disp/6.0;
 +                        }
 +                        break;
 +
 +                    default:
 +                        gmx_fatal(FARGS, "Death & horror! No generic VdW interaction for ivdw=%d.\n", ivdw);
 +                        break;
 +                }
 +                if (fr->vdw_modifier == eintmodPOTSWITCH)
 +                {
 +                    d                = rsq*rinv-fr->rvdw_switch;
 +                    d                = (d > 0.0) ? d : 0.0;
 +                    d2               = d*d;
 +                    sw               = 1.0+d2*d*(vdw_swV3+d*(vdw_swV4+d*vdw_swV5));
 +                    dsw              = d2*(vdw_swF2+d*(vdw_swF3+d*vdw_swF4));
 +                    /* See coulomb interaction for the force-switch formula */
 +                    fvdw             = fvdw*sw - rinv*vvdw*dsw;
 +                    vvdw            *= sw;
 +                }
 +                if (bExactVdwCutoff)
 +                {
 +                    fvdw             = (rsq < rvdw2) ? fvdw : 0.0;
 +                    vvdw             = (rsq < rvdw2) ? vvdw : 0.0;
 +                }
 +                vvdwtot         += vvdw;
 +            } /* end VdW interactions */
 +
 +            fscal            = felec+fvdw;
 +
 +            tx               = fscal*dx;
 +            ty               = fscal*dy;
 +            tz               = fscal*dz;
 +            fix              = fix + tx;
 +            fiy              = fiy + ty;
 +            fiz              = fiz + tz;
 +            f[j3+0]          = f[j3+0] - tx;
 +            f[j3+1]          = f[j3+1] - ty;
 +            f[j3+2]          = f[j3+2] - tz;
 +        }
 +
 +        f[ii3+0]         = f[ii3+0] + fix;
 +        f[ii3+1]         = f[ii3+1] + fiy;
 +        f[ii3+2]         = f[ii3+2] + fiz;
 +        fshift[is3]      = fshift[is3]+fix;
 +        fshift[is3+1]    = fshift[is3+1]+fiy;
 +        fshift[is3+2]    = fshift[is3+2]+fiz;
 +        ggid             = nlist->gid[n];
 +        velecgrp[ggid]  += vctot;
 +        vvdwgrp[ggid]   += vvdwtot;
 +    }
 +    /* Estimate flops, average for generic kernel:
 +     * 12 flops per outer iteration
 +     * 50 flops per inner iteration
 +     */
 +    inc_nrnb(nrnb, eNR_NBKERNEL_GENERIC, nlist->nri*12 + nlist->jindex[n]*50);
 +}
diff --cc src/gromacs/gmxlib/nonbonded/nonbonded.c
index 95cc2d5720,0000000000..ab68c47db0
mode 100644,000000..100644
--- a/src/gromacs/gmxlib/nonbonded/nonbonded.c
+++ b/src/gromacs/gmxlib/nonbonded/nonbonded.c
@@@ -1,686 -1,0 +1,707 @@@
 +/*
 + * This file is part of the GROMACS molecular simulation package.
 + *
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team.
 + * Copyright (c) 2013,2014, by the GROMACS development team, led by
 + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
 + * and including many others, as listed in the AUTHORS file in the
 + * top-level source directory and at http://www.gromacs.org.
 + *
 + * GROMACS is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU Lesser General Public License
 + * as published by the Free Software Foundation; either version 2.1
 + * of the License, or (at your option) any later version.
 + *
 + * GROMACS is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 + * Lesser General Public License for more details.
 + *
 + * You should have received a copy of the GNU Lesser General Public
 + * License along with GROMACS; if not, see
 + * http://www.gnu.org/licenses, or write to the Free Software Foundation,
 + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
 + *
 + * If you want to redistribute modifications to GROMACS, please
 + * consider that scientific software is very special. Version
 + * control is crucial - bugs must be traceable. We will be happy to
 + * consider code for inclusion in the official distribution, but
 + * derived work must not be called official GROMACS. Details are found
 + * in the README & COPYING files - if they are missing, get the
 + * official version at http://www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the research papers on the package. Check out http://www.gromacs.org.
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <stdio.h>
 +#include <stdlib.h>
 +
 +#include "thread_mpi/threads.h"
 +
 +#include "typedefs.h"
 +#include "txtdump.h"
 +#include "gromacs/utility/smalloc.h"
 +#include "ns.h"
 +#include "vec.h"
 +#include "gromacs/math/utilities.h"
 +#include "macros.h"
 +#include "gromacs/utility/cstringutil.h"
 +#include "force.h"
 +#include "names.h"
 +#include "main.h"
 +#include "xvgr.h"
 +#include "gmx_fatal.h"
 +#include "physics.h"
 +#include "force.h"
 +#include "bondf.h"
 +#include "nrnb.h"
 +#include "nonbonded.h"
 +#include "gromacs/simd/simd.h"
 +
 +#include "nb_kernel.h"
 +#include "nb_free_energy.h"
 +#include "nb_generic.h"
 +#include "nb_generic_cg.h"
 +#include "nb_generic_adress.h"
 +
 +/* Different default (c) and SIMD instructions interaction-specific kernels */
 +#include "nb_kernel_c/nb_kernel_c.h"
 +
 +#if (defined GMX_SIMD_X86_SSE2) && !(defined GMX_DOUBLE)
 +#    include "nb_kernel_sse2_single/nb_kernel_sse2_single.h"
 +#endif
 +#if (defined GMX_SIMD_X86_SSE4_1) && !(defined GMX_DOUBLE)
 +#    include "nb_kernel_sse4_1_single/nb_kernel_sse4_1_single.h"
 +#endif
 +#if (defined GMX_SIMD_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
 +#    include "nb_kernel_avx_128_fma_single/nb_kernel_avx_128_fma_single.h"
 +#endif
 +#if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER) && !(defined GMX_DOUBLE)
 +#    include "nb_kernel_avx_256_single/nb_kernel_avx_256_single.h"
 +#endif
 +#if (defined GMX_SIMD_X86_SSE2 && defined GMX_DOUBLE)
 +#    include "nb_kernel_sse2_double/nb_kernel_sse2_double.h"
 +#endif
 +#if (defined GMX_SIMD_X86_SSE4_1 && defined GMX_DOUBLE)
 +#    include "nb_kernel_sse4_1_double/nb_kernel_sse4_1_double.h"
 +#endif
 +#if (defined GMX_SIMD_X86_AVX_128_FMA && defined GMX_DOUBLE)
 +#    include "nb_kernel_avx_128_fma_double/nb_kernel_avx_128_fma_double.h"
 +#endif
 +#if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER && defined GMX_DOUBLE)
 +#    include "nb_kernel_avx_256_double/nb_kernel_avx_256_double.h"
 +#endif
 +#if (defined GMX_SIMD_SPARC64_HPC_ACE && defined GMX_DOUBLE)
 +#    include "nb_kernel_sparc64_hpc_ace_double/nb_kernel_sparc64_hpc_ace_double.h"
 +#endif
 +
 +
 +static tMPI_Thread_mutex_t nonbonded_setup_mutex = TMPI_THREAD_MUTEX_INITIALIZER;
 +static gmx_bool            nonbonded_setup_done  = FALSE;
 +
 +
 +void
 +gmx_nonbonded_setup(t_forcerec *   fr,
 +                    gmx_bool       bGenericKernelOnly)
 +{
 +    tMPI_Thread_mutex_lock(&nonbonded_setup_mutex);
 +    /* Here we are guaranteed only one thread made it. */
 +    if (nonbonded_setup_done == FALSE)
 +    {
 +        if (bGenericKernelOnly == FALSE)
 +        {
 +            /* Add the generic kernels to the structure stored statically in nb_kernel.c */
 +            nb_kernel_list_add_kernels(kernellist_c, kernellist_c_size);
 +
 +            if (!(fr != NULL && fr->use_simd_kernels == FALSE))
 +            {
 +                /* Add interaction-specific kernels for different architectures */
 +                /* Single precision */
 +#if (defined GMX_SIMD_X86_SSE2) && !(defined GMX_DOUBLE)
 +                nb_kernel_list_add_kernels(kernellist_sse2_single, kernellist_sse2_single_size);
 +#endif
 +#if (defined GMX_SIMD_X86_SSE4_1) && !(defined GMX_DOUBLE)
 +                nb_kernel_list_add_kernels(kernellist_sse4_1_single, kernellist_sse4_1_single_size);
 +#endif
 +#if (defined GMX_SIMD_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
 +                nb_kernel_list_add_kernels(kernellist_avx_128_fma_single, kernellist_avx_128_fma_single_size);
 +#endif
 +#if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER) && !(defined GMX_DOUBLE)
 +                nb_kernel_list_add_kernels(kernellist_avx_256_single, kernellist_avx_256_single_size);
 +#endif
 +                /* Double precision */
 +#if (defined GMX_SIMD_X86_SSE2 && defined GMX_DOUBLE)
 +                nb_kernel_list_add_kernels(kernellist_sse2_double, kernellist_sse2_double_size);
 +#endif
 +#if (defined GMX_SIMD_X86_SSE4_1 && defined GMX_DOUBLE)
 +                nb_kernel_list_add_kernels(kernellist_sse4_1_double, kernellist_sse4_1_double_size);
 +#endif
 +#if (defined GMX_SIMD_X86_AVX_128_FMA && defined GMX_DOUBLE)
 +                nb_kernel_list_add_kernels(kernellist_avx_128_fma_double, kernellist_avx_128_fma_double_size);
 +#endif
 +#if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER && defined GMX_DOUBLE)
 +                nb_kernel_list_add_kernels(kernellist_avx_256_double, kernellist_avx_256_double_size);
 +#endif
 +#if (defined GMX_SIMD_SPARC64_HPC_ACE && defined GMX_DOUBLE)
 +                nb_kernel_list_add_kernels(kernellist_sparc64_hpc_ace_double, kernellist_sparc64_hpc_ace_double_size);
 +#endif
 +                ; /* empty statement to avoid a completely empty block */
 +            }
 +        }
 +        /* Create a hash for faster lookups */
 +        nb_kernel_list_hash_init();
 +
 +        nonbonded_setup_done = TRUE;
 +    }
 +    tMPI_Thread_mutex_unlock(&nonbonded_setup_mutex);
 +}
 +
 +
 +
 +void
- gmx_nonbonded_set_kernel_pointers(FILE *log, t_nblist *nl)
++gmx_nonbonded_set_kernel_pointers(FILE *log, t_nblist *nl, gmx_bool bElecAndVdwSwitchDiffers)
 +{
 +    const char *     elec;
 +    const char *     elec_mod;
 +    const char *     vdw;
 +    const char *     vdw_mod;
 +    const char *     geom;
 +    const char *     other;
 +    const char *     vf;
 +
 +    struct
 +    {
 +        const char *  arch;
 +        int           simd_padding_width;
 +    }
 +    arch_and_padding[] =
 +    {
 +        /* Single precision */
 +#if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER) && !(defined GMX_DOUBLE)
 +        { "avx_256_single", 8 },
 +#endif
 +#if (defined GMX_SIMD_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
 +        { "avx_128_fma_single", 4 },
 +#endif
 +#if (defined GMX_SIMD_X86_SSE4_1) && !(defined GMX_DOUBLE)
 +        { "sse4_1_single", 4 },
 +#endif
 +#if (defined GMX_SIMD_X86_SSE2) && !(defined GMX_DOUBLE)
 +        { "sse2_single", 4 },
 +#endif
 +        /* Double precision */
 +#if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER && defined GMX_DOUBLE)
 +        { "avx_256_double", 4 },
 +#endif
 +#if (defined GMX_SIMD_X86_AVX_128_FMA && defined GMX_DOUBLE)
 +        /* Sic. Double precision 2-way SIMD does not require neighbor list padding,
 +         * since the kernels execute a loop unrolled a factor 2, followed by
 +         * a possible single odd-element epilogue.
 +         */
 +        { "avx_128_fma_double", 1 },
 +#endif
 +#if (defined GMX_SIMD_X86_SSE2 && defined GMX_DOUBLE)
 +        /* No padding - see comment above */
 +        { "sse2_double", 1 },
 +#endif
 +#if (defined GMX_SIMD_X86_SSE4_1 && defined GMX_DOUBLE)
 +        /* No padding - see comment above */
 +        { "sse4_1_double", 1 },
 +#endif
 +#if (defined GMX_SIMD_SPARC64_HPC_ACE && defined GMX_DOUBLE)
 +        /* No padding - see comment above */
 +        { "sparc64_hpc_ace_double", 1 },
 +#endif
 +        { "c", 1 },
 +    };
 +    int              narch = asize(arch_and_padding);
 +    int              i;
 +
 +    if (nonbonded_setup_done == FALSE)
 +    {
 +        /* We typically call this setup routine before starting timers,
 +         * but if that has not been done for whatever reason we do it now.
 +         */
 +        gmx_nonbonded_setup(NULL, FALSE);
 +    }
 +
 +    /* Not used yet */
 +    other = "";
 +
 +    nl->kernelptr_vf = NULL;
 +    nl->kernelptr_v  = NULL;
 +    nl->kernelptr_f  = NULL;
 +
 +    elec     = gmx_nbkernel_elec_names[nl->ielec];
 +    elec_mod = eintmod_names[nl->ielecmod];
 +    vdw      = gmx_nbkernel_vdw_names[nl->ivdw];
 +    vdw_mod  = eintmod_names[nl->ivdwmod];
 +    geom     = gmx_nblist_geometry_names[nl->igeometry];
 +
 +    if (nl->type == GMX_NBLIST_INTERACTION_ADRESS)
 +    {
 +        nl->kernelptr_vf       = (void *) gmx_nb_generic_adress_kernel;
 +        nl->kernelptr_f        = (void *) gmx_nb_generic_adress_kernel;
 +        nl->simd_padding_width = 1;
 +        return;
 +    }
 +
 +    if (nl->type == GMX_NBLIST_INTERACTION_FREE_ENERGY)
 +    {
 +        nl->kernelptr_vf       = (void *) gmx_nb_free_energy_kernel;
 +        nl->kernelptr_f        = (void *) gmx_nb_free_energy_kernel;
 +        nl->simd_padding_width = 1;
 +    }
 +    else if (!gmx_strcasecmp_min(geom, "CG-CG"))
 +    {
 +        nl->kernelptr_vf       = (void *) gmx_nb_generic_cg_kernel;
 +        nl->kernelptr_f        = (void *) gmx_nb_generic_cg_kernel;
 +        nl->simd_padding_width = 1;
 +    }
 +    else
 +    {
 +        /* Try to find a specific kernel first */
 +
 +        for (i = 0; i < narch && nl->kernelptr_vf == NULL; i++)
 +        {
 +            nl->kernelptr_vf       = (void *) nb_kernel_list_findkernel(log, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "PotentialAndForce");
 +            nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
 +        }
 +        for (i = 0; i < narch && nl->kernelptr_f == NULL; i++)
 +        {
 +            nl->kernelptr_f        = (void *) nb_kernel_list_findkernel(log, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "Force");
 +            nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
 +
 +            /* If there is not force-only optimized kernel, is there a potential & force one? */
 +            if (nl->kernelptr_f == NULL)
 +            {
 +                nl->kernelptr_f        = (void *) nb_kernel_list_findkernel(NULL, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "PotentialAndForce");
 +                nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
 +            }
 +        }
 +
-         /* Give up. If this was a water kernel, leave the pointer as NULL, which
-          * will disable water optimization in NS. If it is a particle kernel, set
-          * the pointer to the generic NB kernel.
++        /* For now, the accelerated kernels cannot handle the combination of switch functions for both
++         * electrostatics and VdW that use different switch radius or switch cutoff distances
++         * (both of them enter in the switch function calculation). This would require
++         * us to evaluate two completely separate switch functions for every interaction.
++         * Instead, we disable such kernels by setting the pointer to NULL.
++         * This will cause the generic kernel (which can handle it) to be called instead.
++         *
++         * Note that we typically already enable tabulated coulomb interactions for this case,
++         * so this is mostly a safe-guard to make sure we call the generic kernel if the
++         * tables are disabled.
++         */
++        if ((nl->ielec != GMX_NBKERNEL_ELEC_NONE) && (nl->ielecmod == eintmodPOTSWITCH) &&
++            (nl->ivdw  != GMX_NBKERNEL_VDW_NONE)  && (nl->ivdwmod  == eintmodPOTSWITCH) &&
++            bElecAndVdwSwitchDiffers)
++        {
++            nl->kernelptr_vf = NULL;
++            nl->kernelptr_f  = NULL;
++        }
++
++        /* Give up, pick a generic one instead.
++         * We only do this for particle-particle kernels; by leaving the water-optimized kernel
++         * pointers to NULL, the water optimization will automatically be disabled for this interaction.
 +         */
 +        if (nl->kernelptr_vf == NULL && !gmx_strcasecmp_min(geom, "Particle-Particle"))
 +        {
 +            nl->kernelptr_vf       = (void *) gmx_nb_generic_kernel;
 +            nl->kernelptr_f        = (void *) gmx_nb_generic_kernel;
 +            nl->simd_padding_width = 1;
 +            if (debug)
 +            {
 +                fprintf(debug,
 +                        "WARNING - Slow generic NB kernel used for neighborlist with\n"
 +                        "    Elec: '%s', Modifier: '%s'\n"
-                         "    Vdw:  '%s', Modifier: '%s'\n"
-                         "    Geom: '%s', Other: '%s'\n\n",
-                         elec, elec_mod, vdw, vdw_mod, geom, other);
++                        "    Vdw:  '%s', Modifier: '%s'\n",
++                        elec, elec_mod, vdw, vdw_mod);
 +            }
 +        }
 +    }
- 
 +    return;
 +}
 +
 +void do_nonbonded(t_forcerec *fr,
 +                  rvec x[], rvec f_shortrange[], rvec f_longrange[], t_mdatoms *mdatoms, t_blocka *excl,
 +                  gmx_grppairener_t *grppener,
 +                  t_nrnb *nrnb, real *lambda, real *dvdl,
 +                  int nls, int eNL, int flags)
 +{
 +    t_nblist *        nlist;
 +    int               n, n0, n1, i, i0, i1, sz, range;
 +    t_nblists *       nblists;
 +    nb_kernel_data_t  kernel_data;
 +    nb_kernel_t *     kernelptr = NULL;
 +    rvec *            f;
 +
 +    kernel_data.flags                   = flags;
 +    kernel_data.exclusions              = excl;
 +    kernel_data.lambda                  = lambda;
 +    kernel_data.dvdl                    = dvdl;
 +
 +    if (fr->bAllvsAll)
 +    {
 +        gmx_incons("All-vs-all kernels have not been implemented in version 4.6");
 +        return;
 +    }
 +
 +    if (eNL >= 0)
 +    {
 +        i0 = eNL;
 +        i1 = i0+1;
 +    }
 +    else
 +    {
 +        i0 = 0;
 +        i1 = eNL_NR;
 +    }
 +
 +    if (nls >= 0)
 +    {
 +        n0 = nls;
 +        n1 = nls+1;
 +    }
 +    else
 +    {
 +        n0 = 0;
 +        n1 = fr->nnblists;
 +    }
 +
 +    for (n = n0; (n < n1); n++)
 +    {
 +        nblists = &fr->nblists[n];
 +
 +        kernel_data.table_elec              = &nblists->table_elec;
 +        kernel_data.table_vdw               = &nblists->table_vdw;
 +        kernel_data.table_elec_vdw          = &nblists->table_elec_vdw;
 +
 +        for (range = 0; range < 2; range++)
 +        {
 +            /* Are we doing short/long-range? */
 +            if (range == 0)
 +            {
 +                /* Short-range */
 +                if (!(flags & GMX_NONBONDED_DO_SR))
 +                {
 +                    continue;
 +                }
 +                kernel_data.energygrp_elec          = grppener->ener[egCOULSR];
 +                kernel_data.energygrp_vdw           = grppener->ener[fr->bBHAM ? egBHAMSR : egLJSR];
 +                kernel_data.energygrp_polarization  = grppener->ener[egGB];
 +                nlist = nblists->nlist_sr;
 +                f                                   = f_shortrange;
 +            }
 +            else if (range == 1)
 +            {
 +                /* Long-range */
 +                if (!(flags & GMX_NONBONDED_DO_LR))
 +                {
 +                    continue;
 +                }
 +                kernel_data.energygrp_elec          = grppener->ener[egCOULLR];
 +                kernel_data.energygrp_vdw           = grppener->ener[fr->bBHAM ? egBHAMLR : egLJLR];
 +                kernel_data.energygrp_polarization  = grppener->ener[egGB];
 +                nlist = nblists->nlist_lr;
 +                f                                   = f_longrange;
 +            }
 +
 +            for (i = i0; (i < i1); i++)
 +            {
 +                if (nlist[i].nri > 0)
 +                {
 +                    if (flags & GMX_NONBONDED_DO_POTENTIAL)
 +                    {
 +                        /* Potential and force */
 +                        kernelptr = (nb_kernel_t *)nlist[i].kernelptr_vf;
 +                    }
 +                    else
 +                    {
 +                        /* Force only, no potential */
 +                        kernelptr = (nb_kernel_t *)nlist[i].kernelptr_f;
 +                    }
 +
 +                    if (nlist[i].type != GMX_NBLIST_INTERACTION_FREE_ENERGY && (flags & GMX_NONBONDED_DO_FOREIGNLAMBDA))
 +                    {
 +                        /* We don't need the non-perturbed interactions */
 +                        continue;
 +                    }
 +                    /* Neighborlists whose kernelptr==NULL will always be empty */
 +                    if (kernelptr != NULL)
 +                    {
 +                        (*kernelptr)(&(nlist[i]), x, f, fr, mdatoms, &kernel_data, nrnb);
 +                    }
++                    else
++                    {
++                        gmx_fatal(FARGS, "Non-empty neighborlist does not have any kernel pointer assigned.");
++                    }
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +static void
 +nb_listed_warning_rlimit(const rvec *x, int ai, int aj, int * global_atom_index, real r, real rlimit)
 +{
 +    gmx_warning("Listed nonbonded interaction between particles %d and %d\n"
 +                "at distance %.3f which is larger than the table limit %.3f nm.\n\n"
 +                "This is likely either a 1,4 interaction, or a listed interaction inside\n"
 +                "a smaller molecule you are decoupling during a free energy calculation.\n"
 +                "Since interactions at distances beyond the table cannot be computed,\n"
 +                "they are skipped until they are inside the table limit again. You will\n"
 +                "only see this message once, even if it occurs for several interactions.\n\n"
 +                "IMPORTANT: This should not happen in a stable simulation, so there is\n"
 +                "probably something wrong with your system. Only change the table-extension\n"
 +                "distance in the mdp file if you are really sure that is the reason.\n",
 +                glatnr(global_atom_index, ai), glatnr(global_atom_index, aj), r, rlimit);
 +
 +    if (debug)
 +    {
 +        fprintf(debug,
 +                "%8f %8f %8f\n%8f %8f %8f\n1-4 (%d,%d) interaction not within cut-off! r=%g. Ignored\n",
 +                x[ai][XX], x[ai][YY], x[ai][ZZ], x[aj][XX], x[aj][YY], x[aj][ZZ],
 +                glatnr(global_atom_index, ai), glatnr(global_atom_index, aj), r);
 +    }
 +}
 +
 +
 +
 +/* This might logically belong better in the nb_generic.c module, but it is only
 + * used in do_nonbonded_listed(), and we want it to be inlined there to avoid an
 + * extra functional call for every single pair listed in the topology.
 + */
 +static real
 +nb_evaluate_single(real r2, real tabscale, real *vftab,
 +                   real qq, real c6, real c12, real *velec, real *vvdw)
 +{
 +    real       rinv, r, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VVe, FFe, VVd, FFd, VVr, FFr, fscal;
 +    int        ntab;
 +
 +    /* Do the tabulated interactions - first table lookup */
 +    rinv             = gmx_invsqrt(r2);
 +    r                = r2*rinv;
 +    rtab             = r*tabscale;
 +    ntab             = rtab;
 +    eps              = rtab-ntab;
 +    eps2             = eps*eps;
 +    ntab             = 12*ntab;
 +    /* Electrostatics */
 +    Y                = vftab[ntab];
 +    F                = vftab[ntab+1];
 +    Geps             = eps*vftab[ntab+2];
 +    Heps2            = eps2*vftab[ntab+3];
 +    Fp               = F+Geps+Heps2;
 +    VVe              = Y+eps*Fp;
 +    FFe              = Fp+Geps+2.0*Heps2;
 +    /* Dispersion */
 +    Y                = vftab[ntab+4];
 +    F                = vftab[ntab+5];
 +    Geps             = eps*vftab[ntab+6];
 +    Heps2            = eps2*vftab[ntab+7];
 +    Fp               = F+Geps+Heps2;
 +    VVd              = Y+eps*Fp;
 +    FFd              = Fp+Geps+2.0*Heps2;
 +    /* Repulsion */
 +    Y                = vftab[ntab+8];
 +    F                = vftab[ntab+9];
 +    Geps             = eps*vftab[ntab+10];
 +    Heps2            = eps2*vftab[ntab+11];
 +    Fp               = F+Geps+Heps2;
 +    VVr              = Y+eps*Fp;
 +    FFr              = Fp+Geps+2.0*Heps2;
 +
 +    *velec           = qq*VVe;
 +    *vvdw            = c6*VVd+c12*VVr;
 +
 +    fscal            = -(qq*FFe+c6*FFd+c12*FFr)*tabscale*rinv;
 +
 +    return fscal;
 +}
 +
 +
 +real
 +do_nonbonded_listed(int ftype, int nbonds,
 +                    const t_iatom iatoms[], const t_iparams iparams[],
 +                    const rvec x[], rvec f[], rvec fshift[],
 +                    const t_pbc *pbc, const t_graph *g,
 +                    real *lambda, real *dvdl,
 +                    const t_mdatoms *md,
 +                    const t_forcerec *fr, gmx_grppairener_t *grppener,
 +                    int *global_atom_index)
 +{
 +    int              ielec, ivdw;
 +    real             qq, c6, c12;
 +    rvec             dx;
 +    ivec             dt;
 +    int              i, j, itype, ai, aj, gid;
 +    int              fshift_index;
 +    real             r2, rinv;
 +    real             fscal, velec, vvdw;
 +    real *           energygrp_elec;
 +    real *           energygrp_vdw;
 +    static gmx_bool  warned_rlimit = FALSE;
 +    /* Free energy stuff */
 +    gmx_bool         bFreeEnergy;
 +    real             LFC[2], LFV[2], DLF[2], lfac_coul[2], lfac_vdw[2], dlfac_coul[2], dlfac_vdw[2];
 +    real             qqB, c6B, c12B, sigma2_def, sigma2_min;
 +
 +
 +    switch (ftype)
 +    {
 +        case F_LJ14:
 +        case F_LJC14_Q:
 +            energygrp_elec = grppener->ener[egCOUL14];
 +            energygrp_vdw  = grppener->ener[egLJ14];
 +            break;
 +        case F_LJC_PAIRS_NB:
 +            energygrp_elec = grppener->ener[egCOULSR];
 +            energygrp_vdw  = grppener->ener[egLJSR];
 +            break;
 +        default:
 +            energygrp_elec = NULL; /* Keep compiler happy */
 +            energygrp_vdw  = NULL; /* Keep compiler happy */
 +            gmx_fatal(FARGS, "Unknown function type %d in do_nonbonded14", ftype);
 +            break;
 +    }
 +
 +    if (fr->efep != efepNO)
 +    {
 +        /* Lambda factor for state A=1-lambda and B=lambda */
 +        LFC[0] = 1.0 - lambda[efptCOUL];
 +        LFV[0] = 1.0 - lambda[efptVDW];
 +        LFC[1] = lambda[efptCOUL];
 +        LFV[1] = lambda[efptVDW];
 +
 +        /*derivative of the lambda factor for state A and B */
 +        DLF[0] = -1;
 +        DLF[1] = 1;
 +
 +        /* precalculate */
 +        sigma2_def = pow(fr->sc_sigma6_def, 1.0/3.0);
 +        sigma2_min = pow(fr->sc_sigma6_min, 1.0/3.0);
 +
 +        for (i = 0; i < 2; i++)
 +        {
 +            lfac_coul[i]  = (fr->sc_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
 +            dlfac_coul[i] = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power == 2 ? (1-LFC[i]) : 1);
 +            lfac_vdw[i]   = (fr->sc_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
 +            dlfac_vdw[i]  = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power == 2 ? (1-LFV[i]) : 1);
 +        }
 +    }
 +    else
 +    {
 +        sigma2_min = sigma2_def = 0;
 +    }
 +
 +    bFreeEnergy = FALSE;
 +    for (i = 0; (i < nbonds); )
 +    {
 +        itype = iatoms[i++];
 +        ai    = iatoms[i++];
 +        aj    = iatoms[i++];
 +        gid   = GID(md->cENER[ai], md->cENER[aj], md->nenergrp);
 +
 +        /* Get parameters */
 +        switch (ftype)
 +        {
 +            case F_LJ14:
 +                bFreeEnergy =
 +                    (fr->efep != efepNO &&
 +                     ((md->nPerturbed && (md->bPerturbed[ai] || md->bPerturbed[aj])) ||
 +                      iparams[itype].lj14.c6A != iparams[itype].lj14.c6B ||
 +                      iparams[itype].lj14.c12A != iparams[itype].lj14.c12B));
 +                qq               = md->chargeA[ai]*md->chargeA[aj]*fr->epsfac*fr->fudgeQQ;
 +                c6               = iparams[itype].lj14.c6A;
 +                c12              = iparams[itype].lj14.c12A;
 +                break;
 +            case F_LJC14_Q:
 +                qq               = iparams[itype].ljc14.qi*iparams[itype].ljc14.qj*fr->epsfac*iparams[itype].ljc14.fqq;
 +                c6               = iparams[itype].ljc14.c6;
 +                c12              = iparams[itype].ljc14.c12;
 +                break;
 +            case F_LJC_PAIRS_NB:
 +                qq               = iparams[itype].ljcnb.qi*iparams[itype].ljcnb.qj*fr->epsfac;
 +                c6               = iparams[itype].ljcnb.c6;
 +                c12              = iparams[itype].ljcnb.c12;
 +                break;
 +            default:
 +                /* Cannot happen since we called gmx_fatal() above in this case */
 +                qq = c6 = c12 = 0; /* Keep compiler happy */
 +                break;
 +        }
 +
 +        /* To save flops in the optimized kernels, c6/c12 have 6.0/12.0 derivative prefactors
 +         * included in the general nfbp array now. This means the tables are scaled down by the
 +         * same factor, so when we use the original c6/c12 parameters from iparams[] they must
 +         * be scaled up.
 +         */
 +        c6  *= 6.0;
 +        c12 *= 12.0;
 +
 +        /* Do we need to apply full periodic boundary conditions? */
 +        if (fr->bMolPBC == TRUE)
 +        {
 +            fshift_index = pbc_dx_aiuc(pbc, x[ai], x[aj], dx);
 +        }
 +        else
 +        {
 +            fshift_index = CENTRAL;
 +            rvec_sub(x[ai], x[aj], dx);
 +        }
 +        r2           = norm2(dx);
 +
 +        if (r2 >= fr->tab14.r*fr->tab14.r)
 +        {
 +            if (warned_rlimit == FALSE)
 +            {
 +                nb_listed_warning_rlimit(x, ai, aj, global_atom_index, sqrt(r2), fr->tab14.r);
 +                warned_rlimit = TRUE;
 +            }
 +            continue;
 +        }
 +
 +        if (bFreeEnergy)
 +        {
 +            /* Currently free energy is only supported for F_LJ14, so no need to check for that if we got here */
 +            qqB              = md->chargeB[ai]*md->chargeB[aj]*fr->epsfac*fr->fudgeQQ;
 +            c6B              = iparams[itype].lj14.c6B*6.0;
 +            c12B             = iparams[itype].lj14.c12B*12.0;
 +
 +            fscal            = nb_free_energy_evaluate_single(r2, fr->sc_r_power, fr->sc_alphacoul, fr->sc_alphavdw,
 +                                                              fr->tab14.scale, fr->tab14.data, qq, c6, c12, qqB, c6B, c12B,
 +                                                              LFC, LFV, DLF, lfac_coul, lfac_vdw, dlfac_coul, dlfac_vdw,
 +                                                              fr->sc_sigma6_def, fr->sc_sigma6_min, sigma2_def, sigma2_min, &velec, &vvdw, dvdl);
 +        }
 +        else
 +        {
 +            /* Evaluate tabulated interaction without free energy */
 +            fscal            = nb_evaluate_single(r2, fr->tab14.scale, fr->tab14.data, qq, c6, c12, &velec, &vvdw);
 +        }
 +
 +        energygrp_elec[gid]  += velec;
 +        energygrp_vdw[gid]   += vvdw;
 +        svmul(fscal, dx, dx);
 +
 +        /* Add the forces */
 +        rvec_inc(f[ai], dx);
 +        rvec_dec(f[aj], dx);
 +
 +        if (g)
 +        {
 +            /* Correct the shift forces using the graph */
 +            ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
 +            fshift_index = IVEC2IS(dt);
 +        }
 +        if (fshift_index != CENTRAL)
 +        {
 +            rvec_inc(fshift[fshift_index], dx);
 +            rvec_dec(fshift[CENTRAL], dx);
 +        }
 +    }
 +    return 0.0;
 +}
diff --cc src/gromacs/gmxpreprocess/readir.c
index 4dbdf75e3e,0000000000..5fbede033e
mode 100644,000000..100644
--- a/src/gromacs/gmxpreprocess/readir.c
+++ b/src/gromacs/gmxpreprocess/readir.c
@@@ -1,4398 -1,0 +1,4411 @@@
 +/*
 + * This file is part of the GROMACS molecular simulation package.
 + *
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team.
 + * Copyright (c) 2013,2014, by the GROMACS development team, led by
 + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
 + * and including many others, as listed in the AUTHORS file in the
 + * top-level source directory and at http://www.gromacs.org.
 + *
 + * GROMACS is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU Lesser General Public License
 + * as published by the Free Software Foundation; either version 2.1
 + * of the License, or (at your option) any later version.
 + *
 + * GROMACS is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 + * Lesser General Public License for more details.
 + *
 + * You should have received a copy of the GNU Lesser General Public
 + * License along with GROMACS; if not, see
 + * http://www.gnu.org/licenses, or write to the Free Software Foundation,
 + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
 + *
 + * If you want to redistribute modifications to GROMACS, please
 + * consider that scientific software is very special. Version
 + * control is crucial - bugs must be traceable. We will be happy to
 + * consider code for inclusion in the official distribution, but
 + * derived work must not be called official GROMACS. Details are found
 + * in the README & COPYING files - if they are missing, get the
 + * official version at http://www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the research papers on the package. Check out http://www.gromacs.org.
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <ctype.h>
 +#include <stdlib.h>
 +#include <limits.h>
 +#include "sysstuff.h"
 +#include "gromacs/utility/smalloc.h"
 +#include "typedefs.h"
 +#include "physics.h"
 +#include "names.h"
 +#include "gmx_fatal.h"
 +#include "macros.h"
 +#include "index.h"
 +#include "symtab.h"
 +#include "gromacs/utility/cstringutil.h"
 +#include "readinp.h"
 +#include "warninp.h"
 +#include "readir.h"
 +#include "toputil.h"
 +#include "index.h"
 +#include "network.h"
 +#include "vec.h"
 +#include "pbc.h"
 +#include "mtop_util.h"
 +#include "chargegroup.h"
 +#include "inputrec.h"
 +#include "calc_verletbuf.h"
 +
 +#define MAXPTR 254
 +#define NOGID  255
 +
 +/* Resource parameters
 + * Do not change any of these until you read the instruction
 + * in readinp.h. Some cpp's do not take spaces after the backslash
 + * (like the c-shell), which will give you a very weird compiler
 + * message.
 + */
 +
 +typedef struct t_inputrec_strings
 +{
 +    char tcgrps[STRLEN], tau_t[STRLEN], ref_t[STRLEN],
 +         acc[STRLEN], accgrps[STRLEN], freeze[STRLEN], frdim[STRLEN],
 +         energy[STRLEN], user1[STRLEN], user2[STRLEN], vcm[STRLEN], x_compressed_groups[STRLEN],
 +         couple_moltype[STRLEN], orirefitgrp[STRLEN], egptable[STRLEN], egpexcl[STRLEN],
 +         wall_atomtype[STRLEN], wall_density[STRLEN], deform[STRLEN], QMMM[STRLEN],
 +         imd_grp[STRLEN];
 +    char   fep_lambda[efptNR][STRLEN];
 +    char   lambda_weights[STRLEN];
 +    char **pull_grp;
 +    char **rot_grp;
 +    char   anneal[STRLEN], anneal_npoints[STRLEN],
 +           anneal_time[STRLEN], anneal_temp[STRLEN];
 +    char   QMmethod[STRLEN], QMbasis[STRLEN], QMcharge[STRLEN], QMmult[STRLEN],
 +           bSH[STRLEN], CASorbitals[STRLEN], CASelectrons[STRLEN], SAon[STRLEN],
 +           SAoff[STRLEN], SAsteps[STRLEN], bTS[STRLEN], bOPT[STRLEN];
 +    char efield_x[STRLEN], efield_xt[STRLEN], efield_y[STRLEN],
 +         efield_yt[STRLEN], efield_z[STRLEN], efield_zt[STRLEN];
 +
 +} gmx_inputrec_strings;
 +
 +static gmx_inputrec_strings *is = NULL;
 +
 +void init_inputrec_strings()
 +{
 +    if (is)
 +    {
 +        gmx_incons("Attempted to call init_inputrec_strings before calling done_inputrec_strings. Only one inputrec (i.e. .mdp file) can be parsed at a time.");
 +    }
 +    snew(is, 1);
 +}
 +
 +void done_inputrec_strings()
 +{
 +    sfree(is);
 +    is = NULL;
 +}
 +
 +static char swapgrp[STRLEN], splitgrp0[STRLEN], splitgrp1[STRLEN], solgrp[STRLEN];
 +
 +enum {
 +    egrptpALL,         /* All particles have to be a member of a group.     */
 +    egrptpALL_GENREST, /* A rest group with name is generated for particles *
 +                        * that are not part of any group.                   */
 +    egrptpPART,        /* As egrptpALL_GENREST, but no name is generated    *
 +                        * for the rest group.                               */
 +    egrptpONE          /* Merge all selected groups into one group,         *
 +                        * make a rest group for the remaining particles.    */
 +};
 +
 +static const char *constraints[eshNR+1]    = {
 +    "none", "h-bonds", "all-bonds", "h-angles", "all-angles", NULL
 +};
 +
 +static const char *couple_lam[ecouplamNR+1]    = {
 +    "vdw-q", "vdw", "q", "none", NULL
 +};
 +
 +void init_ir(t_inputrec *ir, t_gromppopts *opts)
 +{
 +    snew(opts->include, STRLEN);
 +    snew(opts->define, STRLEN);
 +    snew(ir->fepvals, 1);
 +    snew(ir->expandedvals, 1);
 +    snew(ir->simtempvals, 1);
 +}
 +
 +static void GetSimTemps(int ntemps, t_simtemp *simtemp, double *temperature_lambdas)
 +{
 +
 +    int i;
 +
 +    for (i = 0; i < ntemps; i++)
 +    {
 +        /* simple linear scaling -- allows more control */
 +        if (simtemp->eSimTempScale == esimtempLINEAR)
 +        {
 +            simtemp->temperatures[i] = simtemp->simtemp_low + (simtemp->simtemp_high-simtemp->simtemp_low)*temperature_lambdas[i];
 +        }
 +        else if (simtemp->eSimTempScale == esimtempGEOMETRIC)  /* should give roughly equal acceptance for constant heat capacity . . . */
 +        {
 +            simtemp->temperatures[i] = simtemp->simtemp_low * pow(simtemp->simtemp_high/simtemp->simtemp_low, (1.0*i)/(ntemps-1));
 +        }
 +        else if (simtemp->eSimTempScale == esimtempEXPONENTIAL)
 +        {
 +            simtemp->temperatures[i] = simtemp->simtemp_low + (simtemp->simtemp_high-simtemp->simtemp_low)*((exp(temperature_lambdas[i])-1)/(exp(1.0)-1));
 +        }
 +        else
 +        {
 +            char errorstr[128];
 +            sprintf(errorstr, "eSimTempScale=%d not defined", simtemp->eSimTempScale);
 +            gmx_fatal(FARGS, errorstr);
 +        }
 +    }
 +}
 +
 +
 +
 +static void _low_check(gmx_bool b, char *s, warninp_t wi)
 +{
 +    if (b)
 +    {
 +        warning_error(wi, s);
 +    }
 +}
 +
 +static void check_nst(const char *desc_nst, int nst,
 +                      const char *desc_p, int *p,
 +                      warninp_t wi)
 +{
 +    char buf[STRLEN];
 +
 +    if (*p > 0 && *p % nst != 0)
 +    {
 +        /* Round up to the next multiple of nst */
 +        *p = ((*p)/nst + 1)*nst;
 +        sprintf(buf, "%s should be a multiple of %s, changing %s to %d\n",
 +                desc_p, desc_nst, desc_p, *p);
 +        warning(wi, buf);
 +    }
 +}
 +
 +static gmx_bool ir_NVE(const t_inputrec *ir)
 +{
 +    return ((ir->eI == eiMD || EI_VV(ir->eI)) && ir->etc == etcNO);
 +}
 +
 +static int lcd(int n1, int n2)
 +{
 +    int d, i;
 +
 +    d = 1;
 +    for (i = 2; (i <= n1 && i <= n2); i++)
 +    {
 +        if (n1 % i == 0 && n2 % i == 0)
 +        {
 +            d = i;
 +        }
 +    }
 +
 +    return d;
 +}
 +
 +static void process_interaction_modifier(const t_inputrec *ir, int *eintmod)
 +{
 +    if (*eintmod == eintmodPOTSHIFT_VERLET)
 +    {
 +        if (ir->cutoff_scheme == ecutsVERLET)
 +        {
 +            *eintmod = eintmodPOTSHIFT;
 +        }
 +        else
 +        {
 +            *eintmod = eintmodNONE;
 +        }
 +    }
 +}
 +
 +void check_ir(const char *mdparin, t_inputrec *ir, t_gromppopts *opts,
 +              warninp_t wi)
 +/* Check internal consistency.
 + * NOTE: index groups are not set here yet, don't check things
 + * like temperature coupling group options here, but in triple_check
 + */
 +{
 +    /* Strange macro: first one fills the err_buf, and then one can check
 +     * the condition, which will print the message and increase the error
 +     * counter.
 +     */
 +#define CHECK(b) _low_check(b, err_buf, wi)
 +    char        err_buf[256], warn_buf[STRLEN];
 +    int         i, j;
 +    int         ns_type  = 0;
 +    real        dt_coupl = 0;
 +    real        dt_pcoupl;
 +    int         nstcmin;
 +    t_lambda   *fep    = ir->fepvals;
 +    t_expanded *expand = ir->expandedvals;
 +
 +    set_warning_line(wi, mdparin, -1);
 +
 +    /* BASIC CUT-OFF STUFF */
 +    if (ir->rcoulomb < 0)
 +    {
 +        warning_error(wi, "rcoulomb should be >= 0");
 +    }
 +    if (ir->rvdw < 0)
 +    {
 +        warning_error(wi, "rvdw should be >= 0");
 +    }
 +    if (ir->rlist < 0 &&
 +        !(ir->cutoff_scheme == ecutsVERLET && ir->verletbuf_tol > 0))
 +    {
 +        warning_error(wi, "rlist should be >= 0");
 +    }
 +
 +    process_interaction_modifier(ir, &ir->coulomb_modifier);
 +    process_interaction_modifier(ir, &ir->vdw_modifier);
 +
 +    if (ir->cutoff_scheme == ecutsGROUP)
 +    {
 +        warning_note(wi,
 +                     "The group cutoff scheme is deprecated in Gromacs 5.0 and will be removed in a future "
 +                     "release when all interaction forms are supported for the verlet scheme. The verlet "
 +                     "scheme already scales better, and it is compatible with GPUs and other accelerators.");
 +
 +        /* BASIC CUT-OFF STUFF */
 +        if (ir->rlist == 0 ||
 +            !((ir_coulomb_might_be_zero_at_cutoff(ir) && ir->rcoulomb > ir->rlist) ||
 +              (ir_vdw_might_be_zero_at_cutoff(ir)     && ir->rvdw     > ir->rlist)))
 +        {
 +            /* No switched potential and/or no twin-range:
 +             * we can set the long-range cut-off to the maximum of the other cut-offs.
 +             */
 +            ir->rlistlong = max_cutoff(ir->rlist, max_cutoff(ir->rvdw, ir->rcoulomb));
 +        }
 +        else if (ir->rlistlong < 0)
 +        {
 +            ir->rlistlong = max_cutoff(ir->rlist, max_cutoff(ir->rvdw, ir->rcoulomb));
 +            sprintf(warn_buf, "rlistlong was not set, setting it to %g (no buffer)",
 +                    ir->rlistlong);
 +            warning(wi, warn_buf);
 +        }
 +        if (ir->rlistlong == 0 && ir->ePBC != epbcNONE)
 +        {
 +            warning_error(wi, "Can not have an infinite cut-off with PBC");
 +        }
 +        if (ir->rlistlong > 0 && (ir->rlist == 0 || ir->rlistlong < ir->rlist))
 +        {
 +            warning_error(wi, "rlistlong can not be shorter than rlist");
 +        }
 +        if (IR_TWINRANGE(*ir) && ir->nstlist <= 0)
 +        {
 +            warning_error(wi, "Can not have nstlist<=0 with twin-range interactions");
 +        }
 +    }
 +
 +    if (ir->rlistlong == ir->rlist)
 +    {
 +        ir->nstcalclr = 0;
 +    }
 +    else if (ir->rlistlong > ir->rlist && ir->nstcalclr == 0)
 +    {
 +        warning_error(wi, "With different cutoffs for electrostatics and VdW, nstcalclr must be -1 or a positive number");
 +    }
 +
 +    if (ir->cutoff_scheme == ecutsVERLET)
 +    {
 +        real rc_max;
 +
 +        /* Normal Verlet type neighbor-list, currently only limited feature support */
 +        if (inputrec2nboundeddim(ir) < 3)
 +        {
 +            warning_error(wi, "With Verlet lists only full pbc or pbc=xy with walls is supported");
 +        }
 +        if (ir->rcoulomb != ir->rvdw)
 +        {
 +            warning_error(wi, "With Verlet lists rcoulomb!=rvdw is not supported");
 +        }
 +        if (ir->vdwtype == evdwSHIFT || ir->vdwtype == evdwSWITCH)
 +        {
 +            if (ir->vdw_modifier == eintmodNONE ||
 +                ir->vdw_modifier == eintmodPOTSHIFT)
 +            {
 +                ir->vdw_modifier = (ir->vdwtype == evdwSHIFT ? eintmodFORCESWITCH : eintmodPOTSWITCH);
 +
 +                sprintf(warn_buf, "Replacing vdwtype=%s by the equivalent combination of vdwtype=%s and vdw_modifier=%s", evdw_names[ir->vdwtype], evdw_names[evdwCUT], eintmod_names[ir->vdw_modifier]);
 +                warning_note(wi, warn_buf);
 +
 +                ir->vdwtype = evdwCUT;
 +            }
 +            else
 +            {
 +                sprintf(warn_buf, "Unsupported combination of vdwtype=%s and vdw_modifier=%s", evdw_names[ir->vdwtype], eintmod_names[ir->vdw_modifier]);
 +                warning_error(wi, warn_buf);
 +            }
 +        }
 +
 +        if (!(ir->vdwtype == evdwCUT || ir->vdwtype == evdwPME))
 +        {
 +            warning_error(wi, "With Verlet lists only cut-off and PME LJ interactions are supported");
 +        }
 +        if (!(ir->coulombtype == eelCUT ||
 +              (EEL_RF(ir->coulombtype) && ir->coulombtype != eelRF_NEC) ||
 +              EEL_PME(ir->coulombtype) || ir->coulombtype == eelEWALD))
 +        {
 +            warning_error(wi, "With Verlet lists only cut-off, reaction-field, PME and Ewald electrostatics are supported");
 +        }
 +        if (!(ir->coulomb_modifier == eintmodNONE ||
 +              ir->coulomb_modifier == eintmodPOTSHIFT))
 +        {
 +            sprintf(warn_buf, "coulomb_modifier=%s is not supported with the Verlet cut-off scheme", eintmod_names[ir->coulomb_modifier]);
 +            warning_error(wi, warn_buf);
 +        }
 +
 +        if (ir->nstlist <= 0)
 +        {
 +            warning_error(wi, "With Verlet lists nstlist should be larger than 0");
 +        }
 +
 +        if (ir->nstlist < 10)
 +        {
 +            warning_note(wi, "With Verlet lists the optimal nstlist is >= 10, with GPUs >= 20. Note that with the Verlet scheme, nstlist has no effect on the accuracy of your simulation.");
 +        }
 +
 +        rc_max = max(ir->rvdw, ir->rcoulomb);
 +
 +        if (ir->verletbuf_tol <= 0)
 +        {
 +            if (ir->verletbuf_tol == 0)
 +            {
 +                warning_error(wi, "Can not have Verlet buffer tolerance of exactly 0");
 +            }
 +
 +            if (ir->rlist < rc_max)
 +            {
 +                warning_error(wi, "With verlet lists rlist can not be smaller than rvdw or rcoulomb");
 +            }
 +
 +            if (ir->rlist == rc_max && ir->nstlist > 1)
 +            {
 +                warning_note(wi, "rlist is equal to rvdw and/or rcoulomb: there is no explicit Verlet buffer. The cluster pair list does have a buffering effect, but choosing a larger rlist might be necessary for good energy conservation.");
 +            }
 +        }
 +        else
 +        {
 +            if (ir->rlist > rc_max)
 +            {
 +                warning_note(wi, "You have set rlist larger than the interaction cut-off, but you also have verlet-buffer-tolerance > 0. Will set rlist using verlet-buffer-tolerance.");
 +            }
 +
 +            if (ir->nstlist == 1)
 +            {
 +                /* No buffer required */
 +                ir->rlist = rc_max;
 +            }
 +            else
 +            {
 +                if (EI_DYNAMICS(ir->eI))
 +                {
 +                    if (inputrec2nboundeddim(ir) < 3)
 +                    {
 +                        warning_error(wi, "The box volume is required for calculating rlist from the energy drift with verlet-buffer-tolerance > 0. You are using at least one unbounded dimension, so no volume can be computed. Either use a finite box, or set rlist yourself together with verlet-buffer-tolerance = -1.");
 +                    }
 +                    /* Set rlist temporarily so we can continue processing */
 +                    ir->rlist = rc_max;
 +                }
 +                else
 +                {
 +                    /* Set the buffer to 5% of the cut-off */
 +                    ir->rlist = (1.0 + verlet_buffer_ratio_nodynamics)*rc_max;
 +                }
 +            }
 +        }
 +
 +        /* No twin-range calculations with Verlet lists */
 +        ir->rlistlong = ir->rlist;
 +    }
 +
 +    if (ir->nstcalclr == -1)
 +    {
 +        /* if rlist=rlistlong, this will later be changed to nstcalclr=0 */
 +        ir->nstcalclr = ir->nstlist;
 +    }
 +    else if (ir->nstcalclr > 0)
 +    {
 +        if (ir->nstlist > 0 && (ir->nstlist % ir->nstcalclr != 0))
 +        {
 +            warning_error(wi, "nstlist must be evenly divisible by nstcalclr. Use nstcalclr = -1 to automatically follow nstlist");
 +        }
 +    }
 +    else if (ir->nstcalclr < -1)
 +    {
 +        warning_error(wi, "nstcalclr must be a positive number (divisor of nstcalclr), or -1 to follow nstlist.");
 +    }
 +
 +    if (EEL_PME(ir->coulombtype) && ir->rcoulomb > ir->rvdw && ir->nstcalclr > 1)
 +    {
 +        warning_error(wi, "When used with PME, the long-range component of twin-range interactions must be updated every step (nstcalclr)");
 +    }
 +
 +    /* GENERAL INTEGRATOR STUFF */
 +    if (!(ir->eI == eiMD || EI_VV(ir->eI)))
 +    {
 +        ir->etc = etcNO;
 +    }
 +    if (ir->eI == eiVVAK)
 +    {
 +        sprintf(warn_buf, "Integrator method %s is implemented primarily for validation purposes; for molecular dynamics, you should probably be using %s or %s", ei_names[eiVVAK], ei_names[eiMD], ei_names[eiVV]);
 +        warning_note(wi, warn_buf);
 +    }
 +    if (!EI_DYNAMICS(ir->eI))
 +    {
 +        ir->epc = epcNO;
 +    }
 +    if (EI_DYNAMICS(ir->eI))
 +    {
 +        if (ir->nstcalcenergy < 0)
 +        {
 +            ir->nstcalcenergy = ir_optimal_nstcalcenergy(ir);
 +            if (ir->nstenergy != 0 && ir->nstenergy < ir->nstcalcenergy)
 +            {
 +                /* nstcalcenergy larger than nstener does not make sense.
 +                 * We ideally want nstcalcenergy=nstener.
 +                 */
 +                if (ir->nstlist > 0)
 +                {
 +                    ir->nstcalcenergy = lcd(ir->nstenergy, ir->nstlist);
 +                }
 +                else
 +                {
 +                    ir->nstcalcenergy = ir->nstenergy;
 +                }
 +            }
 +        }
 +        else if ( (ir->nstenergy > 0 && ir->nstcalcenergy > ir->nstenergy) ||
 +                  (ir->efep != efepNO && ir->fepvals->nstdhdl > 0 &&
 +                   (ir->nstcalcenergy > ir->fepvals->nstdhdl) ) )
 +
 +        {
 +            const char *nsten    = "nstenergy";
 +            const char *nstdh    = "nstdhdl";
 +            const char *min_name = nsten;
 +            int         min_nst  = ir->nstenergy;
 +
 +            /* find the smallest of ( nstenergy, nstdhdl ) */
 +            if (ir->efep != efepNO && ir->fepvals->nstdhdl > 0 &&
 +                (ir->nstenergy == 0 || ir->fepvals->nstdhdl < ir->nstenergy))
 +            {
 +                min_nst  = ir->fepvals->nstdhdl;
 +                min_name = nstdh;
 +            }
 +            /* If the user sets nstenergy small, we should respect that */
 +            sprintf(warn_buf,
 +                    "Setting nstcalcenergy (%d) equal to %s (%d)",
 +                    ir->nstcalcenergy, min_name, min_nst);
 +            warning_note(wi, warn_buf);
 +            ir->nstcalcenergy = min_nst;
 +        }
 +
 +        if (ir->epc != epcNO)
 +        {
 +            if (ir->nstpcouple < 0)
 +            {
 +                ir->nstpcouple = ir_optimal_nstpcouple(ir);
 +            }
 +        }
 +        if (IR_TWINRANGE(*ir))
 +        {
 +            check_nst("nstlist", ir->nstlist,
 +                      "nstcalcenergy", &ir->nstcalcenergy, wi);
 +            if (ir->epc != epcNO)
 +            {
 +                check_nst("nstlist", ir->nstlist,
 +                          "nstpcouple", &ir->nstpcouple, wi);
 +            }
 +        }
 +
 +        if (ir->nstcalcenergy > 0)
 +        {
 +            if (ir->efep != efepNO)
 +            {
 +                /* nstdhdl should be a multiple of nstcalcenergy */
 +                check_nst("nstcalcenergy", ir->nstcalcenergy,
 +                          "nstdhdl", &ir->fepvals->nstdhdl, wi);
 +                /* nstexpanded should be a multiple of nstcalcenergy */
 +                check_nst("nstcalcenergy", ir->nstcalcenergy,
 +                          "nstexpanded", &ir->expandedvals->nstexpanded, wi);
 +            }
 +            /* for storing exact averages nstenergy should be
 +             * a multiple of nstcalcenergy
 +             */
 +            check_nst("nstcalcenergy", ir->nstcalcenergy,
 +                      "nstenergy", &ir->nstenergy, wi);
 +        }
 +    }
 +
 +    if (ir->nsteps == 0 && !ir->bContinuation)
 +    {
 +        warning_note(wi, "For a correct single-point energy evaluation with nsteps = 0, use continuation = yes to avoid constraining the input coordinates.");
 +    }
 +
 +    /* LD STUFF */
 +    if ((EI_SD(ir->eI) || ir->eI == eiBD) &&
 +        ir->bContinuation && ir->ld_seed != -1)
 +    {
 +        warning_note(wi, "You are doing a continuation with SD or BD, make sure that ld_seed is different from the previous run (using ld_seed=-1 will ensure this)");
 +    }
 +
 +    /* TPI STUFF */
 +    if (EI_TPI(ir->eI))
 +    {
 +        sprintf(err_buf, "TPI only works with pbc = %s", epbc_names[epbcXYZ]);
 +        CHECK(ir->ePBC != epbcXYZ);
 +        sprintf(err_buf, "TPI only works with ns = %s", ens_names[ensGRID]);
 +        CHECK(ir->ns_type != ensGRID);
 +        sprintf(err_buf, "with TPI nstlist should be larger than zero");
 +        CHECK(ir->nstlist <= 0);
 +        sprintf(err_buf, "TPI does not work with full electrostatics other than PME");
 +        CHECK(EEL_FULL(ir->coulombtype) && !EEL_PME(ir->coulombtype));
 +    }
 +
 +    /* SHAKE / LINCS */
 +    if ( (opts->nshake > 0) && (opts->bMorse) )
 +    {
 +        sprintf(warn_buf,
 +                "Using morse bond-potentials while constraining bonds is useless");
 +        warning(wi, warn_buf);
 +    }
 +
 +    if ((EI_SD(ir->eI) || ir->eI == eiBD) &&
 +        ir->bContinuation && ir->ld_seed != -1)
 +    {
 +        warning_note(wi, "You are doing a continuation with SD or BD, make sure that ld_seed is different from the previous run (using ld_seed=-1 will ensure this)");
 +    }
 +    /* verify simulated tempering options */
 +
 +    if (ir->bSimTemp)
 +    {
 +        gmx_bool bAllTempZero = TRUE;
 +        for (i = 0; i < fep->n_lambda; i++)
 +        {
 +            sprintf(err_buf, "Entry %d for %s must be between 0 and 1, instead is %g", i, efpt_names[efptTEMPERATURE], fep->all_lambda[efptTEMPERATURE][i]);
 +            CHECK((fep->all_lambda[efptTEMPERATURE][i] < 0) || (fep->all_lambda[efptTEMPERATURE][i] > 1));
 +            if (fep->all_lambda[efptTEMPERATURE][i] > 0)
 +            {
 +                bAllTempZero = FALSE;
 +            }
 +        }
 +        sprintf(err_buf, "if simulated tempering is on, temperature-lambdas may not be all zero");
 +        CHECK(bAllTempZero == TRUE);
 +
 +        sprintf(err_buf, "Simulated tempering is currently only compatible with md-vv");
 +        CHECK(ir->eI != eiVV);
 +
 +        /* check compatability of the temperature coupling with simulated tempering */
 +
 +        if (ir->etc == etcNOSEHOOVER)
 +        {
 +            sprintf(warn_buf, "Nose-Hoover based temperature control such as [%s] my not be entirelyconsistent with simulated tempering", etcoupl_names[ir->etc]);
 +            warning_note(wi, warn_buf);
 +        }
 +
 +        /* check that the temperatures make sense */
 +
 +        sprintf(err_buf, "Higher simulated tempering temperature (%g) must be >= than the simulated tempering lower temperature (%g)", ir->simtempvals->simtemp_high, ir->simtempvals->simtemp_low);
 +        CHECK(ir->simtempvals->simtemp_high <= ir->simtempvals->simtemp_low);
 +
 +        sprintf(err_buf, "Higher simulated tempering temperature (%g) must be >= zero", ir->simtempvals->simtemp_high);
 +        CHECK(ir->simtempvals->simtemp_high <= 0);
 +
 +        sprintf(err_buf, "Lower simulated tempering temperature (%g) must be >= zero", ir->simtempvals->simtemp_low);
 +        CHECK(ir->simtempvals->simtemp_low <= 0);
 +    }
 +
 +    /* verify free energy options */
 +
 +    if (ir->efep != efepNO)
 +    {
 +        fep = ir->fepvals;
 +        sprintf(err_buf, "The soft-core power is %d and can only be 1 or 2",
 +                fep->sc_power);
 +        CHECK(fep->sc_alpha != 0 && fep->sc_power != 1 && fep->sc_power != 2);
 +
 +        sprintf(err_buf, "The soft-core sc-r-power is %d and can only be 6 or 48",
 +                (int)fep->sc_r_power);
 +        CHECK(fep->sc_alpha != 0 && fep->sc_r_power != 6.0 && fep->sc_r_power != 48.0);
 +
 +        sprintf(err_buf, "Can't use postive delta-lambda (%g) if initial state/lambda does not start at zero", fep->delta_lambda);
 +        CHECK(fep->delta_lambda > 0 && ((fep->init_fep_state > 0) ||  (fep->init_lambda > 0)));
 +
 +        sprintf(err_buf, "Can't use postive delta-lambda (%g) with expanded ensemble simulations", fep->delta_lambda);
 +        CHECK(fep->delta_lambda > 0 && (ir->efep == efepEXPANDED));
 +
 +        sprintf(err_buf, "Can only use expanded ensemble with md-vv for now; should be supported for other integrators in 5.0");
 +        CHECK(!(EI_VV(ir->eI)) && (ir->efep == efepEXPANDED));
 +
 +        sprintf(err_buf, "Free-energy not implemented for Ewald");
 +        CHECK(ir->coulombtype == eelEWALD);
 +
 +        /* check validty of lambda inputs */
 +        if (fep->n_lambda == 0)
 +        {
 +            /* Clear output in case of no states:*/
 +            sprintf(err_buf, "init-lambda-state set to %d: no lambda states are defined.", fep->init_fep_state);
 +            CHECK((fep->init_fep_state >= 0) && (fep->n_lambda == 0));
 +        }
 +        else
 +        {
 +            sprintf(err_buf, "initial thermodynamic state %d does not exist, only goes to %d", fep->init_fep_state, fep->n_lambda-1);
 +            CHECK((fep->init_fep_state >= fep->n_lambda));
 +        }
 +
 +        sprintf(err_buf, "Lambda state must be set, either with init-lambda-state or with init-lambda");
 +        CHECK((fep->init_fep_state < 0) && (fep->init_lambda < 0));
 +
 +        sprintf(err_buf, "init-lambda=%g while init-lambda-state=%d. Lambda state must be set either with init-lambda-state or with init-lambda, but not both",
 +                fep->init_lambda, fep->init_fep_state);
 +        CHECK((fep->init_fep_state >= 0) && (fep->init_lambda >= 0));
 +
 +
 +
 +        if ((fep->init_lambda >= 0) && (fep->delta_lambda == 0))
 +        {
 +            int n_lambda_terms;
 +            n_lambda_terms = 0;
 +            for (i = 0; i < efptNR; i++)
 +            {
 +                if (fep->separate_dvdl[i])
 +                {
 +                    n_lambda_terms++;
 +                }
 +            }
 +            if (n_lambda_terms > 1)
 +            {
 +                sprintf(warn_buf, "If lambda vector states (fep-lambdas, coul-lambdas etc.) are set, don't use init-lambda to set lambda state (except for slow growth). Use init-lambda-state instead.");
 +                warning(wi, warn_buf);
 +            }
 +
 +            if (n_lambda_terms < 2 && fep->n_lambda > 0)
 +            {
 +                warning_note(wi,
 +                             "init-lambda is deprecated for setting lambda state (except for slow growth). Use init-lambda-state instead.");
 +            }
 +        }
 +
 +        for (j = 0; j < efptNR; j++)
 +        {
 +            for (i = 0; i < fep->n_lambda; i++)
 +            {
 +                sprintf(err_buf, "Entry %d for %s must be between 0 and 1, instead is %g", i, efpt_names[j], fep->all_lambda[j][i]);
 +                CHECK((fep->all_lambda[j][i] < 0) || (fep->all_lambda[j][i] > 1));
 +            }
 +        }
 +
 +        if ((fep->sc_alpha > 0) && (!fep->bScCoul))
 +        {
 +            for (i = 0; i < fep->n_lambda; i++)
 +            {
 +                sprintf(err_buf, "For state %d, vdw-lambdas (%f) is changing with vdw softcore, while coul-lambdas (%f) is nonzero without coulomb softcore: this will lead to crashes, and is not supported.", i, fep->all_lambda[efptVDW][i],
 +                        fep->all_lambda[efptCOUL][i]);
 +                CHECK((fep->sc_alpha > 0) &&
 +                      (((fep->all_lambda[efptCOUL][i] > 0.0) &&
 +                        (fep->all_lambda[efptCOUL][i] < 1.0)) &&
 +                       ((fep->all_lambda[efptVDW][i] > 0.0) &&
 +                        (fep->all_lambda[efptVDW][i] < 1.0))));
 +            }
 +        }
 +
 +        if ((fep->bScCoul) && (EEL_PME(ir->coulombtype)))
 +        {
 +            real sigma, lambda, r_sc;
 +
 +            sigma  = 0.34;
 +            /* Maximum estimate for A and B charges equal with lambda power 1 */
 +            lambda = 0.5;
 +            r_sc   = pow(lambda*fep->sc_alpha*pow(sigma/ir->rcoulomb, fep->sc_r_power) + 1.0, 1.0/fep->sc_r_power);
 +            sprintf(warn_buf, "With PME there is a minor soft core effect present at the cut-off, proportional to (LJsigma/rcoulomb)^%g. This could have a minor effect on energy conservation, but usually other effects dominate. With a common sigma value of %g nm the fraction of the particle-particle potential at the cut-off at lambda=%g is around %.1e, while ewald-rtol is %.1e.",
 +                    fep->sc_r_power,
 +                    sigma, lambda, r_sc - 1.0, ir->ewald_rtol);
 +            warning_note(wi, warn_buf);
 +        }
 +
 +        /*  Free Energy Checks -- In an ideal world, slow growth and FEP would
 +            be treated differently, but that's the next step */
 +
 +        for (i = 0; i < efptNR; i++)
 +        {
 +            for (j = 0; j < fep->n_lambda; j++)
 +            {
 +                sprintf(err_buf, "%s[%d] must be between 0 and 1", efpt_names[i], j);
 +                CHECK((fep->all_lambda[i][j] < 0) || (fep->all_lambda[i][j] > 1));
 +            }
 +        }
 +    }
 +
 +    if ((ir->bSimTemp) || (ir->efep == efepEXPANDED))
 +    {
 +        fep    = ir->fepvals;
 +        expand = ir->expandedvals;
 +
 +        /* checking equilibration of weights inputs for validity */
 +
 +        sprintf(err_buf, "weight-equil-number-all-lambda (%d) is ignored if lmc-weights-equil is not equal to %s",
 +                expand->equil_n_at_lam, elmceq_names[elmceqNUMATLAM]);
 +        CHECK((expand->equil_n_at_lam > 0) && (expand->elmceq != elmceqNUMATLAM));
 +
 +        sprintf(err_buf, "weight-equil-number-samples (%d) is ignored if lmc-weights-equil is not equal to %s",
 +                expand->equil_samples, elmceq_names[elmceqSAMPLES]);
 +        CHECK((expand->equil_samples > 0) && (expand->elmceq != elmceqSAMPLES));
 +
 +        sprintf(err_buf, "weight-equil-number-steps (%d) is ignored if lmc-weights-equil is not equal to %s",
 +                expand->equil_steps, elmceq_names[elmceqSTEPS]);
 +        CHECK((expand->equil_steps > 0) && (expand->elmceq != elmceqSTEPS));
 +
 +        sprintf(err_buf, "weight-equil-wl-delta (%d) is ignored if lmc-weights-equil is not equal to %s",
 +                expand->equil_samples, elmceq_names[elmceqWLDELTA]);
 +        CHECK((expand->equil_wl_delta > 0) && (expand->elmceq != elmceqWLDELTA));
 +
 +        sprintf(err_buf, "weight-equil-count-ratio (%f) is ignored if lmc-weights-equil is not equal to %s",
 +                expand->equil_ratio, elmceq_names[elmceqRATIO]);
 +        CHECK((expand->equil_ratio > 0) && (expand->elmceq != elmceqRATIO));
 +
 +        sprintf(err_buf, "weight-equil-number-all-lambda (%d) must be a positive integer if lmc-weights-equil=%s",
 +                expand->equil_n_at_lam, elmceq_names[elmceqNUMATLAM]);
 +        CHECK((expand->equil_n_at_lam <= 0) && (expand->elmceq == elmceqNUMATLAM));
 +
 +        sprintf(err_buf, "weight-equil-number-samples (%d) must be a positive integer if lmc-weights-equil=%s",
 +                expand->equil_samples, elmceq_names[elmceqSAMPLES]);
 +        CHECK((expand->equil_samples <= 0) && (expand->elmceq == elmceqSAMPLES));
 +
 +        sprintf(err_buf, "weight-equil-number-steps (%d) must be a positive integer if lmc-weights-equil=%s",
 +                expand->equil_steps, elmceq_names[elmceqSTEPS]);
 +        CHECK((expand->equil_steps <= 0) && (expand->elmceq == elmceqSTEPS));
 +
 +        sprintf(err_buf, "weight-equil-wl-delta (%f) must be > 0 if lmc-weights-equil=%s",
 +                expand->equil_wl_delta, elmceq_names[elmceqWLDELTA]);
 +        CHECK((expand->equil_wl_delta <= 0) && (expand->elmceq == elmceqWLDELTA));
 +
 +        sprintf(err_buf, "weight-equil-count-ratio (%f) must be > 0 if lmc-weights-equil=%s",
 +                expand->equil_ratio, elmceq_names[elmceqRATIO]);
 +        CHECK((expand->equil_ratio <= 0) && (expand->elmceq == elmceqRATIO));
 +
 +        sprintf(err_buf, "lmc-weights-equil=%s only possible when lmc-stats = %s or lmc-stats %s",
 +                elmceq_names[elmceqWLDELTA], elamstats_names[elamstatsWL], elamstats_names[elamstatsWWL]);
 +        CHECK((expand->elmceq == elmceqWLDELTA) && (!EWL(expand->elamstats)));
 +
 +        sprintf(err_buf, "lmc-repeats (%d) must be greater than 0", expand->lmc_repeats);
 +        CHECK((expand->lmc_repeats <= 0));
 +        sprintf(err_buf, "minimum-var-min (%d) must be greater than 0", expand->minvarmin);
 +        CHECK((expand->minvarmin <= 0));
 +        sprintf(err_buf, "weight-c-range (%d) must be greater or equal to 0", expand->c_range);
 +        CHECK((expand->c_range < 0));
 +        sprintf(err_buf, "init-lambda-state (%d) must be zero if lmc-forced-nstart (%d)> 0 and lmc-move != 'no'",
 +                fep->init_fep_state, expand->lmc_forced_nstart);
 +        CHECK((fep->init_fep_state != 0) && (expand->lmc_forced_nstart > 0) && (expand->elmcmove != elmcmoveNO));
 +        sprintf(err_buf, "lmc-forced-nstart (%d) must not be negative", expand->lmc_forced_nstart);
 +        CHECK((expand->lmc_forced_nstart < 0));
 +        sprintf(err_buf, "init-lambda-state (%d) must be in the interval [0,number of lambdas)", fep->init_fep_state);
 +        CHECK((fep->init_fep_state < 0) || (fep->init_fep_state >= fep->n_lambda));
 +
 +        sprintf(err_buf, "init-wl-delta (%f) must be greater than or equal to 0", expand->init_wl_delta);
 +        CHECK((expand->init_wl_delta < 0));
 +        sprintf(err_buf, "wl-ratio (%f) must be between 0 and 1", expand->wl_ratio);
 +        CHECK((expand->wl_ratio <= 0) || (expand->wl_ratio >= 1));
 +        sprintf(err_buf, "wl-scale (%f) must be between 0 and 1", expand->wl_scale);
 +        CHECK((expand->wl_scale <= 0) || (expand->wl_scale >= 1));
 +
 +        /* if there is no temperature control, we need to specify an MC temperature */
 +        sprintf(err_buf, "If there is no temperature control, and lmc-mcmove!= 'no',mc_temperature must be set to a positive number");
 +        if (expand->nstTij > 0)
 +        {
 +            sprintf(err_buf, "nst-transition-matrix (%d) must be an integer multiple of nstlog (%d)",
 +                    expand->nstTij, ir->nstlog);
 +            CHECK((mod(expand->nstTij, ir->nstlog) != 0));
 +        }
 +    }
 +
 +    /* PBC/WALLS */
 +    sprintf(err_buf, "walls only work with pbc=%s", epbc_names[epbcXY]);
 +    CHECK(ir->nwall && ir->ePBC != epbcXY);
 +
 +    /* VACUUM STUFF */
 +    if (ir->ePBC != epbcXYZ && ir->nwall != 2)
 +    {
 +        if (ir->ePBC == epbcNONE)
 +        {
 +            if (ir->epc != epcNO)
 +            {
 +                warning(wi, "Turning off pressure coupling for vacuum system");
 +                ir->epc = epcNO;
 +            }
 +        }
 +        else
 +        {
 +            sprintf(err_buf, "Can not have pressure coupling with pbc=%s",
 +                    epbc_names[ir->ePBC]);
 +            CHECK(ir->epc != epcNO);
 +        }
 +        sprintf(err_buf, "Can not have Ewald with pbc=%s", epbc_names[ir->ePBC]);
 +        CHECK(EEL_FULL(ir->coulombtype));
 +
 +        sprintf(err_buf, "Can not have dispersion correction with pbc=%s",
 +                epbc_names[ir->ePBC]);
 +        CHECK(ir->eDispCorr != edispcNO);
 +    }
 +
 +    if (ir->rlist == 0.0)
 +    {
 +        sprintf(err_buf, "can only have neighborlist cut-off zero (=infinite)\n"
 +                "with coulombtype = %s or coulombtype = %s\n"
 +                "without periodic boundary conditions (pbc = %s) and\n"
 +                "rcoulomb and rvdw set to zero",
 +                eel_names[eelCUT], eel_names[eelUSER], epbc_names[epbcNONE]);
 +        CHECK(((ir->coulombtype != eelCUT) && (ir->coulombtype != eelUSER)) ||
 +              (ir->ePBC     != epbcNONE) ||
 +              (ir->rcoulomb != 0.0)      || (ir->rvdw != 0.0));
 +
 +        if (ir->nstlist < 0)
 +        {
 +            warning_error(wi, "Can not have heuristic neighborlist updates without cut-off");
 +        }
 +        if (ir->nstlist > 0)
 +        {
 +            warning_note(wi, "Simulating without cut-offs can be (slightly) faster with nstlist=0, nstype=simple and only one MPI rank");
 +        }
 +    }
 +
 +    /* COMM STUFF */
 +    if (ir->nstcomm == 0)
 +    {
 +        ir->comm_mode = ecmNO;
 +    }
 +    if (ir->comm_mode != ecmNO)
 +    {
 +        if (ir->nstcomm < 0)
 +        {
 +            warning(wi, "If you want to remove the rotation around the center of mass, you should set comm_mode = Angular instead of setting nstcomm < 0. nstcomm is modified to its absolute value");
 +            ir->nstcomm = abs(ir->nstcomm);
 +        }
 +
 +        if (ir->nstcalcenergy > 0 && ir->nstcomm < ir->nstcalcenergy)
 +        {
 +            warning_note(wi, "nstcomm < nstcalcenergy defeats the purpose of nstcalcenergy, setting nstcomm to nstcalcenergy");
 +            ir->nstcomm = ir->nstcalcenergy;
 +        }
 +
 +        if (ir->comm_mode == ecmANGULAR)
 +        {
 +            sprintf(err_buf, "Can not remove the rotation around the center of mass with periodic molecules");
 +            CHECK(ir->bPeriodicMols);
 +            if (ir->ePBC != epbcNONE)
 +            {
 +                warning(wi, "Removing the rotation around the center of mass in a periodic system (this is not a problem when you have only one molecule).");
 +            }
 +        }
 +    }
 +
 +    if (EI_STATE_VELOCITY(ir->eI) && ir->ePBC == epbcNONE && ir->comm_mode != ecmANGULAR)
 +    {
 +        warning_note(wi, "Tumbling and or flying ice-cubes: We are not removing rotation around center of mass in a non-periodic system. You should probably set comm_mode = ANGULAR.");
 +    }
 +
 +    sprintf(err_buf, "Twin-range neighbour searching (NS) with simple NS"
 +            " algorithm not implemented");
 +    CHECK(((ir->rcoulomb > ir->rlist) || (ir->rvdw > ir->rlist))
 +          && (ir->ns_type == ensSIMPLE));
 +
 +    /* TEMPERATURE COUPLING */
 +    if (ir->etc == etcYES)
 +    {
 +        ir->etc = etcBERENDSEN;
 +        warning_note(wi, "Old option for temperature coupling given: "
 +                     "changing \"yes\" to \"Berendsen\"\n");
 +    }
 +
 +    if ((ir->etc == etcNOSEHOOVER) || (ir->epc == epcMTTK))
 +    {
 +        if (ir->opts.nhchainlength < 1)
 +        {
 +            sprintf(warn_buf, "number of Nose-Hoover chains (currently %d) cannot be less than 1,reset to 1\n", ir->opts.nhchainlength);
 +            ir->opts.nhchainlength = 1;
 +            warning(wi, warn_buf);
 +        }
 +
 +        if (ir->etc == etcNOSEHOOVER && !EI_VV(ir->eI) && ir->opts.nhchainlength > 1)
 +        {
 +            warning_note(wi, "leapfrog does not yet support Nose-Hoover chains, nhchainlength reset to 1");
 +            ir->opts.nhchainlength = 1;
 +        }
 +    }
 +    else
 +    {
 +        ir->opts.nhchainlength = 0;
 +    }
 +
 +    if (ir->eI == eiVVAK)
 +    {
 +        sprintf(err_buf, "%s implemented primarily for validation, and requires nsttcouple = 1 and nstpcouple = 1.",
 +                ei_names[eiVVAK]);
 +        CHECK((ir->nsttcouple != 1) || (ir->nstpcouple != 1));
 +    }
 +
 +    if (ETC_ANDERSEN(ir->etc))
 +    {
 +        sprintf(err_buf, "%s temperature control not supported for integrator %s.", etcoupl_names[ir->etc], ei_names[ir->eI]);
 +        CHECK(!(EI_VV(ir->eI)));
 +
 +        if (ir->nstcomm > 0 && (ir->etc == etcANDERSEN))
 +        {
 +            sprintf(warn_buf, "Center of mass removal not necessary for %s.  All velocities of coupled groups are rerandomized periodically, so flying ice cube errors will not occur.", etcoupl_names[ir->etc]);
 +            warning_note(wi, warn_buf);
 +        }
 +
 +        sprintf(err_buf, "nstcomm must be 1, not %d for %s, as velocities of atoms in coupled groups are randomized every time step", ir->nstcomm, etcoupl_names[ir->etc]);
 +        CHECK(ir->nstcomm > 1 && (ir->etc == etcANDERSEN));
 +    }
 +
 +    if (ir->etc == etcBERENDSEN)
 +    {
 +        sprintf(warn_buf, "The %s thermostat does not generate the correct kinetic energy distribution. You might want to consider using the %s thermostat.",
 +                ETCOUPLTYPE(ir->etc), ETCOUPLTYPE(etcVRESCALE));
 +        warning_note(wi, warn_buf);
 +    }
 +
 +    if ((ir->etc == etcNOSEHOOVER || ETC_ANDERSEN(ir->etc))
 +        && ir->epc == epcBERENDSEN)
 +    {
 +        sprintf(warn_buf, "Using Berendsen pressure coupling invalidates the "
 +                "true ensemble for the thermostat");
 +        warning(wi, warn_buf);
 +    }
 +
 +    /* PRESSURE COUPLING */
 +    if (ir->epc == epcISOTROPIC)
 +    {
 +        ir->epc = epcBERENDSEN;
 +        warning_note(wi, "Old option for pressure coupling given: "
 +                     "changing \"Isotropic\" to \"Berendsen\"\n");
 +    }
 +
 +    if (ir->epc != epcNO)
 +    {
 +        dt_pcoupl = ir->nstpcouple*ir->delta_t;
 +
 +        sprintf(err_buf, "tau-p must be > 0 instead of %g\n", ir->tau_p);
 +        CHECK(ir->tau_p <= 0);
 +
 +        if (ir->tau_p/dt_pcoupl < pcouple_min_integration_steps(ir->epc))
 +        {
 +            sprintf(warn_buf, "For proper integration of the %s barostat, tau-p (%g) should be at least %d times larger than nstpcouple*dt (%g)",
 +                    EPCOUPLTYPE(ir->epc), ir->tau_p, pcouple_min_integration_steps(ir->epc), dt_pcoupl);
 +            warning(wi, warn_buf);
 +        }
 +
 +        sprintf(err_buf, "compressibility must be > 0 when using pressure"
 +                " coupling %s\n", EPCOUPLTYPE(ir->epc));
 +        CHECK(ir->compress[XX][XX] < 0 || ir->compress[YY][YY] < 0 ||
 +              ir->compress[ZZ][ZZ] < 0 ||
 +              (trace(ir->compress) == 0 && ir->compress[YY][XX] <= 0 &&
 +               ir->compress[ZZ][XX] <= 0 && ir->compress[ZZ][YY] <= 0));
 +
 +        if (epcPARRINELLORAHMAN == ir->epc && opts->bGenVel)
 +        {
 +            sprintf(warn_buf,
 +                    "You are generating velocities so I am assuming you "
 +                    "are equilibrating a system. You are using "
 +                    "%s pressure coupling, but this can be "
 +                    "unstable for equilibration. If your system crashes, try "
 +                    "equilibrating first with Berendsen pressure coupling. If "
 +                    "you are not equilibrating the system, you can probably "
 +                    "ignore this warning.",
 +                    epcoupl_names[ir->epc]);
 +            warning(wi, warn_buf);
 +        }
 +    }
 +
 +    if (EI_VV(ir->eI))
 +    {
 +        if (ir->epc > epcNO)
 +        {
 +            if ((ir->epc != epcBERENDSEN) && (ir->epc != epcMTTK))
 +            {
 +                warning_error(wi, "for md-vv and md-vv-avek, can only use Berendsen and Martyna-Tuckerman-Tobias-Klein (MTTK) equations for pressure control; MTTK is equivalent to Parrinello-Rahman.");
 +            }
 +        }
 +    }
 +    else
 +    {
 +        if (ir->epc == epcMTTK)
 +        {
 +            warning_error(wi, "MTTK pressure coupling requires a Velocity-verlet integrator");
 +        }
 +    }
 +
 +    /* ELECTROSTATICS */
 +    /* More checks are in triple check (grompp.c) */
 +
 +    if (ir->coulombtype == eelSWITCH)
 +    {
 +        sprintf(warn_buf, "coulombtype = %s is only for testing purposes and can lead to serious "
 +                "artifacts, advice: use coulombtype = %s",
 +                eel_names[ir->coulombtype],
 +                eel_names[eelRF_ZERO]);
 +        warning(wi, warn_buf);
 +    }
 +
 +    if (ir->epsilon_r != 1 && ir->implicit_solvent == eisGBSA)
 +    {
 +        sprintf(warn_buf, "epsilon-r = %g with GB implicit solvent, will use this value for inner dielectric", ir->epsilon_r);
 +        warning_note(wi, warn_buf);
 +    }
 +
 +    if (EEL_RF(ir->coulombtype) && ir->epsilon_rf == 1 && ir->epsilon_r != 1)
 +    {
 +        sprintf(warn_buf, "epsilon-r = %g and epsilon-rf = 1 with reaction field, proceeding assuming old format and exchanging epsilon-r and epsilon-rf", ir->epsilon_r);
 +        warning(wi, warn_buf);
 +        ir->epsilon_rf = ir->epsilon_r;
 +        ir->epsilon_r  = 1.0;
 +    }
 +
 +    if (getenv("GMX_DO_GALACTIC_DYNAMICS") == NULL)
 +    {
 +        sprintf(err_buf, "epsilon-r must be >= 0 instead of %g\n", ir->epsilon_r);
 +        CHECK(ir->epsilon_r < 0);
 +    }
 +
 +    if (EEL_RF(ir->coulombtype))
 +    {
 +        /* reaction field (at the cut-off) */
 +
 +        if (ir->coulombtype == eelRF_ZERO)
 +        {
 +            sprintf(warn_buf, "With coulombtype = %s, epsilon-rf must be 0, assuming you meant epsilon_rf=0",
 +                    eel_names[ir->coulombtype]);
 +            CHECK(ir->epsilon_rf != 0);
 +            ir->epsilon_rf = 0.0;
 +        }
 +
 +        sprintf(err_buf, "epsilon-rf must be >= epsilon-r");
 +        CHECK((ir->epsilon_rf < ir->epsilon_r && ir->epsilon_rf != 0) ||
 +              (ir->epsilon_r == 0));
 +        if (ir->epsilon_rf == ir->epsilon_r)
 +        {
 +            sprintf(warn_buf, "Using epsilon-rf = epsilon-r with %s does not make sense",
 +                    eel_names[ir->coulombtype]);
 +            warning(wi, warn_buf);
 +        }
 +    }
 +    /* Allow rlist>rcoulomb for tabulated long range stuff. This just
 +     * means the interaction is zero outside rcoulomb, but it helps to
 +     * provide accurate energy conservation.
 +     */
 +    if (ir_coulomb_might_be_zero_at_cutoff(ir))
 +    {
 +        if (ir_coulomb_switched(ir))
 +        {
 +            sprintf(err_buf,
 +                    "With coulombtype = %s rcoulomb_switch must be < rcoulomb. Or, better: Use the potential modifier options!",
 +                    eel_names[ir->coulombtype]);
 +            CHECK(ir->rcoulomb_switch >= ir->rcoulomb);
 +        }
 +    }
 +    else if (ir->coulombtype == eelCUT || EEL_RF(ir->coulombtype))
 +    {
 +        if (ir->cutoff_scheme == ecutsGROUP && ir->coulomb_modifier == eintmodNONE)
 +        {
 +            sprintf(err_buf, "With coulombtype = %s, rcoulomb should be >= rlist unless you use a potential modifier",
 +                    eel_names[ir->coulombtype]);
 +            CHECK(ir->rlist > ir->rcoulomb);
 +        }
 +    }
 +
++    if (ir->coulombtype == eelSWITCH || ir->coulombtype == eelSHIFT)
++    {
++        sprintf(err_buf,
++                "Explicit switch/shift coulomb interactions cannot be used in combination with a secondary coulomb-modifier.");
++        CHECK( ir->coulomb_modifier != eintmodNONE);
++    }
++    if (ir->vdwtype == evdwSWITCH || ir->vdwtype == evdwSHIFT)
++    {
++        sprintf(err_buf,
++                "Explicit switch/shift vdw interactions cannot be used in combination with a secondary vdw-modifier.");
++        CHECK( ir->vdw_modifier != eintmodNONE);
++    }
++
 +    if (ir->coulombtype == eelSWITCH || ir->coulombtype == eelSHIFT ||
 +        ir->vdwtype == evdwSWITCH || ir->vdwtype == evdwSHIFT)
 +    {
 +        sprintf(warn_buf,
 +                "The switch/shift interaction settings are just for compatibility; you will get better "
 +                "performance from applying potential modifiers to your interactions!\n");
 +        warning_note(wi, warn_buf);
 +    }
 +
 +    if (ir->coulombtype == eelPMESWITCH || ir->coulomb_modifier == eintmodPOTSWITCH)
 +    {
 +        if (ir->rcoulomb_switch/ir->rcoulomb < 0.9499)
 +        {
 +            real percentage  = 100*(ir->rcoulomb-ir->rcoulomb_switch)/ir->rcoulomb;
-             sprintf(warn_buf, "The switching range for should be 5%% or less (currently %.2f%% using a switching range of %4f-%4f) for accurate electrostatic energies, energy conservation will be good regardless, since ewald_rtol = %g.",
++            sprintf(warn_buf, "The switching range should be 5%% or less (currently %.2f%% using a switching range of %4f-%4f) for accurate electrostatic energies, energy conservation will be good regardless, since ewald_rtol = %g.",
 +                    percentage, ir->rcoulomb_switch, ir->rcoulomb, ir->ewald_rtol);
 +            warning(wi, warn_buf);
 +        }
 +    }
 +
 +    if (ir->vdwtype == evdwSWITCH || ir->vdw_modifier == eintmodPOTSWITCH)
 +    {
 +        if (ir->rvdw_switch == 0)
 +        {
 +            sprintf(warn_buf, "rvdw-switch is equal 0 even though you are using a switched Lennard-Jones potential.  This suggests it was not set in the mdp, which can lead to large energy errors.  In GROMACS, 0.05 to 0.1 nm is often a reasonable vdw switching range.");
 +            warning(wi, warn_buf);
 +        }
 +    }
 +
 +    if (EEL_FULL(ir->coulombtype))
 +    {
 +        if (ir->coulombtype == eelPMESWITCH || ir->coulombtype == eelPMEUSER ||
 +            ir->coulombtype == eelPMEUSERSWITCH)
 +        {
 +            sprintf(err_buf, "With coulombtype = %s, rcoulomb must be <= rlist",
 +                    eel_names[ir->coulombtype]);
 +            CHECK(ir->rcoulomb > ir->rlist);
 +        }
 +        else if (ir->cutoff_scheme == ecutsGROUP && ir->coulomb_modifier == eintmodNONE)
 +        {
 +            if (ir->coulombtype == eelPME || ir->coulombtype == eelP3M_AD)
 +            {
 +                sprintf(err_buf,
 +                        "With coulombtype = %s (without modifier), rcoulomb must be equal to rlist,\n"
 +                        "or rlistlong if nstcalclr=1. For optimal energy conservation,consider using\n"
 +                        "a potential modifier.", eel_names[ir->coulombtype]);
 +                if (ir->nstcalclr == 1)
 +                {
 +                    CHECK(ir->rcoulomb != ir->rlist && ir->rcoulomb != ir->rlistlong);
 +                }
 +                else
 +                {
 +                    CHECK(ir->rcoulomb != ir->rlist);
 +                }
 +            }
 +        }
 +    }
 +
 +    if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
 +    {
 +        if (ir->pme_order < 3)
 +        {
 +            warning_error(wi, "pme-order can not be smaller than 3");
 +        }
 +    }
 +
 +    if (ir->nwall == 2 && EEL_FULL(ir->coulombtype))
 +    {
 +        if (ir->ewald_geometry == eewg3D)
 +        {
 +            sprintf(warn_buf, "With pbc=%s you should use ewald-geometry=%s",
 +                    epbc_names[ir->ePBC], eewg_names[eewg3DC]);
 +            warning(wi, warn_buf);
 +        }
 +        /* This check avoids extra pbc coding for exclusion corrections */
 +        sprintf(err_buf, "wall-ewald-zfac should be >= 2");
 +        CHECK(ir->wall_ewald_zfac < 2);
 +    }
 +
 +    if (ir_vdw_switched(ir))
 +    {
 +        sprintf(err_buf, "With switched vdw forces or potentials, rvdw-switch must be < rvdw");
 +        CHECK(ir->rvdw_switch >= ir->rvdw);
 +
 +        if (ir->rvdw_switch < 0.5*ir->rvdw)
 +        {
 +            sprintf(warn_buf, "You are applying a switch function to vdw forces or potentials from %g to %g nm, which is more than half the interaction range, whereas switch functions are intended to act only close to the cut-off.",
 +                    ir->rvdw_switch, ir->rvdw);
 +            warning_note(wi, warn_buf);
 +        }
 +    }
 +    else if (ir->vdwtype == evdwCUT || ir->vdwtype == evdwPME)
 +    {
 +        if (ir->cutoff_scheme == ecutsGROUP && ir->vdw_modifier == eintmodNONE)
 +        {
 +            sprintf(err_buf, "With vdwtype = %s, rvdw must be >= rlist unless you use a potential modifier", evdw_names[ir->vdwtype]);
 +            CHECK(ir->rlist > ir->rvdw);
 +        }
 +    }
 +
 +    if (ir->vdwtype == evdwPME)
 +    {
 +        if (!(ir->vdw_modifier == eintmodNONE || ir->vdw_modifier == eintmodPOTSHIFT))
 +        {
 +            sprintf(err_buf, "With vdwtype = %s, the only supported modifiers are %s a\
 +nd %s",
 +                    evdw_names[ir->vdwtype],
 +                    eintmod_names[eintmodPOTSHIFT],
 +                    eintmod_names[eintmodNONE]);
 +        }
 +    }
 +
 +    if (ir->cutoff_scheme == ecutsGROUP)
 +    {
 +        if (((ir->coulomb_modifier != eintmodNONE && ir->rcoulomb == ir->rlist) ||
 +             (ir->vdw_modifier != eintmodNONE && ir->rvdw == ir->rlist)) &&
 +            ir->nstlist != 1)
 +        {
 +            warning_note(wi, "With exact cut-offs, rlist should be "
 +                         "larger than rcoulomb and rvdw, so that there "
 +                         "is a buffer region for particle motion "
 +                         "between neighborsearch steps");
 +        }
 +
 +        if (ir_coulomb_is_zero_at_cutoff(ir) && ir->rlistlong <= ir->rcoulomb)
 +        {
 +            sprintf(warn_buf, "For energy conservation with switch/shift potentials, %s should be 0.1 to 0.3 nm larger than rcoulomb.",
 +                    IR_TWINRANGE(*ir) ? "rlistlong" : "rlist");
 +            warning_note(wi, warn_buf);
 +        }
 +        if (ir_vdw_switched(ir) && (ir->rlistlong <= ir->rvdw))
 +        {
 +            sprintf(warn_buf, "For energy conservation with switch/shift potentials, %s should be 0.1 to 0.3 nm larger than rvdw.",
 +                    IR_TWINRANGE(*ir) ? "rlistlong" : "rlist");
 +            warning_note(wi, warn_buf);
 +        }
 +    }
 +
 +    if (ir->vdwtype == evdwUSER && ir->eDispCorr != edispcNO)
 +    {
 +        warning_note(wi, "You have selected user tables with dispersion correction, the dispersion will be corrected to -C6/r^6 beyond rvdw_switch (the tabulated interaction between rvdw_switch and rvdw will not be double counted). Make sure that you really want dispersion correction to -C6/r^6.");
 +    }
 +
 +    if (ir->nstlist == -1)
 +    {
 +        sprintf(err_buf, "With nstlist=-1 rvdw and rcoulomb should be smaller than rlist to account for diffusion and possibly charge-group radii");
 +        CHECK(ir->rvdw >= ir->rlist || ir->rcoulomb >= ir->rlist);
 +    }
 +    sprintf(err_buf, "nstlist can not be smaller than -1");
 +    CHECK(ir->nstlist < -1);
 +
 +    if (ir->eI == eiLBFGS && (ir->coulombtype == eelCUT || ir->vdwtype == evdwCUT)
 +        && ir->rvdw != 0)
 +    {
 +        warning(wi, "For efficient BFGS minimization, use switch/shift/pme instead of cut-off.");
 +    }
 +
 +    if (ir->eI == eiLBFGS && ir->nbfgscorr <= 0)
 +    {
 +        warning(wi, "Using L-BFGS with nbfgscorr<=0 just gets you steepest descent.");
 +    }
 +
 +    /* ENERGY CONSERVATION */
 +    if (ir_NVE(ir) && ir->cutoff_scheme == ecutsGROUP)
 +    {
 +        if (!ir_vdw_might_be_zero_at_cutoff(ir) && ir->rvdw > 0 && ir->vdw_modifier == eintmodNONE)
 +        {
 +            sprintf(warn_buf, "You are using a cut-off for VdW interactions with NVE, for good energy conservation use vdwtype = %s (possibly with DispCorr)",
 +                    evdw_names[evdwSHIFT]);
 +            warning_note(wi, warn_buf);
 +        }
 +        if (!ir_coulomb_might_be_zero_at_cutoff(ir) && ir->rcoulomb > 0)
 +        {
 +            sprintf(warn_buf, "You are using a cut-off for electrostatics with NVE, for good energy conservation use coulombtype = %s or %s",
 +                    eel_names[eelPMESWITCH], eel_names[eelRF_ZERO]);
 +            warning_note(wi, warn_buf);
 +        }
 +    }
 +
 +    if (EI_VV(ir->eI) && IR_TWINRANGE(*ir) && ir->nstlist > 1)
 +    {
 +        sprintf(warn_buf, "Twin-range multiple time stepping does not work with integrator %s.", ei_names[ir->eI]);
 +        warning_error(wi, warn_buf);
 +    }
 +
 +    /* IMPLICIT SOLVENT */
 +    if (ir->coulombtype == eelGB_NOTUSED)
 +    {
 +        ir->coulombtype      = eelCUT;
 +        ir->implicit_solvent = eisGBSA;
 +        fprintf(stderr, "Note: Old option for generalized born electrostatics given:\n"
 +                "Changing coulombtype from \"generalized-born\" to \"cut-off\" and instead\n"
 +                "setting implicit-solvent value to \"GBSA\" in input section.\n");
 +    }
 +
 +    if (ir->sa_algorithm == esaSTILL)
 +    {
 +        sprintf(err_buf, "Still SA algorithm not available yet, use %s or %s instead\n", esa_names[esaAPPROX], esa_names[esaNO]);
 +        CHECK(ir->sa_algorithm == esaSTILL);
 +    }
 +
 +    if (ir->implicit_solvent == eisGBSA)
 +    {
 +        sprintf(err_buf, "With GBSA implicit solvent, rgbradii must be equal to rlist.");
 +        CHECK(ir->rgbradii != ir->rlist);
 +
 +        if (ir->coulombtype != eelCUT)
 +        {
 +            sprintf(err_buf, "With GBSA, coulombtype must be equal to %s\n", eel_names[eelCUT]);
 +            CHECK(ir->coulombtype != eelCUT);
 +        }
 +        if (ir->vdwtype != evdwCUT)
 +        {
 +            sprintf(err_buf, "With GBSA, vdw-type must be equal to %s\n", evdw_names[evdwCUT]);
 +            CHECK(ir->vdwtype != evdwCUT);
 +        }
 +        if (ir->nstgbradii < 1)
 +        {
 +            sprintf(warn_buf, "Using GBSA with nstgbradii<1, setting nstgbradii=1");
 +            warning_note(wi, warn_buf);
 +            ir->nstgbradii = 1;
 +        }
 +        if (ir->sa_algorithm == esaNO)
 +        {
 +            sprintf(warn_buf, "No SA (non-polar) calculation requested together with GB. Are you sure this is what you want?\n");
 +            warning_note(wi, warn_buf);
 +        }
 +        if (ir->sa_surface_tension < 0 && ir->sa_algorithm != esaNO)
 +        {
 +            sprintf(warn_buf, "Value of sa_surface_tension is < 0. Changing it to 2.05016 or 2.25936 kJ/nm^2/mol for Still and HCT/OBC respectively\n");
 +            warning_note(wi, warn_buf);
 +
 +            if (ir->gb_algorithm == egbSTILL)
 +            {
 +                ir->sa_surface_tension = 0.0049 * CAL2JOULE * 100;
 +            }
 +            else
 +            {
 +                ir->sa_surface_tension = 0.0054 * CAL2JOULE * 100;
 +            }
 +        }
 +        if (ir->sa_surface_tension == 0 && ir->sa_algorithm != esaNO)
 +        {
 +            sprintf(err_buf, "Surface tension set to 0 while SA-calculation requested\n");
 +            CHECK(ir->sa_surface_tension == 0 && ir->sa_algorithm != esaNO);
 +        }
 +
 +    }
 +
 +    if (ir->bAdress)
 +    {
 +        if (ir->cutoff_scheme != ecutsGROUP)
 +        {
 +            warning_error(wi, "AdresS simulation supports only cutoff-scheme=group");
 +        }
 +        if (!EI_SD(ir->eI))
 +        {
 +            warning_error(wi, "AdresS simulation supports only stochastic dynamics");
 +        }
 +        if (ir->epc != epcNO)
 +        {
 +            warning_error(wi, "AdresS simulation does not support pressure coupling");
 +        }
 +        if (EEL_FULL(ir->coulombtype))
 +        {
 +            warning_error(wi, "AdresS simulation does not support long-range electrostatics");
 +        }
 +    }
 +}
 +
 +/* count the number of text elemets separated by whitespace in a string.
 +    str = the input string
 +    maxptr = the maximum number of allowed elements
 +    ptr = the output array of pointers to the first character of each element
 +    returns: the number of elements. */
 +int str_nelem(const char *str, int maxptr, char *ptr[])
 +{
 +    int   np = 0;
 +    char *copy0, *copy;
 +
 +    copy0 = strdup(str);
 +    copy  = copy0;
 +    ltrim(copy);
 +    while (*copy != '\0')
 +    {
 +        if (np >= maxptr)
 +        {
 +            gmx_fatal(FARGS, "Too many groups on line: '%s' (max is %d)",
 +                      str, maxptr);
 +        }
 +        if (ptr)
 +        {
 +            ptr[np] = copy;
 +        }
 +        np++;
 +        while ((*copy != '\0') && !isspace(*copy))
 +        {
 +            copy++;
 +        }
 +        if (*copy != '\0')
 +        {
 +            *copy = '\0';
 +            copy++;
 +        }
 +        ltrim(copy);
 +    }
 +    if (ptr == NULL)
 +    {
 +        sfree(copy0);
 +    }
 +
 +    return np;
 +}
 +
 +/* interpret a number of doubles from a string and put them in an array,
 +   after allocating space for them.
 +   str = the input string
 +   n = the (pre-allocated) number of doubles read
 +   r = the output array of doubles. */
 +static void parse_n_real(char *str, int *n, real **r)
 +{
 +    char *ptr[MAXPTR];
 +    int   i;
 +
 +    *n = str_nelem(str, MAXPTR, ptr);
 +
 +    snew(*r, *n);
 +    for (i = 0; i < *n; i++)
 +    {
 +        (*r)[i] = strtod(ptr[i], NULL);
 +    }
 +}
 +
 +static void do_fep_params(t_inputrec *ir, char fep_lambda[][STRLEN], char weights[STRLEN])
 +{
 +
 +    int         i, j, max_n_lambda, nweights, nfep[efptNR];
 +    t_lambda   *fep    = ir->fepvals;
 +    t_expanded *expand = ir->expandedvals;
 +    real      **count_fep_lambdas;
 +    gmx_bool    bOneLambda = TRUE;
 +
 +    snew(count_fep_lambdas, efptNR);
 +
 +    /* FEP input processing */
 +    /* first, identify the number of lambda values for each type.
 +       All that are nonzero must have the same number */
 +
 +    for (i = 0; i < efptNR; i++)
 +    {
 +        parse_n_real(fep_lambda[i], &(nfep[i]), &(count_fep_lambdas[i]));
 +    }
 +
 +    /* now, determine the number of components.  All must be either zero, or equal. */
 +
 +    max_n_lambda = 0;
 +    for (i = 0; i < efptNR; i++)
 +    {
 +        if (nfep[i] > max_n_lambda)
 +        {
 +            max_n_lambda = nfep[i];  /* here's a nonzero one.  All of them
 +                                        must have the same number if its not zero.*/
 +            break;
 +        }
 +    }
 +
 +    for (i = 0; i < efptNR; i++)
 +    {
 +        if (nfep[i] == 0)
 +        {
 +            ir->fepvals->separate_dvdl[i] = FALSE;
 +        }
 +        else if (nfep[i] == max_n_lambda)
 +        {
 +            if (i != efptTEMPERATURE)  /* we treat this differently -- not really a reason to compute the derivative with
 +                                          respect to the temperature currently */
 +            {
 +                ir->fepvals->separate_dvdl[i] = TRUE;
 +            }
 +        }
 +        else
 +        {
 +            gmx_fatal(FARGS, "Number of lambdas (%d) for FEP type %s not equal to number of other types (%d)",
 +                      nfep[i], efpt_names[i], max_n_lambda);
 +        }
 +    }
 +    /* we don't print out dhdl if the temperature is changing, since we can't correctly define dhdl in this case */
 +    ir->fepvals->separate_dvdl[efptTEMPERATURE] = FALSE;
 +
 +    /* the number of lambdas is the number we've read in, which is either zero
 +       or the same for all */
 +    fep->n_lambda = max_n_lambda;
 +
 +    /* allocate space for the array of lambda values */
 +    snew(fep->all_lambda, efptNR);
 +    /* if init_lambda is defined, we need to set lambda */
 +    if ((fep->init_lambda > 0) && (fep->n_lambda == 0))
 +    {
 +        ir->fepvals->separate_dvdl[efptFEP] = TRUE;
 +    }
 +    /* otherwise allocate the space for all of the lambdas, and transfer the data */
 +    for (i = 0; i < efptNR; i++)
 +    {
 +        snew(fep->all_lambda[i], fep->n_lambda);
 +        if (nfep[i] > 0)  /* if it's zero, then the count_fep_lambda arrays
 +                             are zero */
 +        {
 +            for (j = 0; j < fep->n_lambda; j++)
 +            {
 +                fep->all_lambda[i][j] = (double)count_fep_lambdas[i][j];
 +            }
 +            sfree(count_fep_lambdas[i]);
 +        }
 +    }
 +    sfree(count_fep_lambdas);
 +
 +    /* "fep-vals" is either zero or the full number. If zero, we'll need to define fep-lambdas for internal
 +       bookkeeping -- for now, init_lambda */
 +
 +    if ((nfep[efptFEP] == 0) && (fep->init_lambda >= 0))
 +    {
 +        for (i = 0; i < fep->n_lambda; i++)
 +        {
 +            fep->all_lambda[efptFEP][i] = fep->init_lambda;
 +        }
 +    }
 +
 +    /* check to see if only a single component lambda is defined, and soft core is defined.
 +       In this case, turn on coulomb soft core */
 +
 +    if (max_n_lambda == 0)
 +    {
 +        bOneLambda = TRUE;
 +    }
 +    else
 +    {
 +        for (i = 0; i < efptNR; i++)
 +        {
 +            if ((nfep[i] != 0) && (i != efptFEP))
 +            {
 +                bOneLambda = FALSE;
 +            }
 +        }
 +    }
 +    if ((bOneLambda) && (fep->sc_alpha > 0))
 +    {
 +        fep->bScCoul = TRUE;
 +    }
 +
 +    /* Fill in the others with the efptFEP if they are not explicitly
 +       specified (i.e. nfep[i] == 0).  This means if fep is not defined,
 +       they are all zero. */
 +
 +    for (i = 0; i < efptNR; i++)
 +    {
 +        if ((nfep[i] == 0) && (i != efptFEP))
 +        {
 +            for (j = 0; j < fep->n_lambda; j++)
 +            {
 +                fep->all_lambda[i][j] = fep->all_lambda[efptFEP][j];
 +            }
 +        }
 +    }
 +
 +
 +    /* make it easier if sc_r_power = 48 by increasing it to the 4th power, to be in the right scale. */
 +    if (fep->sc_r_power == 48)
 +    {
 +        if (fep->sc_alpha > 0.1)
 +        {
 +            gmx_fatal(FARGS, "sc_alpha (%f) for sc_r_power = 48 should usually be between 0.001 and 0.004", fep->sc_alpha);
 +        }
 +    }
 +
 +    expand = ir->expandedvals;
 +    /* now read in the weights */
 +    parse_n_real(weights, &nweights, &(expand->init_lambda_weights));
 +    if (nweights == 0)
 +    {
 +        snew(expand->init_lambda_weights, fep->n_lambda); /* initialize to zero */
 +    }
 +    else if (nweights != fep->n_lambda)
 +    {
 +        gmx_fatal(FARGS, "Number of weights (%d) is not equal to number of lambda values (%d)",
 +                  nweights, fep->n_lambda);
 +    }
 +    if ((expand->nstexpanded < 0) && (ir->efep != efepNO))
 +    {
 +        expand->nstexpanded = fep->nstdhdl;
 +        /* if you don't specify nstexpanded when doing expanded ensemble free energy calcs, it is set to nstdhdl */
 +    }
 +    if ((expand->nstexpanded < 0) && ir->bSimTemp)
 +    {
 +        expand->nstexpanded = 2*(int)(ir->opts.tau_t[0]/ir->delta_t);
 +        /* if you don't specify nstexpanded when doing expanded ensemble simulated tempering, it is set to
 +           2*tau_t just to be careful so it's not to frequent  */
 +    }
 +}
 +
 +
 +static void do_simtemp_params(t_inputrec *ir)
 +{
 +
 +    snew(ir->simtempvals->temperatures, ir->fepvals->n_lambda);
 +    GetSimTemps(ir->fepvals->n_lambda, ir->simtempvals, ir->fepvals->all_lambda[efptTEMPERATURE]);
 +
 +    return;
 +}
 +
 +static void do_wall_params(t_inputrec *ir,
 +                           char *wall_atomtype, char *wall_density,
 +                           t_gromppopts *opts)
 +{
 +    int    nstr, i;
 +    char  *names[MAXPTR];
 +    double dbl;
 +
 +    opts->wall_atomtype[0] = NULL;
 +    opts->wall_atomtype[1] = NULL;
 +
 +    ir->wall_atomtype[0] = -1;
 +    ir->wall_atomtype[1] = -1;
 +    ir->wall_density[0]  = 0;
 +    ir->wall_density[1]  = 0;
 +
 +    if (ir->nwall > 0)
 +    {
 +        nstr = str_nelem(wall_atomtype, MAXPTR, names);
 +        if (nstr != ir->nwall)
 +        {
 +            gmx_fatal(FARGS, "Expected %d elements for wall_atomtype, found %d",
 +                      ir->nwall, nstr);
 +        }
 +        for (i = 0; i < ir->nwall; i++)
 +        {
 +            opts->wall_atomtype[i] = strdup(names[i]);
 +        }
 +
 +        if (ir->wall_type == ewt93 || ir->wall_type == ewt104)
 +        {
 +            nstr = str_nelem(wall_density, MAXPTR, names);
 +            if (nstr != ir->nwall)
 +            {
 +                gmx_fatal(FARGS, "Expected %d elements for wall-density, found %d", ir->nwall, nstr);
 +            }
 +            for (i = 0; i < ir->nwall; i++)
 +            {
 +                sscanf(names[i], "%lf", &dbl);
 +                if (dbl <= 0)
 +                {
 +                    gmx_fatal(FARGS, "wall-density[%d] = %f\n", i, dbl);
 +                }
 +                ir->wall_density[i] = dbl;
 +            }
 +        }
 +    }
 +}
 +
 +static void add_wall_energrps(gmx_groups_t *groups, int nwall, t_symtab *symtab)
 +{
 +    int     i;
 +    t_grps *grps;
 +    char    str[STRLEN];
 +
 +    if (nwall > 0)
 +    {
 +        srenew(groups->grpname, groups->ngrpname+nwall);
 +        grps = &(groups->grps[egcENER]);
 +        srenew(grps->nm_ind, grps->nr+nwall);
 +        for (i = 0; i < nwall; i++)
 +        {
 +            sprintf(str, "wall%d", i);
 +            groups->grpname[groups->ngrpname] = put_symtab(symtab, str);
 +            grps->nm_ind[grps->nr++]          = groups->ngrpname++;
 +        }
 +    }
 +}
 +
 +void read_expandedparams(int *ninp_p, t_inpfile **inp_p,
 +                         t_expanded *expand, warninp_t wi)
 +{
 +    int        ninp, nerror = 0;
 +    t_inpfile *inp;
 +
 +    ninp   = *ninp_p;
 +    inp    = *inp_p;
 +
 +    /* read expanded ensemble parameters */
 +    CCTYPE ("expanded ensemble variables");
 +    ITYPE ("nstexpanded", expand->nstexpanded, -1);
 +    EETYPE("lmc-stats", expand->elamstats, elamstats_names);
 +    EETYPE("lmc-move", expand->elmcmove, elmcmove_names);
 +    EETYPE("lmc-weights-equil", expand->elmceq, elmceq_names);
 +    ITYPE ("weight-equil-number-all-lambda", expand->equil_n_at_lam, -1);
 +    ITYPE ("weight-equil-number-samples", expand->equil_samples, -1);
 +    ITYPE ("weight-equil-number-steps", expand->equil_steps, -1);
 +    RTYPE ("weight-equil-wl-delta", expand->equil_wl_delta, -1);
 +    RTYPE ("weight-equil-count-ratio", expand->equil_ratio, -1);
 +    CCTYPE("Seed for Monte Carlo in lambda space");
 +    ITYPE ("lmc-seed", expand->lmc_seed, -1);
 +    RTYPE ("mc-temperature", expand->mc_temp, -1);
 +    ITYPE ("lmc-repeats", expand->lmc_repeats, 1);
 +    ITYPE ("lmc-gibbsdelta", expand->gibbsdeltalam, -1);
 +    ITYPE ("lmc-forced-nstart", expand->lmc_forced_nstart, 0);
 +    EETYPE("symmetrized-transition-matrix", expand->bSymmetrizedTMatrix, yesno_names);
 +    ITYPE("nst-transition-matrix", expand->nstTij, -1);
 +    ITYPE ("mininum-var-min", expand->minvarmin, 100); /*default is reasonable */
 +    ITYPE ("weight-c-range", expand->c_range, 0);      /* default is just C=0 */
 +    RTYPE ("wl-scale", expand->wl_scale, 0.8);
 +    RTYPE ("wl-ratio", expand->wl_ratio, 0.8);
 +    RTYPE ("init-wl-delta", expand->init_wl_delta, 1.0);
 +    EETYPE("wl-oneovert", expand->bWLoneovert, yesno_names);
 +
 +    *ninp_p   = ninp;
 +    *inp_p    = inp;
 +
 +    return;
 +}
 +
 +void get_ir(const char *mdparin, const char *mdparout,
 +            t_inputrec *ir, t_gromppopts *opts,
 +            warninp_t wi)
 +{
 +    char       *dumstr[2];
 +    double      dumdub[2][6];
 +    t_inpfile  *inp;
 +    const char *tmp;
 +    int         i, j, m, ninp;
 +    char        warn_buf[STRLEN];
 +    t_lambda   *fep    = ir->fepvals;
 +    t_expanded *expand = ir->expandedvals;
 +
 +    init_inputrec_strings();
 +    inp = read_inpfile(mdparin, &ninp, wi);
 +
 +    snew(dumstr[0], STRLEN);
 +    snew(dumstr[1], STRLEN);
 +
 +    if (-1 == search_einp(ninp, inp, "cutoff-scheme"))
 +    {
 +        sprintf(warn_buf,
 +                "%s did not specify a value for the .mdp option "
 +                "\"cutoff-scheme\". Probably it was first intended for use "
 +                "with GROMACS before 4.6. In 4.6, the Verlet scheme was "
 +                "introduced, but the group scheme was still the default. "
 +                "The default is now the Verlet scheme, so you will observe "
 +                "different behaviour.", mdparin);
 +        warning_note(wi, warn_buf);
 +    }
 +
 +    /* ignore the following deprecated commands */
 +    REM_TYPE("title");
 +    REM_TYPE("cpp");
 +    REM_TYPE("domain-decomposition");
 +    REM_TYPE("andersen-seed");
 +    REM_TYPE("dihre");
 +    REM_TYPE("dihre-fc");
 +    REM_TYPE("dihre-tau");
 +    REM_TYPE("nstdihreout");
 +    REM_TYPE("nstcheckpoint");
 +    REM_TYPE("optimize-fft");
 +
 +    /* replace the following commands with the clearer new versions*/
 +    REPL_TYPE("unconstrained-start", "continuation");
 +    REPL_TYPE("foreign-lambda", "fep-lambdas");
 +    REPL_TYPE("verlet-buffer-drift", "verlet-buffer-tolerance");
 +    REPL_TYPE("nstxtcout", "nstxout-compressed");
 +    REPL_TYPE("xtc-grps", "compressed-x-grps");
 +    REPL_TYPE("xtc-precision", "compressed-x-precision");
 +
 +    CCTYPE ("VARIOUS PREPROCESSING OPTIONS");
 +    CTYPE ("Preprocessor information: use cpp syntax.");
 +    CTYPE ("e.g.: -I/home/joe/doe -I/home/mary/roe");
 +    STYPE ("include", opts->include,  NULL);
 +    CTYPE ("e.g.: -DPOSRES -DFLEXIBLE (note these variable names are case sensitive)");
 +    STYPE ("define",  opts->define,   NULL);
 +
 +    CCTYPE ("RUN CONTROL PARAMETERS");
 +    EETYPE("integrator",  ir->eI,         ei_names);
 +    CTYPE ("Start time and timestep in ps");
 +    RTYPE ("tinit",   ir->init_t, 0.0);
 +    RTYPE ("dt",      ir->delta_t,    0.001);
 +    STEPTYPE ("nsteps",   ir->nsteps,     0);
 +    CTYPE ("For exact run continuation or redoing part of a run");
 +    STEPTYPE ("init-step", ir->init_step,  0);
 +    CTYPE ("Part index is updated automatically on checkpointing (keeps files separate)");
 +    ITYPE ("simulation-part", ir->simulation_part, 1);
 +    CTYPE ("mode for center of mass motion removal");
 +    EETYPE("comm-mode",   ir->comm_mode,  ecm_names);
 +    CTYPE ("number of steps for center of mass motion removal");
 +    ITYPE ("nstcomm", ir->nstcomm,    100);
 +    CTYPE ("group(s) for center of mass motion removal");
 +    STYPE ("comm-grps",   is->vcm,            NULL);
 +
 +    CCTYPE ("LANGEVIN DYNAMICS OPTIONS");
 +    CTYPE ("Friction coefficient (amu/ps) and random seed");
 +    RTYPE ("bd-fric",     ir->bd_fric,    0.0);
 +    STEPTYPE ("ld-seed",  ir->ld_seed,    -1);
 +
 +    /* Em stuff */
 +    CCTYPE ("ENERGY MINIMIZATION OPTIONS");
 +    CTYPE ("Force tolerance and initial step-size");
 +    RTYPE ("emtol",       ir->em_tol,     10.0);
 +    RTYPE ("emstep",      ir->em_stepsize, 0.01);
 +    CTYPE ("Max number of iterations in relax-shells");
 +    ITYPE ("niter",       ir->niter,      20);
 +    CTYPE ("Step size (ps^2) for minimization of flexible constraints");
 +    RTYPE ("fcstep",      ir->fc_stepsize, 0);
 +    CTYPE ("Frequency of steepest descents steps when doing CG");
 +    ITYPE ("nstcgsteep",  ir->nstcgsteep, 1000);
 +    ITYPE ("nbfgscorr",   ir->nbfgscorr,  10);
 +
 +    CCTYPE ("TEST PARTICLE INSERTION OPTIONS");
 +    RTYPE ("rtpi",    ir->rtpi,   0.05);
 +
 +    /* Output options */
 +    CCTYPE ("OUTPUT CONTROL OPTIONS");
 +    CTYPE ("Output frequency for coords (x), velocities (v) and forces (f)");
 +    ITYPE ("nstxout", ir->nstxout,    0);
 +    ITYPE ("nstvout", ir->nstvout,    0);
 +    ITYPE ("nstfout", ir->nstfout,    0);
 +    CTYPE ("Output frequency for energies to log file and energy file");
 +    ITYPE ("nstlog",  ir->nstlog, 1000);
 +    ITYPE ("nstcalcenergy", ir->nstcalcenergy, 100);
 +    ITYPE ("nstenergy",   ir->nstenergy,  1000);
 +    CTYPE ("Output frequency and precision for .xtc file");
 +    ITYPE ("nstxout-compressed", ir->nstxout_compressed,  0);
 +    RTYPE ("compressed-x-precision", ir->x_compression_precision, 1000.0);
 +    CTYPE ("This selects the subset of atoms for the compressed");
 +    CTYPE ("trajectory file. You can select multiple groups. By");
 +    CTYPE ("default, all atoms will be written.");
 +    STYPE ("compressed-x-grps", is->x_compressed_groups, NULL);
 +    CTYPE ("Selection of energy groups");
 +    STYPE ("energygrps",  is->energy,         NULL);
 +
 +    /* Neighbor searching */
 +    CCTYPE ("NEIGHBORSEARCHING PARAMETERS");
 +    CTYPE ("cut-off scheme (Verlet: particle based cut-offs, group: using charge groups)");
 +    EETYPE("cutoff-scheme",     ir->cutoff_scheme,    ecutscheme_names);
 +    CTYPE ("nblist update frequency");
 +    ITYPE ("nstlist", ir->nstlist,    10);
 +    CTYPE ("ns algorithm (simple or grid)");
 +    EETYPE("ns-type",     ir->ns_type,    ens_names);
 +    CTYPE ("Periodic boundary conditions: xyz, no, xy");
 +    EETYPE("pbc",         ir->ePBC,       epbc_names);
 +    EETYPE("periodic-molecules", ir->bPeriodicMols, yesno_names);
 +    CTYPE ("Allowed energy error due to the Verlet buffer in kJ/mol/ps per atom,");
 +    CTYPE ("a value of -1 means: use rlist");
 +    RTYPE("verlet-buffer-tolerance", ir->verletbuf_tol,    0.005);
 +    CTYPE ("nblist cut-off");
 +    RTYPE ("rlist",   ir->rlist,  1.0);
 +    CTYPE ("long-range cut-off for switched potentials");
 +    RTYPE ("rlistlong",   ir->rlistlong,  -1);
 +    ITYPE ("nstcalclr",   ir->nstcalclr,  -1);
 +
 +    /* Electrostatics */
 +    CCTYPE ("OPTIONS FOR ELECTROSTATICS AND VDW");
 +    CTYPE ("Method for doing electrostatics");
 +    EETYPE("coulombtype", ir->coulombtype,    eel_names);
 +    EETYPE("coulomb-modifier",    ir->coulomb_modifier,    eintmod_names);
 +    CTYPE ("cut-off lengths");
 +    RTYPE ("rcoulomb-switch", ir->rcoulomb_switch,    0.0);
 +    RTYPE ("rcoulomb",    ir->rcoulomb,   1.0);
 +    CTYPE ("Relative dielectric constant for the medium and the reaction field");
 +    RTYPE ("epsilon-r",   ir->epsilon_r,  1.0);
 +    RTYPE ("epsilon-rf",  ir->epsilon_rf, 0.0);
 +    CTYPE ("Method for doing Van der Waals");
 +    EETYPE("vdw-type",    ir->vdwtype,    evdw_names);
 +    EETYPE("vdw-modifier",    ir->vdw_modifier,    eintmod_names);
 +    CTYPE ("cut-off lengths");
 +    RTYPE ("rvdw-switch", ir->rvdw_switch,    0.0);
 +    RTYPE ("rvdw",    ir->rvdw,   1.0);
 +    CTYPE ("Apply long range dispersion corrections for Energy and Pressure");
 +    EETYPE("DispCorr",    ir->eDispCorr,  edispc_names);
 +    CTYPE ("Extension of the potential lookup tables beyond the cut-off");
 +    RTYPE ("table-extension", ir->tabext, 1.0);
 +    CTYPE ("Separate tables between energy group pairs");
 +    STYPE ("energygrp-table", is->egptable,   NULL);
 +    CTYPE ("Spacing for the PME/PPPM FFT grid");
 +    RTYPE ("fourierspacing", ir->fourier_spacing, 0.12);
 +    CTYPE ("FFT grid size, when a value is 0 fourierspacing will be used");
 +    ITYPE ("fourier-nx",  ir->nkx,         0);
 +    ITYPE ("fourier-ny",  ir->nky,         0);
 +    ITYPE ("fourier-nz",  ir->nkz,         0);
 +    CTYPE ("EWALD/PME/PPPM parameters");
 +    ITYPE ("pme-order",   ir->pme_order,   4);
 +    RTYPE ("ewald-rtol",  ir->ewald_rtol, 0.00001);
 +    RTYPE ("ewald-rtol-lj", ir->ewald_rtol_lj, 0.001);
 +    EETYPE("lj-pme-comb-rule", ir->ljpme_combination_rule, eljpme_names);
 +    EETYPE("ewald-geometry", ir->ewald_geometry, eewg_names);
 +    RTYPE ("epsilon-surface", ir->epsilon_surface, 0.0);
 +
 +    CCTYPE("IMPLICIT SOLVENT ALGORITHM");
 +    EETYPE("implicit-solvent", ir->implicit_solvent, eis_names);
 +
 +    CCTYPE ("GENERALIZED BORN ELECTROSTATICS");
 +    CTYPE ("Algorithm for calculating Born radii");
 +    EETYPE("gb-algorithm", ir->gb_algorithm, egb_names);
 +    CTYPE ("Frequency of calculating the Born radii inside rlist");
 +    ITYPE ("nstgbradii", ir->nstgbradii, 1);
 +    CTYPE ("Cutoff for Born radii calculation; the contribution from atoms");
 +    CTYPE ("between rlist and rgbradii is updated every nstlist steps");
 +    RTYPE ("rgbradii",  ir->rgbradii, 1.0);
 +    CTYPE ("Dielectric coefficient of the implicit solvent");
 +    RTYPE ("gb-epsilon-solvent", ir->gb_epsilon_solvent, 80.0);
 +    CTYPE ("Salt concentration in M for Generalized Born models");
 +    RTYPE ("gb-saltconc",  ir->gb_saltconc, 0.0);
 +    CTYPE ("Scaling factors used in the OBC GB model. Default values are OBC(II)");
 +    RTYPE ("gb-obc-alpha", ir->gb_obc_alpha, 1.0);
 +    RTYPE ("gb-obc-beta", ir->gb_obc_beta, 0.8);
 +    RTYPE ("gb-obc-gamma", ir->gb_obc_gamma, 4.85);
 +    RTYPE ("gb-dielectric-offset", ir->gb_dielectric_offset, 0.009);
 +    EETYPE("sa-algorithm", ir->sa_algorithm, esa_names);
 +    CTYPE ("Surface tension (kJ/mol/nm^2) for the SA (nonpolar surface) part of GBSA");
 +    CTYPE ("The value -1 will set default value for Still/HCT/OBC GB-models.");
 +    RTYPE ("sa-surface-tension", ir->sa_surface_tension, -1);
 +
 +    /* Coupling stuff */
 +    CCTYPE ("OPTIONS FOR WEAK COUPLING ALGORITHMS");
 +    CTYPE ("Temperature coupling");
 +    EETYPE("tcoupl",  ir->etc,        etcoupl_names);
 +    ITYPE ("nsttcouple", ir->nsttcouple,  -1);
 +    ITYPE("nh-chain-length",     ir->opts.nhchainlength, 10);
 +    EETYPE("print-nose-hoover-chain-variables", ir->bPrintNHChains, yesno_names);
 +    CTYPE ("Groups to couple separately");
 +    STYPE ("tc-grps",     is->tcgrps,         NULL);
 +    CTYPE ("Time constant (ps) and reference temperature (K)");
 +    STYPE ("tau-t",   is->tau_t,      NULL);
 +    STYPE ("ref-t",   is->ref_t,      NULL);
 +    CTYPE ("pressure coupling");
 +    EETYPE("pcoupl",  ir->epc,        epcoupl_names);
 +    EETYPE("pcoupltype",  ir->epct,       epcoupltype_names);
 +    ITYPE ("nstpcouple", ir->nstpcouple,  -1);
 +    CTYPE ("Time constant (ps), compressibility (1/bar) and reference P (bar)");
 +    RTYPE ("tau-p",   ir->tau_p,  1.0);
 +    STYPE ("compressibility", dumstr[0],  NULL);
 +    STYPE ("ref-p",       dumstr[1],      NULL);
 +    CTYPE ("Scaling of reference coordinates, No, All or COM");
 +    EETYPE ("refcoord-scaling", ir->refcoord_scaling, erefscaling_names);
 +
 +    /* QMMM */
 +    CCTYPE ("OPTIONS FOR QMMM calculations");
 +    EETYPE("QMMM", ir->bQMMM, yesno_names);
 +    CTYPE ("Groups treated Quantum Mechanically");
 +    STYPE ("QMMM-grps",  is->QMMM,          NULL);
 +    CTYPE ("QM method");
 +    STYPE("QMmethod",     is->QMmethod, NULL);
 +    CTYPE ("QMMM scheme");
 +    EETYPE("QMMMscheme",  ir->QMMMscheme,    eQMMMscheme_names);
 +    CTYPE ("QM basisset");
 +    STYPE("QMbasis",      is->QMbasis, NULL);
 +    CTYPE ("QM charge");
 +    STYPE ("QMcharge",    is->QMcharge, NULL);
 +    CTYPE ("QM multiplicity");
 +    STYPE ("QMmult",      is->QMmult, NULL);
 +    CTYPE ("Surface Hopping");
 +    STYPE ("SH",          is->bSH, NULL);
 +    CTYPE ("CAS space options");
 +    STYPE ("CASorbitals",      is->CASorbitals,   NULL);
 +    STYPE ("CASelectrons",     is->CASelectrons,  NULL);
 +    STYPE ("SAon", is->SAon, NULL);
 +    STYPE ("SAoff", is->SAoff, NULL);
 +    STYPE ("SAsteps", is->SAsteps, NULL);
 +    CTYPE ("Scale factor for MM charges");
 +    RTYPE ("MMChargeScaleFactor", ir->scalefactor, 1.0);
 +    CTYPE ("Optimization of QM subsystem");
 +    STYPE ("bOPT",          is->bOPT, NULL);
 +    STYPE ("bTS",          is->bTS, NULL);
 +
 +    /* Simulated annealing */
 +    CCTYPE("SIMULATED ANNEALING");
 +    CTYPE ("Type of annealing for each temperature group (no/single/periodic)");
 +    STYPE ("annealing",   is->anneal,      NULL);
 +    CTYPE ("Number of time points to use for specifying annealing in each group");
 +    STYPE ("annealing-npoints", is->anneal_npoints, NULL);
 +    CTYPE ("List of times at the annealing points for each group");
 +    STYPE ("annealing-time",       is->anneal_time,       NULL);
 +    CTYPE ("Temp. at each annealing point, for each group.");
 +    STYPE ("annealing-temp",  is->anneal_temp,  NULL);
 +
 +    /* Startup run */
 +    CCTYPE ("GENERATE VELOCITIES FOR STARTUP RUN");
 +    EETYPE("gen-vel",     opts->bGenVel,  yesno_names);
 +    RTYPE ("gen-temp",    opts->tempi,    300.0);
 +    ITYPE ("gen-seed",    opts->seed,     -1);
 +
 +    /* Shake stuff */
 +    CCTYPE ("OPTIONS FOR BONDS");
 +    EETYPE("constraints", opts->nshake,   constraints);
 +    CTYPE ("Type of constraint algorithm");
 +    EETYPE("constraint-algorithm",  ir->eConstrAlg, econstr_names);
 +    CTYPE ("Do not constrain the start configuration");
 +    EETYPE("continuation", ir->bContinuation, yesno_names);
 +    CTYPE ("Use successive overrelaxation to reduce the number of shake iterations");
 +    EETYPE("Shake-SOR", ir->bShakeSOR, yesno_names);
 +    CTYPE ("Relative tolerance of shake");
 +    RTYPE ("shake-tol", ir->shake_tol, 0.0001);
 +    CTYPE ("Highest order in the expansion of the constraint coupling matrix");
 +    ITYPE ("lincs-order", ir->nProjOrder, 4);
 +    CTYPE ("Number of iterations in the final step of LINCS. 1 is fine for");
 +    CTYPE ("normal simulations, but use 2 to conserve energy in NVE runs.");
 +    CTYPE ("For energy minimization with constraints it should be 4 to 8.");
 +    ITYPE ("lincs-iter", ir->nLincsIter, 1);
 +    CTYPE ("Lincs will write a warning to the stderr if in one step a bond");
 +    CTYPE ("rotates over more degrees than");
 +    RTYPE ("lincs-warnangle", ir->LincsWarnAngle, 30.0);
 +    CTYPE ("Convert harmonic bonds to morse potentials");
 +    EETYPE("morse",       opts->bMorse, yesno_names);
 +
 +    /* Energy group exclusions */
 +    CCTYPE ("ENERGY GROUP EXCLUSIONS");
 +    CTYPE ("Pairs of energy groups for which all non-bonded interactions are excluded");
 +    STYPE ("energygrp-excl", is->egpexcl,     NULL);
 +
 +    /* Walls */
 +    CCTYPE ("WALLS");
 +    CTYPE ("Number of walls, type, atom types, densities and box-z scale factor for Ewald");
 +    ITYPE ("nwall", ir->nwall, 0);
 +    EETYPE("wall-type",     ir->wall_type,   ewt_names);
 +    RTYPE ("wall-r-linpot", ir->wall_r_linpot, -1);
 +    STYPE ("wall-atomtype", is->wall_atomtype, NULL);
 +    STYPE ("wall-density",  is->wall_density,  NULL);
 +    RTYPE ("wall-ewald-zfac", ir->wall_ewald_zfac, 3);
 +
 +    /* COM pulling */
 +    CCTYPE("COM PULLING");
 +    CTYPE("Pull type: no, umbrella, constraint or constant-force");
 +    EETYPE("pull",          ir->ePull, epull_names);
 +    if (ir->ePull != epullNO)
 +    {
 +        snew(ir->pull, 1);
 +        is->pull_grp = read_pullparams(&ninp, &inp, ir->pull, &opts->pull_start, wi);
 +    }
 +
 +    /* Enforced rotation */
 +    CCTYPE("ENFORCED ROTATION");
 +    CTYPE("Enforced rotation: No or Yes");
 +    EETYPE("rotation",       ir->bRot, yesno_names);
 +    if (ir->bRot)
 +    {
 +        snew(ir->rot, 1);
 +        is->rot_grp = read_rotparams(&ninp, &inp, ir->rot, wi);
 +    }
 +
 +    /* Interactive MD */
 +    ir->bIMD = FALSE;
 +    CCTYPE("Group to display and/or manipulate in interactive MD session");
 +    STYPE ("IMD-group", is->imd_grp, NULL);
 +    if (is->imd_grp[0] != '\0')
 +    {
 +        snew(ir->imd, 1);
 +        ir->bIMD = TRUE;
 +    }
 +
 +    /* Refinement */
 +    CCTYPE("NMR refinement stuff");
 +    CTYPE ("Distance restraints type: No, Simple or Ensemble");
 +    EETYPE("disre",       ir->eDisre,     edisre_names);
 +    CTYPE ("Force weighting of pairs in one distance restraint: Conservative or Equal");
 +    EETYPE("disre-weighting", ir->eDisreWeighting, edisreweighting_names);
 +    CTYPE ("Use sqrt of the time averaged times the instantaneous violation");
 +    EETYPE("disre-mixed", ir->bDisreMixed, yesno_names);
 +    RTYPE ("disre-fc",    ir->dr_fc,  1000.0);
 +    RTYPE ("disre-tau",   ir->dr_tau, 0.0);
 +    CTYPE ("Output frequency for pair distances to energy file");
 +    ITYPE ("nstdisreout", ir->nstdisreout, 100);
 +    CTYPE ("Orientation restraints: No or Yes");
 +    EETYPE("orire",       opts->bOrire,   yesno_names);
 +    CTYPE ("Orientation restraints force constant and tau for time averaging");
 +    RTYPE ("orire-fc",    ir->orires_fc,  0.0);
 +    RTYPE ("orire-tau",   ir->orires_tau, 0.0);
 +    STYPE ("orire-fitgrp", is->orirefitgrp,    NULL);
 +    CTYPE ("Output frequency for trace(SD) and S to energy file");
 +    ITYPE ("nstorireout", ir->nstorireout, 100);
 +
 +    /* free energy variables */
 +    CCTYPE ("Free energy variables");
 +    EETYPE("free-energy", ir->efep, efep_names);
 +    STYPE ("couple-moltype",  is->couple_moltype,  NULL);
 +    EETYPE("couple-lambda0", opts->couple_lam0, couple_lam);
 +    EETYPE("couple-lambda1", opts->couple_lam1, couple_lam);
 +    EETYPE("couple-intramol", opts->bCoupleIntra, yesno_names);
 +
 +    RTYPE ("init-lambda", fep->init_lambda, -1); /* start with -1 so
 +                                                    we can recognize if
 +                                                    it was not entered */
 +    ITYPE ("init-lambda-state", fep->init_fep_state, -1);
 +    RTYPE ("delta-lambda", fep->delta_lambda, 0.0);
 +    ITYPE ("nstdhdl", fep->nstdhdl, 50);
 +    STYPE ("fep-lambdas", is->fep_lambda[efptFEP], NULL);
 +    STYPE ("mass-lambdas", is->fep_lambda[efptMASS], NULL);
 +    STYPE ("coul-lambdas", is->fep_lambda[efptCOUL], NULL);
 +    STYPE ("vdw-lambdas", is->fep_lambda[efptVDW], NULL);
 +    STYPE ("bonded-lambdas", is->fep_lambda[efptBONDED], NULL);
 +    STYPE ("restraint-lambdas", is->fep_lambda[efptRESTRAINT], NULL);
 +    STYPE ("temperature-lambdas", is->fep_lambda[efptTEMPERATURE], NULL);
 +    ITYPE ("calc-lambda-neighbors", fep->lambda_neighbors, 1);
 +    STYPE ("init-lambda-weights", is->lambda_weights, NULL);
 +    EETYPE("dhdl-print-energy", fep->bPrintEnergy, yesno_names);
 +    RTYPE ("sc-alpha", fep->sc_alpha, 0.0);
 +    ITYPE ("sc-power", fep->sc_power, 1);
 +    RTYPE ("sc-r-power", fep->sc_r_power, 6.0);
 +    RTYPE ("sc-sigma", fep->sc_sigma, 0.3);
 +    EETYPE("sc-coul", fep->bScCoul, yesno_names);
 +    ITYPE ("dh_hist_size", fep->dh_hist_size, 0);
 +    RTYPE ("dh_hist_spacing", fep->dh_hist_spacing, 0.1);
 +    EETYPE("separate-dhdl-file", fep->separate_dhdl_file,
 +           separate_dhdl_file_names);
 +    EETYPE("dhdl-derivatives", fep->dhdl_derivatives, dhdl_derivatives_names);
 +    ITYPE ("dh_hist_size", fep->dh_hist_size, 0);
 +    RTYPE ("dh_hist_spacing", fep->dh_hist_spacing, 0.1);
 +
 +    /* Non-equilibrium MD stuff */
 +    CCTYPE("Non-equilibrium MD stuff");
 +    STYPE ("acc-grps",    is->accgrps,        NULL);
 +    STYPE ("accelerate",  is->acc,            NULL);
 +    STYPE ("freezegrps",  is->freeze,         NULL);
 +    STYPE ("freezedim",   is->frdim,          NULL);
 +    RTYPE ("cos-acceleration", ir->cos_accel, 0);
 +    STYPE ("deform",      is->deform,         NULL);
 +
 +    /* simulated tempering variables */
 +    CCTYPE("simulated tempering variables");
 +    EETYPE("simulated-tempering", ir->bSimTemp, yesno_names);
 +    EETYPE("simulated-tempering-scaling", ir->simtempvals->eSimTempScale, esimtemp_names);
 +    RTYPE("sim-temp-low", ir->simtempvals->simtemp_low, 300.0);
 +    RTYPE("sim-temp-high", ir->simtempvals->simtemp_high, 300.0);
 +
 +    /* expanded ensemble variables */
 +    if (ir->efep == efepEXPANDED || ir->bSimTemp)
 +    {
 +        read_expandedparams(&ninp, &inp, expand, wi);
 +    }
 +
 +    /* Electric fields */
 +    CCTYPE("Electric fields");
 +    CTYPE ("Format is number of terms (int) and for all terms an amplitude (real)");
 +    CTYPE ("and a phase angle (real)");
 +    STYPE ("E-x",     is->efield_x,   NULL);
 +    STYPE ("E-xt",    is->efield_xt,  NULL);
 +    STYPE ("E-y",     is->efield_y,   NULL);
 +    STYPE ("E-yt",    is->efield_yt,  NULL);
 +    STYPE ("E-z",     is->efield_z,   NULL);
 +    STYPE ("E-zt",    is->efield_zt,  NULL);
 +
 +    CCTYPE("Ion/water position swapping for computational electrophysiology setups");
 +    CTYPE("Swap positions along direction: no, X, Y, Z");
 +    EETYPE("swapcoords", ir->eSwapCoords, eSwapTypes_names);
 +    if (ir->eSwapCoords != eswapNO)
 +    {
 +        snew(ir->swap, 1);
 +        CTYPE("Swap attempt frequency");
 +        ITYPE("swap-frequency", ir->swap->nstswap, 1);
 +        CTYPE("Two index groups that contain the compartment-partitioning atoms");
 +        STYPE("split-group0", splitgrp0, NULL);
 +        STYPE("split-group1", splitgrp1, NULL);
 +        CTYPE("Use center of mass of split groups (yes/no), otherwise center of geometry is used");
 +        EETYPE("massw-split0", ir->swap->massw_split[0], yesno_names);
 +        EETYPE("massw-split1", ir->swap->massw_split[1], yesno_names);
 +
 +        CTYPE("Group name of ions that can be exchanged with solvent molecules");
 +        STYPE("swap-group", swapgrp, NULL);
 +        CTYPE("Group name of solvent molecules");
 +        STYPE("solvent-group", solgrp, NULL);
 +
 +        CTYPE("Split cylinder: radius, upper and lower extension (nm) (this will define the channels)");
 +        CTYPE("Note that the split cylinder settings do not have an influence on the swapping protocol,");
 +        CTYPE("however, if correctly defined, the ion permeation events are counted per channel");
 +        RTYPE("cyl0-r", ir->swap->cyl0r, 2.0);
 +        RTYPE("cyl0-up", ir->swap->cyl0u, 1.0);
 +        RTYPE("cyl0-down", ir->swap->cyl0l, 1.0);
 +        RTYPE("cyl1-r", ir->swap->cyl1r, 2.0);
 +        RTYPE("cyl1-up", ir->swap->cyl1u, 1.0);
 +        RTYPE("cyl1-down", ir->swap->cyl1l, 1.0);
 +
 +        CTYPE("Average the number of ions per compartment over these many swap attempt steps");
 +        ITYPE("coupl-steps", ir->swap->nAverage, 10);
 +        CTYPE("Requested number of anions and cations for each of the two compartments");
 +        CTYPE("-1 means fix the numbers as found in time step 0");
 +        ITYPE("anionsA", ir->swap->nanions[0], -1);
 +        ITYPE("cationsA", ir->swap->ncations[0], -1);
 +        ITYPE("anionsB", ir->swap->nanions[1], -1);
 +        ITYPE("cationsB", ir->swap->ncations[1], -1);
 +        CTYPE("Start to swap ions if threshold difference to requested count is reached");
 +        RTYPE("threshold", ir->swap->threshold, 1.0);
 +    }
 +
 +    /* AdResS defined thingies */
 +    CCTYPE ("AdResS parameters");
 +    EETYPE("adress",       ir->bAdress, yesno_names);
 +    if (ir->bAdress)
 +    {
 +        snew(ir->adress, 1);
 +        read_adressparams(&ninp, &inp, ir->adress, wi);
 +    }
 +
 +    /* User defined thingies */
 +    CCTYPE ("User defined thingies");
 +    STYPE ("user1-grps",  is->user1,          NULL);
 +    STYPE ("user2-grps",  is->user2,          NULL);
 +    ITYPE ("userint1",    ir->userint1,   0);
 +    ITYPE ("userint2",    ir->userint2,   0);
 +    ITYPE ("userint3",    ir->userint3,   0);
 +    ITYPE ("userint4",    ir->userint4,   0);
 +    RTYPE ("userreal1",   ir->userreal1,  0);
 +    RTYPE ("userreal2",   ir->userreal2,  0);
 +    RTYPE ("userreal3",   ir->userreal3,  0);
 +    RTYPE ("userreal4",   ir->userreal4,  0);
 +#undef CTYPE
 +
 +    write_inpfile(mdparout, ninp, inp, FALSE, wi);
 +    for (i = 0; (i < ninp); i++)
 +    {
 +        sfree(inp[i].name);
 +        sfree(inp[i].value);
 +    }
 +    sfree(inp);
 +
 +    /* Process options if necessary */
 +    for (m = 0; m < 2; m++)
 +    {
 +        for (i = 0; i < 2*DIM; i++)
 +        {
 +            dumdub[m][i] = 0.0;
 +        }
 +        if (ir->epc)
 +        {
 +            switch (ir->epct)
 +            {
 +                case epctISOTROPIC:
 +                    if (sscanf(dumstr[m], "%lf", &(dumdub[m][XX])) != 1)
 +                    {
 +                        warning_error(wi, "Pressure coupling not enough values (I need 1)");
 +                    }
 +                    dumdub[m][YY] = dumdub[m][ZZ] = dumdub[m][XX];
 +                    break;
 +                case epctSEMIISOTROPIC:
 +                case epctSURFACETENSION:
 +                    if (sscanf(dumstr[m], "%lf%lf",
 +                               &(dumdub[m][XX]), &(dumdub[m][ZZ])) != 2)
 +                    {
 +                        warning_error(wi, "Pressure coupling not enough values (I need 2)");
 +                    }
 +                    dumdub[m][YY] = dumdub[m][XX];
 +                    break;
 +                case epctANISOTROPIC:
 +                    if (sscanf(dumstr[m], "%lf%lf%lf%lf%lf%lf",
 +                               &(dumdub[m][XX]), &(dumdub[m][YY]), &(dumdub[m][ZZ]),
 +                               &(dumdub[m][3]), &(dumdub[m][4]), &(dumdub[m][5])) != 6)
 +                    {
 +                        warning_error(wi, "Pressure coupling not enough values (I need 6)");
 +                    }
 +                    break;
 +                default:
 +                    gmx_fatal(FARGS, "Pressure coupling type %s not implemented yet",
 +                              epcoupltype_names[ir->epct]);
 +            }
 +        }
 +    }
 +    clear_mat(ir->ref_p);
 +    clear_mat(ir->compress);
 +    for (i = 0; i < DIM; i++)
 +    {
 +        ir->ref_p[i][i]    = dumdub[1][i];
 +        ir->compress[i][i] = dumdub[0][i];
 +    }
 +    if (ir->epct == epctANISOTROPIC)
 +    {
 +        ir->ref_p[XX][YY] = dumdub[1][3];
 +        ir->ref_p[XX][ZZ] = dumdub[1][4];
 +        ir->ref_p[YY][ZZ] = dumdub[1][5];
 +        if (ir->ref_p[XX][YY] != 0 && ir->ref_p[XX][ZZ] != 0 && ir->ref_p[YY][ZZ] != 0)
 +        {
 +            warning(wi, "All off-diagonal reference pressures are non-zero. Are you sure you want to apply a threefold shear stress?\n");
 +        }
 +        ir->compress[XX][YY] = dumdub[0][3];
 +        ir->compress[XX][ZZ] = dumdub[0][4];
 +        ir->compress[YY][ZZ] = dumdub[0][5];
 +        for (i = 0; i < DIM; i++)
 +        {
 +            for (m = 0; m < i; m++)
 +            {
 +                ir->ref_p[i][m]    = ir->ref_p[m][i];
 +                ir->compress[i][m] = ir->compress[m][i];
 +            }
 +        }
 +    }
 +
 +    if (ir->comm_mode == ecmNO)
 +    {
 +        ir->nstcomm = 0;
 +    }
 +
 +    opts->couple_moltype = NULL;
 +    if (strlen(is->couple_moltype) > 0)
 +    {
 +        if (ir->efep != efepNO)
 +        {
 +            opts->couple_moltype = strdup(is->couple_moltype);
 +            if (opts->couple_lam0 == opts->couple_lam1)
 +            {
 +                warning(wi, "The lambda=0 and lambda=1 states for coupling are identical");
 +            }
 +            if (ir->eI == eiMD && (opts->couple_lam0 == ecouplamNONE ||
 +                                   opts->couple_lam1 == ecouplamNONE))
 +            {
 +                warning(wi, "For proper sampling of the (nearly) decoupled state, stochastic dynamics should be used");
 +            }
 +        }
 +        else
 +        {
 +            warning(wi, "Can not couple a molecule with free_energy = no");
 +        }
 +    }
 +    /* FREE ENERGY AND EXPANDED ENSEMBLE OPTIONS */
 +    if (ir->efep != efepNO)
 +    {
 +        if (fep->delta_lambda > 0)
 +        {
 +            ir->efep = efepSLOWGROWTH;
 +        }
 +    }
 +
 +    if (ir->bSimTemp)
 +    {
 +        fep->bPrintEnergy = TRUE;
 +        /* always print out the energy to dhdl if we are doing expanded ensemble, since we need the total energy
 +           if the temperature is changing. */
 +    }
 +
 +    if ((ir->efep != efepNO) || ir->bSimTemp)
 +    {
 +        ir->bExpanded = FALSE;
 +        if ((ir->efep == efepEXPANDED) || ir->bSimTemp)
 +        {
 +            ir->bExpanded = TRUE;
 +        }
 +        do_fep_params(ir, is->fep_lambda, is->lambda_weights);
 +        if (ir->bSimTemp) /* done after fep params */
 +        {
 +            do_simtemp_params(ir);
 +        }
 +    }
 +    else
 +    {
 +        ir->fepvals->n_lambda = 0;
 +    }
 +
 +    /* WALL PARAMETERS */
 +
 +    do_wall_params(ir, is->wall_atomtype, is->wall_density, opts);
 +
 +    /* ORIENTATION RESTRAINT PARAMETERS */
 +
 +    if (opts->bOrire && str_nelem(is->orirefitgrp, MAXPTR, NULL) != 1)
 +    {
 +        warning_error(wi, "ERROR: Need one orientation restraint fit group\n");
 +    }
 +
 +    /* DEFORMATION PARAMETERS */
 +
 +    clear_mat(ir->deform);
 +    for (i = 0; i < 6; i++)
 +    {
 +        dumdub[0][i] = 0;
 +    }
 +    m = sscanf(is->deform, "%lf %lf %lf %lf %lf %lf",
 +               &(dumdub[0][0]), &(dumdub[0][1]), &(dumdub[0][2]),
 +               &(dumdub[0][3]), &(dumdub[0][4]), &(dumdub[0][5]));
 +    for (i = 0; i < 3; i++)
 +    {
 +        ir->deform[i][i] = dumdub[0][i];
 +    }
 +    ir->deform[YY][XX] = dumdub[0][3];
 +    ir->deform[ZZ][XX] = dumdub[0][4];
 +    ir->deform[ZZ][YY] = dumdub[0][5];
 +    if (ir->epc != epcNO)
 +    {
 +        for (i = 0; i < 3; i++)
 +        {
 +            for (j = 0; j <= i; j++)
 +            {
 +                if (ir->deform[i][j] != 0 && ir->compress[i][j] != 0)
 +                {
 +                    warning_error(wi, "A box element has deform set and compressibility > 0");
 +                }
 +            }
 +        }
 +        for (i = 0; i < 3; i++)
 +        {
 +            for (j = 0; j < i; j++)
 +            {
 +                if (ir->deform[i][j] != 0)
 +                {
 +                    for (m = j; m < DIM; m++)
 +                    {
 +                        if (ir->compress[m][j] != 0)
 +                        {
 +                            sprintf(warn_buf, "An off-diagonal box element has deform set while compressibility > 0 for the same component of another box vector, this might lead to spurious periodicity effects.");
 +                            warning(wi, warn_buf);
 +                        }
 +                    }
 +                }
 +            }
 +        }
 +    }
 +
 +    /* Ion/water position swapping checks */
 +    if (ir->eSwapCoords != eswapNO)
 +    {
 +        if (ir->swap->nstswap < 1)
 +        {
 +            warning_error(wi, "swap_frequency must be 1 or larger when ion swapping is requested");
 +        }
 +        if (ir->swap->nAverage < 1)
 +        {
 +            warning_error(wi, "coupl_steps must be 1 or larger.\n");
 +        }
 +        if (ir->swap->threshold < 1.0)
 +        {
 +            warning_error(wi, "Ion count threshold must be at least 1.\n");
 +        }
 +    }
 +
 +    sfree(dumstr[0]);
 +    sfree(dumstr[1]);
 +}
 +
 +static int search_QMstring(const char *s, int ng, const char *gn[])
 +{
 +    /* same as normal search_string, but this one searches QM strings */
 +    int i;
 +
 +    for (i = 0; (i < ng); i++)
 +    {
 +        if (gmx_strcasecmp(s, gn[i]) == 0)
 +        {
 +            return i;
 +        }
 +    }
 +
 +    gmx_fatal(FARGS, "this QM method or basisset (%s) is not implemented\n!", s);
 +
 +    return -1;
 +
 +} /* search_QMstring */
 +
 +/* We would like gn to be const as well, but C doesn't allow this */
 +int search_string(const char *s, int ng, char *gn[])
 +{
 +    int i;
 +
 +    for (i = 0; (i < ng); i++)
 +    {
 +        if (gmx_strcasecmp(s, gn[i]) == 0)
 +        {
 +            return i;
 +        }
 +    }
 +
 +    gmx_fatal(FARGS,
 +              "Group %s referenced in the .mdp file was not found in the index file.\n"
 +              "Group names must match either [moleculetype] names or custom index group\n"
 +              "names, in which case you must supply an index file to the '-n' option\n"
 +              "of grompp.",
 +              s);
 +
 +    return -1;
 +}
 +
 +static gmx_bool do_numbering(int natoms, gmx_groups_t *groups, int ng, char *ptrs[],
 +                             t_blocka *block, char *gnames[],
 +                             int gtype, int restnm,
 +                             int grptp, gmx_bool bVerbose,
 +                             warninp_t wi)
 +{
 +    unsigned short *cbuf;
 +    t_grps         *grps = &(groups->grps[gtype]);
 +    int             i, j, gid, aj, ognr, ntot = 0;
 +    const char     *title;
 +    gmx_bool        bRest;
 +    char            warn_buf[STRLEN];
 +
 +    if (debug)
 +    {
 +        fprintf(debug, "Starting numbering %d groups of type %d\n", ng, gtype);
 +    }
 +
 +    title = gtypes[gtype];
 +
 +    snew(cbuf, natoms);
 +    /* Mark all id's as not set */
 +    for (i = 0; (i < natoms); i++)
 +    {
 +        cbuf[i] = NOGID;
 +    }
 +
 +    snew(grps->nm_ind, ng+1); /* +1 for possible rest group */
 +    for (i = 0; (i < ng); i++)
 +    {
 +        /* Lookup the group name in the block structure */
 +        gid = search_string(ptrs[i], block->nr, gnames);
 +        if ((grptp != egrptpONE) || (i == 0))
 +        {
 +            grps->nm_ind[grps->nr++] = gid;
 +        }
 +        if (debug)
 +        {
 +            fprintf(debug, "Found gid %d for group %s\n", gid, ptrs[i]);
 +        }
 +
 +        /* Now go over the atoms in the group */
 +        for (j = block->index[gid]; (j < block->index[gid+1]); j++)
 +        {
 +
 +            aj = block->a[j];
 +
 +            /* Range checking */
 +            if ((aj < 0) || (aj >= natoms))
 +            {
 +                gmx_fatal(FARGS, "Invalid atom number %d in indexfile", aj);
 +            }
 +            /* Lookup up the old group number */
 +            ognr = cbuf[aj];
 +            if (ognr != NOGID)
 +            {
 +                gmx_fatal(FARGS, "Atom %d in multiple %s groups (%d and %d)",
 +                          aj+1, title, ognr+1, i+1);
 +            }
 +            else
 +            {
 +                /* Store the group number in buffer */
 +                if (grptp == egrptpONE)
 +                {
 +                    cbuf[aj] = 0;
 +                }
 +                else
 +                {
 +                    cbuf[aj] = i;
 +                }
 +                ntot++;
 +            }
 +        }
 +    }
 +
 +    /* Now check whether we have done all atoms */
 +    bRest = FALSE;
 +    if (ntot != natoms)
 +    {
 +        if (grptp == egrptpALL)
 +        {
 +            gmx_fatal(FARGS, "%d atoms are not part of any of the %s groups",
 +                      natoms-ntot, title);
 +        }
 +        else if (grptp == egrptpPART)
 +        {
 +            sprintf(warn_buf, "%d atoms are not part of any of the %s groups",
 +                    natoms-ntot, title);
 +            warning_note(wi, warn_buf);
 +        }
 +        /* Assign all atoms currently unassigned to a rest group */
 +        for (j = 0; (j < natoms); j++)
 +        {
 +            if (cbuf[j] == NOGID)
 +            {
 +                cbuf[j] = grps->nr;
 +                bRest   = TRUE;
 +            }
 +        }
 +        if (grptp != egrptpPART)
 +        {
 +            if (bVerbose)
 +            {
 +                fprintf(stderr,
 +                        "Making dummy/rest group for %s containing %d elements\n",
 +                        title, natoms-ntot);
 +            }
 +            /* Add group name "rest" */
 +            grps->nm_ind[grps->nr] = restnm;
 +
 +            /* Assign the rest name to all atoms not currently assigned to a group */
 +            for (j = 0; (j < natoms); j++)
 +            {
 +                if (cbuf[j] == NOGID)
 +                {
 +                    cbuf[j] = grps->nr;
 +                }
 +            }
 +            grps->nr++;
 +        }
 +    }
 +
 +    if (grps->nr == 1 && (ntot == 0 || ntot == natoms))
 +    {
 +        /* All atoms are part of one (or no) group, no index required */
 +        groups->ngrpnr[gtype] = 0;
 +        groups->grpnr[gtype]  = NULL;
 +    }
 +    else
 +    {
 +        groups->ngrpnr[gtype] = natoms;
 +        snew(groups->grpnr[gtype], natoms);
 +        for (j = 0; (j < natoms); j++)
 +        {
 +            groups->grpnr[gtype][j] = cbuf[j];
 +        }
 +    }
 +
 +    sfree(cbuf);
 +
 +    return (bRest && grptp == egrptpPART);
 +}
 +
 +static void calc_nrdf(gmx_mtop_t *mtop, t_inputrec *ir, char **gnames)
 +{
 +    t_grpopts              *opts;
 +    gmx_groups_t           *groups;
 +    t_pull                 *pull;
 +    int                     natoms, ai, aj, i, j, d, g, imin, jmin;
 +    t_iatom                *ia;
 +    int                    *nrdf2, *na_vcm, na_tot;
 +    double                 *nrdf_tc, *nrdf_vcm, nrdf_uc, n_sub = 0;
 +    gmx_mtop_atomloop_all_t aloop;
 +    t_atom                 *atom;
 +    int                     mb, mol, ftype, as;
 +    gmx_molblock_t         *molb;
 +    gmx_moltype_t          *molt;
 +
 +    /* Calculate nrdf.
 +     * First calc 3xnr-atoms for each group
 +     * then subtract half a degree of freedom for each constraint
 +     *
 +     * Only atoms and nuclei contribute to the degrees of freedom...
 +     */
 +
 +    opts = &ir->opts;
 +
 +    groups = &mtop->groups;
 +    natoms = mtop->natoms;
 +
 +    /* Allocate one more for a possible rest group */
 +    /* We need to sum degrees of freedom into doubles,
 +     * since floats give too low nrdf's above 3 million atoms.
 +     */
 +    snew(nrdf_tc, groups->grps[egcTC].nr+1);
 +    snew(nrdf_vcm, groups->grps[egcVCM].nr+1);
 +    snew(na_vcm, groups->grps[egcVCM].nr+1);
 +
 +    for (i = 0; i < groups->grps[egcTC].nr; i++)
 +    {
 +        nrdf_tc[i] = 0;
 +    }
 +    for (i = 0; i < groups->grps[egcVCM].nr+1; i++)
 +    {
 +        nrdf_vcm[i] = 0;
 +    }
 +
 +    snew(nrdf2, natoms);
 +    aloop = gmx_mtop_atomloop_all_init(mtop);
 +    while (gmx_mtop_atomloop_all_next(aloop, &i, &atom))
 +    {
 +        nrdf2[i] = 0;
 +        if (atom->ptype == eptAtom || atom->ptype == eptNucleus)
 +        {
 +            g = ggrpnr(groups, egcFREEZE, i);
 +            /* Double count nrdf for particle i */
 +            for (d = 0; d < DIM; d++)
 +            {
 +                if (opts->nFreeze[g][d] == 0)
 +                {
 +                    nrdf2[i] += 2;
 +                }
 +            }
 +            nrdf_tc [ggrpnr(groups, egcTC, i)]  += 0.5*nrdf2[i];
 +            nrdf_vcm[ggrpnr(groups, egcVCM, i)] += 0.5*nrdf2[i];
 +        }
 +    }
 +
 +    as = 0;
 +    for (mb = 0; mb < mtop->nmolblock; mb++)
 +    {
 +        molb = &mtop->molblock[mb];
 +        molt = &mtop->moltype[molb->type];
 +        atom = molt->atoms.atom;
 +        for (mol = 0; mol < molb->nmol; mol++)
 +        {
 +            for (ftype = F_CONSTR; ftype <= F_CONSTRNC; ftype++)
 +            {
 +                ia = molt->ilist[ftype].iatoms;
 +                for (i = 0; i < molt->ilist[ftype].nr; )
 +                {
 +                    /* Subtract degrees of freedom for the constraints,
 +                     * if the particles still have degrees of freedom left.
 +                     * If one of the particles is a vsite or a shell, then all
 +                     * constraint motion will go there, but since they do not
 +                     * contribute to the constraints the degrees of freedom do not
 +                     * change.
 +                     */
 +                    ai = as + ia[1];
 +                    aj = as + ia[2];
 +                    if (((atom[ia[1]].ptype == eptNucleus) ||
 +                         (atom[ia[1]].ptype == eptAtom)) &&
 +                        ((atom[ia[2]].ptype == eptNucleus) ||
 +                         (atom[ia[2]].ptype == eptAtom)))
 +                    {
 +                        if (nrdf2[ai] > 0)
 +                        {
 +                            jmin = 1;
 +                        }
 +                        else
 +                        {
 +                            jmin = 2;
 +                        }
 +                        if (nrdf2[aj] > 0)
 +                        {
 +                            imin = 1;
 +                        }
 +                        else
 +                        {
 +                            imin = 2;
 +                        }
 +                        imin       = min(imin, nrdf2[ai]);
 +                        jmin       = min(jmin, nrdf2[aj]);
 +                        nrdf2[ai] -= imin;
 +                        nrdf2[aj] -= jmin;
 +                        nrdf_tc [ggrpnr(groups, egcTC, ai)]  -= 0.5*imin;
 +                        nrdf_tc [ggrpnr(groups, egcTC, aj)]  -= 0.5*jmin;
 +                        nrdf_vcm[ggrpnr(groups, egcVCM, ai)] -= 0.5*imin;
 +                        nrdf_vcm[ggrpnr(groups, egcVCM, aj)] -= 0.5*jmin;
 +                    }
 +                    ia += interaction_function[ftype].nratoms+1;
 +                    i  += interaction_function[ftype].nratoms+1;
 +                }
 +            }
 +            ia = molt->ilist[F_SETTLE].iatoms;
 +            for (i = 0; i < molt->ilist[F_SETTLE].nr; )
 +            {
 +                /* Subtract 1 dof from every atom in the SETTLE */
 +                for (j = 0; j < 3; j++)
 +                {
 +                    ai         = as + ia[1+j];
 +                    imin       = min(2, nrdf2[ai]);
 +                    nrdf2[ai] -= imin;
 +                    nrdf_tc [ggrpnr(groups, egcTC, ai)]  -= 0.5*imin;
 +                    nrdf_vcm[ggrpnr(groups, egcVCM, ai)] -= 0.5*imin;
 +                }
 +                ia += 4;
 +                i  += 4;
 +            }
 +            as += molt->atoms.nr;
 +        }
 +    }
 +
 +    if (ir->ePull == epullCONSTRAINT)
 +    {
 +        /* Correct nrdf for the COM constraints.
 +         * We correct using the TC and VCM group of the first atom
 +         * in the reference and pull group. If atoms in one pull group
 +         * belong to different TC or VCM groups it is anyhow difficult
 +         * to determine the optimal nrdf assignment.
 +         */
 +        pull = ir->pull;
 +
 +        for (i = 0; i < pull->ncoord; i++)
 +        {
 +            imin = 1;
 +
 +            for (j = 0; j < 2; j++)
 +            {
 +                const t_pull_group *pgrp;
 +
 +                pgrp = &pull->group[pull->coord[i].group[j]];
 +
 +                if (pgrp->nat > 0)
 +                {
 +                    /* Subtract 1/2 dof from each group */
 +                    ai = pgrp->ind[0];
 +                    nrdf_tc [ggrpnr(groups, egcTC, ai)]  -= 0.5*imin;
 +                    nrdf_vcm[ggrpnr(groups, egcVCM, ai)] -= 0.5*imin;
 +                    if (nrdf_tc[ggrpnr(groups, egcTC, ai)] < 0)
 +                    {
 +                        gmx_fatal(FARGS, "Center of mass pulling constraints caused the number of degrees of freedom for temperature coupling group %s to be negative", gnames[groups->grps[egcTC].nm_ind[ggrpnr(groups, egcTC, ai)]]);
 +                    }
 +                }
 +                else
 +                {
 +                    /* We need to subtract the whole DOF from group j=1 */
 +                    imin += 1;
 +                }
 +            }
 +        }
 +    }
 +
 +    if (ir->nstcomm != 0)
 +    {
 +        /* Subtract 3 from the number of degrees of freedom in each vcm group
 +         * when com translation is removed and 6 when rotation is removed
 +         * as well.
 +         */
 +        switch (ir->comm_mode)
 +        {
 +            case ecmLINEAR:
 +                n_sub = ndof_com(ir);
 +                break;
 +            case ecmANGULAR:
 +                n_sub = 6;
 +                break;
 +            default:
 +                n_sub = 0;
 +                gmx_incons("Checking comm_mode");
 +        }
 +
 +        for (i = 0; i < groups->grps[egcTC].nr; i++)
 +        {
 +            /* Count the number of atoms of TC group i for every VCM group */
 +            for (j = 0; j < groups->grps[egcVCM].nr+1; j++)
 +            {
 +                na_vcm[j] = 0;
 +            }
 +            na_tot = 0;
 +            for (ai = 0; ai < natoms; ai++)
 +            {
 +                if (ggrpnr(groups, egcTC, ai) == i)
 +                {
 +                    na_vcm[ggrpnr(groups, egcVCM, ai)]++;
 +                    na_tot++;
 +                }
 +            }
 +            /* Correct for VCM removal according to the fraction of each VCM
 +             * group present in this TC group.
 +             */
 +            nrdf_uc = nrdf_tc[i];
 +            if (debug)
 +            {
 +                fprintf(debug, "T-group[%d] nrdf_uc = %g, n_sub = %g\n",
 +                        i, nrdf_uc, n_sub);
 +            }
 +            nrdf_tc[i] = 0;
 +            for (j = 0; j < groups->grps[egcVCM].nr+1; j++)
 +            {
 +                if (nrdf_vcm[j] > n_sub)
 +                {
 +                    nrdf_tc[i] += nrdf_uc*((double)na_vcm[j]/(double)na_tot)*
 +                        (nrdf_vcm[j] - n_sub)/nrdf_vcm[j];
 +                }
 +                if (debug)
 +                {
 +                    fprintf(debug, "  nrdf_vcm[%d] = %g, nrdf = %g\n",
 +                            j, nrdf_vcm[j], nrdf_tc[i]);
 +                }
 +            }
 +        }
 +    }
 +    for (i = 0; (i < groups->grps[egcTC].nr); i++)
 +    {
 +        opts->nrdf[i] = nrdf_tc[i];
 +        if (opts->nrdf[i] < 0)
 +        {
 +            opts->nrdf[i] = 0;
 +        }
 +        fprintf(stderr,
 +                "Number of degrees of freedom in T-Coupling group %s is %.2f\n",
 +                gnames[groups->grps[egcTC].nm_ind[i]], opts->nrdf[i]);
 +    }
 +
 +    sfree(nrdf2);
 +    sfree(nrdf_tc);
 +    sfree(nrdf_vcm);
 +    sfree(na_vcm);
 +}
 +
 +static void decode_cos(char *s, t_cosines *cosine)
 +{
 +    char   *t;
 +    char    format[STRLEN], f1[STRLEN];
 +    double  a, phi;
 +    int     i;
 +
 +    t = strdup(s);
 +    trim(t);
 +
 +    cosine->n   = 0;
 +    cosine->a   = NULL;
 +    cosine->phi = NULL;
 +    if (strlen(t))
 +    {
 +        sscanf(t, "%d", &(cosine->n));
 +        if (cosine->n <= 0)
 +        {
 +            cosine->n = 0;
 +        }
 +        else
 +        {
 +            snew(cosine->a, cosine->n);
 +            snew(cosine->phi, cosine->n);
 +
 +            sprintf(format, "%%*d");
 +            for (i = 0; (i < cosine->n); i++)
 +            {
 +                strcpy(f1, format);
 +                strcat(f1, "%lf%lf");
 +                if (sscanf(t, f1, &a, &phi) < 2)
 +                {
 +                    gmx_fatal(FARGS, "Invalid input for electric field shift: '%s'", t);
 +                }
 +                cosine->a[i]   = a;
 +                cosine->phi[i] = phi;
 +                strcat(format, "%*lf%*lf");
 +            }
 +        }
 +    }
 +    sfree(t);
 +}
 +
 +static gmx_bool do_egp_flag(t_inputrec *ir, gmx_groups_t *groups,
 +                            const char *option, const char *val, int flag)
 +{
 +    /* The maximum number of energy group pairs would be MAXPTR*(MAXPTR+1)/2.
 +     * But since this is much larger than STRLEN, such a line can not be parsed.
 +     * The real maximum is the number of names that fit in a string: STRLEN/2.
 +     */
 +#define EGP_MAX (STRLEN/2)
 +    int      nelem, i, j, k, nr;
 +    char    *names[EGP_MAX];
 +    char  ***gnames;
 +    gmx_bool bSet;
 +
 +    gnames = groups->grpname;
 +
 +    nelem = str_nelem(val, EGP_MAX, names);
 +    if (nelem % 2 != 0)
 +    {
 +        gmx_fatal(FARGS, "The number of groups for %s is odd", option);
 +    }
 +    nr   = groups->grps[egcENER].nr;
 +    bSet = FALSE;
 +    for (i = 0; i < nelem/2; i++)
 +    {
 +        j = 0;
 +        while ((j < nr) &&
 +               gmx_strcasecmp(names[2*i], *(gnames[groups->grps[egcENER].nm_ind[j]])))
 +        {
 +            j++;
 +        }
 +        if (j == nr)
 +        {
 +            gmx_fatal(FARGS, "%s in %s is not an energy group\n",
 +                      names[2*i], option);
 +        }
 +        k = 0;
 +        while ((k < nr) &&
 +               gmx_strcasecmp(names[2*i+1], *(gnames[groups->grps[egcENER].nm_ind[k]])))
 +        {
 +            k++;
 +        }
 +        if (k == nr)
 +        {
 +            gmx_fatal(FARGS, "%s in %s is not an energy group\n",
 +                      names[2*i+1], option);
 +        }
 +        if ((j < nr) && (k < nr))
 +        {
 +            ir->opts.egp_flags[nr*j+k] |= flag;
 +            ir->opts.egp_flags[nr*k+j] |= flag;
 +            bSet = TRUE;
 +        }
 +    }
 +
 +    return bSet;
 +}
 +
 +
 +static void make_swap_groups(
 +        t_swapcoords *swap,
 +        char         *swapgname,
 +        char         *splitg0name,
 +        char         *splitg1name,
 +        char         *solgname,
 +        t_blocka     *grps,
 +        char        **gnames)
 +{
 +    int   ig = -1, i = 0, j;
 +    char *splitg;
 +
 +
 +    /* Just a quick check here, more thorough checks are in mdrun */
 +    if (strcmp(splitg0name, splitg1name) == 0)
 +    {
 +        gmx_fatal(FARGS, "The split groups can not both be '%s'.", splitg0name);
 +    }
 +
 +    /* First get the swap group index atoms */
 +    ig        = search_string(swapgname, grps->nr, gnames);
 +    swap->nat = grps->index[ig+1] - grps->index[ig];
 +    if (swap->nat > 0)
 +    {
 +        fprintf(stderr, "Swap group '%s' contains %d atoms.\n", swapgname, swap->nat);
 +        snew(swap->ind, swap->nat);
 +        for (i = 0; i < swap->nat; i++)
 +        {
 +            swap->ind[i] = grps->a[grps->index[ig]+i];
 +        }
 +    }
 +    else
 +    {
 +        gmx_fatal(FARGS, "You defined an empty group of atoms for swapping.");
 +    }
 +
 +    /* Now do so for the split groups */
 +    for (j = 0; j < 2; j++)
 +    {
 +        if (j == 0)
 +        {
 +            splitg = splitg0name;
 +        }
 +        else
 +        {
 +            splitg = splitg1name;
 +        }
 +
 +        ig                 = search_string(splitg, grps->nr, gnames);
 +        swap->nat_split[j] = grps->index[ig+1] - grps->index[ig];
 +        if (swap->nat_split[j] > 0)
 +        {
 +            fprintf(stderr, "Split group %d '%s' contains %d atom%s.\n",
 +                    j, splitg, swap->nat_split[j], (swap->nat_split[j] > 1) ? "s" : "");
 +            snew(swap->ind_split[j], swap->nat_split[j]);
 +            for (i = 0; i < swap->nat_split[j]; i++)
 +            {
 +                swap->ind_split[j][i] = grps->a[grps->index[ig]+i];
 +            }
 +        }
 +        else
 +        {
 +            gmx_fatal(FARGS, "Split group %d has to contain at least 1 atom!", j);
 +        }
 +    }
 +
 +    /* Now get the solvent group index atoms */
 +    ig            = search_string(solgname, grps->nr, gnames);
 +    swap->nat_sol = grps->index[ig+1] - grps->index[ig];
 +    if (swap->nat_sol > 0)
 +    {
 +        fprintf(stderr, "Solvent group '%s' contains %d atoms.\n", solgname, swap->nat_sol);
 +        snew(swap->ind_sol, swap->nat_sol);
 +        for (i = 0; i < swap->nat_sol; i++)
 +        {
 +            swap->ind_sol[i] = grps->a[grps->index[ig]+i];
 +        }
 +    }
 +    else
 +    {
 +        gmx_fatal(FARGS, "You defined an empty group of solvent. Cannot exchange ions.");
 +    }
 +}
 +
 +
 +void make_IMD_group(t_IMD *IMDgroup, char *IMDgname, t_blocka *grps, char **gnames)
 +{
 +    int      ig = -1, i;
 +
 +
 +    ig            = search_string(IMDgname, grps->nr, gnames);
 +    IMDgroup->nat = grps->index[ig+1] - grps->index[ig];
 +
 +    if (IMDgroup->nat > 0)
 +    {
 +        fprintf(stderr, "Group '%s' with %d atoms can be activated for interactive molecular dynamics (IMD).\n",
 +                IMDgname, IMDgroup->nat);
 +        snew(IMDgroup->ind, IMDgroup->nat);
 +        for (i = 0; i < IMDgroup->nat; i++)
 +        {
 +            IMDgroup->ind[i] = grps->a[grps->index[ig]+i];
 +        }
 +    }
 +}
 +
 +
 +void do_index(const char* mdparin, const char *ndx,
 +              gmx_mtop_t *mtop,
 +              gmx_bool bVerbose,
 +              t_inputrec *ir, rvec *v,
 +              warninp_t wi)
 +{
 +    t_blocka     *grps;
 +    gmx_groups_t *groups;
 +    int           natoms;
 +    t_symtab     *symtab;
 +    t_atoms       atoms_all;
 +    char          warnbuf[STRLEN], **gnames;
 +    int           nr, ntcg, ntau_t, nref_t, nacc, nofg, nSA, nSA_points, nSA_time, nSA_temp;
 +    real          tau_min;
 +    int           nstcmin;
 +    int           nacg, nfreeze, nfrdim, nenergy, nvcm, nuser;
 +    char         *ptr1[MAXPTR], *ptr2[MAXPTR], *ptr3[MAXPTR];
 +    int           i, j, k, restnm;
 +    real          SAtime;
 +    gmx_bool      bExcl, bTable, bSetTCpar, bAnneal, bRest;
 +    int           nQMmethod, nQMbasis, nQMcharge, nQMmult, nbSH, nCASorb, nCASelec,
 +                  nSAon, nSAoff, nSAsteps, nQMg, nbOPT, nbTS;
 +    char          warn_buf[STRLEN];
 +
 +    if (bVerbose)
 +    {
 +        fprintf(stderr, "processing index file...\n");
 +    }
 +    debug_gmx();
 +    if (ndx == NULL)
 +    {
 +        snew(grps, 1);
 +        snew(grps->index, 1);
 +        snew(gnames, 1);
 +        atoms_all = gmx_mtop_global_atoms(mtop);
 +        analyse(&atoms_all, grps, &gnames, FALSE, TRUE);
 +        free_t_atoms(&atoms_all, FALSE);
 +    }
 +    else
 +    {
 +        grps = init_index(ndx, &gnames);
 +    }
 +
 +    groups = &mtop->groups;
 +    natoms = mtop->natoms;
 +    symtab = &mtop->symtab;
 +
 +    snew(groups->grpname, grps->nr+1);
 +
 +    for (i = 0; (i < grps->nr); i++)
 +    {
 +        groups->grpname[i] = put_symtab(symtab, gnames[i]);
 +    }
 +    groups->grpname[i] = put_symtab(symtab, "rest");
 +    restnm             = i;
 +    srenew(gnames, grps->nr+1);
 +    gnames[restnm]   = *(groups->grpname[i]);
 +    groups->ngrpname = grps->nr+1;
 +
 +    set_warning_line(wi, mdparin, -1);
 +
 +    ntau_t = str_nelem(is->tau_t, MAXPTR, ptr1);
 +    nref_t = str_nelem(is->ref_t, MAXPTR, ptr2);
 +    ntcg   = str_nelem(is->tcgrps, MAXPTR, ptr3);
 +    if ((ntau_t != ntcg) || (nref_t != ntcg))
 +    {
 +        gmx_fatal(FARGS, "Invalid T coupling input: %d groups, %d ref-t values and "
 +                  "%d tau-t values", ntcg, nref_t, ntau_t);
 +    }
 +
 +    bSetTCpar = (ir->etc || EI_SD(ir->eI) || ir->eI == eiBD || EI_TPI(ir->eI));
 +    do_numbering(natoms, groups, ntcg, ptr3, grps, gnames, egcTC,
 +                 restnm, bSetTCpar ? egrptpALL : egrptpALL_GENREST, bVerbose, wi);
 +    nr            = groups->grps[egcTC].nr;
 +    ir->opts.ngtc = nr;
 +    snew(ir->opts.nrdf, nr);
 +    snew(ir->opts.tau_t, nr);
 +    snew(ir->opts.ref_t, nr);
 +    if (ir->eI == eiBD && ir->bd_fric == 0)
 +    {
 +        fprintf(stderr, "bd-fric=0, so tau-t will be used as the inverse friction constant(s)\n");
 +    }
 +
 +    if (bSetTCpar)
 +    {
 +        if (nr != nref_t)
 +        {
 +            gmx_fatal(FARGS, "Not enough ref-t and tau-t values!");
 +        }
 +
 +        tau_min = 1e20;
 +        for (i = 0; (i < nr); i++)
 +        {
 +            ir->opts.tau_t[i] = strtod(ptr1[i], NULL);
 +            if ((ir->eI == eiBD || ir->eI == eiSD2) && ir->opts.tau_t[i] <= 0)
 +            {
 +                sprintf(warn_buf, "With integrator %s tau-t should be larger than 0", ei_names[ir->eI]);
 +                warning_error(wi, warn_buf);
 +            }
 +
 +            if (ir->etc != etcVRESCALE && ir->opts.tau_t[i] == 0)
 +            {
 +                warning_note(wi, "tau-t = -1 is the value to signal that a group should not have temperature coupling. Treating your use of tau-t = 0 as if you used -1.");
 +            }
 +
 +            if (ir->opts.tau_t[i] >= 0)
 +            {
 +                tau_min = min(tau_min, ir->opts.tau_t[i]);
 +            }
 +        }
 +        if (ir->etc != etcNO && ir->nsttcouple == -1)
 +        {
 +            ir->nsttcouple = ir_optimal_nsttcouple(ir);
 +        }
 +
 +        if (EI_VV(ir->eI))
 +        {
 +            if ((ir->etc == etcNOSEHOOVER) && (ir->epc == epcBERENDSEN))
 +            {
 +                gmx_fatal(FARGS, "Cannot do Nose-Hoover temperature with Berendsen pressure control with md-vv; use either vrescale temperature with berendsen pressure or Nose-Hoover temperature with MTTK pressure");
 +            }
 +            if ((ir->epc == epcMTTK) && (ir->etc > etcNO))
 +            {
 +                if (ir->nstpcouple != ir->nsttcouple)
 +                {
 +                    int mincouple = min(ir->nstpcouple, ir->nsttcouple);
 +                    ir->nstpcouple = ir->nsttcouple = mincouple;
 +                    sprintf(warn_buf, "for current Trotter decomposition methods with vv, nsttcouple and nstpcouple must be equal.  Both have been reset to min(nsttcouple,nstpcouple) = %d", mincouple);
 +                    warning_note(wi, warn_buf);
 +                }
 +            }
 +        }
 +        /* velocity verlet with averaged kinetic energy KE = 0.5*(v(t+1/2) - v(t-1/2)) is implemented
 +           primarily for testing purposes, and does not work with temperature coupling other than 1 */
 +
 +        if (ETC_ANDERSEN(ir->etc))
 +        {
 +            if (ir->nsttcouple != 1)
 +            {
 +                ir->nsttcouple = 1;
 +                sprintf(warn_buf, "Andersen temperature control methods assume nsttcouple = 1; there is no need for larger nsttcouple > 1, since no global parameters are computed. nsttcouple has been reset to 1");
 +                warning_note(wi, warn_buf);
 +            }
 +        }
 +        nstcmin = tcouple_min_integration_steps(ir->etc);
 +        if (nstcmin > 1)
 +        {
 +            if (tau_min/(ir->delta_t*ir->nsttcouple) < nstcmin)
 +            {
 +                sprintf(warn_buf, "For proper integration of the %s thermostat, tau-t (%g) should be at least %d times larger than nsttcouple*dt (%g)",
 +                        ETCOUPLTYPE(ir->etc),
 +                        tau_min, nstcmin,
 +                        ir->nsttcouple*ir->delta_t);
 +                warning(wi, warn_buf);
 +            }
 +        }
 +        for (i = 0; (i < nr); i++)
 +        {
 +            ir->opts.ref_t[i] = strtod(ptr2[i], NULL);
 +            if (ir->opts.ref_t[i] < 0)
 +            {
 +                gmx_fatal(FARGS, "ref-t for group %d negative", i);
 +            }
 +        }
 +        /* set the lambda mc temperature to the md integrator temperature (which should be defined
 +           if we are in this conditional) if mc_temp is negative */
 +        if (ir->expandedvals->mc_temp < 0)
 +        {
 +            ir->expandedvals->mc_temp = ir->opts.ref_t[0]; /*for now, set to the first reft */
 +        }
 +    }
 +
 +    /* Simulated annealing for each group. There are nr groups */
 +    nSA = str_nelem(is->anneal, MAXPTR, ptr1);
 +    if (nSA == 1 && (ptr1[0][0] == 'n' || ptr1[0][0] == 'N'))
 +    {
 +        nSA = 0;
 +    }
 +    if (nSA > 0 && nSA != nr)
 +    {
 +        gmx_fatal(FARGS, "Not enough annealing values: %d (for %d groups)\n", nSA, nr);
 +    }
 +    else
 +    {
 +        snew(ir->opts.annealing, nr);
 +        snew(ir->opts.anneal_npoints, nr);
 +        snew(ir->opts.anneal_time, nr);
 +        snew(ir->opts.anneal_temp, nr);
 +        for (i = 0; i < nr; i++)
 +        {
 +            ir->opts.annealing[i]      = eannNO;
 +            ir->opts.anneal_npoints[i] = 0;
 +            ir->opts.anneal_time[i]    = NULL;
 +            ir->opts.anneal_temp[i]    = NULL;
 +        }
 +        if (nSA > 0)
 +        {
 +            bAnneal = FALSE;
 +            for (i = 0; i < nr; i++)
 +            {
 +                if (ptr1[i][0] == 'n' || ptr1[i][0] == 'N')
 +                {
 +                    ir->opts.annealing[i] = eannNO;
 +                }
 +                else if (ptr1[i][0] == 's' || ptr1[i][0] == 'S')
 +                {
 +                    ir->opts.annealing[i] = eannSINGLE;
 +                    bAnneal               = TRUE;
 +                }
 +                else if (ptr1[i][0] == 'p' || ptr1[i][0] == 'P')
 +                {
 +                    ir->opts.annealing[i] = eannPERIODIC;
 +                    bAnneal               = TRUE;
 +                }
 +            }
 +            if (bAnneal)
 +            {
 +                /* Read the other fields too */
 +                nSA_points = str_nelem(is->anneal_npoints, MAXPTR, ptr1);
 +                if (nSA_points != nSA)
 +                {
 +                    gmx_fatal(FARGS, "Found %d annealing-npoints values for %d groups\n", nSA_points, nSA);
 +                }
 +                for (k = 0, i = 0; i < nr; i++)
 +                {
 +                    ir->opts.anneal_npoints[i] = strtol(ptr1[i], NULL, 10);
 +                    if (ir->opts.anneal_npoints[i] == 1)
 +                    {
 +                        gmx_fatal(FARGS, "Please specify at least a start and an end point for annealing\n");
 +                    }
 +                    snew(ir->opts.anneal_time[i], ir->opts.anneal_npoints[i]);
 +                    snew(ir->opts.anneal_temp[i], ir->opts.anneal_npoints[i]);
 +                    k += ir->opts.anneal_npoints[i];
 +                }
 +
 +                nSA_time = str_nelem(is->anneal_time, MAXPTR, ptr1);
 +                if (nSA_time != k)
 +                {
 +                    gmx_fatal(FARGS, "Found %d annealing-time values, wanter %d\n", nSA_time, k);
 +                }
 +                nSA_temp = str_nelem(is->anneal_temp, MAXPTR, ptr2);
 +                if (nSA_temp != k)
 +                {
 +                    gmx_fatal(FARGS, "Found %d annealing-temp values, wanted %d\n", nSA_temp, k);
 +                }
 +
 +                for (i = 0, k = 0; i < nr; i++)
 +                {
 +
 +                    for (j = 0; j < ir->opts.anneal_npoints[i]; j++)
 +                    {
 +                        ir->opts.anneal_time[i][j] = strtod(ptr1[k], NULL);
 +                        ir->opts.anneal_temp[i][j] = strtod(ptr2[k], NULL);
 +                        if (j == 0)
 +                        {
 +                            if (ir->opts.anneal_time[i][0] > (ir->init_t+GMX_REAL_EPS))
 +                            {
 +                                gmx_fatal(FARGS, "First time point for annealing > init_t.\n");
 +                            }
 +                        }
 +                        else
 +                        {
 +                            /* j>0 */
 +                            if (ir->opts.anneal_time[i][j] < ir->opts.anneal_time[i][j-1])
 +                            {
 +                                gmx_fatal(FARGS, "Annealing timepoints out of order: t=%f comes after t=%f\n",
 +                                          ir->opts.anneal_time[i][j], ir->opts.anneal_time[i][j-1]);
 +                            }
 +                        }
 +                        if (ir->opts.anneal_temp[i][j] < 0)
 +                        {
 +                            gmx_fatal(FARGS, "Found negative temperature in annealing: %f\n", ir->opts.anneal_temp[i][j]);
 +                        }
 +                        k++;
 +                    }
 +                }
 +                /* Print out some summary information, to make sure we got it right */
 +                for (i = 0, k = 0; i < nr; i++)
 +                {
 +                    if (ir->opts.annealing[i] != eannNO)
 +                    {
 +                        j = groups->grps[egcTC].nm_ind[i];
 +                        fprintf(stderr, "Simulated annealing for group %s: %s, %d timepoints\n",
 +                                *(groups->grpname[j]), eann_names[ir->opts.annealing[i]],
 +                                ir->opts.anneal_npoints[i]);
 +                        fprintf(stderr, "Time (ps)   Temperature (K)\n");
 +                        /* All terms except the last one */
 +                        for (j = 0; j < (ir->opts.anneal_npoints[i]-1); j++)
 +                        {
 +                            fprintf(stderr, "%9.1f      %5.1f\n", ir->opts.anneal_time[i][j], ir->opts.anneal_temp[i][j]);
 +                        }
 +
 +                        /* Finally the last one */
 +                        j = ir->opts.anneal_npoints[i]-1;
 +                        if (ir->opts.annealing[i] == eannSINGLE)
 +                        {
 +                            fprintf(stderr, "%9.1f-     %5.1f\n", ir->opts.anneal_time[i][j], ir->opts.anneal_temp[i][j]);
 +                        }
 +                        else
 +                        {
 +                            fprintf(stderr, "%9.1f      %5.1f\n", ir->opts.anneal_time[i][j], ir->opts.anneal_temp[i][j]);
 +                            if (fabs(ir->opts.anneal_temp[i][j]-ir->opts.anneal_temp[i][0]) > GMX_REAL_EPS)
 +                            {
 +                                warning_note(wi, "There is a temperature jump when your annealing loops back.\n");
 +                            }
 +                        }
 +                    }
 +                }
 +            }
 +        }
 +    }
 +
 +    if (ir->ePull != epullNO)
 +    {
 +        make_pull_groups(ir->pull, is->pull_grp, grps, gnames);
 +
 +        make_pull_coords(ir->pull);
 +    }
 +
 +    if (ir->bRot)
 +    {
 +        make_rotation_groups(ir->rot, is->rot_grp, grps, gnames);
 +    }
 +
 +    if (ir->eSwapCoords != eswapNO)
 +    {
 +        make_swap_groups(ir->swap, swapgrp, splitgrp0, splitgrp1, solgrp, grps, gnames);
 +    }
 +
 +    /* Make indices for IMD session */
 +    if (ir->bIMD)
 +    {
 +        make_IMD_group(ir->imd, is->imd_grp, grps, gnames);
 +    }
 +
 +    nacc = str_nelem(is->acc, MAXPTR, ptr1);
 +    nacg = str_nelem(is->accgrps, MAXPTR, ptr2);
 +    if (nacg*DIM != nacc)
 +    {
 +        gmx_fatal(FARGS, "Invalid Acceleration input: %d groups and %d acc. values",
 +                  nacg, nacc);
 +    }
 +    do_numbering(natoms, groups, nacg, ptr2, grps, gnames, egcACC,
 +                 restnm, egrptpALL_GENREST, bVerbose, wi);
 +    nr = groups->grps[egcACC].nr;
 +    snew(ir->opts.acc, nr);
 +    ir->opts.ngacc = nr;
 +
 +    for (i = k = 0; (i < nacg); i++)
 +    {
 +        for (j = 0; (j < DIM); j++, k++)
 +        {
 +            ir->opts.acc[i][j] = strtod(ptr1[k], NULL);
 +        }
 +    }
 +    for (; (i < nr); i++)
 +    {
 +        for (j = 0; (j < DIM); j++)
 +        {
 +            ir->opts.acc[i][j] = 0;
 +        }
 +    }
 +
 +    nfrdim  = str_nelem(is->frdim, MAXPTR, ptr1);
 +    nfreeze = str_nelem(is->freeze, MAXPTR, ptr2);
 +    if (nfrdim != DIM*nfreeze)
 +    {
 +        gmx_fatal(FARGS, "Invalid Freezing input: %d groups and %d freeze values",
 +                  nfreeze, nfrdim);
 +    }
 +    do_numbering(natoms, groups, nfreeze, ptr2, grps, gnames, egcFREEZE,
 +                 restnm, egrptpALL_GENREST, bVerbose, wi);
 +    nr             = groups->grps[egcFREEZE].nr;
 +    ir->opts.ngfrz = nr;
 +    snew(ir->opts.nFreeze, nr);
 +    for (i = k = 0; (i < nfreeze); i++)
 +    {
 +        for (j = 0; (j < DIM); j++, k++)
 +        {
 +            ir->opts.nFreeze[i][j] = (gmx_strncasecmp(ptr1[k], "Y", 1) == 0);
 +            if (!ir->opts.nFreeze[i][j])
 +            {
 +                if (gmx_strncasecmp(ptr1[k], "N", 1) != 0)
 +                {
 +                    sprintf(warnbuf, "Please use Y(ES) or N(O) for freezedim only "
 +                            "(not %s)", ptr1[k]);
 +                    warning(wi, warn_buf);
 +                }
 +            }
 +        }
 +    }
 +    for (; (i < nr); i++)
 +    {
 +        for (j = 0; (j < DIM); j++)
 +        {
 +            ir->opts.nFreeze[i][j] = 0;
 +        }
 +    }
 +
 +    nenergy = str_nelem(is->energy, MAXPTR, ptr1);
 +    do_numbering(natoms, groups, nenergy, ptr1, grps, gnames, egcENER,
 +                 restnm, egrptpALL_GENREST, bVerbose, wi);
 +    add_wall_energrps(groups, ir->nwall, symtab);
 +    ir->opts.ngener = groups->grps[egcENER].nr;
 +    nvcm            = str_nelem(is->vcm, MAXPTR, ptr1);
 +    bRest           =
 +        do_numbering(natoms, groups, nvcm, ptr1, grps, gnames, egcVCM,
 +                     restnm, nvcm == 0 ? egrptpALL_GENREST : egrptpPART, bVerbose, wi);
 +    if (bRest)
 +    {
 +        warning(wi, "Some atoms are not part of any center of mass motion removal group.\n"
 +                "This may lead to artifacts.\n"
 +                "In most cases one should use one group for the whole system.");
 +    }
 +
 +    /* Now we have filled the freeze struct, so we can calculate NRDF */
 +    calc_nrdf(mtop, ir, gnames);
 +
 +    if (v && NULL)
 +    {
 +        real fac, ntot = 0;
 +
 +        /* Must check per group! */
 +        for (i = 0; (i < ir->opts.ngtc); i++)
 +        {
 +            ntot += ir->opts.nrdf[i];
 +        }
 +        if (ntot != (DIM*natoms))
 +        {
 +            fac = sqrt(ntot/(DIM*natoms));
 +            if (bVerbose)
 +            {
 +                fprintf(stderr, "Scaling velocities by a factor of %.3f to account for constraints\n"
 +                        "and removal of center of mass motion\n", fac);
 +            }
 +            for (i = 0; (i < natoms); i++)
 +            {
 +                svmul(fac, v[i], v[i]);
 +            }
 +        }
 +    }
 +
 +    nuser = str_nelem(is->user1, MAXPTR, ptr1);
 +    do_numbering(natoms, groups, nuser, ptr1, grps, gnames, egcUser1,
 +                 restnm, egrptpALL_GENREST, bVerbose, wi);
 +    nuser = str_nelem(is->user2, MAXPTR, ptr1);
 +    do_numbering(natoms, groups, nuser, ptr1, grps, gnames, egcUser2,
 +                 restnm, egrptpALL_GENREST, bVerbose, wi);
 +    nuser = str_nelem(is->x_compressed_groups, MAXPTR, ptr1);
 +    do_numbering(natoms, groups, nuser, ptr1, grps, gnames, egcCompressedX,
 +                 restnm, egrptpONE, bVerbose, wi);
 +    nofg = str_nelem(is->orirefitgrp, MAXPTR, ptr1);
 +    do_numbering(natoms, groups, nofg, ptr1, grps, gnames, egcORFIT,
 +                 restnm, egrptpALL_GENREST, bVerbose, wi);
 +
 +    /* QMMM input processing */
 +    nQMg          = str_nelem(is->QMMM, MAXPTR, ptr1);
 +    nQMmethod     = str_nelem(is->QMmethod, MAXPTR, ptr2);
 +    nQMbasis      = str_nelem(is->QMbasis, MAXPTR, ptr3);
 +    if ((nQMmethod != nQMg) || (nQMbasis != nQMg))
 +    {
 +        gmx_fatal(FARGS, "Invalid QMMM input: %d groups %d basissets"
 +                  " and %d methods\n", nQMg, nQMbasis, nQMmethod);
 +    }
 +    /* group rest, if any, is always MM! */
 +    do_numbering(natoms, groups, nQMg, ptr1, grps, gnames, egcQMMM,
 +                 restnm, egrptpALL_GENREST, bVerbose, wi);
 +    nr            = nQMg; /*atoms->grps[egcQMMM].nr;*/
 +    ir->opts.ngQM = nQMg;
 +    snew(ir->opts.QMmethod, nr);
 +    snew(ir->opts.QMbasis, nr);
 +    for (i = 0; i < nr; i++)
 +    {
 +        /* input consists of strings: RHF CASSCF PM3 .. These need to be
 +         * converted to the corresponding enum in names.c
 +         */
 +        ir->opts.QMmethod[i] = search_QMstring(ptr2[i], eQMmethodNR,
 +                                               eQMmethod_names);
 +        ir->opts.QMbasis[i]  = search_QMstring(ptr3[i], eQMbasisNR,
 +                                               eQMbasis_names);
 +
 +    }
 +    nQMmult   = str_nelem(is->QMmult, MAXPTR, ptr1);
 +    nQMcharge = str_nelem(is->QMcharge, MAXPTR, ptr2);
 +    nbSH      = str_nelem(is->bSH, MAXPTR, ptr3);
 +    snew(ir->opts.QMmult, nr);
 +    snew(ir->opts.QMcharge, nr);
 +    snew(ir->opts.bSH, nr);
 +
 +    for (i = 0; i < nr; i++)
 +    {
 +        ir->opts.QMmult[i]   = strtol(ptr1[i], NULL, 10);
 +        ir->opts.QMcharge[i] = strtol(ptr2[i], NULL, 10);
 +        ir->opts.bSH[i]      = (gmx_strncasecmp(ptr3[i], "Y", 1) == 0);
 +    }
 +
 +    nCASelec  = str_nelem(is->CASelectrons, MAXPTR, ptr1);
 +    nCASorb   = str_nelem(is->CASorbitals, MAXPTR, ptr2);
 +    snew(ir->opts.CASelectrons, nr);
 +    snew(ir->opts.CASorbitals, nr);
 +    for (i = 0; i < nr; i++)
 +    {
 +        ir->opts.CASelectrons[i] = strtol(ptr1[i], NULL, 10);
 +        ir->opts.CASorbitals[i]  = strtol(ptr2[i], NULL, 10);
 +    }
 +    /* special optimization options */
 +
 +    nbOPT = str_nelem(is->bOPT, MAXPTR, ptr1);
 +    nbTS  = str_nelem(is->bTS, MAXPTR, ptr2);
 +    snew(ir->opts.bOPT, nr);
 +    snew(ir->opts.bTS, nr);
 +    for (i = 0; i < nr; i++)
 +    {
 +        ir->opts.bOPT[i] = (gmx_strncasecmp(ptr1[i], "Y", 1) == 0);
 +        ir->opts.bTS[i]  = (gmx_strncasecmp(ptr2[i], "Y", 1) == 0);
 +    }
 +    nSAon     = str_nelem(is->SAon, MAXPTR, ptr1);
 +    nSAoff    = str_nelem(is->SAoff, MAXPTR, ptr2);
 +    nSAsteps  = str_nelem(is->SAsteps, MAXPTR, ptr3);
 +    snew(ir->opts.SAon, nr);
 +    snew(ir->opts.SAoff, nr);
 +    snew(ir->opts.SAsteps, nr);
 +
 +    for (i = 0; i < nr; i++)
 +    {
 +        ir->opts.SAon[i]    = strtod(ptr1[i], NULL);
 +        ir->opts.SAoff[i]   = strtod(ptr2[i], NULL);
 +        ir->opts.SAsteps[i] = strtol(ptr3[i], NULL, 10);
 +    }
 +    /* end of QMMM input */
 +
 +    if (bVerbose)
 +    {
 +        for (i = 0; (i < egcNR); i++)
 +        {
 +            fprintf(stderr, "%-16s has %d element(s):", gtypes[i], groups->grps[i].nr);
 +            for (j = 0; (j < groups->grps[i].nr); j++)
 +            {
 +                fprintf(stderr, " %s", *(groups->grpname[groups->grps[i].nm_ind[j]]));
 +            }
 +            fprintf(stderr, "\n");
 +        }
 +    }
 +
 +    nr = groups->grps[egcENER].nr;
 +    snew(ir->opts.egp_flags, nr*nr);
 +
 +    bExcl = do_egp_flag(ir, groups, "energygrp-excl", is->egpexcl, EGP_EXCL);
 +    if (bExcl && ir->cutoff_scheme == ecutsVERLET)
 +    {
 +        warning_error(wi, "Energy group exclusions are not (yet) implemented for the Verlet scheme");
 +    }
 +    if (bExcl && EEL_FULL(ir->coulombtype))
 +    {
 +        warning(wi, "Can not exclude the lattice Coulomb energy between energy groups");
 +    }
 +
 +    bTable = do_egp_flag(ir, groups, "energygrp-table", is->egptable, EGP_TABLE);
 +    if (bTable && !(ir->vdwtype == evdwUSER) &&
 +        !(ir->coulombtype == eelUSER) && !(ir->coulombtype == eelPMEUSER) &&
 +        !(ir->coulombtype == eelPMEUSERSWITCH))
 +    {
 +        gmx_fatal(FARGS, "Can only have energy group pair tables in combination with user tables for VdW and/or Coulomb");
 +    }
 +
 +    decode_cos(is->efield_x, &(ir->ex[XX]));
 +    decode_cos(is->efield_xt, &(ir->et[XX]));
 +    decode_cos(is->efield_y, &(ir->ex[YY]));
 +    decode_cos(is->efield_yt, &(ir->et[YY]));
 +    decode_cos(is->efield_z, &(ir->ex[ZZ]));
 +    decode_cos(is->efield_zt, &(ir->et[ZZ]));
 +
 +    if (ir->bAdress)
 +    {
 +        do_adress_index(ir->adress, groups, gnames, &(ir->opts), wi);
 +    }
 +
 +    for (i = 0; (i < grps->nr); i++)
 +    {
 +        sfree(gnames[i]);
 +    }
 +    sfree(gnames);
 +    done_blocka(grps);
 +    sfree(grps);
 +
 +}
 +
 +
 +
 +static void check_disre(gmx_mtop_t *mtop)
 +{
 +    gmx_ffparams_t *ffparams;
 +    t_functype     *functype;
 +    t_iparams      *ip;
 +    int             i, ndouble, ftype;
 +    int             label, old_label;
 +
 +    if (gmx_mtop_ftype_count(mtop, F_DISRES) > 0)
 +    {
 +        ffparams  = &mtop->ffparams;
 +        functype  = ffparams->functype;
 +        ip        = ffparams->iparams;
 +        ndouble   = 0;
 +        old_label = -1;
 +        for (i = 0; i < ffparams->ntypes; i++)
 +        {
 +            ftype = functype[i];
 +            if (ftype == F_DISRES)
 +            {
 +                label = ip[i].disres.label;
 +                if (label == old_label)
 +                {
 +                    fprintf(stderr, "Distance restraint index %d occurs twice\n", label);
 +                    ndouble++;
 +                }
 +                old_label = label;
 +            }
 +        }
 +        if (ndouble > 0)
 +        {
 +            gmx_fatal(FARGS, "Found %d double distance restraint indices,\n"
 +                      "probably the parameters for multiple pairs in one restraint "
 +                      "are not identical\n", ndouble);
 +        }
 +    }
 +}
 +
 +static gmx_bool absolute_reference(t_inputrec *ir, gmx_mtop_t *sys,
 +                                   gmx_bool posres_only,
 +                                   ivec AbsRef)
 +{
 +    int                  d, g, i;
 +    gmx_mtop_ilistloop_t iloop;
 +    t_ilist             *ilist;
 +    int                  nmol;
 +    t_iparams           *pr;
 +
 +    clear_ivec(AbsRef);
 +
 +    if (!posres_only)
 +    {
 +        /* Check the COM */
 +        for (d = 0; d < DIM; d++)
 +        {
 +            AbsRef[d] = (d < ndof_com(ir) ? 0 : 1);
 +        }
 +        /* Check for freeze groups */
 +        for (g = 0; g < ir->opts.ngfrz; g++)
 +        {
 +            for (d = 0; d < DIM; d++)
 +            {
 +                if (ir->opts.nFreeze[g][d] != 0)
 +                {
 +                    AbsRef[d] = 1;
 +                }
 +            }
 +        }
 +    }
 +
 +    /* Check for position restraints */
 +    iloop = gmx_mtop_ilistloop_init(sys);
 +    while (gmx_mtop_ilistloop_next(iloop, &ilist, &nmol))
 +    {
 +        if (nmol > 0 &&
 +            (AbsRef[XX] == 0 || AbsRef[YY] == 0 || AbsRef[ZZ] == 0))
 +        {
 +            for (i = 0; i < ilist[F_POSRES].nr; i += 2)
 +            {
 +                pr = &sys->ffparams.iparams[ilist[F_POSRES].iatoms[i]];
 +                for (d = 0; d < DIM; d++)
 +                {
 +                    if (pr->posres.fcA[d] != 0)
 +                    {
 +                        AbsRef[d] = 1;
 +                    }
 +                }
 +            }
 +            for (i = 0; i < ilist[F_FBPOSRES].nr; i += 2)
 +            {
 +                /* Check for flat-bottom posres */
 +                pr = &sys->ffparams.iparams[ilist[F_FBPOSRES].iatoms[i]];
 +                if (pr->fbposres.k != 0)
 +                {
 +                    switch (pr->fbposres.geom)
 +                    {
 +                        case efbposresSPHERE:
 +                            AbsRef[XX] = AbsRef[YY] = AbsRef[ZZ] = 1;
 +                            break;
 +                        case efbposresCYLINDER:
 +                            AbsRef[XX] = AbsRef[YY] = 1;
 +                            break;
 +                        case efbposresX: /* d=XX */
 +                        case efbposresY: /* d=YY */
 +                        case efbposresZ: /* d=ZZ */
 +                            d         = pr->fbposres.geom - efbposresX;
 +                            AbsRef[d] = 1;
 +                            break;
 +                        default:
 +                            gmx_fatal(FARGS, " Invalid geometry for flat-bottom position restraint.\n"
 +                                      "Expected nr between 1 and %d. Found %d\n", efbposresNR-1,
 +                                      pr->fbposres.geom);
 +                    }
 +                }
 +            }
 +        }
 +    }
 +
 +    return (AbsRef[XX] != 0 && AbsRef[YY] != 0 && AbsRef[ZZ] != 0);
 +}
 +
 +static void
 +check_combination_rule_differences(const gmx_mtop_t *mtop, int state,
 +                                   gmx_bool *bC6ParametersWorkWithGeometricRules,
 +                                   gmx_bool *bC6ParametersWorkWithLBRules,
 +                                   gmx_bool *bLBRulesPossible)
 +{
 +    int           ntypes, tpi, tpj, thisLBdiff, thisgeomdiff;
 +    int          *typecount;
 +    real          tol;
 +    double        geometricdiff, LBdiff;
 +    double        c6i, c6j, c12i, c12j;
 +    double        c6, c6_geometric, c6_LB;
 +    double        sigmai, sigmaj, epsi, epsj;
 +    gmx_bool      bCanDoLBRules, bCanDoGeometricRules;
 +    const char   *ptr;
 +
 +    /* A tolerance of 1e-5 seems reasonable for (possibly hand-typed)
 +     * force-field floating point parameters.
 +     */
 +    tol = 1e-5;
 +    ptr = getenv("GMX_LJCOMB_TOL");
 +    if (ptr != NULL)
 +    {
 +        double dbl;
 +
 +        sscanf(ptr, "%lf", &dbl);
 +        tol = dbl;
 +    }
 +
 +    *bC6ParametersWorkWithLBRules         = TRUE;
 +    *bC6ParametersWorkWithGeometricRules  = TRUE;
 +    bCanDoLBRules                         = TRUE;
 +    bCanDoGeometricRules                  = TRUE;
 +    ntypes                                = mtop->ffparams.atnr;
 +    snew(typecount, ntypes);
 +    gmx_mtop_count_atomtypes(mtop, state, typecount);
 +    geometricdiff           = LBdiff = 0.0;
 +    *bLBRulesPossible       = TRUE;
 +    for (tpi = 0; tpi < ntypes; ++tpi)
 +    {
 +        c6i  = mtop->ffparams.iparams[(ntypes + 1) * tpi].lj.c6;
 +        c12i = mtop->ffparams.iparams[(ntypes + 1) * tpi].lj.c12;
 +        for (tpj = tpi; tpj < ntypes; ++tpj)
 +        {
 +            c6j          = mtop->ffparams.iparams[(ntypes + 1) * tpj].lj.c6;
 +            c12j         = mtop->ffparams.iparams[(ntypes + 1) * tpj].lj.c12;
 +            c6           = mtop->ffparams.iparams[ntypes * tpi + tpj].lj.c6;
 +            c6_geometric = sqrt(c6i * c6j);
 +            if (!gmx_numzero(c6_geometric))
 +            {
 +                if (!gmx_numzero(c12i) && !gmx_numzero(c12j))
 +                {
 +                    sigmai   = pow(c12i / c6i, 1.0/6.0);
 +                    sigmaj   = pow(c12j / c6j, 1.0/6.0);
 +                    epsi     = c6i * c6i /(4.0 * c12i);
 +                    epsj     = c6j * c6j /(4.0 * c12j);
 +                    c6_LB    = 4.0 * pow(epsi * epsj, 1.0/2.0) * pow(0.5 * (sigmai + sigmaj), 6);
 +                }
 +                else
 +                {
 +                    *bLBRulesPossible = FALSE;
 +                    c6_LB             = c6_geometric;
 +                }
 +                bCanDoLBRules = gmx_within_tol(c6_LB, c6, tol);
 +            }
 +
 +            if (FALSE == bCanDoLBRules)
 +            {
 +                *bC6ParametersWorkWithLBRules = FALSE;
 +            }
 +
 +            bCanDoGeometricRules = gmx_within_tol(c6_geometric, c6, tol);
 +
 +            if (FALSE == bCanDoGeometricRules)
 +            {
 +                *bC6ParametersWorkWithGeometricRules = FALSE;
 +            }
 +        }
 +    }
 +    sfree(typecount);
 +}
 +
 +static void
 +check_combination_rules(const t_inputrec *ir, const gmx_mtop_t *mtop,
 +                        warninp_t wi)
 +{
 +    char     err_buf[256];
 +    gmx_bool bLBRulesPossible, bC6ParametersWorkWithGeometricRules, bC6ParametersWorkWithLBRules;
 +
 +    check_combination_rule_differences(mtop, 0,
 +                                       &bC6ParametersWorkWithGeometricRules,
 +                                       &bC6ParametersWorkWithLBRules,
 +                                       &bLBRulesPossible);
 +    if (ir->ljpme_combination_rule == eljpmeLB)
 +    {
 +        if (FALSE == bC6ParametersWorkWithLBRules || FALSE == bLBRulesPossible)
 +        {
 +            warning(wi, "You are using arithmetic-geometric combination rules "
 +                    "in LJ-PME, but your non-bonded C6 parameters do not "
 +                    "follow these rules.");
 +        }
 +    }
 +    else
 +    {
 +        if (FALSE == bC6ParametersWorkWithGeometricRules)
 +        {
 +            if (ir->eDispCorr != edispcNO)
 +            {
 +                warning_note(wi, "You are using geometric combination rules in "
 +                             "LJ-PME, but your non-bonded C6 parameters do "
 +                             "not follow these rules. "
 +                             "This will introduce very small errors in the forces and energies in "
 +                             "your simulations. Dispersion correction will correct total energy "
 +                             "and/or pressure for isotropic systems, but not forces or surface tensions.");
 +            }
 +            else
 +            {
 +                warning_note(wi, "You are using geometric combination rules in "
 +                             "LJ-PME, but your non-bonded C6 parameters do "
 +                             "not follow these rules. "
 +                             "This will introduce very small errors in the forces and energies in "
 +                             "your simulations. If your system is homogeneous, consider using dispersion correction "
 +                             "for the total energy and pressure.");
 +            }
 +        }
 +    }
 +}
 +
 +void triple_check(const char *mdparin, t_inputrec *ir, gmx_mtop_t *sys,
 +                  warninp_t wi)
 +{
 +    char                      err_buf[STRLEN];
 +    int                       i, m, c, nmol, npct;
 +    gmx_bool                  bCharge, bAcc;
 +    real                      gdt_max, *mgrp, mt;
 +    rvec                      acc;
 +    gmx_mtop_atomloop_block_t aloopb;
 +    gmx_mtop_atomloop_all_t   aloop;
 +    t_atom                   *atom;
 +    ivec                      AbsRef;
 +    char                      warn_buf[STRLEN];
 +
 +    set_warning_line(wi, mdparin, -1);
 +
 +    if (ir->cutoff_scheme == ecutsVERLET &&
 +        ir->verletbuf_tol > 0 &&
 +        ir->nstlist > 1 &&
 +        ((EI_MD(ir->eI) || EI_SD(ir->eI)) &&
 +         (ir->etc == etcVRESCALE || ir->etc == etcBERENDSEN)))
 +    {
 +        /* Check if a too small Verlet buffer might potentially
 +         * cause more drift than the thermostat can couple off.
 +         */
 +        /* Temperature error fraction for warning and suggestion */
 +        const real T_error_warn    = 0.002;
 +        const real T_error_suggest = 0.001;
 +        /* For safety: 2 DOF per atom (typical with constraints) */
 +        const real nrdf_at         = 2;
 +        real       T, tau, max_T_error;
 +        int        i;
 +
 +        T   = 0;
 +        tau = 0;
 +        for (i = 0; i < ir->opts.ngtc; i++)
 +        {
 +            T   = max(T, ir->opts.ref_t[i]);
 +            tau = max(tau, ir->opts.tau_t[i]);
 +        }
 +        if (T > 0)
 +        {
 +            /* This is a worst case estimate of the temperature error,
 +             * assuming perfect buffer estimation and no cancelation
 +             * of errors. The factor 0.5 is because energy distributes
 +             * equally over Ekin and Epot.
 +             */
 +            max_T_error = 0.5*tau*ir->verletbuf_tol/(nrdf_at*BOLTZ*T);
 +            if (max_T_error > T_error_warn)
 +            {
 +                sprintf(warn_buf, "With a verlet-buffer-tolerance of %g kJ/mol/ps, a reference temperature of %g and a tau_t of %g, your temperature might be off by up to %.1f%%. To ensure the error is below %.1f%%, decrease verlet-buffer-tolerance to %.0e or decrease tau_t.",
 +                        ir->verletbuf_tol, T, tau,
 +                        100*max_T_error,
 +                        100*T_error_suggest,
 +                        ir->verletbuf_tol*T_error_suggest/max_T_error);
 +                warning(wi, warn_buf);
 +            }
 +        }
 +    }
 +
 +    if (ETC_ANDERSEN(ir->etc))
 +    {
 +        int i;
 +
 +        for (i = 0; i < ir->opts.ngtc; i++)
 +        {
 +            sprintf(err_buf, "all tau_t must currently be equal using Andersen temperature control, violated for group %d", i);
 +            CHECK(ir->opts.tau_t[0] != ir->opts.tau_t[i]);
 +            sprintf(err_buf, "all tau_t must be postive using Andersen temperature control, tau_t[%d]=%10.6f",
 +                    i, ir->opts.tau_t[i]);
 +            CHECK(ir->opts.tau_t[i] < 0);
 +        }
 +
 +        for (i = 0; i < ir->opts.ngtc; i++)
 +        {
 +            int nsteps = (int)(ir->opts.tau_t[i]/ir->delta_t);
 +            sprintf(err_buf, "tau_t/delta_t for group %d for temperature control method %s must be a multiple of nstcomm (%d), as velocities of atoms in coupled groups are randomized every time step. The input tau_t (%8.3f) leads to %d steps per randomization", i, etcoupl_names[ir->etc], ir->nstcomm, ir->opts.tau_t[i], nsteps);
 +            CHECK((nsteps % ir->nstcomm) && (ir->etc == etcANDERSENMASSIVE));
 +        }
 +    }
 +
 +    if (EI_DYNAMICS(ir->eI) && !EI_SD(ir->eI) && ir->eI != eiBD &&
 +        ir->comm_mode == ecmNO &&
 +        !(absolute_reference(ir, sys, FALSE, AbsRef) || ir->nsteps <= 10) &&
 +        !ETC_ANDERSEN(ir->etc))
 +    {
 +        warning(wi, "You are not using center of mass motion removal (mdp option comm-mode), numerical rounding errors can lead to build up of kinetic energy of the center of mass");
 +    }
 +
 +    /* Check for pressure coupling with absolute position restraints */
 +    if (ir->epc != epcNO && ir->refcoord_scaling == erscNO)
 +    {
 +        absolute_reference(ir, sys, TRUE, AbsRef);
 +        {
 +            for (m = 0; m < DIM; m++)
 +            {
 +                if (AbsRef[m] && norm2(ir->compress[m]) > 0)
 +                {
 +                    warning(wi, "You are using pressure coupling with absolute position restraints, this will give artifacts. Use the refcoord_scaling option.");
 +                    break;
 +                }
 +            }
 +        }
 +    }
 +
 +    bCharge = FALSE;
 +    aloopb  = gmx_mtop_atomloop_block_init(sys);
 +    while (gmx_mtop_atomloop_block_next(aloopb, &atom, &nmol))
 +    {
 +        if (atom->q != 0 || atom->qB != 0)
 +        {
 +            bCharge = TRUE;
 +        }
 +    }
 +
 +    if (!bCharge)
 +    {
 +        if (EEL_FULL(ir->coulombtype))
 +        {
 +            sprintf(err_buf,
 +                    "You are using full electrostatics treatment %s for a system without charges.\n"
 +                    "This costs a lot of performance for just processing zeros, consider using %s instead.\n",
 +                    EELTYPE(ir->coulombtype), EELTYPE(eelCUT));
 +            warning(wi, err_buf);
 +        }
 +    }
 +    else
 +    {
 +        if (ir->coulombtype == eelCUT && ir->rcoulomb > 0 && !ir->implicit_solvent)
 +        {
 +            sprintf(err_buf,
 +                    "You are using a plain Coulomb cut-off, which might produce artifacts.\n"
 +                    "You might want to consider using %s electrostatics.\n",
 +                    EELTYPE(eelPME));
 +            warning_note(wi, err_buf);
 +        }
 +    }
 +
 +    /* Check if combination rules used in LJ-PME are the same as in the force field */
 +    if (EVDW_PME(ir->vdwtype))
 +    {
 +        check_combination_rules(ir, sys, wi);
 +    }
 +
 +    /* Generalized reaction field */
 +    if (ir->opts.ngtc == 0)
 +    {
 +        sprintf(err_buf, "No temperature coupling while using coulombtype %s",
 +                eel_names[eelGRF]);
 +        CHECK(ir->coulombtype == eelGRF);
 +    }
 +    else
 +    {
 +        sprintf(err_buf, "When using coulombtype = %s"
 +                " ref-t for temperature coupling should be > 0",
 +                eel_names[eelGRF]);
 +        CHECK((ir->coulombtype == eelGRF) && (ir->opts.ref_t[0] <= 0));
 +    }
 +
 +    if (ir->eI == eiSD1 &&
 +        (gmx_mtop_ftype_count(sys, F_CONSTR) > 0 ||
 +         gmx_mtop_ftype_count(sys, F_SETTLE) > 0))
 +    {
 +        sprintf(warn_buf, "With constraints integrator %s is less accurate, consider using %s instead", ei_names[ir->eI], ei_names[eiSD2]);
 +        warning_note(wi, warn_buf);
 +    }
 +
 +    bAcc = FALSE;
 +    for (i = 0; (i < sys->groups.grps[egcACC].nr); i++)
 +    {
 +        for (m = 0; (m < DIM); m++)
 +        {
 +            if (fabs(ir->opts.acc[i][m]) > 1e-6)
 +            {
 +                bAcc = TRUE;
 +            }
 +        }
 +    }
 +    if (bAcc)
 +    {
 +        clear_rvec(acc);
 +        snew(mgrp, sys->groups.grps[egcACC].nr);
 +        aloop = gmx_mtop_atomloop_all_init(sys);
 +        while (gmx_mtop_atomloop_all_next(aloop, &i, &atom))
 +        {
 +            mgrp[ggrpnr(&sys->groups, egcACC, i)] += atom->m;
 +        }
 +        mt = 0.0;
 +        for (i = 0; (i < sys->groups.grps[egcACC].nr); i++)
 +        {
 +            for (m = 0; (m < DIM); m++)
 +            {
 +                acc[m] += ir->opts.acc[i][m]*mgrp[i];
 +            }
 +            mt += mgrp[i];
 +        }
 +        for (m = 0; (m < DIM); m++)
 +        {
 +            if (fabs(acc[m]) > 1e-6)
 +            {
 +                const char *dim[DIM] = { "X", "Y", "Z" };
 +                fprintf(stderr,
 +                        "Net Acceleration in %s direction, will %s be corrected\n",
 +                        dim[m], ir->nstcomm != 0 ? "" : "not");
 +                if (ir->nstcomm != 0 && m < ndof_com(ir))
 +                {
 +                    acc[m] /= mt;
 +                    for (i = 0; (i < sys->groups.grps[egcACC].nr); i++)
 +                    {
 +                        ir->opts.acc[i][m] -= acc[m];
 +                    }
 +                }
 +            }
 +        }
 +        sfree(mgrp);
 +    }
 +
 +    if (ir->efep != efepNO && ir->fepvals->sc_alpha != 0 &&
 +        !gmx_within_tol(sys->ffparams.reppow, 12.0, 10*GMX_DOUBLE_EPS))
 +    {
 +        gmx_fatal(FARGS, "Soft-core interactions are only supported with VdW repulsion power 12");
 +    }
 +
 +    if (ir->ePull != epullNO)
 +    {
 +        gmx_bool bPullAbsoluteRef;
 +
 +        bPullAbsoluteRef = FALSE;
 +        for (i = 0; i < ir->pull->ncoord; i++)
 +        {
 +            bPullAbsoluteRef = bPullAbsoluteRef ||
 +                ir->pull->coord[i].group[0] == 0 ||
 +                ir->pull->coord[i].group[1] == 0;
 +        }
 +        if (bPullAbsoluteRef)
 +        {
 +            absolute_reference(ir, sys, FALSE, AbsRef);
 +            for (m = 0; m < DIM; m++)
 +            {
 +                if (ir->pull->dim[m] && !AbsRef[m])
 +                {
 +                    warning(wi, "You are using an absolute reference for pulling, but the rest of the system does not have an absolute reference. This will lead to artifacts.");
 +                    break;
 +                }
 +            }
 +        }
 +
 +        if (ir->pull->eGeom == epullgDIRPBC)
 +        {
 +            for (i = 0; i < 3; i++)
 +            {
 +                for (m = 0; m <= i; m++)
 +                {
 +                    if ((ir->epc != epcNO && ir->compress[i][m] != 0) ||
 +                        ir->deform[i][m] != 0)
 +                    {
 +                        for (c = 0; c < ir->pull->ncoord; c++)
 +                        {
 +                            if (ir->pull->coord[c].vec[m] != 0)
 +                            {
 +                                gmx_fatal(FARGS, "Can not have dynamic box while using pull geometry '%s' (dim %c)", EPULLGEOM(ir->pull->eGeom), 'x'+m);
 +                            }
 +                        }
 +                    }
 +                }
 +            }
 +        }
 +    }
 +
 +    check_disre(sys);
 +}
 +
 +void double_check(t_inputrec *ir, matrix box, gmx_bool bConstr, warninp_t wi)
 +{
 +    real        min_size;
 +    gmx_bool    bTWIN;
 +    char        warn_buf[STRLEN];
 +    const char *ptr;
 +
 +    ptr = check_box(ir->ePBC, box);
 +    if (ptr)
 +    {
 +        warning_error(wi, ptr);
 +    }
 +
 +    if (bConstr && ir->eConstrAlg == econtSHAKE)
 +    {
 +        if (ir->shake_tol <= 0.0)
 +        {
 +            sprintf(warn_buf, "ERROR: shake-tol must be > 0 instead of %g\n",
 +                    ir->shake_tol);
 +            warning_error(wi, warn_buf);
 +        }
 +
 +        if (IR_TWINRANGE(*ir) && ir->nstlist > 1)
 +        {
 +            sprintf(warn_buf, "With twin-range cut-off's and SHAKE the virial and the pressure are incorrect.");
 +            if (ir->epc == epcNO)
 +            {
 +                warning(wi, warn_buf);
 +            }
 +            else
 +            {
 +                warning_error(wi, warn_buf);
 +            }
 +        }
 +    }
 +
 +    if ( (ir->eConstrAlg == econtLINCS) && bConstr)
 +    {
 +        /* If we have Lincs constraints: */
 +        if (ir->eI == eiMD && ir->etc == etcNO &&
 +            ir->eConstrAlg == econtLINCS && ir->nLincsIter == 1)
 +        {
 +            sprintf(warn_buf, "For energy conservation with LINCS, lincs_iter should be 2 or larger.\n");
 +            warning_note(wi, warn_buf);
 +        }
 +
 +        if ((ir->eI == eiCG || ir->eI == eiLBFGS) && (ir->nProjOrder < 8))
 +        {
 +            sprintf(warn_buf, "For accurate %s with LINCS constraints, lincs-order should be 8 or more.", ei_names[ir->eI]);
 +            warning_note(wi, warn_buf);
 +        }
 +        if (ir->epc == epcMTTK)
 +        {
 +            warning_error(wi, "MTTK not compatible with lincs -- use shake instead.");
 +        }
 +    }
 +
 +    if (bConstr && ir->epc == epcMTTK)
 +    {
 +        warning_note(wi, "MTTK with constraints is deprecated, and will be removed in GROMACS 5.1");
 +    }
 +
 +    if (ir->LincsWarnAngle > 90.0)
 +    {
 +        sprintf(warn_buf, "lincs-warnangle can not be larger than 90 degrees, setting it to 90.\n");
 +        warning(wi, warn_buf);
 +        ir->LincsWarnAngle = 90.0;
 +    }
 +
 +    if (ir->ePBC != epbcNONE)
 +    {
 +        if (ir->nstlist == 0)
 +        {
 +            warning(wi, "With nstlist=0 atoms are only put into the box at step 0, therefore drifting atoms might cause the simulation to crash.");
 +        }
 +        bTWIN = (ir->rlistlong > ir->rlist);
 +        if (ir->ns_type == ensGRID)
 +        {
 +            if (sqr(ir->rlistlong) >= max_cutoff2(ir->ePBC, box))
 +            {
 +                sprintf(warn_buf, "ERROR: The cut-off length is longer than half the shortest box vector or longer than the smallest box diagonal element. Increase the box size or decrease %s.\n",
 +                        bTWIN ? (ir->rcoulomb == ir->rlistlong ? "rcoulomb" : "rvdw") : "rlist");
 +                warning_error(wi, warn_buf);
 +            }
 +        }
 +        else
 +        {
 +            min_size = min(box[XX][XX], min(box[YY][YY], box[ZZ][ZZ]));
 +            if (2*ir->rlistlong >= min_size)
 +            {
 +                sprintf(warn_buf, "ERROR: One of the box lengths is smaller than twice the cut-off length. Increase the box size or decrease rlist.");
 +                warning_error(wi, warn_buf);
 +                if (TRICLINIC(box))
 +                {
 +                    fprintf(stderr, "Grid search might allow larger cut-off's than simple search with triclinic boxes.");
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +void check_chargegroup_radii(const gmx_mtop_t *mtop, const t_inputrec *ir,
 +                             rvec *x,
 +                             warninp_t wi)
 +{
 +    real rvdw1, rvdw2, rcoul1, rcoul2;
 +    char warn_buf[STRLEN];
 +
 +    calc_chargegroup_radii(mtop, x, &rvdw1, &rvdw2, &rcoul1, &rcoul2);
 +
 +    if (rvdw1 > 0)
 +    {
 +        printf("Largest charge group radii for Van der Waals: %5.3f, %5.3f nm\n",
 +               rvdw1, rvdw2);
 +    }
 +    if (rcoul1 > 0)
 +    {
 +        printf("Largest charge group radii for Coulomb:       %5.3f, %5.3f nm\n",
 +               rcoul1, rcoul2);
 +    }
 +
 +    if (ir->rlist > 0)
 +    {
 +        if (rvdw1  + rvdw2  > ir->rlist ||
 +            rcoul1 + rcoul2 > ir->rlist)
 +        {
 +            sprintf(warn_buf,
 +                    "The sum of the two largest charge group radii (%f) "
 +                    "is larger than rlist (%f)\n",
 +                    max(rvdw1+rvdw2, rcoul1+rcoul2), ir->rlist);
 +            warning(wi, warn_buf);
 +        }
 +        else
 +        {
 +            /* Here we do not use the zero at cut-off macro,
 +             * since user defined interactions might purposely
 +             * not be zero at the cut-off.
 +             */
 +            if (ir_vdw_is_zero_at_cutoff(ir) &&
 +                rvdw1 + rvdw2 > ir->rlistlong - ir->rvdw)
 +            {
 +                sprintf(warn_buf, "The sum of the two largest charge group "
 +                        "radii (%f) is larger than %s (%f) - rvdw (%f).\n"
 +                        "With exact cut-offs, better performance can be "
 +                        "obtained with cutoff-scheme = %s, because it "
 +                        "does not use charge groups at all.",
 +                        rvdw1+rvdw2,
 +                        ir->rlistlong > ir->rlist ? "rlistlong" : "rlist",
 +                        ir->rlistlong, ir->rvdw,
 +                        ecutscheme_names[ecutsVERLET]);
 +                if (ir_NVE(ir))
 +                {
 +                    warning(wi, warn_buf);
 +                }
 +                else
 +                {
 +                    warning_note(wi, warn_buf);
 +                }
 +            }
 +            if (ir_coulomb_is_zero_at_cutoff(ir) &&
 +                rcoul1 + rcoul2 > ir->rlistlong - ir->rcoulomb)
 +            {
 +                sprintf(warn_buf, "The sum of the two largest charge group radii (%f) is larger than %s (%f) - rcoulomb (%f).\n"
 +                        "With exact cut-offs, better performance can be obtained with cutoff-scheme = %s, because it does not use charge groups at all.",
 +                        rcoul1+rcoul2,
 +                        ir->rlistlong > ir->rlist ? "rlistlong" : "rlist",
 +                        ir->rlistlong, ir->rcoulomb,
 +                        ecutscheme_names[ecutsVERLET]);
 +                if (ir_NVE(ir))
 +                {
 +                    warning(wi, warn_buf);
 +                }
 +                else
 +                {
 +                    warning_note(wi, warn_buf);
 +                }
 +            }
 +        }
 +    }
 +}
diff --cc src/gromacs/legacyheaders/nonbonded.h
index 6176d60631,0000000000..d05b3e5d7e
mode 100644,000000..100644
--- a/src/gromacs/legacyheaders/nonbonded.h
+++ b/src/gromacs/legacyheaders/nonbonded.h
@@@ -1,97 -1,0 +1,98 @@@
 +/*
 + * This file is part of the GROMACS molecular simulation package.
 + *
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team.
 + * Copyright (c) 2013,2014, by the GROMACS development team, led by
 + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
 + * and including many others, as listed in the AUTHORS file in the
 + * top-level source directory and at http://www.gromacs.org.
 + *
 + * GROMACS is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU Lesser General Public License
 + * as published by the Free Software Foundation; either version 2.1
 + * of the License, or (at your option) any later version.
 + *
 + * GROMACS is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 + * Lesser General Public License for more details.
 + *
 + * You should have received a copy of the GNU Lesser General Public
 + * License along with GROMACS; if not, see
 + * http://www.gnu.org/licenses, or write to the Free Software Foundation,
 + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
 + *
 + * If you want to redistribute modifications to GROMACS, please
 + * consider that scientific software is very special. Version
 + * control is crucial - bugs must be traceable. We will be happy to
 + * consider code for inclusion in the official distribution, but
 + * derived work must not be called official GROMACS. Details are found
 + * in the README & COPYING files - if they are missing, get the
 + * official version at http://www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the research papers on the package. Check out http://www.gromacs.org.
 + */
 +
 +#ifndef _nonbonded_h
 +#define _nonbonded_h
 +
 +#include "typedefs.h"
 +#include "pbc.h"
 +#include "network.h"
 +#include "tgroup.h"
 +#include "genborn.h"
 +
 +#ifdef __cplusplus
 +extern "C" {
 +#endif
 +#if 0
 +} /* fixes auto-indentation problems */
 +#endif
 +
 +
 +
 +void
 +gmx_nonbonded_setup(t_forcerec *   fr,
 +                    gmx_bool       bGenericKernelOnly);
 +
 +
 +
 +
 +
 +void
 +gmx_nonbonded_set_kernel_pointers(FILE *       fplog,
-                                   t_nblist *   nl);
++                                  t_nblist *   nl,
++                                  gmx_bool     bElecAndVdwSwitchDiffers);
 +
 +
 +
 +#define GMX_NONBONDED_DO_LR             (1<<0)
 +#define GMX_NONBONDED_DO_FORCE          (1<<1)
 +#define GMX_NONBONDED_DO_SHIFTFORCE     (1<<2)
 +#define GMX_NONBONDED_DO_FOREIGNLAMBDA  (1<<3)
 +#define GMX_NONBONDED_DO_POTENTIAL      (1<<4)
 +#define GMX_NONBONDED_DO_SR             (1<<5)
 +
 +void
 +do_nonbonded(t_forcerec *fr,
 +             rvec x[], rvec f_shortrange[], rvec f_longrange[], t_mdatoms *md, t_blocka *excl,
 +             gmx_grppairener_t *grppener,
 +             t_nrnb *nrnb, real *lambda, real dvdlambda[],
 +             int nls, int eNL, int flags);
 +
 +/* Calculate VdW/charge listed pair interactions (usually 1-4 interactions).
 + * global_atom_index is only passed for printing error messages.
 + */
 +real
 +do_nonbonded_listed(int ftype, int nbonds, const t_iatom iatoms[], const t_iparams iparams[],
 +                    const rvec x[], rvec f[], rvec fshift[], const t_pbc *pbc, const t_graph *g,
 +                    real *lambda, real *dvdl, const t_mdatoms *md, const t_forcerec *fr,
 +                    gmx_grppairener_t *grppener, int *global_atom_index);
 +
 +#ifdef __cplusplus
 +}
 +#endif
 +
 +#endif
diff --cc src/gromacs/mdlib/forcerec.c
index 165a1243d2,0000000000..3b2d69ca04
mode 100644,000000..100644
--- a/src/gromacs/mdlib/forcerec.c
+++ b/src/gromacs/mdlib/forcerec.c
@@@ -1,3297 -1,0 +1,3326 @@@
 +/*
 + * This file is part of the GROMACS molecular simulation package.
 + *
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team.
 + * Copyright (c) 2013,2014, by the GROMACS development team, led by
 + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
 + * and including many others, as listed in the AUTHORS file in the
 + * top-level source directory and at http://www.gromacs.org.
 + *
 + * GROMACS is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU Lesser General Public License
 + * as published by the Free Software Foundation; either version 2.1
 + * of the License, or (at your option) any later version.
 + *
 + * GROMACS is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 + * Lesser General Public License for more details.
 + *
 + * You should have received a copy of the GNU Lesser General Public
 + * License along with GROMACS; if not, see
 + * http://www.gnu.org/licenses, or write to the Free Software Foundation,
 + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
 + *
 + * If you want to redistribute modifications to GROMACS, please
 + * consider that scientific software is very special. Version
 + * control is crucial - bugs must be traceable. We will be happy to
 + * consider code for inclusion in the official distribution, but
 + * derived work must not be called official GROMACS. Details are found
 + * in the README & COPYING files - if they are missing, get the
 + * official version at http://www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the research papers on the package. Check out http://www.gromacs.org.
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <math.h>
 +#include <string.h>
 +#include <assert.h>
 +#include "sysstuff.h"
 +#include "typedefs.h"
 +#include "types/commrec.h"
 +#include "vec.h"
 +#include "gromacs/math/utilities.h"
 +#include "macros.h"
 +#include "gromacs/utility/smalloc.h"
 +#include "macros.h"
 +#include "gmx_fatal.h"
 +#include "physics.h"
 +#include "force.h"
 +#include "tables.h"
 +#include "nonbonded.h"
 +#include "invblock.h"
 +#include "names.h"
 +#include "network.h"
 +#include "pbc.h"
 +#include "ns.h"
 +#include "mshift.h"
 +#include "txtdump.h"
 +#include "coulomb.h"
 +#include "md_support.h"
 +#include "md_logging.h"
 +#include "domdec.h"
 +#include "qmmm.h"
 +#include "copyrite.h"
 +#include "mtop_util.h"
 +#include "nbnxn_simd.h"
 +#include "nbnxn_search.h"
 +#include "nbnxn_atomdata.h"
 +#include "nbnxn_consts.h"
 +#include "gmx_omp_nthreads.h"
 +#include "gmx_detect_hardware.h"
 +#include "inputrec.h"
 +
 +#include "types/nbnxn_cuda_types_ext.h"
 +#include "gpu_utils.h"
 +#include "nbnxn_cuda_data_mgmt.h"
 +#include "pmalloc_cuda.h"
 +
 +t_forcerec *mk_forcerec(void)
 +{
 +    t_forcerec *fr;
 +
 +    snew(fr, 1);
 +
 +    return fr;
 +}
 +
 +#ifdef DEBUG
 +static void pr_nbfp(FILE *fp, real *nbfp, gmx_bool bBHAM, int atnr)
 +{
 +    int i, j;
 +
 +    for (i = 0; (i < atnr); i++)
 +    {
 +        for (j = 0; (j < atnr); j++)
 +        {
 +            fprintf(fp, "%2d - %2d", i, j);
 +            if (bBHAM)
 +            {
 +                fprintf(fp, "  a=%10g, b=%10g, c=%10g\n", BHAMA(nbfp, atnr, i, j),
 +                        BHAMB(nbfp, atnr, i, j), BHAMC(nbfp, atnr, i, j)/6.0);
 +            }
 +            else
 +            {
 +                fprintf(fp, "  c6=%10g, c12=%10g\n", C6(nbfp, atnr, i, j)/6.0,
 +                        C12(nbfp, atnr, i, j)/12.0);
 +            }
 +        }
 +    }
 +}
 +#endif
 +
 +static real *mk_nbfp(const gmx_ffparams_t *idef, gmx_bool bBHAM)
 +{
 +    real *nbfp;
 +    int   i, j, k, atnr;
 +
 +    atnr = idef->atnr;
 +    if (bBHAM)
 +    {
 +        snew(nbfp, 3*atnr*atnr);
 +        for (i = k = 0; (i < atnr); i++)
 +        {
 +            for (j = 0; (j < atnr); j++, k++)
 +            {
 +                BHAMA(nbfp, atnr, i, j) = idef->iparams[k].bham.a;
 +                BHAMB(nbfp, atnr, i, j) = idef->iparams[k].bham.b;
 +                /* nbfp now includes the 6.0 derivative prefactor */
 +                BHAMC(nbfp, atnr, i, j) = idef->iparams[k].bham.c*6.0;
 +            }
 +        }
 +    }
 +    else
 +    {
 +        snew(nbfp, 2*atnr*atnr);
 +        for (i = k = 0; (i < atnr); i++)
 +        {
 +            for (j = 0; (j < atnr); j++, k++)
 +            {
 +                /* nbfp now includes the 6.0/12.0 derivative prefactors */
 +                C6(nbfp, atnr, i, j)   = idef->iparams[k].lj.c6*6.0;
 +                C12(nbfp, atnr, i, j)  = idef->iparams[k].lj.c12*12.0;
 +            }
 +        }
 +    }
 +
 +    return nbfp;
 +}
 +
 +static real *make_ljpme_c6grid(const gmx_ffparams_t *idef, t_forcerec *fr)
 +{
 +    int   i, j, k, atnr;
 +    real  c6, c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
 +    real *grid;
 +
 +    /* For LJ-PME simulations, we correct the energies with the reciprocal space
 +     * inside of the cut-off. To do this the non-bonded kernels needs to have
 +     * access to the C6-values used on the reciprocal grid in pme.c
 +     */
 +
 +    atnr = idef->atnr;
 +    snew(grid, 2*atnr*atnr);
 +    for (i = k = 0; (i < atnr); i++)
 +    {
 +        for (j = 0; (j < atnr); j++, k++)
 +        {
 +            c6i  = idef->iparams[i*(atnr+1)].lj.c6;
 +            c12i = idef->iparams[i*(atnr+1)].lj.c12;
 +            c6j  = idef->iparams[j*(atnr+1)].lj.c6;
 +            c12j = idef->iparams[j*(atnr+1)].lj.c12;
 +            c6   = sqrt(c6i * c6j);
 +            if (fr->ljpme_combination_rule == eljpmeLB
 +                && !gmx_numzero(c6) && !gmx_numzero(c12i) && !gmx_numzero(c12j))
 +            {
 +                sigmai = pow(c12i / c6i, 1.0/6.0);
 +                sigmaj = pow(c12j / c6j, 1.0/6.0);
 +                epsi   = c6i * c6i / c12i;
 +                epsj   = c6j * c6j / c12j;
 +                c6     = sqrt(epsi * epsj) * pow(0.5*(sigmai+sigmaj), 6);
 +            }
 +            /* Store the elements at the same relative positions as C6 in nbfp in order
 +             * to simplify access in the kernels
 +             */
 +            grid[2*(atnr*i+j)] = c6*6.0;
 +        }
 +    }
 +    return grid;
 +}
 +
 +static real *mk_nbfp_combination_rule(const gmx_ffparams_t *idef, int comb_rule)
 +{
 +    real *nbfp;
 +    int   i, j, k, atnr;
 +    real  c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
 +    real  c6, c12;
 +
 +    atnr = idef->atnr;
 +    snew(nbfp, 2*atnr*atnr);
 +    for (i = 0; i < atnr; ++i)
 +    {
 +        for (j = 0; j < atnr; ++j)
 +        {
 +            c6i  = idef->iparams[i*(atnr+1)].lj.c6;
 +            c12i = idef->iparams[i*(atnr+1)].lj.c12;
 +            c6j  = idef->iparams[j*(atnr+1)].lj.c6;
 +            c12j = idef->iparams[j*(atnr+1)].lj.c12;
 +            c6   = sqrt(c6i  * c6j);
 +            c12  = sqrt(c12i * c12j);
 +            if (comb_rule == eCOMB_ARITHMETIC
 +                && !gmx_numzero(c6) && !gmx_numzero(c12))
 +            {
 +                sigmai = pow(c12i / c6i, 1.0/6.0);
 +                sigmaj = pow(c12j / c6j, 1.0/6.0);
 +                epsi   = c6i * c6i / c12i;
 +                epsj   = c6j * c6j / c12j;
 +                c6     = epsi * epsj * pow(0.5*(sigmai+sigmaj), 6);
 +                c12    = epsi * epsj * pow(0.5*(sigmai+sigmaj), 12);
 +            }
 +            C6(nbfp, atnr, i, j)   = c6*6.0;
 +            C12(nbfp, atnr, i, j)  = c12*12.0;
 +        }
 +    }
 +    return nbfp;
 +}
 +
 +/* This routine sets fr->solvent_opt to the most common solvent in the
 + * system, e.g. esolSPC or esolTIP4P. It will also mark each charge group in
 + * the fr->solvent_type array with the correct type (or esolNO).
 + *
 + * Charge groups that fulfill the conditions but are not identical to the
 + * most common one will be marked as esolNO in the solvent_type array.
 + *
 + * TIP3p is identical to SPC for these purposes, so we call it
 + * SPC in the arrays (Apologies to Bill Jorgensen ;-)
 + *
 + * NOTE: QM particle should not
 + * become an optimized solvent. Not even if there is only one charge
 + * group in the Qm
 + */
 +
 +typedef struct
 +{
 +    int    model;
 +    int    count;
 +    int    vdwtype[4];
 +    real   charge[4];
 +} solvent_parameters_t;
 +
 +static void
 +check_solvent_cg(const gmx_moltype_t    *molt,
 +                 int                     cg0,
 +                 int                     nmol,
 +                 const unsigned char    *qm_grpnr,
 +                 const t_grps           *qm_grps,
 +                 t_forcerec   *          fr,
 +                 int                    *n_solvent_parameters,
 +                 solvent_parameters_t  **solvent_parameters_p,
 +                 int                     cginfo,
 +                 int                    *cg_sp)
 +{
 +    const t_blocka       *excl;
 +    t_atom               *atom;
 +    int                   j, k;
 +    int                   j0, j1, nj;
 +    gmx_bool              perturbed;
 +    gmx_bool              has_vdw[4];
 +    gmx_bool              match;
 +    real                  tmp_charge[4]  = { 0.0 }; /* init to zero to make gcc4.8 happy */
 +    int                   tmp_vdwtype[4] = { 0 };   /* init to zero to make gcc4.8 happy */
 +    int                   tjA;
 +    gmx_bool              qm;
 +    solvent_parameters_t *solvent_parameters;
 +
 +    /* We use a list with parameters for each solvent type.
 +     * Every time we discover a new molecule that fulfills the basic
 +     * conditions for a solvent we compare with the previous entries
 +     * in these lists. If the parameters are the same we just increment
 +     * the counter for that type, and otherwise we create a new type
 +     * based on the current molecule.
 +     *
 +     * Once we've finished going through all molecules we check which
 +     * solvent is most common, and mark all those molecules while we
 +     * clear the flag on all others.
 +     */
 +
 +    solvent_parameters = *solvent_parameters_p;
 +
 +    /* Mark the cg first as non optimized */
 +    *cg_sp = -1;
 +
 +    /* Check if this cg has no exclusions with atoms in other charge groups
 +     * and all atoms inside the charge group excluded.
 +     * We only have 3 or 4 atom solvent loops.
 +     */
 +    if (GET_CGINFO_EXCL_INTER(cginfo) ||
 +        !GET_CGINFO_EXCL_INTRA(cginfo))
 +    {
 +        return;
 +    }
 +
 +    /* Get the indices of the first atom in this charge group */
 +    j0     = molt->cgs.index[cg0];
 +    j1     = molt->cgs.index[cg0+1];
 +
 +    /* Number of atoms in our molecule */
 +    nj     = j1 - j0;
 +
 +    if (debug)
 +    {
 +        fprintf(debug,
 +                "Moltype '%s': there are %d atoms in this charge group\n",
 +                *molt->name, nj);
 +    }
 +
 +    /* Check if it could be an SPC (3 atoms) or TIP4p (4) water,
 +     * otherwise skip it.
 +     */
 +    if (nj < 3 || nj > 4)
 +    {
 +        return;
 +    }
 +
 +    /* Check if we are doing QM on this group */
 +    qm = FALSE;
 +    if (qm_grpnr != NULL)
 +    {
 +        for (j = j0; j < j1 && !qm; j++)
 +        {
 +            qm = (qm_grpnr[j] < qm_grps->nr - 1);
 +        }
 +    }
 +    /* Cannot use solvent optimization with QM */
 +    if (qm)
 +    {
 +        return;
 +    }
 +
 +    atom = molt->atoms.atom;
 +
 +    /* Still looks like a solvent, time to check parameters */
 +
 +    /* If it is perturbed (free energy) we can't use the solvent loops,
 +     * so then we just skip to the next molecule.
 +     */
 +    perturbed = FALSE;
 +
 +    for (j = j0; j < j1 && !perturbed; j++)
 +    {
 +        perturbed = PERTURBED(atom[j]);
 +    }
 +
 +    if (perturbed)
 +    {
 +        return;
 +    }
 +
 +    /* Now it's only a question if the VdW and charge parameters
 +     * are OK. Before doing the check we compare and see if they are
 +     * identical to a possible previous solvent type.
 +     * First we assign the current types and charges.
 +     */
 +    for (j = 0; j < nj; j++)
 +    {
 +        tmp_vdwtype[j] = atom[j0+j].type;
 +        tmp_charge[j]  = atom[j0+j].q;
 +    }
 +
 +    /* Does it match any previous solvent type? */
 +    for (k = 0; k < *n_solvent_parameters; k++)
 +    {
 +        match = TRUE;
 +
 +
 +        /* We can only match SPC with 3 atoms and TIP4p with 4 atoms */
 +        if ( (solvent_parameters[k].model == esolSPC   && nj != 3)  ||
 +             (solvent_parameters[k].model == esolTIP4P && nj != 4) )
 +        {
 +            match = FALSE;
 +        }
 +
 +        /* Check that types & charges match for all atoms in molecule */
 +        for (j = 0; j < nj && match == TRUE; j++)
 +        {
 +            if (tmp_vdwtype[j] != solvent_parameters[k].vdwtype[j])
 +            {
 +                match = FALSE;
 +            }
 +            if (tmp_charge[j] != solvent_parameters[k].charge[j])
 +            {
 +                match = FALSE;
 +            }
 +        }
 +        if (match == TRUE)
 +        {
 +            /* Congratulations! We have a matched solvent.
 +             * Flag it with this type for later processing.
 +             */
 +            *cg_sp = k;
 +            solvent_parameters[k].count += nmol;
 +
 +            /* We are done with this charge group */
 +            return;
 +        }
 +    }
 +
 +    /* If we get here, we have a tentative new solvent type.
 +     * Before we add it we must check that it fulfills the requirements
 +     * of the solvent optimized loops. First determine which atoms have
 +     * VdW interactions.
 +     */
 +    for (j = 0; j < nj; j++)
 +    {
 +        has_vdw[j] = FALSE;
 +        tjA        = tmp_vdwtype[j];
 +
 +        /* Go through all other tpes and see if any have non-zero
 +         * VdW parameters when combined with this one.
 +         */
 +        for (k = 0; k < fr->ntype && (has_vdw[j] == FALSE); k++)
 +        {
 +            /* We already checked that the atoms weren't perturbed,
 +             * so we only need to check state A now.
 +             */
 +            if (fr->bBHAM)
 +            {
 +                has_vdw[j] = (has_vdw[j] ||
 +                              (BHAMA(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
 +                              (BHAMB(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
 +                              (BHAMC(fr->nbfp, fr->ntype, tjA, k) != 0.0));
 +            }
 +            else
 +            {
 +                /* Standard LJ */
 +                has_vdw[j] = (has_vdw[j] ||
 +                              (C6(fr->nbfp, fr->ntype, tjA, k)  != 0.0) ||
 +                              (C12(fr->nbfp, fr->ntype, tjA, k) != 0.0));
 +            }
 +        }
 +    }
 +
 +    /* Now we know all we need to make the final check and assignment. */
 +    if (nj == 3)
 +    {
 +        /* So, is it an SPC?
 +         * For this we require thatn all atoms have charge,
 +         * the charges on atom 2 & 3 should be the same, and only
 +         * atom 1 might have VdW.
 +         */
 +        if (has_vdw[1] == FALSE &&
 +            has_vdw[2] == FALSE &&
 +            tmp_charge[0]  != 0 &&
 +            tmp_charge[1]  != 0 &&
 +            tmp_charge[2]  == tmp_charge[1])
 +        {
 +            srenew(solvent_parameters, *n_solvent_parameters+1);
 +            solvent_parameters[*n_solvent_parameters].model = esolSPC;
 +            solvent_parameters[*n_solvent_parameters].count = nmol;
 +            for (k = 0; k < 3; k++)
 +            {
 +                solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
 +                solvent_parameters[*n_solvent_parameters].charge[k]  = tmp_charge[k];
 +            }
 +
 +            *cg_sp = *n_solvent_parameters;
 +            (*n_solvent_parameters)++;
 +        }
 +    }
 +    else if (nj == 4)
 +    {
 +        /* Or could it be a TIP4P?
 +         * For this we require thatn atoms 2,3,4 have charge, but not atom 1.
 +         * Only atom 1 mght have VdW.
 +         */
 +        if (has_vdw[1] == FALSE &&
 +            has_vdw[2] == FALSE &&
 +            has_vdw[3] == FALSE &&
 +            tmp_charge[0]  == 0 &&
 +            tmp_charge[1]  != 0 &&
 +            tmp_charge[2]  == tmp_charge[1] &&
 +            tmp_charge[3]  != 0)
 +        {
 +            srenew(solvent_parameters, *n_solvent_parameters+1);
 +            solvent_parameters[*n_solvent_parameters].model = esolTIP4P;
 +            solvent_parameters[*n_solvent_parameters].count = nmol;
 +            for (k = 0; k < 4; k++)
 +            {
 +                solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
 +                solvent_parameters[*n_solvent_parameters].charge[k]  = tmp_charge[k];
 +            }
 +
 +            *cg_sp = *n_solvent_parameters;
 +            (*n_solvent_parameters)++;
 +        }
 +    }
 +
 +    *solvent_parameters_p = solvent_parameters;
 +}
 +
 +static void
 +check_solvent(FILE  *                fp,
 +              const gmx_mtop_t  *    mtop,
 +              t_forcerec  *          fr,
 +              cginfo_mb_t           *cginfo_mb)
 +{
 +    const t_block     *   cgs;
 +    const t_block     *   mols;
 +    const gmx_moltype_t  *molt;
 +    int                   mb, mol, cg_mol, at_offset, cg_offset, am, cgm, i, nmol_ch, nmol;
 +    int                   n_solvent_parameters;
 +    solvent_parameters_t *solvent_parameters;
 +    int                 **cg_sp;
 +    int                   bestsp, bestsol;
 +
 +    if (debug)
 +    {
 +        fprintf(debug, "Going to determine what solvent types we have.\n");
 +    }
 +
 +    mols = &mtop->mols;
 +
 +    n_solvent_parameters = 0;
 +    solvent_parameters   = NULL;
 +    /* Allocate temporary array for solvent type */
 +    snew(cg_sp, mtop->nmolblock);
 +
 +    cg_offset = 0;
 +    at_offset = 0;
 +    for (mb = 0; mb < mtop->nmolblock; mb++)
 +    {
 +        molt = &mtop->moltype[mtop->molblock[mb].type];
 +        cgs  = &molt->cgs;
 +        /* Here we have to loop over all individual molecules
 +         * because we need to check for QMMM particles.
 +         */
 +        snew(cg_sp[mb], cginfo_mb[mb].cg_mod);
 +        nmol_ch = cginfo_mb[mb].cg_mod/cgs->nr;
 +        nmol    = mtop->molblock[mb].nmol/nmol_ch;
 +        for (mol = 0; mol < nmol_ch; mol++)
 +        {
 +            cgm = mol*cgs->nr;
 +            am  = mol*cgs->index[cgs->nr];
 +            for (cg_mol = 0; cg_mol < cgs->nr; cg_mol++)
 +            {
 +                check_solvent_cg(molt, cg_mol, nmol,
 +                                 mtop->groups.grpnr[egcQMMM] ?
 +                                 mtop->groups.grpnr[egcQMMM]+at_offset+am : 0,
 +                                 &mtop->groups.grps[egcQMMM],
 +                                 fr,
 +                                 &n_solvent_parameters, &solvent_parameters,
 +                                 cginfo_mb[mb].cginfo[cgm+cg_mol],
 +                                 &cg_sp[mb][cgm+cg_mol]);
 +            }
 +        }
 +        cg_offset += cgs->nr;
 +        at_offset += cgs->index[cgs->nr];
 +    }
 +
 +    /* Puh! We finished going through all charge groups.
 +     * Now find the most common solvent model.
 +     */
 +
 +    /* Most common solvent this far */
 +    bestsp = -2;
 +    for (i = 0; i < n_solvent_parameters; i++)
 +    {
 +        if (bestsp == -2 ||
 +            solvent_parameters[i].count > solvent_parameters[bestsp].count)
 +        {
 +            bestsp = i;
 +        }
 +    }
 +
 +    if (bestsp >= 0)
 +    {
 +        bestsol = solvent_parameters[bestsp].model;
 +    }
 +    else
 +    {
 +        bestsol = esolNO;
 +    }
 +
 +#ifdef DISABLE_WATER_NLIST
 +    bestsol = esolNO;
 +#endif
 +
 +    fr->nWatMol = 0;
 +    for (mb = 0; mb < mtop->nmolblock; mb++)
 +    {
 +        cgs  = &mtop->moltype[mtop->molblock[mb].type].cgs;
 +        nmol = (mtop->molblock[mb].nmol*cgs->nr)/cginfo_mb[mb].cg_mod;
 +        for (i = 0; i < cginfo_mb[mb].cg_mod; i++)
 +        {
 +            if (cg_sp[mb][i] == bestsp)
 +            {
 +                SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], bestsol);
 +                fr->nWatMol += nmol;
 +            }
 +            else
 +            {
 +                SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], esolNO);
 +            }
 +        }
 +        sfree(cg_sp[mb]);
 +    }
 +    sfree(cg_sp);
 +
 +    if (bestsol != esolNO && fp != NULL)
 +    {
 +        fprintf(fp, "\nEnabling %s-like water optimization for %d molecules.\n\n",
 +                esol_names[bestsol],
 +                solvent_parameters[bestsp].count);
 +    }
 +
 +    sfree(solvent_parameters);
 +    fr->solvent_opt = bestsol;
 +}
 +
 +enum {
 +    acNONE = 0, acCONSTRAINT, acSETTLE
 +};
 +
 +static cginfo_mb_t *init_cginfo_mb(FILE *fplog, const gmx_mtop_t *mtop,
 +                                   t_forcerec *fr, gmx_bool bNoSolvOpt,
 +                                   gmx_bool *bFEP_NonBonded,
 +                                   gmx_bool *bExcl_IntraCGAll_InterCGNone)
 +{
 +    const t_block        *cgs;
 +    const t_blocka       *excl;
 +    const gmx_moltype_t  *molt;
 +    const gmx_molblock_t *molb;
 +    cginfo_mb_t          *cginfo_mb;
 +    gmx_bool             *type_VDW;
 +    int                  *cginfo;
 +    int                   cg_offset, a_offset, cgm, am;
 +    int                   mb, m, ncg_tot, cg, a0, a1, gid, ai, j, aj, excl_nalloc;
 +    int                  *a_con;
 +    int                   ftype;
 +    int                   ia;
 +    gmx_bool              bId, *bExcl, bExclIntraAll, bExclInter, bHaveVDW, bHaveQ, bHavePerturbedAtoms;
 +
 +    ncg_tot = ncg_mtop(mtop);
 +    snew(cginfo_mb, mtop->nmolblock);
 +
 +    snew(type_VDW, fr->ntype);
 +    for (ai = 0; ai < fr->ntype; ai++)
 +    {
 +        type_VDW[ai] = FALSE;
 +        for (j = 0; j < fr->ntype; j++)
 +        {
 +            type_VDW[ai] = type_VDW[ai] ||
 +                fr->bBHAM ||
 +                C6(fr->nbfp, fr->ntype, ai, j) != 0 ||
 +                C12(fr->nbfp, fr->ntype, ai, j) != 0;
 +        }
 +    }
 +
 +    *bFEP_NonBonded               = FALSE;
 +    *bExcl_IntraCGAll_InterCGNone = TRUE;
 +
 +    excl_nalloc = 10;
 +    snew(bExcl, excl_nalloc);
 +    cg_offset = 0;
 +    a_offset  = 0;
 +    for (mb = 0; mb < mtop->nmolblock; mb++)
 +    {
 +        molb = &mtop->molblock[mb];
 +        molt = &mtop->moltype[molb->type];
 +        cgs  = &molt->cgs;
 +        excl = &molt->excls;
 +
 +        /* Check if the cginfo is identical for all molecules in this block.
 +         * If so, we only need an array of the size of one molecule.
 +         * Otherwise we make an array of #mol times #cgs per molecule.
 +         */
 +        bId = TRUE;
 +        am  = 0;
 +        for (m = 0; m < molb->nmol; m++)
 +        {
 +            am = m*cgs->index[cgs->nr];
 +            for (cg = 0; cg < cgs->nr; cg++)
 +            {
 +                a0 = cgs->index[cg];
 +                a1 = cgs->index[cg+1];
 +                if (ggrpnr(&mtop->groups, egcENER, a_offset+am+a0) !=
 +                    ggrpnr(&mtop->groups, egcENER, a_offset   +a0))
 +                {
 +                    bId = FALSE;
 +                }
 +                if (mtop->groups.grpnr[egcQMMM] != NULL)
 +                {
 +                    for (ai = a0; ai < a1; ai++)
 +                    {
 +                        if (mtop->groups.grpnr[egcQMMM][a_offset+am+ai] !=
 +                            mtop->groups.grpnr[egcQMMM][a_offset   +ai])
 +                        {
 +                            bId = FALSE;
 +                        }
 +                    }
 +                }
 +            }
 +        }
 +
 +        cginfo_mb[mb].cg_start = cg_offset;
 +        cginfo_mb[mb].cg_end   = cg_offset + molb->nmol*cgs->nr;
 +        cginfo_mb[mb].cg_mod   = (bId ? 1 : molb->nmol)*cgs->nr;
 +        snew(cginfo_mb[mb].cginfo, cginfo_mb[mb].cg_mod);
 +        cginfo = cginfo_mb[mb].cginfo;
 +
 +        /* Set constraints flags for constrained atoms */
 +        snew(a_con, molt->atoms.nr);
 +        for (ftype = 0; ftype < F_NRE; ftype++)
 +        {
 +            if (interaction_function[ftype].flags & IF_CONSTRAINT)
 +            {
 +                int nral;
 +
 +                nral = NRAL(ftype);
 +                for (ia = 0; ia < molt->ilist[ftype].nr; ia += 1+nral)
 +                {
 +                    int a;
 +
 +                    for (a = 0; a < nral; a++)
 +                    {
 +                        a_con[molt->ilist[ftype].iatoms[ia+1+a]] =
 +                            (ftype == F_SETTLE ? acSETTLE : acCONSTRAINT);
 +                    }
 +                }
 +            }
 +        }
 +
 +        for (m = 0; m < (bId ? 1 : molb->nmol); m++)
 +        {
 +            cgm = m*cgs->nr;
 +            am  = m*cgs->index[cgs->nr];
 +            for (cg = 0; cg < cgs->nr; cg++)
 +            {
 +                a0 = cgs->index[cg];
 +                a1 = cgs->index[cg+1];
 +
 +                /* Store the energy group in cginfo */
 +                gid = ggrpnr(&mtop->groups, egcENER, a_offset+am+a0);
 +                SET_CGINFO_GID(cginfo[cgm+cg], gid);
 +
 +                /* Check the intra/inter charge group exclusions */
 +                if (a1-a0 > excl_nalloc)
 +                {
 +                    excl_nalloc = a1 - a0;
 +                    srenew(bExcl, excl_nalloc);
 +                }
 +                /* bExclIntraAll: all intra cg interactions excluded
 +                 * bExclInter:    any inter cg interactions excluded
 +                 */
 +                bExclIntraAll       = TRUE;
 +                bExclInter          = FALSE;
 +                bHaveVDW            = FALSE;
 +                bHaveQ              = FALSE;
 +                bHavePerturbedAtoms = FALSE;
 +                for (ai = a0; ai < a1; ai++)
 +                {
 +                    /* Check VDW and electrostatic interactions */
 +                    bHaveVDW = bHaveVDW || (type_VDW[molt->atoms.atom[ai].type] ||
 +                                            type_VDW[molt->atoms.atom[ai].typeB]);
 +                    bHaveQ  = bHaveQ    || (molt->atoms.atom[ai].q != 0 ||
 +                                            molt->atoms.atom[ai].qB != 0);
 +
 +                    bHavePerturbedAtoms = bHavePerturbedAtoms || (PERTURBED(molt->atoms.atom[ai]) != 0);
 +
 +                    /* Clear the exclusion list for atom ai */
 +                    for (aj = a0; aj < a1; aj++)
 +                    {
 +                        bExcl[aj-a0] = FALSE;
 +                    }
 +                    /* Loop over all the exclusions of atom ai */
 +                    for (j = excl->index[ai]; j < excl->index[ai+1]; j++)
 +                    {
 +                        aj = excl->a[j];
 +                        if (aj < a0 || aj >= a1)
 +                        {
 +                            bExclInter = TRUE;
 +                        }
 +                        else
 +                        {
 +                            bExcl[aj-a0] = TRUE;
 +                        }
 +                    }
 +                    /* Check if ai excludes a0 to a1 */
 +                    for (aj = a0; aj < a1; aj++)
 +                    {
 +                        if (!bExcl[aj-a0])
 +                        {
 +                            bExclIntraAll = FALSE;
 +                        }
 +                    }
 +
 +                    switch (a_con[ai])
 +                    {
 +                        case acCONSTRAINT:
 +                            SET_CGINFO_CONSTR(cginfo[cgm+cg]);
 +                            break;
 +                        case acSETTLE:
 +                            SET_CGINFO_SETTLE(cginfo[cgm+cg]);
 +                            break;
 +                        default:
 +                            break;
 +                    }
 +                }
 +                if (bExclIntraAll)
 +                {
 +                    SET_CGINFO_EXCL_INTRA(cginfo[cgm+cg]);
 +                }
 +                if (bExclInter)
 +                {
 +                    SET_CGINFO_EXCL_INTER(cginfo[cgm+cg]);
 +                }
 +                if (a1 - a0 > MAX_CHARGEGROUP_SIZE)
 +                {
 +                    /* The size in cginfo is currently only read with DD */
 +                    gmx_fatal(FARGS, "A charge group has size %d which is larger than the limit of %d atoms", a1-a0, MAX_CHARGEGROUP_SIZE);
 +                }
 +                if (bHaveVDW)
 +                {
 +                    SET_CGINFO_HAS_VDW(cginfo[cgm+cg]);
 +                }
 +                if (bHaveQ)
 +                {
 +                    SET_CGINFO_HAS_Q(cginfo[cgm+cg]);
 +                }
 +                if (bHavePerturbedAtoms && fr->efep != efepNO)
 +                {
 +                    SET_CGINFO_FEP(cginfo[cgm+cg]);
 +                    *bFEP_NonBonded = TRUE;
 +                }
 +                /* Store the charge group size */
 +                SET_CGINFO_NATOMS(cginfo[cgm+cg], a1-a0);
 +
 +                if (!bExclIntraAll || bExclInter)
 +                {
 +                    *bExcl_IntraCGAll_InterCGNone = FALSE;
 +                }
 +            }
 +        }
 +
 +        sfree(a_con);
 +
 +        cg_offset += molb->nmol*cgs->nr;
 +        a_offset  += molb->nmol*cgs->index[cgs->nr];
 +    }
 +    sfree(bExcl);
 +
 +    /* the solvent optimizer is called after the QM is initialized,
 +     * because we don't want to have the QM subsystemto become an
 +     * optimized solvent
 +     */
 +
 +    check_solvent(fplog, mtop, fr, cginfo_mb);
 +
 +    if (getenv("GMX_NO_SOLV_OPT"))
 +    {
 +        if (fplog)
 +        {
 +            fprintf(fplog, "Found environment variable GMX_NO_SOLV_OPT.\n"
 +                    "Disabling all solvent optimization\n");
 +        }
 +        fr->solvent_opt = esolNO;
 +    }
 +    if (bNoSolvOpt)
 +    {
 +        fr->solvent_opt = esolNO;
 +    }
 +    if (!fr->solvent_opt)
 +    {
 +        for (mb = 0; mb < mtop->nmolblock; mb++)
 +        {
 +            for (cg = 0; cg < cginfo_mb[mb].cg_mod; cg++)
 +            {
 +                SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[cg], esolNO);
 +            }
 +        }
 +    }
 +
 +    return cginfo_mb;
 +}
 +
 +static int *cginfo_expand(int nmb, cginfo_mb_t *cgi_mb)
 +{
 +    int  ncg, mb, cg;
 +    int *cginfo;
 +
 +    ncg = cgi_mb[nmb-1].cg_end;
 +    snew(cginfo, ncg);
 +    mb = 0;
 +    for (cg = 0; cg < ncg; cg++)
 +    {
 +        while (cg >= cgi_mb[mb].cg_end)
 +        {
 +            mb++;
 +        }
 +        cginfo[cg] =
 +            cgi_mb[mb].cginfo[(cg - cgi_mb[mb].cg_start) % cgi_mb[mb].cg_mod];
 +    }
 +
 +    return cginfo;
 +}
 +
 +static void set_chargesum(FILE *log, t_forcerec *fr, const gmx_mtop_t *mtop)
 +{
 +    /*This now calculates sum for q and c6*/
 +    double         qsum, q2sum, q, c6sum, c6;
 +    int            mb, nmol, i;
 +    const t_atoms *atoms;
 +
 +    qsum   = 0;
 +    q2sum  = 0;
 +    c6sum  = 0;
 +    for (mb = 0; mb < mtop->nmolblock; mb++)
 +    {
 +        nmol  = mtop->molblock[mb].nmol;
 +        atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 +        for (i = 0; i < atoms->nr; i++)
 +        {
 +            q       = atoms->atom[i].q;
 +            qsum   += nmol*q;
 +            q2sum  += nmol*q*q;
 +            c6      = mtop->ffparams.iparams[atoms->atom[i].type*(mtop->ffparams.atnr+1)].lj.c6;
 +            c6sum  += nmol*c6;
 +        }
 +    }
 +    fr->qsum[0]   = qsum;
 +    fr->q2sum[0]  = q2sum;
 +    fr->c6sum[0]  = c6sum;
 +
 +    if (fr->efep != efepNO)
 +    {
 +        qsum   = 0;
 +        q2sum  = 0;
 +        c6sum  = 0;
 +        for (mb = 0; mb < mtop->nmolblock; mb++)
 +        {
 +            nmol  = mtop->molblock[mb].nmol;
 +            atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 +            for (i = 0; i < atoms->nr; i++)
 +            {
 +                q       = atoms->atom[i].qB;
 +                qsum   += nmol*q;
 +                q2sum  += nmol*q*q;
 +                c6      = mtop->ffparams.iparams[atoms->atom[i].typeB*(mtop->ffparams.atnr+1)].lj.c6;
 +                c6sum  += nmol*c6;
 +            }
 +            fr->qsum[1]   = qsum;
 +            fr->q2sum[1]  = q2sum;
 +            fr->c6sum[1]  = c6sum;
 +        }
 +    }
 +    else
 +    {
 +        fr->qsum[1]   = fr->qsum[0];
 +        fr->q2sum[1]  = fr->q2sum[0];
 +        fr->c6sum[1]  = fr->c6sum[0];
 +    }
 +    if (log)
 +    {
 +        if (fr->efep == efepNO)
 +        {
 +            fprintf(log, "System total charge: %.3f\n", fr->qsum[0]);
 +        }
 +        else
 +        {
 +            fprintf(log, "System total charge, top. A: %.3f top. B: %.3f\n",
 +                    fr->qsum[0], fr->qsum[1]);
 +        }
 +    }
 +}
 +
 +void update_forcerec(t_forcerec *fr, matrix box)
 +{
 +    if (fr->eeltype == eelGRF)
 +    {
 +        calc_rffac(NULL, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
 +                   fr->rcoulomb, fr->temp, fr->zsquare, box,
 +                   &fr->kappa, &fr->k_rf, &fr->c_rf);
 +    }
 +}
 +
 +void set_avcsixtwelve(FILE *fplog, t_forcerec *fr, const gmx_mtop_t *mtop)
 +{
 +    const t_atoms  *atoms, *atoms_tpi;
 +    const t_blocka *excl;
 +    int             mb, nmol, nmolc, i, j, tpi, tpj, j1, j2, k, n, nexcl, q;
 +    gmx_int64_t     npair, npair_ij, tmpi, tmpj;
 +    double          csix, ctwelve;
 +    int             ntp, *typecount;
 +    gmx_bool        bBHAM;
 +    real           *nbfp;
 +    real           *nbfp_comb = NULL;
 +
 +    ntp   = fr->ntype;
 +    bBHAM = fr->bBHAM;
 +    nbfp  = fr->nbfp;
 +
 +    /* For LJ-PME, we want to correct for the difference between the
 +     * actual C6 values and the C6 values used by the LJ-PME based on
 +     * combination rules. */
 +
 +    if (EVDW_PME(fr->vdwtype))
 +    {
 +        nbfp_comb = mk_nbfp_combination_rule(&mtop->ffparams,
 +                                             (fr->ljpme_combination_rule == eljpmeLB) ? eCOMB_ARITHMETIC : eCOMB_GEOMETRIC);
 +        for (tpi = 0; tpi < ntp; ++tpi)
 +        {
 +            for (tpj = 0; tpj < ntp; ++tpj)
 +            {
 +                C6(nbfp_comb, ntp, tpi, tpj) =
 +                    C6(nbfp, ntp, tpi, tpj) - C6(nbfp_comb, ntp, tpi, tpj);
 +                C12(nbfp_comb, ntp, tpi, tpj) = C12(nbfp, ntp, tpi, tpj);
 +            }
 +        }
 +        nbfp = nbfp_comb;
 +    }
 +    for (q = 0; q < (fr->efep == efepNO ? 1 : 2); q++)
 +    {
 +        csix    = 0;
 +        ctwelve = 0;
 +        npair   = 0;
 +        nexcl   = 0;
 +        if (!fr->n_tpi)
 +        {
 +            /* Count the types so we avoid natoms^2 operations */
 +            snew(typecount, ntp);
 +            gmx_mtop_count_atomtypes(mtop, q, typecount);
 +
 +            for (tpi = 0; tpi < ntp; tpi++)
 +            {
 +                for (tpj = tpi; tpj < ntp; tpj++)
 +                {
 +                    tmpi = typecount[tpi];
 +                    tmpj = typecount[tpj];
 +                    if (tpi != tpj)
 +                    {
 +                        npair_ij = tmpi*tmpj;
 +                    }
 +                    else
 +                    {
 +                        npair_ij = tmpi*(tmpi - 1)/2;
 +                    }
 +                    if (bBHAM)
 +                    {
 +                        /* nbfp now includes the 6.0 derivative prefactor */
 +                        csix    += npair_ij*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
 +                    }
 +                    else
 +                    {
 +                        /* nbfp now includes the 6.0/12.0 derivative prefactors */
 +                        csix    += npair_ij*   C6(nbfp, ntp, tpi, tpj)/6.0;
 +                        ctwelve += npair_ij*  C12(nbfp, ntp, tpi, tpj)/12.0;
 +                    }
 +                    npair += npair_ij;
 +                }
 +            }
 +            sfree(typecount);
 +            /* Subtract the excluded pairs.
 +             * The main reason for substracting exclusions is that in some cases
 +             * some combinations might never occur and the parameters could have
 +             * any value. These unused values should not influence the dispersion
 +             * correction.
 +             */
 +            for (mb = 0; mb < mtop->nmolblock; mb++)
 +            {
 +                nmol  = mtop->molblock[mb].nmol;
 +                atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 +                excl  = &mtop->moltype[mtop->molblock[mb].type].excls;
 +                for (i = 0; (i < atoms->nr); i++)
 +                {
 +                    if (q == 0)
 +                    {
 +                        tpi = atoms->atom[i].type;
 +                    }
 +                    else
 +                    {
 +                        tpi = atoms->atom[i].typeB;
 +                    }
 +                    j1  = excl->index[i];
 +                    j2  = excl->index[i+1];
 +                    for (j = j1; j < j2; j++)
 +                    {
 +                        k = excl->a[j];
 +                        if (k > i)
 +                        {
 +                            if (q == 0)
 +                            {
 +                                tpj = atoms->atom[k].type;
 +                            }
 +                            else
 +                            {
 +                                tpj = atoms->atom[k].typeB;
 +                            }
 +                            if (bBHAM)
 +                            {
 +                                /* nbfp now includes the 6.0 derivative prefactor */
 +                                csix -= nmol*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
 +                            }
 +                            else
 +                            {
 +                                /* nbfp now includes the 6.0/12.0 derivative prefactors */
 +                                csix    -= nmol*C6 (nbfp, ntp, tpi, tpj)/6.0;
 +                                ctwelve -= nmol*C12(nbfp, ntp, tpi, tpj)/12.0;
 +                            }
 +                            nexcl += nmol;
 +                        }
 +                    }
 +                }
 +            }
 +        }
 +        else
 +        {
 +            /* Only correct for the interaction of the test particle
 +             * with the rest of the system.
 +             */
 +            atoms_tpi =
 +                &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].atoms;
 +
 +            npair = 0;
 +            for (mb = 0; mb < mtop->nmolblock; mb++)
 +            {
 +                nmol  = mtop->molblock[mb].nmol;
 +                atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 +                for (j = 0; j < atoms->nr; j++)
 +                {
 +                    nmolc = nmol;
 +                    /* Remove the interaction of the test charge group
 +                     * with itself.
 +                     */
 +                    if (mb == mtop->nmolblock-1)
 +                    {
 +                        nmolc--;
 +
 +                        if (mb == 0 && nmol == 1)
 +                        {
 +                            gmx_fatal(FARGS, "Old format tpr with TPI, please generate a new tpr file");
 +                        }
 +                    }
 +                    if (q == 0)
 +                    {
 +                        tpj = atoms->atom[j].type;
 +                    }
 +                    else
 +                    {
 +                        tpj = atoms->atom[j].typeB;
 +                    }
 +                    for (i = 0; i < fr->n_tpi; i++)
 +                    {
 +                        if (q == 0)
 +                        {
 +                            tpi = atoms_tpi->atom[i].type;
 +                        }
 +                        else
 +                        {
 +                            tpi = atoms_tpi->atom[i].typeB;
 +                        }
 +                        if (bBHAM)
 +                        {
 +                            /* nbfp now includes the 6.0 derivative prefactor */
 +                            csix    += nmolc*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
 +                        }
 +                        else
 +                        {
 +                            /* nbfp now includes the 6.0/12.0 derivative prefactors */
 +                            csix    += nmolc*C6 (nbfp, ntp, tpi, tpj)/6.0;
 +                            ctwelve += nmolc*C12(nbfp, ntp, tpi, tpj)/12.0;
 +                        }
 +                        npair += nmolc;
 +                    }
 +                }
 +            }
 +        }
 +        if (npair - nexcl <= 0 && fplog)
 +        {
 +            fprintf(fplog, "\nWARNING: There are no atom pairs for dispersion correction\n\n");
 +            csix     = 0;
 +            ctwelve  = 0;
 +        }
 +        else
 +        {
 +            csix    /= npair - nexcl;
 +            ctwelve /= npair - nexcl;
 +        }
 +        if (debug)
 +        {
 +            fprintf(debug, "Counted %d exclusions\n", nexcl);
 +            fprintf(debug, "Average C6 parameter is: %10g\n", (double)csix);
 +            fprintf(debug, "Average C12 parameter is: %10g\n", (double)ctwelve);
 +        }
 +        fr->avcsix[q]    = csix;
 +        fr->avctwelve[q] = ctwelve;
 +    }
 +
 +    if (EVDW_PME(fr->vdwtype))
 +    {
 +        sfree(nbfp_comb);
 +    }
 +
 +    if (fplog != NULL)
 +    {
 +        if (fr->eDispCorr == edispcAllEner ||
 +            fr->eDispCorr == edispcAllEnerPres)
 +        {
 +            fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
 +                    fr->avcsix[0], fr->avctwelve[0]);
 +        }
 +        else
 +        {
 +            fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e\n", fr->avcsix[0]);
 +        }
 +    }
 +}
 +
 +
 +static void set_bham_b_max(FILE *fplog, t_forcerec *fr,
 +                           const gmx_mtop_t *mtop)
 +{
 +    const t_atoms *at1, *at2;
 +    int            mt1, mt2, i, j, tpi, tpj, ntypes;
 +    real           b, bmin;
 +    real          *nbfp;
 +
 +    if (fplog)
 +    {
 +        fprintf(fplog, "Determining largest Buckingham b parameter for table\n");
 +    }
 +    nbfp   = fr->nbfp;
 +    ntypes = fr->ntype;
 +
 +    bmin           = -1;
 +    fr->bham_b_max = 0;
 +    for (mt1 = 0; mt1 < mtop->nmoltype; mt1++)
 +    {
 +        at1 = &mtop->moltype[mt1].atoms;
 +        for (i = 0; (i < at1->nr); i++)
 +        {
 +            tpi = at1->atom[i].type;
 +            if (tpi >= ntypes)
 +            {
 +                gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", i, tpi, ntypes);
 +            }
 +
 +            for (mt2 = mt1; mt2 < mtop->nmoltype; mt2++)
 +            {
 +                at2 = &mtop->moltype[mt2].atoms;
 +                for (j = 0; (j < at2->nr); j++)
 +                {
 +                    tpj = at2->atom[j].type;
 +                    if (tpj >= ntypes)
 +                    {
 +                        gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", j, tpj, ntypes);
 +                    }
 +                    b = BHAMB(nbfp, ntypes, tpi, tpj);
 +                    if (b > fr->bham_b_max)
 +                    {
 +                        fr->bham_b_max = b;
 +                    }
 +                    if ((b < bmin) || (bmin == -1))
 +                    {
 +                        bmin = b;
 +                    }
 +                }
 +            }
 +        }
 +    }
 +    if (fplog)
 +    {
 +        fprintf(fplog, "Buckingham b parameters, min: %g, max: %g\n",
 +                bmin, fr->bham_b_max);
 +    }
 +}
 +
 +static void make_nbf_tables(FILE *fp, const output_env_t oenv,
 +                            t_forcerec *fr, real rtab,
 +                            const t_commrec *cr,
 +                            const char *tabfn, char *eg1, char *eg2,
 +                            t_nblists *nbl)
 +{
 +    char buf[STRLEN];
 +    int  i, j;
 +
 +    if (tabfn == NULL)
 +    {
 +        if (debug)
 +        {
 +            fprintf(debug, "No table file name passed, can not read table, can not do non-bonded interactions\n");
 +        }
 +        return;
 +    }
 +
 +    sprintf(buf, "%s", tabfn);
 +    if (eg1 && eg2)
 +    {
 +        /* Append the two energy group names */
 +        sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "_%s_%s.%s",
 +                eg1, eg2, ftp2ext(efXVG));
 +    }
 +    nbl->table_elec_vdw = make_tables(fp, oenv, fr, MASTER(cr), buf, rtab, 0);
 +    /* Copy the contents of the table to separate coulomb and LJ tables too,
 +     * to improve cache performance.
 +     */
 +    /* For performance reasons we want
 +     * the table data to be aligned to 16-byte. The pointers could be freed
 +     * but currently aren't.
 +     */
 +    nbl->table_elec.interaction   = GMX_TABLE_INTERACTION_ELEC;
 +    nbl->table_elec.format        = nbl->table_elec_vdw.format;
 +    nbl->table_elec.r             = nbl->table_elec_vdw.r;
 +    nbl->table_elec.n             = nbl->table_elec_vdw.n;
 +    nbl->table_elec.scale         = nbl->table_elec_vdw.scale;
 +    nbl->table_elec.scale_exp     = nbl->table_elec_vdw.scale_exp;
 +    nbl->table_elec.formatsize    = nbl->table_elec_vdw.formatsize;
 +    nbl->table_elec.ninteractions = 1;
 +    nbl->table_elec.stride        = nbl->table_elec.formatsize * nbl->table_elec.ninteractions;
 +    snew_aligned(nbl->table_elec.data, nbl->table_elec.stride*(nbl->table_elec.n+1), 32);
 +
 +    nbl->table_vdw.interaction   = GMX_TABLE_INTERACTION_VDWREP_VDWDISP;
 +    nbl->table_vdw.format        = nbl->table_elec_vdw.format;
 +    nbl->table_vdw.r             = nbl->table_elec_vdw.r;
 +    nbl->table_vdw.n             = nbl->table_elec_vdw.n;
 +    nbl->table_vdw.scale         = nbl->table_elec_vdw.scale;
 +    nbl->table_vdw.scale_exp     = nbl->table_elec_vdw.scale_exp;
 +    nbl->table_vdw.formatsize    = nbl->table_elec_vdw.formatsize;
 +    nbl->table_vdw.ninteractions = 2;
 +    nbl->table_vdw.stride        = nbl->table_vdw.formatsize * nbl->table_vdw.ninteractions;
 +    snew_aligned(nbl->table_vdw.data, nbl->table_vdw.stride*(nbl->table_vdw.n+1), 32);
 +
 +    for (i = 0; i <= nbl->table_elec_vdw.n; i++)
 +    {
 +        for (j = 0; j < 4; j++)
 +        {
 +            nbl->table_elec.data[4*i+j] = nbl->table_elec_vdw.data[12*i+j];
 +        }
 +        for (j = 0; j < 8; j++)
 +        {
 +            nbl->table_vdw.data[8*i+j] = nbl->table_elec_vdw.data[12*i+4+j];
 +        }
 +    }
 +}
 +
 +static void count_tables(int ftype1, int ftype2, const gmx_mtop_t *mtop,
 +                         int *ncount, int **count)
 +{
 +    const gmx_moltype_t *molt;
 +    const t_ilist       *il;
 +    int                  mt, ftype, stride, i, j, tabnr;
 +
 +    for (mt = 0; mt < mtop->nmoltype; mt++)
 +    {
 +        molt = &mtop->moltype[mt];
 +        for (ftype = 0; ftype < F_NRE; ftype++)
 +        {
 +            if (ftype == ftype1 || ftype == ftype2)
 +            {
 +                il     = &molt->ilist[ftype];
 +                stride = 1 + NRAL(ftype);
 +                for (i = 0; i < il->nr; i += stride)
 +                {
 +                    tabnr = mtop->ffparams.iparams[il->iatoms[i]].tab.table;
 +                    if (tabnr < 0)
 +                    {
 +                        gmx_fatal(FARGS, "A bonded table number is smaller than 0: %d\n", tabnr);
 +                    }
 +                    if (tabnr >= *ncount)
 +                    {
 +                        srenew(*count, tabnr+1);
 +                        for (j = *ncount; j < tabnr+1; j++)
 +                        {
 +                            (*count)[j] = 0;
 +                        }
 +                        *ncount = tabnr+1;
 +                    }
 +                    (*count)[tabnr]++;
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +static bondedtable_t *make_bonded_tables(FILE *fplog,
 +                                         int ftype1, int ftype2,
 +                                         const gmx_mtop_t *mtop,
 +                                         const char *basefn, const char *tabext)
 +{
 +    int            i, ncount, *count;
 +    char           tabfn[STRLEN];
 +    bondedtable_t *tab;
 +
 +    tab = NULL;
 +
 +    ncount = 0;
 +    count  = NULL;
 +    count_tables(ftype1, ftype2, mtop, &ncount, &count);
 +
 +    if (ncount > 0)
 +    {
 +        snew(tab, ncount);
 +        for (i = 0; i < ncount; i++)
 +        {
 +            if (count[i] > 0)
 +            {
 +                sprintf(tabfn, "%s", basefn);
 +                sprintf(tabfn + strlen(basefn) - strlen(ftp2ext(efXVG)) - 1, "_%s%d.%s",
 +                        tabext, i, ftp2ext(efXVG));
 +                tab[i] = make_bonded_table(fplog, tabfn, NRAL(ftype1)-2);
 +            }
 +        }
 +        sfree(count);
 +    }
 +
 +    return tab;
 +}
 +
 +void forcerec_set_ranges(t_forcerec *fr,
 +                         int ncg_home, int ncg_force,
 +                         int natoms_force,
 +                         int natoms_force_constr, int natoms_f_novirsum)
 +{
 +    fr->cg0 = 0;
 +    fr->hcg = ncg_home;
 +
 +    /* fr->ncg_force is unused in the standard code,
 +     * but it can be useful for modified code dealing with charge groups.
 +     */
 +    fr->ncg_force           = ncg_force;
 +    fr->natoms_force        = natoms_force;
 +    fr->natoms_force_constr = natoms_force_constr;
 +
 +    if (fr->natoms_force_constr > fr->nalloc_force)
 +    {
 +        fr->nalloc_force = over_alloc_dd(fr->natoms_force_constr);
 +
 +        if (fr->bTwinRange)
 +        {
 +            srenew(fr->f_twin, fr->nalloc_force);
 +        }
 +    }
 +
 +    if (fr->bF_NoVirSum)
 +    {
 +        fr->f_novirsum_n = natoms_f_novirsum;
 +        if (fr->f_novirsum_n > fr->f_novirsum_nalloc)
 +        {
 +            fr->f_novirsum_nalloc = over_alloc_dd(fr->f_novirsum_n);
 +            srenew(fr->f_novirsum_alloc, fr->f_novirsum_nalloc);
 +        }
 +    }
 +    else
 +    {
 +        fr->f_novirsum_n = 0;
 +    }
 +}
 +
 +static real cutoff_inf(real cutoff)
 +{
 +    if (cutoff == 0)
 +    {
 +        cutoff = GMX_CUTOFF_INF;
 +    }
 +
 +    return cutoff;
 +}
 +
 +static void make_adress_tf_tables(FILE *fp, const output_env_t oenv,
 +                                  t_forcerec *fr, const t_inputrec *ir,
 +                                  const char *tabfn, const gmx_mtop_t *mtop,
 +                                  matrix     box)
 +{
 +    char buf[STRLEN];
 +    int  i, j;
 +
 +    if (tabfn == NULL)
 +    {
 +        gmx_fatal(FARGS, "No thermoforce table file given. Use -tabletf to specify a file\n");
 +        return;
 +    }
 +
 +    snew(fr->atf_tabs, ir->adress->n_tf_grps);
 +
 +    sprintf(buf, "%s", tabfn);
 +    for (i = 0; i < ir->adress->n_tf_grps; i++)
 +    {
 +        j = ir->adress->tf_table_index[i]; /* get energy group index */
 +        sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "tf_%s.%s",
 +                *(mtop->groups.grpname[mtop->groups.grps[egcENER].nm_ind[j]]), ftp2ext(efXVG));
 +        if (fp)
 +        {
 +            fprintf(fp, "loading tf table for energygrp index %d from %s\n", ir->adress->tf_table_index[i], buf);
 +        }
 +        fr->atf_tabs[i] = make_atf_table(fp, oenv, fr, buf, box);
 +    }
 +
 +}
 +
 +gmx_bool can_use_allvsall(const t_inputrec *ir, gmx_bool bPrintNote, t_commrec *cr, FILE *fp)
 +{
 +    gmx_bool bAllvsAll;
 +
 +    bAllvsAll =
 +        (
 +            ir->rlist == 0            &&
 +            ir->rcoulomb == 0         &&
 +            ir->rvdw == 0             &&
 +            ir->ePBC == epbcNONE      &&
 +            ir->vdwtype == evdwCUT    &&
 +            ir->coulombtype == eelCUT &&
 +            ir->efep == efepNO        &&
 +            (ir->implicit_solvent == eisNO ||
 +             (ir->implicit_solvent == eisGBSA && (ir->gb_algorithm == egbSTILL ||
 +                                                  ir->gb_algorithm == egbHCT   ||
 +                                                  ir->gb_algorithm == egbOBC))) &&
 +            getenv("GMX_NO_ALLVSALL") == NULL
 +        );
 +
 +    if (bAllvsAll && ir->opts.ngener > 1)
 +    {
 +        const char *note = "NOTE: Can not use all-vs-all force loops, because there are multiple energy monitor groups; you might get significantly higher performance when using only a single energy monitor group.\n";
 +
 +        if (bPrintNote)
 +        {
 +            if (MASTER(cr))
 +            {
 +                fprintf(stderr, "\n%s\n", note);
 +            }
 +            if (fp != NULL)
 +            {
 +                fprintf(fp, "\n%s\n", note);
 +            }
 +        }
 +        bAllvsAll = FALSE;
 +    }
 +
 +    if (bAllvsAll && fp && MASTER(cr))
 +    {
 +        fprintf(fp, "\nUsing SIMD all-vs-all kernels.\n\n");
 +    }
 +
 +    return bAllvsAll;
 +}
 +
 +
 +static void init_forcerec_f_threads(t_forcerec *fr, int nenergrp)
 +{
 +    int t, i;
 +
 +    /* These thread local data structures are used for bondeds only */
 +    fr->nthreads = gmx_omp_nthreads_get(emntBonded);
 +
 +    if (fr->nthreads > 1)
 +    {
 +        snew(fr->f_t, fr->nthreads);
 +        /* Thread 0 uses the global force and energy arrays */
 +        for (t = 1; t < fr->nthreads; t++)
 +        {
 +            fr->f_t[t].f        = NULL;
 +            fr->f_t[t].f_nalloc = 0;
 +            snew(fr->f_t[t].fshift, SHIFTS);
 +            fr->f_t[t].grpp.nener = nenergrp*nenergrp;
 +            for (i = 0; i < egNR; i++)
 +            {
 +                snew(fr->f_t[t].grpp.ener[i], fr->f_t[t].grpp.nener);
 +            }
 +        }
 +    }
 +}
 +
 +
 +gmx_bool nbnxn_acceleration_supported(FILE             *fplog,
 +                                      const t_commrec  *cr,
 +                                      const t_inputrec *ir,
 +                                      gmx_bool          bGPU)
 +{
 +    if (!bGPU && (ir->vdwtype == evdwPME && ir->ljpme_combination_rule == eljpmeLB))
 +    {
 +        md_print_warn(cr, fplog, "LJ-PME with Lorentz-Berthelot is not supported with %s, falling back to %s\n",
 +                      bGPU ? "GPUs" : "SIMD kernels",
 +                      bGPU ? "CPU only" : "plain-C kernels");
 +        return FALSE;
 +    }
 +
 +    return TRUE;
 +}
 +
 +
 +static void pick_nbnxn_kernel_cpu(const t_inputrec gmx_unused *ir,
 +                                  int                         *kernel_type,
 +                                  int                         *ewald_excl)
 +{
 +    *kernel_type = nbnxnk4x4_PlainC;
 +    *ewald_excl  = ewaldexclTable;
 +
 +#ifdef GMX_NBNXN_SIMD
 +    {
 +#ifdef GMX_NBNXN_SIMD_4XN
 +        *kernel_type = nbnxnk4xN_SIMD_4xN;
 +#endif
 +#ifdef GMX_NBNXN_SIMD_2XNN
 +        *kernel_type = nbnxnk4xN_SIMD_2xNN;
 +#endif
 +
 +#if defined GMX_NBNXN_SIMD_2XNN && defined GMX_NBNXN_SIMD_4XN
 +        /* We need to choose if we want 2x(N+N) or 4xN kernels.
 +         * Currently this is based on the SIMD acceleration choice,
 +         * but it might be better to decide this at runtime based on CPU.
 +         *
 +         * 4xN calculates more (zero) interactions, but has less pair-search
 +         * work and much better kernel instruction scheduling.
 +         *
 +         * Up till now we have only seen that on Intel Sandy/Ivy Bridge,
 +         * which doesn't have FMA, both the analytical and tabulated Ewald
 +         * kernels have similar pair rates for 4x8 and 2x(4+4), so we choose
 +         * 2x(4+4) because it results in significantly fewer pairs.
 +         * For RF, the raw pair rate of the 4x8 kernel is higher than 2x(4+4),
 +         * 10% with HT, 50% without HT. As we currently don't detect the actual
 +         * use of HT, use 4x8 to avoid a potential performance hit.
 +         * On Intel Haswell 4x8 is always faster.
 +         */
 +        *kernel_type = nbnxnk4xN_SIMD_4xN;
 +
 +#ifndef GMX_SIMD_HAVE_FMA
 +        if (EEL_PME_EWALD(ir->coulombtype) ||
 +            EVDW_PME(ir->vdwtype))
 +        {
 +            /* We have Ewald kernels without FMA (Intel Sandy/Ivy Bridge).
 +             * There are enough instructions to make 2x(4+4) efficient.
 +             */
 +            *kernel_type = nbnxnk4xN_SIMD_2xNN;
 +        }
 +#endif
 +#endif  /* GMX_NBNXN_SIMD_2XNN && GMX_NBNXN_SIMD_4XN */
 +
 +
 +        if (getenv("GMX_NBNXN_SIMD_4XN") != NULL)
 +        {
 +#ifdef GMX_NBNXN_SIMD_4XN
 +            *kernel_type = nbnxnk4xN_SIMD_4xN;
 +#else
 +            gmx_fatal(FARGS, "SIMD 4xN kernels requested, but Gromacs has been compiled without support for these kernels");
 +#endif
 +        }
 +        if (getenv("GMX_NBNXN_SIMD_2XNN") != NULL)
 +        {
 +#ifdef GMX_NBNXN_SIMD_2XNN
 +            *kernel_type = nbnxnk4xN_SIMD_2xNN;
 +#else
 +            gmx_fatal(FARGS, "SIMD 2x(N+N) kernels requested, but Gromacs has been compiled without support for these kernels");
 +#endif
 +        }
 +
 +        /* Analytical Ewald exclusion correction is only an option in
 +         * the SIMD kernel.
 +         * Since table lookup's don't parallelize with SIMD, analytical
 +         * will probably always be faster for a SIMD width of 8 or more.
 +         * With FMA analytical is sometimes faster for a width if 4 as well.
 +         * On BlueGene/Q, this is faster regardless of precision.
 +         * In single precision, this is faster on Bulldozer.
 +         */
 +#if GMX_SIMD_REAL_WIDTH >= 8 || \
 +        (GMX_SIMD_REAL_WIDTH >= 4 && defined GMX_SIMD_HAVE_FMA && !defined GMX_DOUBLE) || \
 +        defined GMX_SIMD_IBM_QPX
 +        *ewald_excl = ewaldexclAnalytical;
 +#endif
 +        if (getenv("GMX_NBNXN_EWALD_TABLE") != NULL)
 +        {
 +            *ewald_excl = ewaldexclTable;
 +        }
 +        if (getenv("GMX_NBNXN_EWALD_ANALYTICAL") != NULL)
 +        {
 +            *ewald_excl = ewaldexclAnalytical;
 +        }
 +
 +    }
 +#endif /* GMX_NBNXN_SIMD */
 +}
 +
 +
 +const char *lookup_nbnxn_kernel_name(int kernel_type)
 +{
 +    const char *returnvalue = NULL;
 +    switch (kernel_type)
 +    {
 +        case nbnxnkNotSet:
 +            returnvalue = "not set";
 +            break;
 +        case nbnxnk4x4_PlainC:
 +            returnvalue = "plain C";
 +            break;
 +        case nbnxnk4xN_SIMD_4xN:
 +        case nbnxnk4xN_SIMD_2xNN:
 +#ifdef GMX_NBNXN_SIMD
 +#if defined GMX_SIMD_X86_SSE2
 +            returnvalue = "SSE2";
 +#elif defined GMX_SIMD_X86_SSE4_1
 +            returnvalue = "SSE4.1";
 +#elif defined GMX_SIMD_X86_AVX_128_FMA
 +            returnvalue = "AVX_128_FMA";
 +#elif defined GMX_SIMD_X86_AVX_256
 +            returnvalue = "AVX_256";
 +#elif defined GMX_SIMD_X86_AVX2_256
 +            returnvalue = "AVX2_256";
 +#else
 +            returnvalue = "SIMD";
 +#endif
 +#else  /* GMX_NBNXN_SIMD */
 +            returnvalue = "not available";
 +#endif /* GMX_NBNXN_SIMD */
 +            break;
 +        case nbnxnk8x8x8_CUDA: returnvalue   = "CUDA"; break;
 +        case nbnxnk8x8x8_PlainC: returnvalue = "plain C"; break;
 +
 +        case nbnxnkNR:
 +        default:
 +            gmx_fatal(FARGS, "Illegal kernel type selected");
 +            returnvalue = NULL;
 +            break;
 +    }
 +    return returnvalue;
 +};
 +
 +static void pick_nbnxn_kernel(FILE                *fp,
 +                              const t_commrec     *cr,
 +                              gmx_bool             use_simd_kernels,
 +                              gmx_bool             bUseGPU,
 +                              gmx_bool             bEmulateGPU,
 +                              const t_inputrec    *ir,
 +                              int                 *kernel_type,
 +                              int                 *ewald_excl,
 +                              gmx_bool             bDoNonbonded)
 +{
 +    assert(kernel_type);
 +
 +    *kernel_type = nbnxnkNotSet;
 +    *ewald_excl  = ewaldexclTable;
 +
 +    if (bEmulateGPU)
 +    {
 +        *kernel_type = nbnxnk8x8x8_PlainC;
 +
 +        if (bDoNonbonded)
 +        {
 +            md_print_warn(cr, fp, "Emulating a GPU run on the CPU (slow)");
 +        }
 +    }
 +    else if (bUseGPU)
 +    {
 +        *kernel_type = nbnxnk8x8x8_CUDA;
 +    }
 +
 +    if (*kernel_type == nbnxnkNotSet)
 +    {
 +        /* LJ PME with LB combination rule does 7 mesh operations.
 +         * This so slow that we don't compile SIMD non-bonded kernels for that.
 +         */
 +        if (use_simd_kernels &&
 +            nbnxn_acceleration_supported(fp, cr, ir, FALSE))
 +        {
 +            pick_nbnxn_kernel_cpu(ir, kernel_type, ewald_excl);
 +        }
 +        else
 +        {
 +            *kernel_type = nbnxnk4x4_PlainC;
 +        }
 +    }
 +
 +    if (bDoNonbonded && fp != NULL)
 +    {
 +        fprintf(fp, "\nUsing %s %dx%d non-bonded kernels\n\n",
 +                lookup_nbnxn_kernel_name(*kernel_type),
 +                nbnxn_kernel_pairlist_simple(*kernel_type) ? NBNXN_CPU_CLUSTER_I_SIZE : NBNXN_GPU_CLUSTER_SIZE,
 +                nbnxn_kernel_to_cj_size(*kernel_type));
 +
 +        if (nbnxnk4x4_PlainC == *kernel_type ||
 +            nbnxnk8x8x8_PlainC == *kernel_type)
 +        {
 +            md_print_warn(cr, fp,
 +                          "WARNING: Using the slow %s kernels. This should\n"
 +                          "not happen during routine usage on supported platforms.\n\n",
 +                          lookup_nbnxn_kernel_name(*kernel_type));
 +        }
 +    }
 +}
 +
 +static void pick_nbnxn_resources(const t_commrec     *cr,
 +                                 const gmx_hw_info_t *hwinfo,
 +                                 gmx_bool             bDoNonbonded,
 +                                 gmx_bool            *bUseGPU,
 +                                 gmx_bool            *bEmulateGPU,
 +                                 const gmx_gpu_opt_t *gpu_opt)
 +{
 +    gmx_bool bEmulateGPUEnvVarSet;
 +    char     gpu_err_str[STRLEN];
 +
 +    *bUseGPU = FALSE;
 +
 +    bEmulateGPUEnvVarSet = (getenv("GMX_EMULATE_GPU") != NULL);
 +
 +    /* Run GPU emulation mode if GMX_EMULATE_GPU is defined. Because
 +     * GPUs (currently) only handle non-bonded calculations, we will
 +     * automatically switch to emulation if non-bonded calculations are
 +     * turned off via GMX_NO_NONBONDED - this is the simple and elegant
 +     * way to turn off GPU initialization, data movement, and cleanup.
 +     *
 +     * GPU emulation can be useful to assess the performance one can expect by
 +     * adding GPU(s) to the machine. The conditional below allows this even
 +     * if mdrun is compiled without GPU acceleration support.
 +     * Note that you should freezing the system as otherwise it will explode.
 +     */
 +    *bEmulateGPU = (bEmulateGPUEnvVarSet ||
 +                    (!bDoNonbonded &&
 +                     gpu_opt->ncuda_dev_use > 0));
 +
 +    /* Enable GPU mode when GPUs are available or no GPU emulation is requested.
 +     */
 +    if (gpu_opt->ncuda_dev_use > 0 && !(*bEmulateGPU))
 +    {
 +        /* Each PP node will use the intra-node id-th device from the
 +         * list of detected/selected GPUs. */
 +        if (!init_gpu(cr->rank_pp_intranode, gpu_err_str,
 +                      &hwinfo->gpu_info, gpu_opt))
 +        {
 +            /* At this point the init should never fail as we made sure that
 +             * we have all the GPUs we need. If it still does, we'll bail. */
 +            gmx_fatal(FARGS, "On node %d failed to initialize GPU #%d: %s",
 +                      cr->nodeid,
 +                      get_gpu_device_id(&hwinfo->gpu_info, gpu_opt,
 +                                        cr->rank_pp_intranode),
 +                      gpu_err_str);
 +        }
 +
 +        /* Here we actually turn on hardware GPU acceleration */
 +        *bUseGPU = TRUE;
 +    }
 +}
 +
 +gmx_bool uses_simple_tables(int                 cutoff_scheme,
 +                            nonbonded_verlet_t *nbv,
 +                            int                 group)
 +{
 +    gmx_bool bUsesSimpleTables = TRUE;
 +    int      grp_index;
 +
 +    switch (cutoff_scheme)
 +    {
 +        case ecutsGROUP:
 +            bUsesSimpleTables = TRUE;
 +            break;
 +        case ecutsVERLET:
 +            assert(NULL != nbv && NULL != nbv->grp);
 +            grp_index         = (group < 0) ? 0 : (nbv->ngrp - 1);
 +            bUsesSimpleTables = nbnxn_kernel_pairlist_simple(nbv->grp[grp_index].kernel_type);
 +            break;
 +        default:
 +            gmx_incons("unimplemented");
 +    }
 +    return bUsesSimpleTables;
 +}
 +
 +static void init_ewald_f_table(interaction_const_t *ic,
 +                               gmx_bool             bUsesSimpleTables,
 +                               real                 rtab)
 +{
 +    real maxr;
 +
 +    if (bUsesSimpleTables)
 +    {
 +        /* With a spacing of 0.0005 we are at the force summation accuracy
 +         * for the SSE kernels for "normal" atomistic simulations.
 +         */
 +        ic->tabq_scale = ewald_spline3_table_scale(ic->ewaldcoeff_q,
 +                                                   ic->rcoulomb);
 +
 +        maxr           = (rtab > ic->rcoulomb) ? rtab : ic->rcoulomb;
 +        ic->tabq_size  = (int)(maxr*ic->tabq_scale) + 2;
 +    }
 +    else
 +    {
 +        ic->tabq_size = GPU_EWALD_COULOMB_FORCE_TABLE_SIZE;
 +        /* Subtract 2 iso 1 to avoid access out of range due to rounding */
 +        ic->tabq_scale = (ic->tabq_size - 2)/ic->rcoulomb;
 +    }
 +
 +    sfree_aligned(ic->tabq_coul_FDV0);
 +    sfree_aligned(ic->tabq_coul_F);
 +    sfree_aligned(ic->tabq_coul_V);
 +
 +    sfree_aligned(ic->tabq_vdw_FDV0);
 +    sfree_aligned(ic->tabq_vdw_F);
 +    sfree_aligned(ic->tabq_vdw_V);
 +
 +    if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype))
 +    {
 +        /* Create the original table data in FDV0 */
 +        snew_aligned(ic->tabq_coul_FDV0, ic->tabq_size*4, 32);
 +        snew_aligned(ic->tabq_coul_F, ic->tabq_size, 32);
 +        snew_aligned(ic->tabq_coul_V, ic->tabq_size, 32);
 +        table_spline3_fill_ewald_lr(ic->tabq_coul_F, ic->tabq_coul_V, ic->tabq_coul_FDV0,
 +                                    ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff_q, v_q_ewald_lr);
 +    }
 +
 +    if (EVDW_PME(ic->vdwtype))
 +    {
 +        snew_aligned(ic->tabq_vdw_FDV0, ic->tabq_size*4, 32);
 +        snew_aligned(ic->tabq_vdw_F, ic->tabq_size, 32);
 +        snew_aligned(ic->tabq_vdw_V, ic->tabq_size, 32);
 +        table_spline3_fill_ewald_lr(ic->tabq_vdw_F, ic->tabq_vdw_V, ic->tabq_vdw_FDV0,
 +                                    ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff_lj, v_lj_ewald_lr);
 +    }
 +}
 +
 +void init_interaction_const_tables(FILE                *fp,
 +                                   interaction_const_t *ic,
 +                                   gmx_bool             bUsesSimpleTables,
 +                                   real                 rtab)
 +{
 +    real spacing;
 +
 +    if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype) || EVDW_PME(ic->vdwtype))
 +    {
 +        init_ewald_f_table(ic, bUsesSimpleTables, rtab);
 +
 +        if (fp != NULL)
 +        {
 +            fprintf(fp, "Initialized non-bonded Ewald correction tables, spacing: %.2e size: %d\n\n",
 +                    1/ic->tabq_scale, ic->tabq_size);
 +        }
 +    }
 +}
 +
 +static void clear_force_switch_constants(shift_consts_t *sc)
 +{
 +    sc->c2   = 0;
 +    sc->c3   = 0;
 +    sc->cpot = 0;
 +}
 +
 +static void force_switch_constants(real p,
 +                                   real rsw, real rc,
 +                                   shift_consts_t *sc)
 +{
 +    /* Here we determine the coefficient for shifting the force to zero
 +     * between distance rsw and the cut-off rc.
 +     * For a potential of r^-p, we have force p*r^-(p+1).
 +     * But to save flops we absorb p in the coefficient.
 +     * Thus we get:
 +     * force/p   = r^-(p+1) + c2*r^2 + c3*r^3
 +     * potential = r^-p + c2/3*r^3 + c3/4*r^4 + cpot
 +     */
 +    sc->c2   =  ((p + 1)*rsw - (p + 4)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 2));
 +    sc->c3   = -((p + 1)*rsw - (p + 3)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 3));
 +    sc->cpot = -pow(rc, -p) + p*sc->c2/3*pow(rc - rsw, 3) + p*sc->c3/4*pow(rc - rsw, 4);
 +}
 +
 +static void potential_switch_constants(real rsw, real rc,
 +                                       switch_consts_t *sc)
 +{
 +    /* The switch function is 1 at rsw and 0 at rc.
 +     * The derivative and second derivate are zero at both ends.
 +     * rsw        = max(r - r_switch, 0)
 +     * sw         = 1 + c3*rsw^3 + c4*rsw^4 + c5*rsw^5
 +     * dsw        = 3*c3*rsw^2 + 4*c4*rsw^3 + 5*c5*rsw^4
 +     * force      = force*dsw - potential*sw
 +     * potential *= sw
 +     */
 +    sc->c3 = -10*pow(rc - rsw, -3);
 +    sc->c4 =  15*pow(rc - rsw, -4);
 +    sc->c5 =  -6*pow(rc - rsw, -5);
 +}
 +
 +static void
 +init_interaction_const(FILE                       *fp,
 +                       const t_commrec gmx_unused *cr,
 +                       interaction_const_t       **interaction_const,
 +                       const t_forcerec           *fr,
 +                       real                        rtab)
 +{
 +    interaction_const_t *ic;
 +    gmx_bool             bUsesSimpleTables = TRUE;
 +
 +    snew(ic, 1);
 +
 +    /* Just allocate something so we can free it */
 +    snew_aligned(ic->tabq_coul_FDV0, 16, 32);
 +    snew_aligned(ic->tabq_coul_F, 16, 32);
 +    snew_aligned(ic->tabq_coul_V, 16, 32);
 +
 +    ic->rlist           = fr->rlist;
 +    ic->rlistlong       = fr->rlistlong;
 +
 +    /* Lennard-Jones */
 +    ic->vdwtype         = fr->vdwtype;
 +    ic->vdw_modifier    = fr->vdw_modifier;
 +    ic->rvdw            = fr->rvdw;
 +    ic->rvdw_switch     = fr->rvdw_switch;
 +    ic->ewaldcoeff_lj   = fr->ewaldcoeff_lj;
 +    ic->ljpme_comb_rule = fr->ljpme_combination_rule;
 +    ic->sh_lj_ewald     = 0;
 +    clear_force_switch_constants(&ic->dispersion_shift);
 +    clear_force_switch_constants(&ic->repulsion_shift);
 +
 +    switch (ic->vdw_modifier)
 +    {
 +        case eintmodPOTSHIFT:
 +            /* Only shift the potential, don't touch the force */
 +            ic->dispersion_shift.cpot = -pow(ic->rvdw, -6.0);
 +            ic->repulsion_shift.cpot  = -pow(ic->rvdw, -12.0);
 +            if (EVDW_PME(ic->vdwtype))
 +            {
 +                real crc2;
 +
 +                crc2            = sqr(ic->ewaldcoeff_lj*ic->rvdw);
 +                ic->sh_lj_ewald = (exp(-crc2)*(1 + crc2 + 0.5*crc2*crc2) - 1)*pow(ic->rvdw, -6.0);
 +            }
 +            break;
 +        case eintmodFORCESWITCH:
 +            /* Switch the force, switch and shift the potential */
 +            force_switch_constants(6.0, ic->rvdw_switch, ic->rvdw,
 +                                   &ic->dispersion_shift);
 +            force_switch_constants(12.0, ic->rvdw_switch, ic->rvdw,
 +                                   &ic->repulsion_shift);
 +            break;
 +        case eintmodPOTSWITCH:
 +            /* Switch the potential and force */
 +            potential_switch_constants(ic->rvdw_switch, ic->rvdw,
 +                                       &ic->vdw_switch);
 +            break;
 +        case eintmodNONE:
 +        case eintmodEXACTCUTOFF:
 +            /* Nothing to do here */
 +            break;
 +        default:
 +            gmx_incons("unimplemented potential modifier");
 +    }
 +
 +    ic->sh_invrc6 = -ic->dispersion_shift.cpot;
 +
 +    /* Electrostatics */
 +    ic->eeltype          = fr->eeltype;
 +    ic->coulomb_modifier = fr->coulomb_modifier;
 +    ic->rcoulomb         = fr->rcoulomb;
 +    ic->epsilon_r        = fr->epsilon_r;
 +    ic->epsfac           = fr->epsfac;
 +    ic->ewaldcoeff_q     = fr->ewaldcoeff_q;
 +
 +    if (fr->coulomb_modifier == eintmodPOTSHIFT)
 +    {
 +        ic->sh_ewald = gmx_erfc(ic->ewaldcoeff_q*ic->rcoulomb);
 +    }
 +    else
 +    {
 +        ic->sh_ewald = 0;
 +    }
 +
 +    /* Reaction-field */
 +    if (EEL_RF(ic->eeltype))
 +    {
 +        ic->epsilon_rf = fr->epsilon_rf;
 +        ic->k_rf       = fr->k_rf;
 +        ic->c_rf       = fr->c_rf;
 +    }
 +    else
 +    {
 +        /* For plain cut-off we might use the reaction-field kernels */
 +        ic->epsilon_rf = ic->epsilon_r;
 +        ic->k_rf       = 0;
 +        if (fr->coulomb_modifier == eintmodPOTSHIFT)
 +        {
 +            ic->c_rf   = 1/ic->rcoulomb;
 +        }
 +        else
 +        {
 +            ic->c_rf   = 0;
 +        }
 +    }
 +
 +    if (fp != NULL)
 +    {
 +        real dispersion_shift;
 +
 +        dispersion_shift = ic->dispersion_shift.cpot;
 +        if (EVDW_PME(ic->vdwtype))
 +        {
 +            dispersion_shift -= ic->sh_lj_ewald;
 +        }
 +        fprintf(fp, "Potential shift: LJ r^-12: %.3e r^-6: %.3e",
 +                ic->repulsion_shift.cpot, dispersion_shift);
 +
 +        if (ic->eeltype == eelCUT)
 +        {
 +            fprintf(fp, ", Coulomb %.e", -ic->c_rf);
 +        }
 +        else if (EEL_PME(ic->eeltype))
 +        {
 +            fprintf(fp, ", Ewald %.3e", -ic->sh_ewald);
 +        }
 +        fprintf(fp, "\n");
 +    }
 +
 +    *interaction_const = ic;
 +
 +    if (fr->nbv != NULL && fr->nbv->bUseGPU)
 +    {
 +        nbnxn_cuda_init_const(fr->nbv->cu_nbv, ic, fr->nbv->grp);
 +
 +        /* With tMPI + GPUs some ranks may be sharing GPU(s) and therefore
 +         * also sharing texture references. To keep the code simple, we don't
 +         * treat texture references as shared resources, but this means that
 +         * the coulomb_tab and nbfp texture refs will get updated by multiple threads.
 +         * Hence, to ensure that the non-bonded kernels don't start before all
 +         * texture binding operations are finished, we need to wait for all ranks
 +         * to arrive here before continuing.
 +         *
 +         * Note that we could omit this barrier if GPUs are not shared (or
 +         * texture objects are used), but as this is initialization code, there
 +         * is not point in complicating things.
 +         */
 +#ifdef GMX_THREAD_MPI
 +        if (PAR(cr))
 +        {
 +            gmx_barrier(cr);
 +        }
 +#endif  /* GMX_THREAD_MPI */
 +    }
 +
 +    bUsesSimpleTables = uses_simple_tables(fr->cutoff_scheme, fr->nbv, -1);
 +    init_interaction_const_tables(fp, ic, bUsesSimpleTables, rtab);
 +}
 +
 +static void init_nb_verlet(FILE                *fp,
 +                           nonbonded_verlet_t **nb_verlet,
 +                           gmx_bool             bFEP_NonBonded,
 +                           const t_inputrec    *ir,
 +                           const t_forcerec    *fr,
 +                           const t_commrec     *cr,
 +                           const char          *nbpu_opt)
 +{
 +    nonbonded_verlet_t *nbv;
 +    int                 i;
 +    char               *env;
 +    gmx_bool            bEmulateGPU, bHybridGPURun = FALSE;
 +
 +    nbnxn_alloc_t      *nb_alloc;
 +    nbnxn_free_t       *nb_free;
 +
 +    snew(nbv, 1);
 +
 +    pick_nbnxn_resources(cr, fr->hwinfo,
 +                         fr->bNonbonded,
 +                         &nbv->bUseGPU,
 +                         &bEmulateGPU,
 +                         fr->gpu_opt);
 +
 +    nbv->nbs = NULL;
 +
 +    nbv->ngrp = (DOMAINDECOMP(cr) ? 2 : 1);
 +    for (i = 0; i < nbv->ngrp; i++)
 +    {
 +        nbv->grp[i].nbl_lists.nnbl = 0;
 +        nbv->grp[i].nbat           = NULL;
 +        nbv->grp[i].kernel_type    = nbnxnkNotSet;
 +
 +        if (i == 0) /* local */
 +        {
 +            pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels,
 +                              nbv->bUseGPU, bEmulateGPU, ir,
 +                              &nbv->grp[i].kernel_type,
 +                              &nbv->grp[i].ewald_excl,
 +                              fr->bNonbonded);
 +        }
 +        else /* non-local */
 +        {
 +            if (nbpu_opt != NULL && strcmp(nbpu_opt, "gpu_cpu") == 0)
 +            {
 +                /* Use GPU for local, select a CPU kernel for non-local */
 +                pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels,
 +                                  FALSE, FALSE, ir,
 +                                  &nbv->grp[i].kernel_type,
 +                                  &nbv->grp[i].ewald_excl,
 +                                  fr->bNonbonded);
 +
 +                bHybridGPURun = TRUE;
 +            }
 +            else
 +            {
 +                /* Use the same kernel for local and non-local interactions */
 +                nbv->grp[i].kernel_type = nbv->grp[0].kernel_type;
 +                nbv->grp[i].ewald_excl  = nbv->grp[0].ewald_excl;
 +            }
 +        }
 +    }
 +
 +    if (nbv->bUseGPU)
 +    {
 +        /* init the NxN GPU data; the last argument tells whether we'll have
 +         * both local and non-local NB calculation on GPU */
 +        nbnxn_cuda_init(fp, &nbv->cu_nbv,
 +                        &fr->hwinfo->gpu_info, fr->gpu_opt,
 +                        cr->rank_pp_intranode,
 +                        (nbv->ngrp > 1) && !bHybridGPURun);
 +
 +        if ((env = getenv("GMX_NB_MIN_CI")) != NULL)
 +        {
 +            char *end;
 +
 +            nbv->min_ci_balanced = strtol(env, &end, 10);
 +            if (!end || (*end != 0) || nbv->min_ci_balanced <= 0)
 +            {
 +                gmx_fatal(FARGS, "Invalid value passed in GMX_NB_MIN_CI=%s, positive integer required", env);
 +            }
 +
 +            if (debug)
 +            {
 +                fprintf(debug, "Neighbor-list balancing parameter: %d (passed as env. var.)\n",
 +                        nbv->min_ci_balanced);
 +            }
 +        }
 +        else
 +        {
 +            nbv->min_ci_balanced = nbnxn_cuda_min_ci_balanced(nbv->cu_nbv);
 +            if (debug)
 +            {
 +                fprintf(debug, "Neighbor-list balancing parameter: %d (auto-adjusted to the number of GPU multi-processors)\n",
 +                        nbv->min_ci_balanced);
 +            }
 +        }
 +    }
 +    else
 +    {
 +        nbv->min_ci_balanced = 0;
 +    }
 +
 +    *nb_verlet = nbv;
 +
 +    nbnxn_init_search(&nbv->nbs,
 +                      DOMAINDECOMP(cr) ? &cr->dd->nc : NULL,
 +                      DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : NULL,
 +                      bFEP_NonBonded,
 +                      gmx_omp_nthreads_get(emntNonbonded));
 +
 +    for (i = 0; i < nbv->ngrp; i++)
 +    {
 +        if (nbv->grp[0].kernel_type == nbnxnk8x8x8_CUDA)
 +        {
 +            nb_alloc = &pmalloc;
 +            nb_free  = &pfree;
 +        }
 +        else
 +        {
 +            nb_alloc = NULL;
 +            nb_free  = NULL;
 +        }
 +
 +        nbnxn_init_pairlist_set(&nbv->grp[i].nbl_lists,
 +                                nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
 +                                /* 8x8x8 "non-simple" lists are ATM always combined */
 +                                !nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
 +                                nb_alloc, nb_free);
 +
 +        if (i == 0 ||
 +            nbv->grp[0].kernel_type != nbv->grp[i].kernel_type)
 +        {
 +            gmx_bool bSimpleList;
 +            int      enbnxninitcombrule;
 +
 +            bSimpleList = nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type);
 +
 +            if (bSimpleList && (fr->vdwtype == evdwCUT && (fr->vdw_modifier == eintmodNONE || fr->vdw_modifier == eintmodPOTSHIFT)))
 +            {
 +                /* Plain LJ cut-off: we can optimize with combination rules */
 +                enbnxninitcombrule = enbnxninitcombruleDETECT;
 +            }
 +            else if (fr->vdwtype == evdwPME)
 +            {
 +                /* LJ-PME: we need to use a combination rule for the grid */
 +                if (fr->ljpme_combination_rule == eljpmeGEOM)
 +                {
 +                    enbnxninitcombrule = enbnxninitcombruleGEOM;
 +                }
 +                else
 +                {
 +                    enbnxninitcombrule = enbnxninitcombruleLB;
 +                }
 +            }
 +            else
 +            {
 +                /* We use a full combination matrix: no rule required */
 +                enbnxninitcombrule = enbnxninitcombruleNONE;
 +            }
 +
 +
 +            snew(nbv->grp[i].nbat, 1);
 +            nbnxn_atomdata_init(fp,
 +                                nbv->grp[i].nbat,
 +                                nbv->grp[i].kernel_type,
 +                                enbnxninitcombrule,
 +                                fr->ntype, fr->nbfp,
 +                                ir->opts.ngener,
 +                                bSimpleList ? gmx_omp_nthreads_get(emntNonbonded) : 1,
 +                                nb_alloc, nb_free);
 +        }
 +        else
 +        {
 +            nbv->grp[i].nbat = nbv->grp[0].nbat;
 +        }
 +    }
 +}
 +
 +void init_forcerec(FILE              *fp,
 +                   const output_env_t oenv,
 +                   t_forcerec        *fr,
 +                   t_fcdata          *fcd,
 +                   const t_inputrec  *ir,
 +                   const gmx_mtop_t  *mtop,
 +                   const t_commrec   *cr,
 +                   matrix             box,
 +                   const char        *tabfn,
 +                   const char        *tabafn,
 +                   const char        *tabpfn,
 +                   const char        *tabbfn,
 +                   const char        *nbpu_opt,
 +                   gmx_bool           bNoSolvOpt,
 +                   real               print_force)
 +{
 +    int            i, j, m, natoms, ngrp, negp_pp, negptable, egi, egj;
 +    real           rtab;
 +    char          *env;
 +    double         dbl;
 +    const t_block *cgs;
 +    gmx_bool       bGenericKernelOnly;
 +    gmx_bool       bMakeTables, bMakeSeparate14Table, bSomeNormalNbListsAreInUse;
 +    gmx_bool       bFEP_NonBonded;
 +    t_nblists     *nbl;
 +    int           *nm_ind, egp_flags;
 +
 +    if (fr->hwinfo == NULL)
 +    {
 +        /* Detect hardware, gather information.
 +         * In mdrun, hwinfo has already been set before calling init_forcerec.
 +         * Here we ignore GPUs, as tools will not use them anyhow.
 +         */
 +        fr->hwinfo = gmx_detect_hardware(fp, cr, FALSE);
 +    }
 +
 +    /* By default we turn SIMD kernels on, but it might be turned off further down... */
 +    fr->use_simd_kernels = TRUE;
 +
 +    fr->bDomDec = DOMAINDECOMP(cr);
 +
 +    natoms = mtop->natoms;
 +
 +    if (check_box(ir->ePBC, box))
 +    {
 +        gmx_fatal(FARGS, check_box(ir->ePBC, box));
 +    }
 +
 +    /* Test particle insertion ? */
 +    if (EI_TPI(ir->eI))
 +    {
 +        /* Set to the size of the molecule to be inserted (the last one) */
 +        /* Because of old style topologies, we have to use the last cg
 +         * instead of the last molecule type.
 +         */
 +        cgs       = &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].cgs;
 +        fr->n_tpi = cgs->index[cgs->nr] - cgs->index[cgs->nr-1];
 +        if (fr->n_tpi != mtop->mols.index[mtop->mols.nr] - mtop->mols.index[mtop->mols.nr-1])
 +        {
 +            gmx_fatal(FARGS, "The molecule to insert can not consist of multiple charge groups.\nMake it a single charge group.");
 +        }
 +    }
 +    else
 +    {
 +        fr->n_tpi = 0;
 +    }
 +
 +    /* Copy AdResS parameters */
 +    if (ir->bAdress)
 +    {
 +        fr->adress_type           = ir->adress->type;
 +        fr->adress_const_wf       = ir->adress->const_wf;
 +        fr->adress_ex_width       = ir->adress->ex_width;
 +        fr->adress_hy_width       = ir->adress->hy_width;
 +        fr->adress_icor           = ir->adress->icor;
 +        fr->adress_site           = ir->adress->site;
 +        fr->adress_ex_forcecap    = ir->adress->ex_forcecap;
 +        fr->adress_do_hybridpairs = ir->adress->do_hybridpairs;
 +
 +
 +        snew(fr->adress_group_explicit, ir->adress->n_energy_grps);
 +        for (i = 0; i < ir->adress->n_energy_grps; i++)
 +        {
 +            fr->adress_group_explicit[i] = ir->adress->group_explicit[i];
 +        }
 +
 +        fr->n_adress_tf_grps = ir->adress->n_tf_grps;
 +        snew(fr->adress_tf_table_index, fr->n_adress_tf_grps);
 +        for (i = 0; i < fr->n_adress_tf_grps; i++)
 +        {
 +            fr->adress_tf_table_index[i] = ir->adress->tf_table_index[i];
 +        }
 +        copy_rvec(ir->adress->refs, fr->adress_refs);
 +    }
 +    else
 +    {
 +        fr->adress_type           = eAdressOff;
 +        fr->adress_do_hybridpairs = FALSE;
 +    }
 +
 +    /* Copy the user determined parameters */
 +    fr->userint1  = ir->userint1;
 +    fr->userint2  = ir->userint2;
 +    fr->userint3  = ir->userint3;
 +    fr->userint4  = ir->userint4;
 +    fr->userreal1 = ir->userreal1;
 +    fr->userreal2 = ir->userreal2;
 +    fr->userreal3 = ir->userreal3;
 +    fr->userreal4 = ir->userreal4;
 +
 +    /* Shell stuff */
 +    fr->fc_stepsize = ir->fc_stepsize;
 +
 +    /* Free energy */
 +    fr->efep        = ir->efep;
 +    fr->sc_alphavdw = ir->fepvals->sc_alpha;
 +    if (ir->fepvals->bScCoul)
 +    {
 +        fr->sc_alphacoul  = ir->fepvals->sc_alpha;
 +        fr->sc_sigma6_min = pow(ir->fepvals->sc_sigma_min, 6);
 +    }
 +    else
 +    {
 +        fr->sc_alphacoul  = 0;
 +        fr->sc_sigma6_min = 0; /* only needed when bScCoul is on */
 +    }
 +    fr->sc_power      = ir->fepvals->sc_power;
 +    fr->sc_r_power    = ir->fepvals->sc_r_power;
 +    fr->sc_sigma6_def = pow(ir->fepvals->sc_sigma, 6);
 +
 +    env = getenv("GMX_SCSIGMA_MIN");
 +    if (env != NULL)
 +    {
 +        dbl = 0;
 +        sscanf(env, "%lf", &dbl);
 +        fr->sc_sigma6_min = pow(dbl, 6);
 +        if (fp)
 +        {
 +            fprintf(fp, "Setting the minimum soft core sigma to %g nm\n", dbl);
 +        }
 +    }
 +
 +    fr->bNonbonded = TRUE;
 +    if (getenv("GMX_NO_NONBONDED") != NULL)
 +    {
 +        /* turn off non-bonded calculations */
 +        fr->bNonbonded = FALSE;
 +        md_print_warn(cr, fp,
 +                      "Found environment variable GMX_NO_NONBONDED.\n"
 +                      "Disabling nonbonded calculations.\n");
 +    }
 +
 +    bGenericKernelOnly = FALSE;
 +
 +    /* We now check in the NS code whether a particular combination of interactions
 +     * can be used with water optimization, and disable it if that is not the case.
 +     */
 +
 +    if (getenv("GMX_NB_GENERIC") != NULL)
 +    {
 +        if (fp != NULL)
 +        {
 +            fprintf(fp,
 +                    "Found environment variable GMX_NB_GENERIC.\n"
 +                    "Disabling all interaction-specific nonbonded kernels, will only\n"
 +                    "use the slow generic ones in src/gmxlib/nonbonded/nb_generic.c\n\n");
 +        }
 +        bGenericKernelOnly = TRUE;
 +    }
 +
 +    if (bGenericKernelOnly == TRUE)
 +    {
 +        bNoSolvOpt         = TRUE;
 +    }
 +
 +    if ( (getenv("GMX_DISABLE_SIMD_KERNELS") != NULL) || (getenv("GMX_NOOPTIMIZEDKERNELS") != NULL) )
 +    {
 +        fr->use_simd_kernels = FALSE;
 +        if (fp != NULL)
 +        {
 +            fprintf(fp,
 +                    "\nFound environment variable GMX_DISABLE_SIMD_KERNELS.\n"
 +                    "Disabling the usage of any SIMD-specific kernel routines (e.g. SSE2/SSE4.1/AVX).\n\n");
 +        }
 +    }
 +
 +    fr->bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
 +
 +    /* Check if we can/should do all-vs-all kernels */
 +    fr->bAllvsAll       = can_use_allvsall(ir, FALSE, NULL, NULL);
 +    fr->AllvsAll_work   = NULL;
 +    fr->AllvsAll_workgb = NULL;
 +
 +    /* All-vs-all kernels have not been implemented in 4.6, and
 +     * the SIMD group kernels are also buggy in this case. Non-SIMD
 +     * group kernels are OK. See Redmine #1249. */
 +    if (fr->bAllvsAll)
 +    {
 +        fr->bAllvsAll            = FALSE;
 +        fr->use_simd_kernels     = FALSE;
 +        if (fp != NULL)
 +        {
 +            fprintf(fp,
 +                    "\nYour simulation settings would have triggered the efficient all-vs-all\n"
 +                    "kernels in GROMACS 4.5, but these have not been implemented in GROMACS\n"
 +                    "4.6. Also, we can't use the accelerated SIMD kernels here because\n"
 +                    "of an unfixed bug. The reference C kernels are correct, though, so\n"
 +                    "we are proceeding by disabling all CPU architecture-specific\n"
 +                    "(e.g. SSE2/SSE4/AVX) routines. If performance is important, please\n"
 +                    "use GROMACS 4.5.7 or try cutoff-scheme = Verlet.\n\n");
 +        }
 +    }
 +
 +    /* Neighbour searching stuff */
 +    fr->cutoff_scheme = ir->cutoff_scheme;
 +    fr->bGrid         = (ir->ns_type == ensGRID);
 +    fr->ePBC          = ir->ePBC;
 +
 +    if (fr->cutoff_scheme == ecutsGROUP)
 +    {
 +        const char *note = "NOTE: This file uses the deprecated 'group' cutoff_scheme. This will be\n"
 +            "removed in a future release when 'verlet' supports all interaction forms.\n";
 +
 +        if (MASTER(cr))
 +        {
 +            fprintf(stderr, "\n%s\n", note);
 +        }
 +        if (fp != NULL)
 +        {
 +            fprintf(fp, "\n%s\n", note);
 +        }
 +    }
 +
 +    /* Determine if we will do PBC for distances in bonded interactions */
 +    if (fr->ePBC == epbcNONE)
 +    {
 +        fr->bMolPBC = FALSE;
 +    }
 +    else
 +    {
 +        if (!DOMAINDECOMP(cr))
 +        {
 +            /* The group cut-off scheme and SHAKE assume charge groups
 +             * are whole, but not using molpbc is faster in most cases.
 +             */
 +            if (fr->cutoff_scheme == ecutsGROUP ||
 +                (ir->eConstrAlg == econtSHAKE &&
 +                 (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0 ||
 +                  gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0)))
 +            {
 +                fr->bMolPBC = ir->bPeriodicMols;
 +            }
 +            else
 +            {
 +                fr->bMolPBC = TRUE;
 +                if (getenv("GMX_USE_GRAPH") != NULL)
 +                {
 +                    fr->bMolPBC = FALSE;
 +                    if (fp)
 +                    {
 +                        fprintf(fp, "\nGMX_MOLPBC is set, using the graph for bonded interactions\n\n");
 +                    }
 +                }
 +            }
 +        }
 +        else
 +        {
 +            fr->bMolPBC = dd_bonded_molpbc(cr->dd, fr->ePBC);
 +        }
 +    }
 +    fr->bGB = (ir->implicit_solvent == eisGBSA);
 +
 +    fr->rc_scaling = ir->refcoord_scaling;
 +    copy_rvec(ir->posres_com, fr->posres_com);
 +    copy_rvec(ir->posres_comB, fr->posres_comB);
 +    fr->rlist                    = cutoff_inf(ir->rlist);
 +    fr->rlistlong                = cutoff_inf(ir->rlistlong);
 +    fr->eeltype                  = ir->coulombtype;
 +    fr->vdwtype                  = ir->vdwtype;
 +    fr->ljpme_combination_rule   = ir->ljpme_combination_rule;
 +
 +    fr->coulomb_modifier = ir->coulomb_modifier;
 +    fr->vdw_modifier     = ir->vdw_modifier;
 +
 +    /* Electrostatics: Translate from interaction-setting-in-mdp-file to kernel interaction format */
 +    switch (fr->eeltype)
 +    {
 +        case eelCUT:
 +            fr->nbkernel_elec_interaction = (fr->bGB) ? GMX_NBKERNEL_ELEC_GENERALIZEDBORN : GMX_NBKERNEL_ELEC_COULOMB;
 +            break;
 +
 +        case eelRF:
 +        case eelGRF:
 +        case eelRF_NEC:
 +            fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
 +            break;
 +
 +        case eelRF_ZERO:
 +            fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
 +            fr->coulomb_modifier          = eintmodEXACTCUTOFF;
 +            break;
 +
 +        case eelSWITCH:
 +        case eelSHIFT:
 +        case eelUSER:
 +        case eelENCADSHIFT:
 +        case eelPMESWITCH:
 +        case eelPMEUSER:
 +        case eelPMEUSERSWITCH:
 +            fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
 +            break;
 +
 +        case eelPME:
 +        case eelEWALD:
 +            fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_EWALD;
 +            break;
 +
 +        default:
 +            gmx_fatal(FARGS, "Unsupported electrostatic interaction: %s", eel_names[fr->eeltype]);
 +            break;
 +    }
 +
 +    /* Vdw: Translate from mdp settings to kernel format */
 +    switch (fr->vdwtype)
 +    {
 +        case evdwCUT:
 +            if (fr->bBHAM)
 +            {
 +                fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_BUCKINGHAM;
 +            }
 +            else
 +            {
 +                fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LENNARDJONES;
 +            }
 +            break;
 +        case evdwPME:
 +            fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LJEWALD;
 +            break;
 +
 +        case evdwSWITCH:
 +        case evdwSHIFT:
 +        case evdwUSER:
 +        case evdwENCADSHIFT:
 +            fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
 +            break;
 +
 +        default:
 +            gmx_fatal(FARGS, "Unsupported vdw interaction: %s", evdw_names[fr->vdwtype]);
 +            break;
 +    }
 +
 +    /* These start out identical to ir, but might be altered if we e.g. tabulate the interaction in the kernel */
 +    fr->nbkernel_elec_modifier    = fr->coulomb_modifier;
 +    fr->nbkernel_vdw_modifier     = fr->vdw_modifier;
 +
++    fr->rvdw             = cutoff_inf(ir->rvdw);
++    fr->rvdw_switch      = ir->rvdw_switch;
++    fr->rcoulomb         = cutoff_inf(ir->rcoulomb);
++    fr->rcoulomb_switch  = ir->rcoulomb_switch;
++
 +    fr->bTwinRange = fr->rlistlong > fr->rlist;
 +    fr->bEwald     = (EEL_PME(fr->eeltype) || fr->eeltype == eelEWALD);
 +
 +    fr->reppow     = mtop->ffparams.reppow;
 +
 +    if (ir->cutoff_scheme == ecutsGROUP)
 +    {
 +        fr->bvdwtab    = ((fr->vdwtype != evdwCUT || !gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
 +                          && !EVDW_PME(fr->vdwtype));
 +        /* We have special kernels for standard Ewald and PME, but the pme-switch ones are tabulated above */
 +        fr->bcoultab   = !(fr->eeltype == eelCUT ||
 +                           fr->eeltype == eelEWALD ||
 +                           fr->eeltype == eelPME ||
 +                           fr->eeltype == eelRF ||
 +                           fr->eeltype == eelRF_ZERO);
 +
 +        /* If the user absolutely wants different switch/shift settings for coul/vdw, it is likely
 +         * going to be faster to tabulate the interaction than calling the generic kernel.
++         * However, if generic kernels have been requested we keep things analytically.
 +         */
-         if (fr->nbkernel_elec_modifier == eintmodPOTSWITCH && fr->nbkernel_vdw_modifier == eintmodPOTSWITCH)
++        if (fr->nbkernel_elec_modifier == eintmodPOTSWITCH &&
++            fr->nbkernel_vdw_modifier == eintmodPOTSWITCH &&
++            bGenericKernelOnly == FALSE)
 +        {
 +            if ((fr->rcoulomb_switch != fr->rvdw_switch) || (fr->rcoulomb != fr->rvdw))
 +            {
 +                fr->bcoultab = TRUE;
++                /* Once we tabulate electrostatics, we can use the switch function for LJ,
++                 * which would otherwise need two tables.
++                 */
 +            }
 +        }
 +        else if ((fr->nbkernel_elec_modifier == eintmodPOTSHIFT && fr->nbkernel_vdw_modifier == eintmodPOTSHIFT) ||
 +                 ((fr->nbkernel_elec_interaction == GMX_NBKERNEL_ELEC_REACTIONFIELD &&
 +                   fr->nbkernel_elec_modifier == eintmodEXACTCUTOFF &&
 +                   (fr->nbkernel_vdw_modifier == eintmodPOTSWITCH || fr->nbkernel_vdw_modifier == eintmodPOTSHIFT))))
 +        {
-             if (fr->rcoulomb != fr->rvdw)
++            if ((fr->rcoulomb != fr->rvdw) && (bGenericKernelOnly == FALSE))
 +            {
 +                fr->bcoultab = TRUE;
 +            }
 +        }
 +
++        if (fr->nbkernel_elec_modifier == eintmodFORCESWITCH)
++        {
++            fr->bcoultab = TRUE;
++        }
++        if (fr->nbkernel_vdw_modifier == eintmodFORCESWITCH)
++        {
++            fr->bvdwtab = TRUE;
++        }
++
 +        if (getenv("GMX_REQUIRE_TABLES"))
 +        {
 +            fr->bvdwtab  = TRUE;
 +            fr->bcoultab = TRUE;
 +        }
 +
 +        if (fp)
 +        {
 +            fprintf(fp, "Table routines are used for coulomb: %s\n", bool_names[fr->bcoultab]);
 +            fprintf(fp, "Table routines are used for vdw:     %s\n", bool_names[fr->bvdwtab ]);
 +        }
 +
 +        if (fr->bvdwtab == TRUE)
 +        {
 +            fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
 +            fr->nbkernel_vdw_modifier    = eintmodNONE;
 +        }
 +        if (fr->bcoultab == TRUE)
 +        {
 +            fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
 +            fr->nbkernel_elec_modifier    = eintmodNONE;
 +        }
 +    }
 +
 +    if (ir->cutoff_scheme == ecutsVERLET)
 +    {
 +        if (!gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
 +        {
 +            gmx_fatal(FARGS, "Cut-off scheme %S only supports LJ repulsion power 12", ecutscheme_names[ir->cutoff_scheme]);
 +        }
 +        fr->bvdwtab  = FALSE;
 +        fr->bcoultab = FALSE;
 +    }
 +
 +    /* Tables are used for direct ewald sum */
 +    if (fr->bEwald)
 +    {
 +        if (EEL_PME(ir->coulombtype))
 +        {
 +            if (fp)
 +            {
 +                fprintf(fp, "Will do PME sum in reciprocal space for electrostatic interactions.\n");
 +            }
 +            if (ir->coulombtype == eelP3M_AD)
 +            {
 +                please_cite(fp, "Hockney1988");
 +                please_cite(fp, "Ballenegger2012");
 +            }
 +            else
 +            {
 +                please_cite(fp, "Essmann95a");
 +            }
 +
 +            if (ir->ewald_geometry == eewg3DC)
 +            {
 +                if (fp)
 +                {
 +                    fprintf(fp, "Using the Ewald3DC correction for systems with a slab geometry.\n");
 +                }
 +                please_cite(fp, "In-Chul99a");
 +            }
 +        }
 +        fr->ewaldcoeff_q = calc_ewaldcoeff_q(ir->rcoulomb, ir->ewald_rtol);
 +        init_ewald_tab(&(fr->ewald_table), ir, fp);
 +        if (fp)
 +        {
 +            fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for Ewald\n",
 +                    1/fr->ewaldcoeff_q);
 +        }
 +    }
 +
 +    if (EVDW_PME(ir->vdwtype))
 +    {
 +        if (fp)
 +        {
 +            fprintf(fp, "Will do PME sum in reciprocal space for LJ dispersion interactions.\n");
 +        }
 +        please_cite(fp, "Essmann95a");
 +        fr->ewaldcoeff_lj = calc_ewaldcoeff_lj(ir->rvdw, ir->ewald_rtol_lj);
 +        if (fp)
 +        {
 +            fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for LJ Ewald\n",
 +                    1/fr->ewaldcoeff_lj);
 +        }
 +    }
 +
 +    /* Electrostatics */
 +    fr->epsilon_r       = ir->epsilon_r;
 +    fr->epsilon_rf      = ir->epsilon_rf;
 +    fr->fudgeQQ         = mtop->ffparams.fudgeQQ;
-     fr->rcoulomb_switch = ir->rcoulomb_switch;
-     fr->rcoulomb        = cutoff_inf(ir->rcoulomb);
 +
 +    /* Parameters for generalized RF */
 +    fr->zsquare = 0.0;
 +    fr->temp    = 0.0;
 +
 +    if (fr->eeltype == eelGRF)
 +    {
 +        init_generalized_rf(fp, mtop, ir, fr);
 +    }
 +
 +    fr->bF_NoVirSum = (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype) ||
 +                       gmx_mtop_ftype_count(mtop, F_POSRES) > 0 ||
 +                       gmx_mtop_ftype_count(mtop, F_FBPOSRES) > 0 ||
 +                       IR_ELEC_FIELD(*ir) ||
 +                       (fr->adress_icor != eAdressICOff)
 +                       );
 +
 +    if (fr->cutoff_scheme == ecutsGROUP &&
 +        ncg_mtop(mtop) > fr->cg_nalloc && !DOMAINDECOMP(cr))
 +    {
 +        /* Count the total number of charge groups */
 +        fr->cg_nalloc = ncg_mtop(mtop);
 +        srenew(fr->cg_cm, fr->cg_nalloc);
 +    }
 +    if (fr->shift_vec == NULL)
 +    {
 +        snew(fr->shift_vec, SHIFTS);
 +    }
 +
 +    if (fr->fshift == NULL)
 +    {
 +        snew(fr->fshift, SHIFTS);
 +    }
 +
 +    if (fr->nbfp == NULL)
 +    {
 +        fr->ntype = mtop->ffparams.atnr;
 +        fr->nbfp  = mk_nbfp(&mtop->ffparams, fr->bBHAM);
 +        if (EVDW_PME(fr->vdwtype))
 +        {
 +            fr->ljpme_c6grid  = make_ljpme_c6grid(&mtop->ffparams, fr);
 +        }
 +    }
 +
 +    /* Copy the energy group exclusions */
 +    fr->egp_flags = ir->opts.egp_flags;
 +
 +    /* Van der Waals stuff */
-     fr->rvdw        = cutoff_inf(ir->rvdw);
-     fr->rvdw_switch = ir->rvdw_switch;
 +    if ((fr->vdwtype != evdwCUT) && (fr->vdwtype != evdwUSER) && !fr->bBHAM)
 +    {
 +        if (fr->rvdw_switch >= fr->rvdw)
 +        {
 +            gmx_fatal(FARGS, "rvdw_switch (%f) must be < rvdw (%f)",
 +                      fr->rvdw_switch, fr->rvdw);
 +        }
 +        if (fp)
 +        {
 +            fprintf(fp, "Using %s Lennard-Jones, switch between %g and %g nm\n",
 +                    (fr->eeltype == eelSWITCH) ? "switched" : "shifted",
 +                    fr->rvdw_switch, fr->rvdw);
 +        }
 +    }
 +
 +    if (fr->bBHAM && EVDW_PME(fr->vdwtype))
 +    {
 +        gmx_fatal(FARGS, "LJ PME not supported with Buckingham");
 +    }
 +
 +    if (fr->bBHAM && (fr->vdwtype == evdwSHIFT || fr->vdwtype == evdwSWITCH))
 +    {
 +        gmx_fatal(FARGS, "Switch/shift interaction not supported with Buckingham");
 +    }
 +
 +    if (fr->bBHAM && fr->cutoff_scheme == ecutsVERLET)
 +    {
 +        gmx_fatal(FARGS, "Verlet cutoff-scheme is not supported with Buckingham");
 +    }
 +
 +    if (fp)
 +    {
 +        fprintf(fp, "Cut-off's:   NS: %g   Coulomb: %g   %s: %g\n",
 +                fr->rlist, fr->rcoulomb, fr->bBHAM ? "BHAM" : "LJ", fr->rvdw);
 +    }
 +
 +    fr->eDispCorr = ir->eDispCorr;
 +    if (ir->eDispCorr != edispcNO)
 +    {
 +        set_avcsixtwelve(fp, fr, mtop);
 +    }
 +
 +    if (fr->bBHAM)
 +    {
 +        set_bham_b_max(fp, fr, mtop);
 +    }
 +
 +    fr->gb_epsilon_solvent = ir->gb_epsilon_solvent;
 +
 +    /* Copy the GBSA data (radius, volume and surftens for each
 +     * atomtype) from the topology atomtype section to forcerec.
 +     */
 +    snew(fr->atype_radius, fr->ntype);
 +    snew(fr->atype_vol, fr->ntype);
 +    snew(fr->atype_surftens, fr->ntype);
 +    snew(fr->atype_gb_radius, fr->ntype);
 +    snew(fr->atype_S_hct, fr->ntype);
 +
 +    if (mtop->atomtypes.nr > 0)
 +    {
 +        for (i = 0; i < fr->ntype; i++)
 +        {
 +            fr->atype_radius[i] = mtop->atomtypes.radius[i];
 +        }
 +        for (i = 0; i < fr->ntype; i++)
 +        {
 +            fr->atype_vol[i] = mtop->atomtypes.vol[i];
 +        }
 +        for (i = 0; i < fr->ntype; i++)
 +        {
 +            fr->atype_surftens[i] = mtop->atomtypes.surftens[i];
 +        }
 +        for (i = 0; i < fr->ntype; i++)
 +        {
 +            fr->atype_gb_radius[i] = mtop->atomtypes.gb_radius[i];
 +        }
 +        for (i = 0; i < fr->ntype; i++)
 +        {
 +            fr->atype_S_hct[i] = mtop->atomtypes.S_hct[i];
 +        }
 +    }
 +
 +    /* Generate the GB table if needed */
 +    if (fr->bGB)
 +    {
 +#ifdef GMX_DOUBLE
 +        fr->gbtabscale = 2000;
 +#else
 +        fr->gbtabscale = 500;
 +#endif
 +
 +        fr->gbtabr = 100;
 +        fr->gbtab  = make_gb_table(oenv, fr);
 +
 +        init_gb(&fr->born, fr, ir, mtop, ir->gb_algorithm);
 +
 +        /* Copy local gb data (for dd, this is done in dd_partition_system) */
 +        if (!DOMAINDECOMP(cr))
 +        {
 +            make_local_gb(cr, fr->born, ir->gb_algorithm);
 +        }
 +    }
 +
 +    /* Set the charge scaling */
 +    if (fr->epsilon_r != 0)
 +    {
 +        fr->epsfac = ONE_4PI_EPS0/fr->epsilon_r;
 +    }
 +    else
 +    {
 +        /* eps = 0 is infinite dieletric: no coulomb interactions */
 +        fr->epsfac = 0;
 +    }
 +
 +    /* Reaction field constants */
 +    if (EEL_RF(fr->eeltype))
 +    {
 +        calc_rffac(fp, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
 +                   fr->rcoulomb, fr->temp, fr->zsquare, box,
 +                   &fr->kappa, &fr->k_rf, &fr->c_rf);
 +    }
 +
 +    /*This now calculates sum for q and c6*/
 +    set_chargesum(fp, fr, mtop);
 +
 +    /* if we are using LR electrostatics, and they are tabulated,
 +     * the tables will contain modified coulomb interactions.
 +     * Since we want to use the non-shifted ones for 1-4
 +     * coulombic interactions, we must have an extra set of tables.
 +     */
 +
 +    /* Construct tables.
 +     * A little unnecessary to make both vdw and coul tables sometimes,
 +     * but what the heck... */
 +
 +    bMakeTables = fr->bcoultab || fr->bvdwtab || fr->bEwald ||
 +        (ir->eDispCorr != edispcNO && ir_vdw_switched(ir));
 +
 +    bMakeSeparate14Table = ((!bMakeTables || fr->eeltype != eelCUT || fr->vdwtype != evdwCUT ||
++                             fr->coulomb_modifier != eintmodNONE ||
++                             fr->vdw_modifier != eintmodNONE ||
 +                             fr->bBHAM || fr->bEwald) &&
 +                            (gmx_mtop_ftype_count(mtop, F_LJ14) > 0 ||
 +                             gmx_mtop_ftype_count(mtop, F_LJC14_Q) > 0 ||
 +                             gmx_mtop_ftype_count(mtop, F_LJC_PAIRS_NB) > 0));
 +
 +    negp_pp   = ir->opts.ngener - ir->nwall;
 +    negptable = 0;
 +    if (!bMakeTables)
 +    {
 +        bSomeNormalNbListsAreInUse = TRUE;
 +        fr->nnblists               = 1;
 +    }
 +    else
 +    {
 +        bSomeNormalNbListsAreInUse = (ir->eDispCorr != edispcNO);
 +        for (egi = 0; egi < negp_pp; egi++)
 +        {
 +            for (egj = egi; egj < negp_pp; egj++)
 +            {
 +                egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
 +                if (!(egp_flags & EGP_EXCL))
 +                {
 +                    if (egp_flags & EGP_TABLE)
 +                    {
 +                        negptable++;
 +                    }
 +                    else
 +                    {
 +                        bSomeNormalNbListsAreInUse = TRUE;
 +                    }
 +                }
 +            }
 +        }
 +        if (bSomeNormalNbListsAreInUse)
 +        {
 +            fr->nnblists = negptable + 1;
 +        }
 +        else
 +        {
 +            fr->nnblists = negptable;
 +        }
 +        if (fr->nnblists > 1)
 +        {
 +            snew(fr->gid2nblists, ir->opts.ngener*ir->opts.ngener);
 +        }
 +    }
 +
 +    if (ir->adress)
 +    {
 +        fr->nnblists *= 2;
 +    }
 +
 +    snew(fr->nblists, fr->nnblists);
 +
 +    /* This code automatically gives table length tabext without cut-off's,
 +     * in that case grompp should already have checked that we do not need
 +     * normal tables and we only generate tables for 1-4 interactions.
 +     */
 +    rtab = ir->rlistlong + ir->tabext;
 +
 +    if (bMakeTables)
 +    {
 +        /* make tables for ordinary interactions */
 +        if (bSomeNormalNbListsAreInUse)
 +        {
 +            make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]);
 +            if (ir->adress)
 +            {
 +                make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[fr->nnblists/2]);
 +            }
 +            if (!bMakeSeparate14Table)
 +            {
 +                fr->tab14 = fr->nblists[0].table_elec_vdw;
 +            }
 +            m = 1;
 +        }
 +        else
 +        {
 +            m = 0;
 +        }
 +        if (negptable > 0)
 +        {
 +            /* Read the special tables for certain energy group pairs */
 +            nm_ind = mtop->groups.grps[egcENER].nm_ind;
 +            for (egi = 0; egi < negp_pp; egi++)
 +            {
 +                for (egj = egi; egj < negp_pp; egj++)
 +                {
 +                    egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
 +                    if ((egp_flags & EGP_TABLE) && !(egp_flags & EGP_EXCL))
 +                    {
 +                        nbl = &(fr->nblists[m]);
 +                        if (fr->nnblists > 1)
 +                        {
 +                            fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = m;
 +                        }
 +                        /* Read the table file with the two energy groups names appended */
 +                        make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
 +                                        *mtop->groups.grpname[nm_ind[egi]],
 +                                        *mtop->groups.grpname[nm_ind[egj]],
 +                                        &fr->nblists[m]);
 +                        if (ir->adress)
 +                        {
 +                            make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
 +                                            *mtop->groups.grpname[nm_ind[egi]],
 +                                            *mtop->groups.grpname[nm_ind[egj]],
 +                                            &fr->nblists[fr->nnblists/2+m]);
 +                        }
 +                        m++;
 +                    }
 +                    else if (fr->nnblists > 1)
 +                    {
 +                        fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = 0;
 +                    }
 +                }
 +            }
 +        }
 +    }
++    else if ((fr->eDispCorr != edispcNO) &&
++             ((fr->vdw_modifier == eintmodPOTSWITCH) ||
++              (fr->vdw_modifier == eintmodFORCESWITCH) ||
++              (fr->vdw_modifier == eintmodPOTSHIFT)))
++    {
++        /* Tables might not be used for the potential modifier interactions per se, but
++         * we still need them to evaluate switch/shift dispersion corrections in this case.
++         */
++        make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]);
++    }
++
 +    if (bMakeSeparate14Table)
 +    {
 +        /* generate extra tables with plain Coulomb for 1-4 interactions only */
 +        fr->tab14 = make_tables(fp, oenv, fr, MASTER(cr), tabpfn, rtab,
 +                                GMX_MAKETABLES_14ONLY);
 +    }
 +
 +    /* Read AdResS Thermo Force table if needed */
 +    if (fr->adress_icor == eAdressICThermoForce)
 +    {
 +        /* old todo replace */
 +
 +        if (ir->adress->n_tf_grps > 0)
 +        {
 +            make_adress_tf_tables(fp, oenv, fr, ir, tabfn, mtop, box);
 +
 +        }
 +        else
 +        {
 +            /* load the default table */
 +            snew(fr->atf_tabs, 1);
 +            fr->atf_tabs[DEFAULT_TF_TABLE] = make_atf_table(fp, oenv, fr, tabafn, box);
 +        }
 +    }
 +
 +    /* Wall stuff */
 +    fr->nwall = ir->nwall;
 +    if (ir->nwall && ir->wall_type == ewtTABLE)
 +    {
 +        make_wall_tables(fp, oenv, ir, tabfn, &mtop->groups, fr);
 +    }
 +
 +    if (fcd && tabbfn)
 +    {
 +        fcd->bondtab  = make_bonded_tables(fp,
 +                                           F_TABBONDS, F_TABBONDSNC,
 +                                           mtop, tabbfn, "b");
 +        fcd->angletab = make_bonded_tables(fp,
 +                                           F_TABANGLES, -1,
 +                                           mtop, tabbfn, "a");
 +        fcd->dihtab   = make_bonded_tables(fp,
 +                                           F_TABDIHS, -1,
 +                                           mtop, tabbfn, "d");
 +    }
 +    else
 +    {
 +        if (debug)
 +        {
 +            fprintf(debug, "No fcdata or table file name passed, can not read table, can not do bonded interactions\n");
 +        }
 +    }
 +
 +    /* QM/MM initialization if requested
 +     */
 +    if (ir->bQMMM)
 +    {
 +        fprintf(stderr, "QM/MM calculation requested.\n");
 +    }
 +
 +    fr->bQMMM      = ir->bQMMM;
 +    fr->qr         = mk_QMMMrec();
 +
 +    /* Set all the static charge group info */
 +    fr->cginfo_mb = init_cginfo_mb(fp, mtop, fr, bNoSolvOpt,
 +                                   &bFEP_NonBonded,
 +                                   &fr->bExcl_IntraCGAll_InterCGNone);
 +    if (DOMAINDECOMP(cr))
 +    {
 +        fr->cginfo = NULL;
 +    }
 +    else
 +    {
 +        fr->cginfo = cginfo_expand(mtop->nmolblock, fr->cginfo_mb);
 +    }
 +
 +    if (!DOMAINDECOMP(cr))
 +    {
 +        forcerec_set_ranges(fr, ncg_mtop(mtop), ncg_mtop(mtop),
 +                            mtop->natoms, mtop->natoms, mtop->natoms);
 +    }
 +
 +    fr->print_force = print_force;
 +
 +
 +    /* coarse load balancing vars */
 +    fr->t_fnbf    = 0.;
 +    fr->t_wait    = 0.;
 +    fr->timesteps = 0;
 +
 +    /* Initialize neighbor search */
 +    init_ns(fp, cr, &fr->ns, fr, mtop);
 +
 +    if (cr->duty & DUTY_PP)
 +    {
 +        gmx_nonbonded_setup(fr, bGenericKernelOnly);
 +        /*
 +           if (ir->bAdress)
 +            {
 +                gmx_setup_adress_kernels(fp,bGenericKernelOnly);
 +            }
 +         */
 +    }
 +
 +    /* Initialize the thread working data for bonded interactions */
 +    init_forcerec_f_threads(fr, mtop->groups.grps[egcENER].nr);
 +
 +    snew(fr->excl_load, fr->nthreads+1);
 +
 +    if (fr->cutoff_scheme == ecutsVERLET)
 +    {
 +        if (ir->rcoulomb != ir->rvdw)
 +        {
 +            gmx_fatal(FARGS, "With Verlet lists rcoulomb and rvdw should be identical");
 +        }
 +
 +        init_nb_verlet(fp, &fr->nbv, bFEP_NonBonded, ir, fr, cr, nbpu_opt);
 +    }
 +
 +    /* fr->ic is used both by verlet and group kernels (to some extent) now */
 +    init_interaction_const(fp, cr, &fr->ic, fr, rtab);
 +
 +    if (ir->eDispCorr != edispcNO)
 +    {
 +        calc_enervirdiff(fp, ir->eDispCorr, fr);
 +    }
 +}
 +
 +#define pr_real(fp, r) fprintf(fp, "%s: %e\n",#r, r)
 +#define pr_int(fp, i)  fprintf((fp), "%s: %d\n",#i, i)
 +#define pr_bool(fp, b) fprintf((fp), "%s: %s\n",#b, bool_names[b])
 +
 +void pr_forcerec(FILE *fp, t_forcerec *fr)
 +{
 +    int i;
 +
 +    pr_real(fp, fr->rlist);
 +    pr_real(fp, fr->rcoulomb);
 +    pr_real(fp, fr->fudgeQQ);
 +    pr_bool(fp, fr->bGrid);
 +    pr_bool(fp, fr->bTwinRange);
 +    /*pr_int(fp,fr->cg0);
 +       pr_int(fp,fr->hcg);*/
 +    for (i = 0; i < fr->nnblists; i++)
 +    {
 +        pr_int(fp, fr->nblists[i].table_elec_vdw.n);
 +    }
 +    pr_real(fp, fr->rcoulomb_switch);
 +    pr_real(fp, fr->rcoulomb);
 +
 +    fflush(fp);
 +}
 +
 +void forcerec_set_excl_load(t_forcerec           *fr,
 +                            const gmx_localtop_t *top)
 +{
 +    const int *ind, *a;
 +    int        t, i, j, ntot, n, ntarget;
 +
 +    ind = top->excls.index;
 +    a   = top->excls.a;
 +
 +    ntot = 0;
 +    for (i = 0; i < top->excls.nr; i++)
 +    {
 +        for (j = ind[i]; j < ind[i+1]; j++)
 +        {
 +            if (a[j] > i)
 +            {
 +                ntot++;
 +            }
 +        }
 +    }
 +
 +    fr->excl_load[0] = 0;
 +    n                = 0;
 +    i                = 0;
 +    for (t = 1; t <= fr->nthreads; t++)
 +    {
 +        ntarget = (ntot*t)/fr->nthreads;
 +        while (i < top->excls.nr && n < ntarget)
 +        {
 +            for (j = ind[i]; j < ind[i+1]; j++)
 +            {
 +                if (a[j] > i)
 +                {
 +                    n++;
 +                }
 +            }
 +            i++;
 +        }
 +        fr->excl_load[t] = i;
 +    }
 +}
diff --cc src/gromacs/mdlib/ns.c
index 5208983dec,0000000000..496f605300
mode 100644,000000..100644
--- a/src/gromacs/mdlib/ns.c
+++ b/src/gromacs/mdlib/ns.c
@@@ -1,2978 -1,0 +1,2970 @@@
 +/*
 + * This file is part of the GROMACS molecular simulation package.
 + *
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team.
 + * Copyright (c) 2013,2014, by the GROMACS development team, led by
 + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
 + * and including many others, as listed in the AUTHORS file in the
 + * top-level source directory and at http://www.gromacs.org.
 + *
 + * GROMACS is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU Lesser General Public License
 + * as published by the Free Software Foundation; either version 2.1
 + * of the License, or (at your option) any later version.
 + *
 + * GROMACS is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 + * Lesser General Public License for more details.
 + *
 + * You should have received a copy of the GNU Lesser General Public
 + * License along with GROMACS; if not, see
 + * http://www.gnu.org/licenses, or write to the Free Software Foundation,
 + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
 + *
 + * If you want to redistribute modifications to GROMACS, please
 + * consider that scientific software is very special. Version
 + * control is crucial - bugs must be traceable. We will be happy to
 + * consider code for inclusion in the official distribution, but
 + * derived work must not be called official GROMACS. Details are found
 + * in the README & COPYING files - if they are missing, get the
 + * official version at http://www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the research papers on the package. Check out http://www.gromacs.org.
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <math.h>
 +#include <string.h>
 +#include "sysstuff.h"
 +#include "gromacs/utility/smalloc.h"
 +#include "macros.h"
 +#include "gromacs/math/utilities.h"
 +#include "vec.h"
 +#include "types/commrec.h"
 +#include "network.h"
 +#include "nsgrid.h"
 +#include "force.h"
 +#include "nonbonded.h"
 +#include "ns.h"
 +#include "pbc.h"
 +#include "names.h"
 +#include "gmx_fatal.h"
 +#include "nrnb.h"
 +#include "txtdump.h"
 +#include "mtop_util.h"
 +
 +#include "domdec.h"
 +#include "adress.h"
 +
 +
 +/*
 + *    E X C L U S I O N   H A N D L I N G
 + */
 +
 +#ifdef DEBUG
 +static void SETEXCL_(t_excl e[], atom_id i, atom_id j)
 +{
 +    e[j] = e[j] | (1<<i);
 +}
 +static void RMEXCL_(t_excl e[], atom_id i, atom_id j)
 +{
 +    e[j] = e[j] & ~(1<<i);
 +}
 +static gmx_bool ISEXCL_(t_excl e[], atom_id i, atom_id j)
 +{
 +    return (gmx_bool)(e[j] & (1<<i));
 +}
 +static gmx_bool NOTEXCL_(t_excl e[], atom_id i, atom_id j)
 +{
 +    return !(ISEXCL(e, i, j));
 +}
 +#else
 +#define SETEXCL(e, i, j) (e)[((atom_id) (j))] |= (1<<((atom_id) (i)))
 +#define RMEXCL(e, i, j)  (e)[((atom_id) (j))] &= (~(1<<((atom_id) (i))))
 +#define ISEXCL(e, i, j)  (gmx_bool) ((e)[((atom_id) (j))] & (1<<((atom_id) (i))))
 +#define NOTEXCL(e, i, j) !(ISEXCL(e, i, j))
 +#endif
 +
 +static int
 +round_up_to_simd_width(int length, int simd_width)
 +{
 +    int offset, newlength;
 +
 +    offset = (simd_width > 0) ? length % simd_width : 0;
 +
 +    return (offset == 0) ? length : length-offset+simd_width;
 +}
 +/************************************************
 + *
 + *  U T I L I T I E S    F O R    N S
 + *
 + ************************************************/
 +
 +void reallocate_nblist(t_nblist *nl)
 +{
 +    if (gmx_debug_at)
 +    {
 +        fprintf(debug, "reallocating neigborlist (ielec=%d, ivdw=%d, igeometry=%d, type=%d), maxnri=%d\n",
 +                nl->ielec, nl->ivdw, nl->igeometry, nl->type, nl->maxnri);
 +    }
 +    srenew(nl->iinr,   nl->maxnri);
 +    if (nl->igeometry == GMX_NBLIST_GEOMETRY_CG_CG)
 +    {
 +        srenew(nl->iinr_end, nl->maxnri);
 +    }
 +    srenew(nl->gid,    nl->maxnri);
 +    srenew(nl->shift,  nl->maxnri);
 +    srenew(nl->jindex, nl->maxnri+1);
 +}
 +
 +
 +static void init_nblist(FILE *log, t_nblist *nl_sr, t_nblist *nl_lr,
 +                        int maxsr, int maxlr,
 +                        int ivdw, int ivdwmod,
 +                        int ielec, int ielecmod,
-                         int igeometry, int type)
++                        int igeometry, int type,
++                        gmx_bool bElecAndVdwSwitchDiffers)
 +{
 +    t_nblist *nl;
 +    int       homenr;
 +    int       i, nn;
 +
 +    for (i = 0; (i < 2); i++)
 +    {
 +        nl     = (i == 0) ? nl_sr : nl_lr;
 +        homenr = (i == 0) ? maxsr : maxlr;
 +
 +        if (nl == NULL)
 +        {
 +            continue;
 +        }
 +
 +
 +        /* Set coul/vdw in neighborlist, and for the normal loops we determine
 +         * an index of which one to call.
 +         */
 +        nl->ivdw        = ivdw;
 +        nl->ivdwmod     = ivdwmod;
 +        nl->ielec       = ielec;
 +        nl->ielecmod    = ielecmod;
 +        nl->type        = type;
 +        nl->igeometry   = igeometry;
 +
 +        if (nl->type == GMX_NBLIST_INTERACTION_FREE_ENERGY)
 +        {
 +            nl->igeometry  = GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE;
 +        }
 +
 +        /* This will also set the simd_padding_width field */
-         gmx_nonbonded_set_kernel_pointers( (i == 0) ? log : NULL, nl);
++        gmx_nonbonded_set_kernel_pointers( (i == 0) ? log : NULL, nl, bElecAndVdwSwitchDiffers);
 +
 +        /* maxnri is influenced by the number of shifts (maximum is 8)
 +         * and the number of energy groups.
 +         * If it is not enough, nl memory will be reallocated during the run.
 +         * 4 seems to be a reasonable factor, which only causes reallocation
 +         * during runs with tiny and many energygroups.
 +         */
 +        nl->maxnri      = homenr*4;
 +        nl->maxnrj      = 0;
 +        nl->nri         = -1;
 +        nl->nrj         = 0;
 +        nl->iinr        = NULL;
 +        nl->gid         = NULL;
 +        nl->shift       = NULL;
 +        nl->jindex      = NULL;
 +        nl->jjnr        = NULL;
 +        nl->excl_fep    = NULL;
 +        reallocate_nblist(nl);
 +        nl->jindex[0] = 0;
 +
 +        if (debug)
 +        {
 +            fprintf(debug, "Initiating neighbourlist (ielec=%d, ivdw=%d, type=%d) for %s interactions,\nwith %d SR, %d LR atoms.\n",
 +                    nl->ielec, nl->ivdw, nl->type, gmx_nblist_geometry_names[nl->igeometry], maxsr, maxlr);
 +        }
 +    }
 +}
 +
 +void init_neighbor_list(FILE *log, t_forcerec *fr, int homenr)
 +{
 +    /* Make maxlr tunable! (does not seem to be a big difference though)
 +     * This parameter determines the number of i particles in a long range
 +     * neighbourlist. Too few means many function calls, too many means
 +     * cache trashing.
 +     */
 +    int        maxsr, maxsr_wat, maxlr, maxlr_wat;
-     int        ielec, ielecf, ivdw, ielecmod, ielecmodf, ivdwmod, type;
++    int        ielec, ivdw, ielecmod, ivdwmod, type;
 +    int        solvent;
 +    int        igeometry_def, igeometry_w, igeometry_ww;
 +    int        i;
++    gmx_bool   bElecAndVdwSwitchDiffers;
 +    t_nblists *nbl;
 +
 +    /* maxsr     = homenr-fr->nWatMol*3; */
 +    maxsr     = homenr;
 +
 +    if (maxsr < 0)
 +    {
 +        gmx_fatal(FARGS, "%s, %d: Negative number of short range atoms.\n"
 +                  "Call your Gromacs dealer for assistance.", __FILE__, __LINE__);
 +    }
 +    /* This is just for initial allocation, so we do not reallocate
 +     * all the nlist arrays many times in a row.
 +     * The numbers seem very accurate, but they are uncritical.
 +     */
 +    maxsr_wat = min(fr->nWatMol, (homenr+2)/3);
 +    if (fr->bTwinRange)
 +    {
 +        maxlr     = 50;
 +        maxlr_wat = min(maxsr_wat, maxlr);
 +    }
 +    else
 +    {
 +        maxlr = maxlr_wat = 0;
 +    }
 +
 +    /* Determine the values for ielec/ivdw. */
-     ielec    = fr->nbkernel_elec_interaction;
-     ivdw     = fr->nbkernel_vdw_interaction;
-     ielecmod = fr->nbkernel_elec_modifier;
-     ivdwmod  = fr->nbkernel_vdw_modifier;
-     type     = GMX_NBLIST_INTERACTION_STANDARD;
++    ielec                    = fr->nbkernel_elec_interaction;
++    ivdw                     = fr->nbkernel_vdw_interaction;
++    ielecmod                 = fr->nbkernel_elec_modifier;
++    ivdwmod                  = fr->nbkernel_vdw_modifier;
++    type                     = GMX_NBLIST_INTERACTION_STANDARD;
++    bElecAndVdwSwitchDiffers = ( (fr->rcoulomb_switch != fr->rvdw_switch) || (fr->rcoulomb != fr->rvdw));
 +
 +    fr->ns.bCGlist = (getenv("GMX_NBLISTCG") != 0);
 +    if (!fr->ns.bCGlist)
 +    {
 +        igeometry_def = GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE;
 +    }
 +    else
 +    {
 +        igeometry_def = GMX_NBLIST_GEOMETRY_CG_CG;
 +        if (log != NULL)
 +        {
 +            fprintf(log, "\nUsing charge-group - charge-group neighbor lists and kernels\n\n");
 +        }
 +    }
 +
 +    if (fr->solvent_opt == esolTIP4P)
 +    {
 +        igeometry_w  = GMX_NBLIST_GEOMETRY_WATER4_PARTICLE;
 +        igeometry_ww = GMX_NBLIST_GEOMETRY_WATER4_WATER4;
 +    }
 +    else
 +    {
 +        igeometry_w  = GMX_NBLIST_GEOMETRY_WATER3_PARTICLE;
 +        igeometry_ww = GMX_NBLIST_GEOMETRY_WATER3_WATER3;
 +    }
 +
 +    for (i = 0; i < fr->nnblists; i++)
 +    {
 +        nbl = &(fr->nblists[i]);
 +
 +        if ((fr->adress_type != eAdressOff) && (i >= fr->nnblists/2))
 +        {
 +            type = GMX_NBLIST_INTERACTION_ADRESS;
 +        }
 +        init_nblist(log, &nbl->nlist_sr[eNL_VDWQQ], &nbl->nlist_lr[eNL_VDWQQ],
-                     maxsr, maxlr, ivdw, ivdwmod, ielec, ielecmod, igeometry_def, type);
++                    maxsr, maxlr, ivdw, ivdwmod, ielec, ielecmod, igeometry_def, type, bElecAndVdwSwitchDiffers);
 +        init_nblist(log, &nbl->nlist_sr[eNL_VDW], &nbl->nlist_lr[eNL_VDW],
-                     maxsr, maxlr, ivdw, ivdwmod, GMX_NBKERNEL_ELEC_NONE, eintmodNONE, igeometry_def, type);
++                    maxsr, maxlr, ivdw, ivdwmod, GMX_NBKERNEL_ELEC_NONE, eintmodNONE, igeometry_def, type, bElecAndVdwSwitchDiffers);
 +        init_nblist(log, &nbl->nlist_sr[eNL_QQ], &nbl->nlist_lr[eNL_QQ],
-                     maxsr, maxlr, GMX_NBKERNEL_VDW_NONE, eintmodNONE, ielec, ielecmod, igeometry_def, type);
++                    maxsr, maxlr, GMX_NBKERNEL_VDW_NONE, eintmodNONE, ielec, ielecmod, igeometry_def, type, bElecAndVdwSwitchDiffers);
 +        init_nblist(log, &nbl->nlist_sr[eNL_VDWQQ_WATER], &nbl->nlist_lr[eNL_VDWQQ_WATER],
-                     maxsr_wat, maxlr_wat, ivdw, ivdwmod, ielec, ielecmod, igeometry_w, type);
++                    maxsr_wat, maxlr_wat, ivdw, ivdwmod, ielec, ielecmod, igeometry_w, type, bElecAndVdwSwitchDiffers);
 +        init_nblist(log, &nbl->nlist_sr[eNL_QQ_WATER], &nbl->nlist_lr[eNL_QQ_WATER],
-                     maxsr_wat, maxlr_wat, GMX_NBKERNEL_VDW_NONE, eintmodNONE, ielec, ielecmod, igeometry_w, type);
++                    maxsr_wat, maxlr_wat, GMX_NBKERNEL_VDW_NONE, eintmodNONE, ielec, ielecmod, igeometry_w, type, bElecAndVdwSwitchDiffers);
 +        init_nblist(log, &nbl->nlist_sr[eNL_VDWQQ_WATERWATER], &nbl->nlist_lr[eNL_VDWQQ_WATERWATER],
-                     maxsr_wat, maxlr_wat, ivdw, ivdwmod, ielec, ielecmod, igeometry_ww, type);
++                    maxsr_wat, maxlr_wat, ivdw, ivdwmod, ielec, ielecmod, igeometry_ww, type, bElecAndVdwSwitchDiffers);
 +        init_nblist(log, &nbl->nlist_sr[eNL_QQ_WATERWATER], &nbl->nlist_lr[eNL_QQ_WATERWATER],
-                     maxsr_wat, maxlr_wat, GMX_NBKERNEL_VDW_NONE, eintmodNONE, ielec, ielecmod, igeometry_ww, type);
++                    maxsr_wat, maxlr_wat, GMX_NBKERNEL_VDW_NONE, eintmodNONE, ielec, ielecmod, igeometry_ww, type, bElecAndVdwSwitchDiffers);
 +
 +        /* Did we get the solvent loops so we can use optimized water kernels? */
 +        if (nbl->nlist_sr[eNL_VDWQQ_WATER].kernelptr_vf == NULL
 +            || nbl->nlist_sr[eNL_QQ_WATER].kernelptr_vf == NULL
 +#ifndef DISABLE_WATERWATER_NLIST
 +            || nbl->nlist_sr[eNL_VDWQQ_WATERWATER].kernelptr_vf == NULL
 +            || nbl->nlist_sr[eNL_QQ_WATERWATER].kernelptr_vf == NULL
 +#endif
 +            )
 +        {
 +            fr->solvent_opt = esolNO;
 +            if (log != NULL)
 +            {
 +                fprintf(log, "Note: The available nonbonded kernels do not support water optimization - disabling.\n");
 +            }
 +        }
 +
 +        if (fr->efep != efepNO)
 +        {
-             if ((fr->bEwald) && (fr->sc_alphacoul > 0)) /* need to handle long range differently if using softcore */
-             {
-                 ielecf    = GMX_NBKERNEL_ELEC_EWALD;
-                 ielecmodf = eintmodNONE;
-             }
-             else
-             {
-                 ielecf    = ielec;
-                 ielecmodf = ielecmod;
-             }
- 
 +            init_nblist(log, &nbl->nlist_sr[eNL_VDWQQ_FREE], &nbl->nlist_lr[eNL_VDWQQ_FREE],
-                         maxsr, maxlr, ivdw, ivdwmod, ielecf, ielecmod, GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE, GMX_NBLIST_INTERACTION_FREE_ENERGY);
++                        maxsr, maxlr, ivdw, ivdwmod, ielec, ielecmod, GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE, GMX_NBLIST_INTERACTION_FREE_ENERGY, bElecAndVdwSwitchDiffers);
 +            init_nblist(log, &nbl->nlist_sr[eNL_VDW_FREE], &nbl->nlist_lr[eNL_VDW_FREE],
-                         maxsr, maxlr, ivdw, ivdwmod, GMX_NBKERNEL_ELEC_NONE, eintmodNONE, GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE, GMX_NBLIST_INTERACTION_FREE_ENERGY);
++                        maxsr, maxlr, ivdw, ivdwmod, GMX_NBKERNEL_ELEC_NONE, eintmodNONE, GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE, GMX_NBLIST_INTERACTION_FREE_ENERGY, bElecAndVdwSwitchDiffers);
 +            init_nblist(log, &nbl->nlist_sr[eNL_QQ_FREE], &nbl->nlist_lr[eNL_QQ_FREE],
-                         maxsr, maxlr, GMX_NBKERNEL_VDW_NONE, eintmodNONE, ielecf, ielecmod, GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE, GMX_NBLIST_INTERACTION_FREE_ENERGY);
++                        maxsr, maxlr, GMX_NBKERNEL_VDW_NONE, eintmodNONE, ielec, ielecmod, GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE, GMX_NBLIST_INTERACTION_FREE_ENERGY, bElecAndVdwSwitchDiffers);
 +        }
 +    }
 +    /* QMMM MM list */
 +    if (fr->bQMMM && fr->qr->QMMMscheme != eQMMMschemeoniom)
 +    {
 +        init_nblist(log, &fr->QMMMlist, NULL,
-                     maxsr, maxlr, 0, 0, ielec, ielecmod, GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE, GMX_NBLIST_INTERACTION_STANDARD);
++                    maxsr, maxlr, 0, 0, ielec, ielecmod, GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE, GMX_NBLIST_INTERACTION_STANDARD, bElecAndVdwSwitchDiffers);
 +    }
 +
 +    if (log != NULL)
 +    {
 +        fprintf(log, "\n");
 +    }
 +
 +    fr->ns.nblist_initialized = TRUE;
 +}
 +
 +static void reset_nblist(t_nblist *nl)
 +{
 +    nl->nri       = -1;
 +    nl->nrj       = 0;
 +    if (nl->jindex)
 +    {
 +        nl->jindex[0] = 0;
 +    }
 +}
 +
 +static void reset_neighbor_lists(t_forcerec *fr, gmx_bool bResetSR, gmx_bool bResetLR)
 +{
 +    int n, i;
 +
 +    if (fr->bQMMM)
 +    {
 +        /* only reset the short-range nblist */
 +        reset_nblist(&(fr->QMMMlist));
 +    }
 +
 +    for (n = 0; n < fr->nnblists; n++)
 +    {
 +        for (i = 0; i < eNL_NR; i++)
 +        {
 +            if (bResetSR)
 +            {
 +                reset_nblist( &(fr->nblists[n].nlist_sr[i]) );
 +            }
 +            if (bResetLR)
 +            {
 +                reset_nblist( &(fr->nblists[n].nlist_lr[i]) );
 +            }
 +        }
 +    }
 +}
 +
 +
 +
 +
 +static gmx_inline void new_i_nblist(t_nblist *nlist, atom_id i_atom, int shift, int gid)
 +{
 +    int    i, k, nri, nshift;
 +
 +    nri = nlist->nri;
 +
 +    /* Check whether we have to increase the i counter */
 +    if ((nri == -1) ||
 +        (nlist->iinr[nri]  != i_atom) ||
 +        (nlist->shift[nri] != shift) ||
 +        (nlist->gid[nri]   != gid))
 +    {
 +        /* This is something else. Now see if any entries have
 +         * been added in the list of the previous atom.
 +         */
 +        if ((nri == -1) ||
 +            ((nlist->jindex[nri+1] > nlist->jindex[nri]) &&
 +             (nlist->gid[nri] != -1)))
 +        {
 +            /* If so increase the counter */
 +            nlist->nri++;
 +            nri++;
 +            if (nlist->nri >= nlist->maxnri)
 +            {
 +                nlist->maxnri += over_alloc_large(nlist->nri);
 +                reallocate_nblist(nlist);
 +            }
 +        }
 +        /* Set the number of neighbours and the atom number */
 +        nlist->jindex[nri+1] = nlist->jindex[nri];
 +        nlist->iinr[nri]     = i_atom;
 +        nlist->gid[nri]      = gid;
 +        nlist->shift[nri]    = shift;
 +    }
 +    else
 +    {
 +        /* Adding to previous list. First remove possible previous padding */
 +        if (nlist->simd_padding_width > 1)
 +        {
 +            while (nlist->nrj > 0 && nlist->jjnr[nlist->nrj-1] < 0)
 +            {
 +                nlist->nrj--;
 +            }
 +        }
 +    }
 +}
 +
 +static gmx_inline void close_i_nblist(t_nblist *nlist)
 +{
 +    int nri = nlist->nri;
 +    int len;
 +
 +    if (nri >= 0)
 +    {
 +        /* Add elements up to padding. Since we allocate memory in units
 +         * of the simd_padding width, we do not have to check for possible
 +         * list reallocation here.
 +         */
 +        while ((nlist->nrj % nlist->simd_padding_width) != 0)
 +        {
 +            /* Use -4 here, so we can write forces for 4 atoms before real data */
 +            nlist->jjnr[nlist->nrj++] = -4;
 +        }
 +        nlist->jindex[nri+1] = nlist->nrj;
 +
 +        len = nlist->nrj -  nlist->jindex[nri];
 +    }
 +}
 +
 +static gmx_inline void close_nblist(t_nblist *nlist)
 +{
 +    /* Only close this nblist when it has been initialized.
 +     * Avoid the creation of i-lists with no j-particles.
 +     */
 +    if (nlist->nrj == 0)
 +    {
 +        /* Some assembly kernels do not support empty lists,
 +         * make sure here that we don't generate any empty lists.
 +         * With the current ns code this branch is taken in two cases:
 +         * No i-particles at all: nri=-1 here
 +         * There are i-particles, but no j-particles; nri=0 here
 +         */
 +        nlist->nri = 0;
 +    }
 +    else
 +    {
 +        /* Close list number nri by incrementing the count */
 +        nlist->nri++;
 +    }
 +}
 +
 +static gmx_inline void close_neighbor_lists(t_forcerec *fr, gmx_bool bMakeQMMMnblist)
 +{
 +    int n, i;
 +
 +    if (bMakeQMMMnblist)
 +    {
 +        close_nblist(&(fr->QMMMlist));
 +    }
 +
 +    for (n = 0; n < fr->nnblists; n++)
 +    {
 +        for (i = 0; (i < eNL_NR); i++)
 +        {
 +            close_nblist(&(fr->nblists[n].nlist_sr[i]));
 +            close_nblist(&(fr->nblists[n].nlist_lr[i]));
 +        }
 +    }
 +}
 +
 +
 +static gmx_inline void add_j_to_nblist(t_nblist *nlist, atom_id j_atom, gmx_bool bLR)
 +{
 +    int nrj = nlist->nrj;
 +
 +    if (nlist->nrj >= nlist->maxnrj)
 +    {
 +        nlist->maxnrj = round_up_to_simd_width(over_alloc_small(nlist->nrj + 1), nlist->simd_padding_width);
 +
 +        if (gmx_debug_at)
 +        {
 +            fprintf(debug, "Increasing %s nblist (ielec=%d,ivdw=%d,type=%d,igeometry=%d) j size to %d\n",
 +                    bLR ? "LR" : "SR", nlist->ielec, nlist->ivdw, nlist->type, nlist->igeometry, nlist->maxnrj);
 +        }
 +
 +        srenew(nlist->jjnr, nlist->maxnrj);
 +    }
 +
 +    nlist->jjnr[nrj] = j_atom;
 +    nlist->nrj++;
 +}
 +
 +static gmx_inline void add_j_to_nblist_cg(t_nblist *nlist,
 +                                          atom_id j_start, int j_end,
 +                                          t_excl *bexcl, gmx_bool i_is_j,
 +                                          gmx_bool bLR)
 +{
 +    int nrj = nlist->nrj;
 +    int j;
 +
 +    if (nlist->nrj >= nlist->maxnrj)
 +    {
 +        nlist->maxnrj = over_alloc_small(nlist->nrj + 1);
 +        if (gmx_debug_at)
 +        {
 +            fprintf(debug, "Increasing %s nblist (ielec=%d,ivdw=%d,type=%d,igeometry=%d) j size to %d\n",
 +                    bLR ? "LR" : "SR", nlist->ielec, nlist->ivdw, nlist->type, nlist->igeometry, nlist->maxnrj);
 +        }
 +
 +        srenew(nlist->jjnr, nlist->maxnrj);
 +        srenew(nlist->jjnr_end, nlist->maxnrj);
 +        srenew(nlist->excl, nlist->maxnrj*MAX_CGCGSIZE);
 +    }
 +
 +    nlist->jjnr[nrj]     = j_start;
 +    nlist->jjnr_end[nrj] = j_end;
 +
 +    if (j_end - j_start > MAX_CGCGSIZE)
 +    {
 +        gmx_fatal(FARGS, "The charge-group - charge-group neighborlist do not support charge groups larger than %d, found a charge group of size %d", MAX_CGCGSIZE, j_end-j_start);
 +    }
 +
 +    /* Set the exclusions */
 +    for (j = j_start; j < j_end; j++)
 +    {
 +        nlist->excl[nrj*MAX_CGCGSIZE + j - j_start] = bexcl[j];
 +    }
 +    if (i_is_j)
 +    {
 +        /* Avoid double counting of intra-cg interactions */
 +        for (j = 1; j < j_end-j_start; j++)
 +        {
 +            nlist->excl[nrj*MAX_CGCGSIZE + j] |= (1<<j) - 1;
 +        }
 +    }
 +
 +    nlist->nrj++;
 +}
 +
 +typedef void
 +    put_in_list_t (gmx_bool              bHaveVdW[],
 +                   int                   ngid,
 +                   t_mdatoms     *       md,
 +                   int                   icg,
 +                   int                   jgid,
 +                   int                   nj,
 +                   atom_id               jjcg[],
 +                   atom_id               index[],
 +                   t_excl                bExcl[],
 +                   int                   shift,
 +                   t_forcerec     *      fr,
 +                   gmx_bool              bLR,
 +                   gmx_bool              bDoVdW,
 +                   gmx_bool              bDoCoul,
 +                   int                   solvent_opt);
 +
 +static void
 +put_in_list_at(gmx_bool              bHaveVdW[],
 +               int                   ngid,
 +               t_mdatoms     *       md,
 +               int                   icg,
 +               int                   jgid,
 +               int                   nj,
 +               atom_id               jjcg[],
 +               atom_id               index[],
 +               t_excl                bExcl[],
 +               int                   shift,
 +               t_forcerec     *      fr,
 +               gmx_bool              bLR,
 +               gmx_bool              bDoVdW,
 +               gmx_bool              bDoCoul,
 +               int                   solvent_opt)
 +{
 +    /* The a[] index has been removed,
 +     * to put it back in i_atom should be a[i0] and jj should be a[jj].
 +     */
 +    t_nblist  *   vdwc;
 +    t_nblist  *   vdw;
 +    t_nblist  *   coul;
 +    t_nblist  *   vdwc_free  = NULL;
 +    t_nblist  *   vdw_free   = NULL;
 +    t_nblist  *   coul_free  = NULL;
 +    t_nblist  *   vdwc_ww    = NULL;
 +    t_nblist  *   coul_ww    = NULL;
 +
 +    int           i, j, jcg, igid, gid, nbl_ind, ind_ij;
 +    atom_id       jj, jj0, jj1, i_atom;
 +    int           i0, nicg, len;
 +
 +    int          *cginfo;
 +    int          *type, *typeB;
 +    real         *charge, *chargeB;
 +    real          qi, qiB, qq, rlj;
 +    gmx_bool      bFreeEnergy, bFree, bFreeJ, bNotEx, *bPert;
 +    gmx_bool      bDoVdW_i, bDoCoul_i, bDoCoul_i_sol;
 +    int           iwater, jwater;
 +    t_nblist     *nlist;
 +
 +    /* Copy some pointers */
 +    cginfo  = fr->cginfo;
 +    charge  = md->chargeA;
 +    chargeB = md->chargeB;
 +    type    = md->typeA;
 +    typeB   = md->typeB;
 +    bPert   = md->bPerturbed;
 +
 +    /* Get atom range */
 +    i0     = index[icg];
 +    nicg   = index[icg+1]-i0;
 +
 +    /* Get the i charge group info */
 +    igid   = GET_CGINFO_GID(cginfo[icg]);
 +
 +    iwater = (solvent_opt != esolNO) ? GET_CGINFO_SOLOPT(cginfo[icg]) : esolNO;
 +
 +    bFreeEnergy = FALSE;
 +    if (md->nPerturbed)
 +    {
 +        /* Check if any of the particles involved are perturbed.
 +         * If not we can do the cheaper normal put_in_list
 +         * and use more solvent optimization.
 +         */
 +        for (i = 0; i < nicg; i++)
 +        {
 +            bFreeEnergy |= bPert[i0+i];
 +        }
 +        /* Loop over the j charge groups */
 +        for (j = 0; (j < nj && !bFreeEnergy); j++)
 +        {
 +            jcg = jjcg[j];
 +            jj0 = index[jcg];
 +            jj1 = index[jcg+1];
 +            /* Finally loop over the atoms in the j-charge group */
 +            for (jj = jj0; jj < jj1; jj++)
 +            {
 +                bFreeEnergy |= bPert[jj];
 +            }
 +        }
 +    }
 +
 +    /* Unpack pointers to neighbourlist structs */
 +    if (fr->nnblists == 1)
 +    {
 +        nbl_ind = 0;
 +    }
 +    else
 +    {
 +        nbl_ind = fr->gid2nblists[GID(igid, jgid, ngid)];
 +    }
 +    if (bLR)
 +    {
 +        nlist = fr->nblists[nbl_ind].nlist_lr;
 +    }
 +    else
 +    {
 +        nlist = fr->nblists[nbl_ind].nlist_sr;
 +    }
 +
 +    if (iwater != esolNO)
 +    {
 +        vdwc = &nlist[eNL_VDWQQ_WATER];
 +        vdw  = &nlist[eNL_VDW];
 +        coul = &nlist[eNL_QQ_WATER];
 +#ifndef DISABLE_WATERWATER_NLIST
 +        vdwc_ww = &nlist[eNL_VDWQQ_WATERWATER];
 +        coul_ww = &nlist[eNL_QQ_WATERWATER];
 +#endif
 +    }
 +    else
 +    {
 +        vdwc = &nlist[eNL_VDWQQ];
 +        vdw  = &nlist[eNL_VDW];
 +        coul = &nlist[eNL_QQ];
 +    }
 +
 +    if (!bFreeEnergy)
 +    {
 +        if (iwater != esolNO)
 +        {
 +            /* Loop over the atoms in the i charge group */
 +            i_atom  = i0;
 +            gid     = GID(igid, jgid, ngid);
 +            /* Create new i_atom for each energy group */
 +            if (bDoCoul && bDoVdW)
 +            {
 +                new_i_nblist(vdwc, i_atom, shift, gid);
 +#ifndef DISABLE_WATERWATER_NLIST
 +                new_i_nblist(vdwc_ww, i_atom, shift, gid);
 +#endif
 +            }
 +            if (bDoVdW)
 +            {
 +                new_i_nblist(vdw, i_atom, shift, gid);
 +            }
 +            if (bDoCoul)
 +            {
 +                new_i_nblist(coul, i_atom, shift, gid);
 +#ifndef DISABLE_WATERWATER_NLIST
 +                new_i_nblist(coul_ww, i_atom, shift, gid);
 +#endif
 +            }
 +            /* Loop over the j charge groups */
 +            for (j = 0; (j < nj); j++)
 +            {
 +                jcg = jjcg[j];
 +
 +                if (jcg == icg)
 +                {
 +                    continue;
 +                }
 +
 +                jj0    = index[jcg];
 +                jwater = GET_CGINFO_SOLOPT(cginfo[jcg]);
 +
 +                if (iwater == esolSPC && jwater == esolSPC)
 +                {
 +                    /* Interaction between two SPC molecules */
 +                    if (!bDoCoul)
 +                    {
 +                        /* VdW only - only first atoms in each water interact */
 +                        add_j_to_nblist(vdw, jj0, bLR);
 +                    }
 +                    else
 +                    {
 +#ifdef DISABLE_WATERWATER_NLIST
 +                        /* Add entries for the three atoms - only do VdW if we need to */
 +                        if (!bDoVdW)
 +                        {
 +                            add_j_to_nblist(coul, jj0, bLR);
 +                        }
 +                        else
 +                        {
 +                            add_j_to_nblist(vdwc, jj0, bLR);
 +                        }
 +                        add_j_to_nblist(coul, jj0+1, bLR);
 +                        add_j_to_nblist(coul, jj0+2, bLR);
 +#else
 +                        /* One entry for the entire water-water interaction */
 +                        if (!bDoVdW)
 +                        {
 +                            add_j_to_nblist(coul_ww, jj0, bLR);
 +                        }
 +                        else
 +                        {
 +                            add_j_to_nblist(vdwc_ww, jj0, bLR);
 +                        }
 +#endif
 +                    }
 +                }
 +                else if (iwater == esolTIP4P && jwater == esolTIP4P)
 +                {
 +                    /* Interaction between two TIP4p molecules */
 +                    if (!bDoCoul)
 +                    {
 +                        /* VdW only - only first atoms in each water interact */
 +                        add_j_to_nblist(vdw, jj0, bLR);
 +                    }
 +                    else
 +                    {
 +#ifdef DISABLE_WATERWATER_NLIST
 +                        /* Add entries for the four atoms - only do VdW if we need to */
 +                        if (bDoVdW)
 +                        {
 +                            add_j_to_nblist(vdw, jj0, bLR);
 +                        }
 +                        add_j_to_nblist(coul, jj0+1, bLR);
 +                        add_j_to_nblist(coul, jj0+2, bLR);
 +                        add_j_to_nblist(coul, jj0+3, bLR);
 +#else
 +                        /* One entry for the entire water-water interaction */
 +                        if (!bDoVdW)
 +                        {
 +                            add_j_to_nblist(coul_ww, jj0, bLR);
 +                        }
 +                        else
 +                        {
 +                            add_j_to_nblist(vdwc_ww, jj0, bLR);
 +                        }
 +#endif
 +                    }
 +                }
 +                else
 +                {
 +                    /* j charge group is not water, but i is.
 +                     * Add entries to the water-other_atom lists; the geometry of the water
 +                     * molecule doesn't matter - that is taken care of in the nonbonded kernel,
 +                     * so we don't care if it is SPC or TIP4P...
 +                     */
 +
 +                    jj1 = index[jcg+1];
 +
 +                    if (!bDoVdW)
 +                    {
 +                        for (jj = jj0; (jj < jj1); jj++)
 +                        {
 +                            if (charge[jj] != 0)
 +                            {
 +                                add_j_to_nblist(coul, jj, bLR);
 +                            }
 +                        }
 +                    }
 +                    else if (!bDoCoul)
 +                    {
 +                        for (jj = jj0; (jj < jj1); jj++)
 +                        {
 +                            if (bHaveVdW[type[jj]])
 +                            {
 +                                add_j_to_nblist(vdw, jj, bLR);
 +                            }
 +                        }
 +                    }
 +                    else
 +                    {
 +                        /* _charge_ _groups_ interact with both coulomb and LJ */
 +                        /* Check which atoms we should add to the lists!       */
 +                        for (jj = jj0; (jj < jj1); jj++)
 +                        {
 +                            if (bHaveVdW[type[jj]])
 +                            {
 +                                if (charge[jj] != 0)
 +                                {
 +                                    add_j_to_nblist(vdwc, jj, bLR);
 +                                }
 +                                else
 +                                {
 +                                    add_j_to_nblist(vdw, jj, bLR);
 +                                }
 +                            }
 +                            else if (charge[jj] != 0)
 +                            {
 +                                add_j_to_nblist(coul, jj, bLR);
 +                            }
 +                        }
 +                    }
 +                }
 +            }
 +            close_i_nblist(vdw);
 +            close_i_nblist(coul);
 +            close_i_nblist(vdwc);
 +#ifndef DISABLE_WATERWATER_NLIST
 +            close_i_nblist(coul_ww);
 +            close_i_nblist(vdwc_ww);
 +#endif
 +        }
 +        else
 +        {
 +            /* no solvent as i charge group */
 +            /* Loop over the atoms in the i charge group */
 +            for (i = 0; i < nicg; i++)
 +            {
 +                i_atom  = i0+i;
 +                gid     = GID(igid, jgid, ngid);
 +                qi      = charge[i_atom];
 +
 +                /* Create new i_atom for each energy group */
 +                if (bDoVdW && bDoCoul)
 +                {
 +                    new_i_nblist(vdwc, i_atom, shift, gid);
 +                }
 +                if (bDoVdW)
 +                {
 +                    new_i_nblist(vdw, i_atom, shift, gid);
 +                }
 +                if (bDoCoul)
 +                {
 +                    new_i_nblist(coul, i_atom, shift, gid);
 +                }
 +                bDoVdW_i  = (bDoVdW  && bHaveVdW[type[i_atom]]);
 +                bDoCoul_i = (bDoCoul && qi != 0);
 +
 +                if (bDoVdW_i || bDoCoul_i)
 +                {
 +                    /* Loop over the j charge groups */
 +                    for (j = 0; (j < nj); j++)
 +                    {
 +                        jcg = jjcg[j];
 +
 +                        /* Check for large charge groups */
 +                        if (jcg == icg)
 +                        {
 +                            jj0 = i0 + i + 1;
 +                        }
 +                        else
 +                        {
 +                            jj0 = index[jcg];
 +                        }
 +
 +                        jj1 = index[jcg+1];
 +                        /* Finally loop over the atoms in the j-charge group */
 +                        for (jj = jj0; jj < jj1; jj++)
 +                        {
 +                            bNotEx = NOTEXCL(bExcl, i, jj);
 +
 +                            if (bNotEx)
 +                            {
 +                                if (!bDoVdW_i)
 +                                {
 +                                    if (charge[jj] != 0)
 +                                    {
 +                                        add_j_to_nblist(coul, jj, bLR);
 +                                    }
 +                                }
 +                                else if (!bDoCoul_i)
 +                                {
 +                                    if (bHaveVdW[type[jj]])
 +                                    {
 +                                        add_j_to_nblist(vdw, jj, bLR);
 +                                    }
 +                                }
 +                                else
 +                                {
 +                                    if (bHaveVdW[type[jj]])
 +                                    {
 +                                        if (charge[jj] != 0)
 +                                        {
 +                                            add_j_to_nblist(vdwc, jj, bLR);
 +                                        }
 +                                        else
 +                                        {
 +                                            add_j_to_nblist(vdw, jj, bLR);
 +                                        }
 +                                    }
 +                                    else if (charge[jj] != 0)
 +                                    {
 +                                        add_j_to_nblist(coul, jj, bLR);
 +                                    }
 +                                }
 +                            }
 +                        }
 +                    }
 +                }
 +                close_i_nblist(vdw);
 +                close_i_nblist(coul);
 +                close_i_nblist(vdwc);
 +            }
 +        }
 +    }
 +    else
 +    {
 +        /* we are doing free energy */
 +        vdwc_free = &nlist[eNL_VDWQQ_FREE];
 +        vdw_free  = &nlist[eNL_VDW_FREE];
 +        coul_free = &nlist[eNL_QQ_FREE];
 +        /* Loop over the atoms in the i charge group */
 +        for (i = 0; i < nicg; i++)
 +        {
 +            i_atom  = i0+i;
 +            gid     = GID(igid, jgid, ngid);
 +            qi      = charge[i_atom];
 +            qiB     = chargeB[i_atom];
 +
 +            /* Create new i_atom for each energy group */
 +            if (bDoVdW && bDoCoul)
 +            {
 +                new_i_nblist(vdwc, i_atom, shift, gid);
 +            }
 +            if (bDoVdW)
 +            {
 +                new_i_nblist(vdw, i_atom, shift, gid);
 +            }
 +            if (bDoCoul)
 +            {
 +                new_i_nblist(coul, i_atom, shift, gid);
 +            }
 +
 +            new_i_nblist(vdw_free, i_atom, shift, gid);
 +            new_i_nblist(coul_free, i_atom, shift, gid);
 +            new_i_nblist(vdwc_free, i_atom, shift, gid);
 +
 +            bDoVdW_i  = (bDoVdW  &&
 +                         (bHaveVdW[type[i_atom]] || bHaveVdW[typeB[i_atom]]));
 +            bDoCoul_i = (bDoCoul && (qi != 0 || qiB != 0));
 +            /* For TIP4P the first atom does not have a charge,
 +             * but the last three do. So we should still put an atom
 +             * without LJ but with charge in the water-atom neighborlist
 +             * for a TIP4p i charge group.
 +             * For SPC type water the first atom has LJ and charge,
 +             * so there is no such problem.
 +             */
 +            if (iwater == esolNO)
 +            {
 +                bDoCoul_i_sol = bDoCoul_i;
 +            }
 +            else
 +            {
 +                bDoCoul_i_sol = bDoCoul;
 +            }
 +
 +            if (bDoVdW_i || bDoCoul_i_sol)
 +            {
 +                /* Loop over the j charge groups */
 +                for (j = 0; (j < nj); j++)
 +                {
 +                    jcg = jjcg[j];
 +
 +                    /* Check for large charge groups */
 +                    if (jcg == icg)
 +                    {
 +                        jj0 = i0 + i + 1;
 +                    }
 +                    else
 +                    {
 +                        jj0 = index[jcg];
 +                    }
 +
 +                    jj1 = index[jcg+1];
 +                    /* Finally loop over the atoms in the j-charge group */
 +                    bFree = bPert[i_atom];
 +                    for (jj = jj0; (jj < jj1); jj++)
 +                    {
 +                        bFreeJ = bFree || bPert[jj];
 +                        /* Complicated if, because the water H's should also
 +                         * see perturbed j-particles
 +                         */
 +                        if (iwater == esolNO || i == 0 || bFreeJ)
 +                        {
 +                            bNotEx = NOTEXCL(bExcl, i, jj);
 +
 +                            if (bNotEx)
 +                            {
 +                                if (bFreeJ)
 +                                {
 +                                    if (!bDoVdW_i)
 +                                    {
 +                                        if (charge[jj] != 0 || chargeB[jj] != 0)
 +                                        {
 +                                            add_j_to_nblist(coul_free, jj, bLR);
 +                                        }
 +                                    }
 +                                    else if (!bDoCoul_i)
 +                                    {
 +                                        if (bHaveVdW[type[jj]] || bHaveVdW[typeB[jj]])
 +                                        {
 +                                            add_j_to_nblist(vdw_free, jj, bLR);
 +                                        }
 +                                    }
 +                                    else
 +                                    {
 +                                        if (bHaveVdW[type[jj]] || bHaveVdW[typeB[jj]])
 +                                        {
 +                                            if (charge[jj] != 0 || chargeB[jj] != 0)
 +                                            {
 +                                                add_j_to_nblist(vdwc_free, jj, bLR);
 +                                            }
 +                                            else
 +                                            {
 +                                                add_j_to_nblist(vdw_free, jj, bLR);
 +                                            }
 +                                        }
 +                                        else if (charge[jj] != 0 || chargeB[jj] != 0)
 +                                        {
 +                                            add_j_to_nblist(coul_free, jj, bLR);
 +                                        }
 +                                    }
 +                                }
 +                                else if (!bDoVdW_i)
 +                                {
 +                                    /* This is done whether or not bWater is set */
 +                                    if (charge[jj] != 0)
 +                                    {
 +                                        add_j_to_nblist(coul, jj, bLR);
 +                                    }
 +                                }
 +                                else if (!bDoCoul_i_sol)
 +                                {
 +                                    if (bHaveVdW[type[jj]])
 +                                    {
 +                                        add_j_to_nblist(vdw, jj, bLR);
 +                                    }
 +                                }
 +                                else
 +                                {
 +                                    if (bHaveVdW[type[jj]])
 +                                    {
 +                                        if (charge[jj] != 0)
 +                                        {
 +                                            add_j_to_nblist(vdwc, jj, bLR);
 +                                        }
 +                                        else
 +                                        {
 +                                            add_j_to_nblist(vdw, jj, bLR);
 +                                        }
 +                                    }
 +                                    else if (charge[jj] != 0)
 +                                    {
 +                                        add_j_to_nblist(coul, jj, bLR);
 +                                    }
 +                                }
 +                            }
 +                        }
 +                    }
 +                }
 +            }
 +            close_i_nblist(vdw);
 +            close_i_nblist(coul);
 +            close_i_nblist(vdwc);
 +            close_i_nblist(vdw_free);
 +            close_i_nblist(coul_free);
 +            close_i_nblist(vdwc_free);
 +        }
 +    }
 +}
 +
 +static void
 +put_in_list_adress(gmx_bool              bHaveVdW[],
 +                   int                   ngid,
 +                   t_mdatoms     *       md,
 +                   int                   icg,
 +                   int                   jgid,
 +                   int                   nj,
 +                   atom_id               jjcg[],
 +                   atom_id               index[],
 +                   t_excl                bExcl[],
 +                   int                   shift,
 +                   t_forcerec     *      fr,
 +                   gmx_bool              bLR,
 +                   gmx_bool              bDoVdW,
 +                   gmx_bool              bDoCoul,
 +                   int                   solvent_opt)
 +{
 +    /* The a[] index has been removed,
 +     * to put it back in i_atom should be a[i0] and jj should be a[jj].
 +     */
 +    t_nblist  *   vdwc;
 +    t_nblist  *   vdw;
 +    t_nblist  *   coul;
 +    t_nblist  *   vdwc_adress  = NULL;
 +    t_nblist  *   vdw_adress   = NULL;
 +    t_nblist  *   coul_adress  = NULL;
 +    t_nblist  *   vdwc_ww      = NULL;
 +    t_nblist  *   coul_ww      = NULL;
 +
 +    int           i, j, jcg, igid, gid, nbl_ind, nbl_ind_adress;
 +    atom_id       jj, jj0, jj1, i_atom;
 +    int           i0, nicg, len;
 +
 +    int          *cginfo;
 +    int          *type, *typeB;
 +    real         *charge, *chargeB;
 +    real         *wf;
 +    real          qi, qiB, qq, rlj;
 +    gmx_bool      bFreeEnergy, bFree, bFreeJ, bNotEx, *bPert;
 +    gmx_bool      bDoVdW_i, bDoCoul_i, bDoCoul_i_sol;
 +    gmx_bool      b_hybrid;
 +    gmx_bool      j_all_atom;
 +    int           iwater, jwater;
 +    t_nblist     *nlist, *nlist_adress;
 +    gmx_bool      bEnergyGroupCG;
 +
 +    /* Copy some pointers */
 +    cginfo  = fr->cginfo;
 +    charge  = md->chargeA;
 +    chargeB = md->chargeB;
 +    type    = md->typeA;
 +    typeB   = md->typeB;
 +    bPert   = md->bPerturbed;
 +    wf      = md->wf;
 +
 +    /* Get atom range */
 +    i0     = index[icg];
 +    nicg   = index[icg+1]-i0;
 +
 +    /* Get the i charge group info */
 +    igid   = GET_CGINFO_GID(cginfo[icg]);
 +
 +    iwater = (solvent_opt != esolNO) ? GET_CGINFO_SOLOPT(cginfo[icg]) : esolNO;
 +
 +    if (md->nPerturbed)
 +    {
 +        gmx_fatal(FARGS, "AdResS does not support free energy pertubation\n");
 +    }
 +
 +    /* Unpack pointers to neighbourlist structs */
 +    if (fr->nnblists == 2)
 +    {
 +        nbl_ind        = 0;
 +        nbl_ind_adress = 1;
 +    }
 +    else
 +    {
 +        nbl_ind        = fr->gid2nblists[GID(igid, jgid, ngid)];
 +        nbl_ind_adress = nbl_ind+fr->nnblists/2;
 +    }
 +    if (bLR)
 +    {
 +        nlist        = fr->nblists[nbl_ind].nlist_lr;
 +        nlist_adress = fr->nblists[nbl_ind_adress].nlist_lr;
 +    }
 +    else
 +    {
 +        nlist        = fr->nblists[nbl_ind].nlist_sr;
 +        nlist_adress = fr->nblists[nbl_ind_adress].nlist_sr;
 +    }
 +
 +
 +    vdwc = &nlist[eNL_VDWQQ];
 +    vdw  = &nlist[eNL_VDW];
 +    coul = &nlist[eNL_QQ];
 +
 +    vdwc_adress = &nlist_adress[eNL_VDWQQ];
 +    vdw_adress  = &nlist_adress[eNL_VDW];
 +    coul_adress = &nlist_adress[eNL_QQ];
 +
 +    /* We do not support solvent optimization with AdResS for now.
 +       For this we would need hybrid solvent-other kernels */
 +
 +    /* no solvent as i charge group */
 +    /* Loop over the atoms in the i charge group */
 +    for (i = 0; i < nicg; i++)
 +    {
 +        i_atom  = i0+i;
 +        gid     = GID(igid, jgid, ngid);
 +        qi      = charge[i_atom];
 +
 +        /* Create new i_atom for each energy group */
 +        if (bDoVdW && bDoCoul)
 +        {
 +            new_i_nblist(vdwc, i_atom, shift, gid);
 +            new_i_nblist(vdwc_adress, i_atom, shift, gid);
 +
 +        }
 +        if (bDoVdW)
 +        {
 +            new_i_nblist(vdw, i_atom, shift, gid);
 +            new_i_nblist(vdw_adress, i_atom, shift, gid);
 +
 +        }
 +        if (bDoCoul)
 +        {
 +            new_i_nblist(coul, i_atom, shift, gid);
 +            new_i_nblist(coul_adress, i_atom, shift, gid);
 +        }
 +        bDoVdW_i  = (bDoVdW  && bHaveVdW[type[i_atom]]);
 +        bDoCoul_i = (bDoCoul && qi != 0);
 +
 +        /* Here we find out whether the energy groups interaction belong to a
 +         * coarse-grained (vsite) or atomistic interaction. Note that, beacuse
 +         * interactions between coarse-grained and other (atomistic) energygroups
 +         * are excluded automatically by grompp, it is sufficient to check for
 +         * the group id of atom i (igid) */
 +        bEnergyGroupCG = !egp_explicit(fr, igid);
 +
 +        if (bDoVdW_i || bDoCoul_i)
 +        {
 +            /* Loop over the j charge groups */
 +            for (j = 0; (j < nj); j++)
 +            {
 +                jcg = jjcg[j];
 +
 +                /* Check for large charge groups */
 +                if (jcg == icg)
 +                {
 +                    jj0 = i0 + i + 1;
 +                }
 +                else
 +                {
 +                    jj0 = index[jcg];
 +                }
 +
 +                jj1 = index[jcg+1];
 +                /* Finally loop over the atoms in the j-charge group */
 +                for (jj = jj0; jj < jj1; jj++)
 +                {
 +                    bNotEx = NOTEXCL(bExcl, i, jj);
 +
 +                    /* Now we have to exclude interactions which will be zero
 +                     * anyway due to the AdResS weights (in previous implementations
 +                     * this was done in the force kernel). This is necessary as
 +                     * pure interactions (those with b_hybrid=false, i.e. w_i*w_j==1 or 0)
 +                     * are put into neighbour lists which will be passed to the
 +                     * standard (optimized) kernels for speed. The interactions with
 +                     * b_hybrid=true are placed into the _adress neighbour lists and
 +                     * processed by the generic AdResS kernel.
 +                     */
 +                    if ( (bEnergyGroupCG &&
 +                          wf[i_atom] >= 1-GMX_REAL_EPS && wf[jj] >= 1-GMX_REAL_EPS ) ||
 +                         ( !bEnergyGroupCG && wf[jj] <= GMX_REAL_EPS ) )
 +                    {
 +                        continue;
 +                    }
 +
 +                    b_hybrid = !((wf[i_atom] >= 1-GMX_REAL_EPS && wf[jj] >= 1-GMX_REAL_EPS) ||
 +                                 (wf[i_atom] <= GMX_REAL_EPS && wf[jj] <= GMX_REAL_EPS));
 +
 +                    if (bNotEx)
 +                    {
 +                        if (!bDoVdW_i)
 +                        {
 +                            if (charge[jj] != 0)
 +                            {
 +                                if (!b_hybrid)
 +                                {
 +                                    add_j_to_nblist(coul, jj, bLR);
 +                                }
 +                                else
 +                                {
 +                                    add_j_to_nblist(coul_adress, jj, bLR);
 +                                }
 +                            }
 +                        }
 +                        else if (!bDoCoul_i)
 +                        {
 +                            if (bHaveVdW[type[jj]])
 +                            {
 +                                if (!b_hybrid)
 +                                {
 +                                    add_j_to_nblist(vdw, jj, bLR);
 +                                }
 +                                else
 +                                {
 +                                    add_j_to_nblist(vdw_adress, jj, bLR);
 +                                }
 +                            }
 +                        }
 +                        else
 +                        {
 +                            if (bHaveVdW[type[jj]])
 +                            {
 +                                if (charge[jj] != 0)
 +                                {
 +                                    if (!b_hybrid)
 +                                    {
 +                                        add_j_to_nblist(vdwc, jj, bLR);
 +                                    }
 +                                    else
 +                                    {
 +                                        add_j_to_nblist(vdwc_adress, jj, bLR);
 +                                    }
 +                                }
 +                                else
 +                                {
 +                                    if (!b_hybrid)
 +                                    {
 +                                        add_j_to_nblist(vdw, jj, bLR);
 +                                    }
 +                                    else
 +                                    {
 +                                        add_j_to_nblist(vdw_adress, jj, bLR);
 +                                    }
 +
 +                                }
 +                            }
 +                            else if (charge[jj] != 0)
 +                            {
 +                                if (!b_hybrid)
 +                                {
 +                                    add_j_to_nblist(coul, jj, bLR);
 +                                }
 +                                else
 +                                {
 +                                    add_j_to_nblist(coul_adress, jj, bLR);
 +                                }
 +
 +                            }
 +                        }
 +                    }
 +                }
 +            }
 +
 +            close_i_nblist(vdw);
 +            close_i_nblist(coul);
 +            close_i_nblist(vdwc);
 +            close_i_nblist(vdw_adress);
 +            close_i_nblist(coul_adress);
 +            close_i_nblist(vdwc_adress);
 +        }
 +    }
 +}
 +
 +static void
 +put_in_list_qmmm(gmx_bool gmx_unused              bHaveVdW[],
 +                 int                              ngid,
 +                 t_mdatoms gmx_unused     *       md,
 +                 int                              icg,
 +                 int                              jgid,
 +                 int                              nj,
 +                 atom_id                          jjcg[],
 +                 atom_id                          index[],
 +                 t_excl                           bExcl[],
 +                 int                              shift,
 +                 t_forcerec                *      fr,
 +                 gmx_bool                         bLR,
 +                 gmx_bool  gmx_unused             bDoVdW,
 +                 gmx_bool  gmx_unused             bDoCoul,
 +                 int       gmx_unused             solvent_opt)
 +{
 +    t_nblist  *   coul;
 +    int           i, j, jcg, igid, gid;
 +    atom_id       jj, jj0, jj1, i_atom;
 +    int           i0, nicg;
 +    gmx_bool      bNotEx;
 +
 +    /* Get atom range */
 +    i0     = index[icg];
 +    nicg   = index[icg+1]-i0;
 +
 +    /* Get the i charge group info */
 +    igid   = GET_CGINFO_GID(fr->cginfo[icg]);
 +
 +    coul = &fr->QMMMlist;
 +
 +    /* Loop over atoms in the ith charge group */
 +    for (i = 0; i < nicg; i++)
 +    {
 +        i_atom = i0+i;
 +        gid    = GID(igid, jgid, ngid);
 +        /* Create new i_atom for each energy group */
 +        new_i_nblist(coul, i_atom, shift, gid);
 +
 +        /* Loop over the j charge groups */
 +        for (j = 0; j < nj; j++)
 +        {
 +            jcg = jjcg[j];
 +
 +            /* Charge groups cannot have QM and MM atoms simultaneously */
 +            if (jcg != icg)
 +            {
 +                jj0 = index[jcg];
 +                jj1 = index[jcg+1];
 +                /* Finally loop over the atoms in the j-charge group */
 +                for (jj = jj0; jj < jj1; jj++)
 +                {
 +                    bNotEx = NOTEXCL(bExcl, i, jj);
 +                    if (bNotEx)
 +                    {
 +                        add_j_to_nblist(coul, jj, bLR);
 +                    }
 +                }
 +            }
 +        }
 +        close_i_nblist(coul);
 +    }
 +}
 +
 +static void
 +put_in_list_cg(gmx_bool  gmx_unused             bHaveVdW[],
 +               int                              ngid,
 +               t_mdatoms  gmx_unused    *       md,
 +               int                              icg,
 +               int                              jgid,
 +               int                              nj,
 +               atom_id                          jjcg[],
 +               atom_id                          index[],
 +               t_excl                           bExcl[],
 +               int                              shift,
 +               t_forcerec                *      fr,
 +               gmx_bool                         bLR,
 +               gmx_bool   gmx_unused            bDoVdW,
 +               gmx_bool   gmx_unused            bDoCoul,
 +               int        gmx_unused            solvent_opt)
 +{
 +    int          cginfo;
 +    int          igid, gid, nbl_ind;
 +    t_nblist *   vdwc;
 +    int          j, jcg;
 +
 +    cginfo = fr->cginfo[icg];
 +
 +    igid = GET_CGINFO_GID(cginfo);
 +    gid  = GID(igid, jgid, ngid);
 +
 +    /* Unpack pointers to neighbourlist structs */
 +    if (fr->nnblists == 1)
 +    {
 +        nbl_ind = 0;
 +    }
 +    else
 +    {
 +        nbl_ind = fr->gid2nblists[gid];
 +    }
 +    if (bLR)
 +    {
 +        vdwc = &fr->nblists[nbl_ind].nlist_lr[eNL_VDWQQ];
 +    }
 +    else
 +    {
 +        vdwc = &fr->nblists[nbl_ind].nlist_sr[eNL_VDWQQ];
 +    }
 +
 +    /* Make a new neighbor list for charge group icg.
 +     * Currently simply one neighbor list is made with LJ and Coulomb.
 +     * If required, zero interactions could be removed here
 +     * or in the force loop.
 +     */
 +    new_i_nblist(vdwc, index[icg], shift, gid);
 +    vdwc->iinr_end[vdwc->nri] = index[icg+1];
 +
 +    for (j = 0; (j < nj); j++)
 +    {
 +        jcg = jjcg[j];
 +        /* Skip the icg-icg pairs if all self interactions are excluded */
 +        if (!(jcg == icg && GET_CGINFO_EXCL_INTRA(cginfo)))
 +        {
 +            /* Here we add the j charge group jcg to the list,
 +             * exclusions are also added to the list.
 +             */
 +            add_j_to_nblist_cg(vdwc, index[jcg], index[jcg+1], bExcl, icg == jcg, bLR);
 +        }
 +    }
 +
 +    close_i_nblist(vdwc);
 +}
 +
 +static void setexcl(atom_id start, atom_id end, t_blocka *excl, gmx_bool b,
 +                    t_excl bexcl[])
 +{
 +    atom_id i, k;
 +
 +    if (b)
 +    {
 +        for (i = start; i < end; i++)
 +        {
 +            for (k = excl->index[i]; k < excl->index[i+1]; k++)
 +            {
 +                SETEXCL(bexcl, i-start, excl->a[k]);
 +            }
 +        }
 +    }
 +    else
 +    {
 +        for (i = start; i < end; i++)
 +        {
 +            for (k = excl->index[i]; k < excl->index[i+1]; k++)
 +            {
 +                RMEXCL(bexcl, i-start, excl->a[k]);
 +            }
 +        }
 +    }
 +}
 +
 +int calc_naaj(int icg, int cgtot)
 +{
 +    int naaj;
 +
 +    if ((cgtot % 2) == 1)
 +    {
 +        /* Odd number of charge groups, easy */
 +        naaj = 1 + (cgtot/2);
 +    }
 +    else if ((cgtot % 4) == 0)
 +    {
 +        /* Multiple of four is hard */
 +        if (icg < cgtot/2)
 +        {
 +            if ((icg % 2) == 0)
 +            {
 +                naaj = 1+(cgtot/2);
 +            }
 +            else
 +            {
 +                naaj = cgtot/2;
 +            }
 +        }
 +        else
 +        {
 +            if ((icg % 2) == 1)
 +            {
 +                naaj = 1+(cgtot/2);
 +            }
 +            else
 +            {
 +                naaj = cgtot/2;
 +            }
 +        }
 +    }
 +    else
 +    {
 +        /* cgtot/2 = odd */
 +        if ((icg % 2) == 0)
 +        {
 +            naaj = 1+(cgtot/2);
 +        }
 +        else
 +        {
 +            naaj = cgtot/2;
 +        }
 +    }
 +#ifdef DEBUG
 +    fprintf(log, "naaj=%d\n", naaj);
 +#endif
 +
 +    return naaj;
 +}
 +
 +/************************************************
 + *
 + *  S I M P L E      C O R E     S T U F F
 + *
 + ************************************************/
 +
 +static real calc_image_tric(rvec xi, rvec xj, matrix box,
 +                            rvec b_inv, int *shift)
 +{
 +    /* This code assumes that the cut-off is smaller than
 +     * a half times the smallest diagonal element of the box.
 +     */
 +    const real h25 = 2.5;
 +    real       dx, dy, dz;
 +    real       r2;
 +    int        tx, ty, tz;
 +
 +    /* Compute diff vector */
 +    dz = xj[ZZ] - xi[ZZ];
 +    dy = xj[YY] - xi[YY];
 +    dx = xj[XX] - xi[XX];
 +
 +    /* Perform NINT operation, using trunc operation, therefore
 +     * we first add 2.5 then subtract 2 again
 +     */
 +    tz  = dz*b_inv[ZZ] + h25;
 +    tz -= 2;
 +    dz -= tz*box[ZZ][ZZ];
 +    dy -= tz*box[ZZ][YY];
 +    dx -= tz*box[ZZ][XX];
 +
 +    ty  = dy*b_inv[YY] + h25;
 +    ty -= 2;
 +    dy -= ty*box[YY][YY];
 +    dx -= ty*box[YY][XX];
 +
 +    tx  = dx*b_inv[XX]+h25;
 +    tx -= 2;
 +    dx -= tx*box[XX][XX];
 +
 +    /* Distance squared */
 +    r2 = (dx*dx) + (dy*dy) + (dz*dz);
 +
 +    *shift = XYZ2IS(tx, ty, tz);
 +
 +    return r2;
 +}
 +
 +static real calc_image_rect(rvec xi, rvec xj, rvec box_size,
 +                            rvec b_inv, int *shift)
 +{
 +    const real h15 = 1.5;
 +    real       ddx, ddy, ddz;
 +    real       dx, dy, dz;
 +    real       r2;
 +    int        tx, ty, tz;
 +
 +    /* Compute diff vector */
 +    dx = xj[XX] - xi[XX];
 +    dy = xj[YY] - xi[YY];
 +    dz = xj[ZZ] - xi[ZZ];
 +
 +    /* Perform NINT operation, using trunc operation, therefore
 +     * we first add 1.5 then subtract 1 again
 +     */
 +    tx = dx*b_inv[XX] + h15;
 +    ty = dy*b_inv[YY] + h15;
 +    tz = dz*b_inv[ZZ] + h15;
 +    tx--;
 +    ty--;
 +    tz--;
 +
 +    /* Correct diff vector for translation */
 +    ddx = tx*box_size[XX] - dx;
 +    ddy = ty*box_size[YY] - dy;
 +    ddz = tz*box_size[ZZ] - dz;
 +
 +    /* Distance squared */
 +    r2 = (ddx*ddx) + (ddy*ddy) + (ddz*ddz);
 +
 +    *shift = XYZ2IS(tx, ty, tz);
 +
 +    return r2;
 +}
 +
 +static void add_simple(t_ns_buf *nsbuf, int nrj, atom_id cg_j,
 +                       gmx_bool bHaveVdW[], int ngid, t_mdatoms *md,
 +                       int icg, int jgid, t_block *cgs, t_excl bexcl[],
 +                       int shift, t_forcerec *fr, put_in_list_t *put_in_list)
 +{
 +    if (nsbuf->nj + nrj > MAX_CG)
 +    {
 +        put_in_list(bHaveVdW, ngid, md, icg, jgid, nsbuf->ncg, nsbuf->jcg,
 +                    cgs->index, bexcl, shift, fr, FALSE, TRUE, TRUE, fr->solvent_opt);
 +        /* Reset buffer contents */
 +        nsbuf->ncg = nsbuf->nj = 0;
 +    }
 +    nsbuf->jcg[nsbuf->ncg++] = cg_j;
 +    nsbuf->nj               += nrj;
 +}
 +
 +static void ns_inner_tric(rvec x[], int icg, int *i_egp_flags,
 +                          int njcg, atom_id jcg[],
 +                          matrix box, rvec b_inv, real rcut2,
 +                          t_block *cgs, t_ns_buf **ns_buf,
 +                          gmx_bool bHaveVdW[], int ngid, t_mdatoms *md,
 +                          t_excl bexcl[], t_forcerec *fr,
 +                          put_in_list_t *put_in_list)
 +{
 +    int       shift;
 +    int       j, nrj, jgid;
 +    int      *cginfo = fr->cginfo;
 +    atom_id   cg_j, *cgindex;
 +    t_ns_buf *nsbuf;
 +
 +    cgindex = cgs->index;
 +    shift   = CENTRAL;
 +    for (j = 0; (j < njcg); j++)
 +    {
 +        cg_j   = jcg[j];
 +        nrj    = cgindex[cg_j+1]-cgindex[cg_j];
 +        if (calc_image_tric(x[icg], x[cg_j], box, b_inv, &shift) < rcut2)
 +        {
 +            jgid  = GET_CGINFO_GID(cginfo[cg_j]);
 +            if (!(i_egp_flags[jgid] & EGP_EXCL))
 +            {
 +                add_simple(&ns_buf[jgid][shift], nrj, cg_j,
 +                           bHaveVdW, ngid, md, icg, jgid, cgs, bexcl, shift, fr,
 +                           put_in_list);
 +            }
 +        }
 +    }
 +}
 +
 +static void ns_inner_rect(rvec x[], int icg, int *i_egp_flags,
 +                          int njcg, atom_id jcg[],
 +                          gmx_bool bBox, rvec box_size, rvec b_inv, real rcut2,
 +                          t_block *cgs, t_ns_buf **ns_buf,
 +                          gmx_bool bHaveVdW[], int ngid, t_mdatoms *md,
 +                          t_excl bexcl[], t_forcerec *fr,
 +                          put_in_list_t *put_in_list)
 +{
 +    int       shift;
 +    int       j, nrj, jgid;
 +    int      *cginfo = fr->cginfo;
 +    atom_id   cg_j, *cgindex;
 +    t_ns_buf *nsbuf;
 +
 +    cgindex = cgs->index;
 +    if (bBox)
 +    {
 +        shift = CENTRAL;
 +        for (j = 0; (j < njcg); j++)
 +        {
 +            cg_j   = jcg[j];
 +            nrj    = cgindex[cg_j+1]-cgindex[cg_j];
 +            if (calc_image_rect(x[icg], x[cg_j], box_size, b_inv, &shift) < rcut2)
 +            {
 +                jgid  = GET_CGINFO_GID(cginfo[cg_j]);
 +                if (!(i_egp_flags[jgid] & EGP_EXCL))
 +                {
 +                    add_simple(&ns_buf[jgid][shift], nrj, cg_j,
 +                               bHaveVdW, ngid, md, icg, jgid, cgs, bexcl, shift, fr,
 +                               put_in_list);
 +                }
 +            }
 +        }
 +    }
 +    else
 +    {
 +        for (j = 0; (j < njcg); j++)
 +        {
 +            cg_j   = jcg[j];
 +            nrj    = cgindex[cg_j+1]-cgindex[cg_j];
 +            if ((rcut2 == 0) || (distance2(x[icg], x[cg_j]) < rcut2))
 +            {
 +                jgid  = GET_CGINFO_GID(cginfo[cg_j]);
 +                if (!(i_egp_flags[jgid] & EGP_EXCL))
 +                {
 +                    add_simple(&ns_buf[jgid][CENTRAL], nrj, cg_j,
 +                               bHaveVdW, ngid, md, icg, jgid, cgs, bexcl, CENTRAL, fr,
 +                               put_in_list);
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +/* ns_simple_core needs to be adapted for QMMM still 2005 */
 +
 +static int ns_simple_core(t_forcerec *fr,
 +                          gmx_localtop_t *top,
 +                          t_mdatoms *md,
 +                          matrix box, rvec box_size,
 +                          t_excl bexcl[], atom_id *aaj,
 +                          int ngid, t_ns_buf **ns_buf,
 +                          put_in_list_t *put_in_list, gmx_bool bHaveVdW[])
 +{
 +    int          naaj, k;
 +    real         rlist2;
 +    int          nsearch, icg, jcg, igid, i0, nri, nn;
 +    int         *cginfo;
 +    t_ns_buf    *nsbuf;
 +    /* atom_id  *i_atoms; */
 +    t_block     *cgs  = &(top->cgs);
 +    t_blocka    *excl = &(top->excls);
 +    rvec         b_inv;
 +    int          m;
 +    gmx_bool     bBox, bTriclinic;
 +    int         *i_egp_flags;
 +
 +    rlist2 = sqr(fr->rlist);
 +
 +    bBox = (fr->ePBC != epbcNONE);
 +    if (bBox)
 +    {
 +        for (m = 0; (m < DIM); m++)
 +        {
 +            b_inv[m] = divide_err(1.0, box_size[m]);
 +        }
 +        bTriclinic = TRICLINIC(box);
 +    }
 +    else
 +    {
 +        bTriclinic = FALSE;
 +    }
 +
 +    cginfo = fr->cginfo;
 +
 +    nsearch = 0;
 +    for (icg = fr->cg0; (icg < fr->hcg); icg++)
 +    {
 +        /*
 +           i0        = cgs->index[icg];
 +           nri       = cgs->index[icg+1]-i0;
 +           i_atoms   = &(cgs->a[i0]);
 +           i_eg_excl = fr->eg_excl + ngid*md->cENER[*i_atoms];
 +           setexcl(nri,i_atoms,excl,TRUE,bexcl);
 +         */
 +        igid        = GET_CGINFO_GID(cginfo[icg]);
 +        i_egp_flags = fr->egp_flags + ngid*igid;
 +        setexcl(cgs->index[icg], cgs->index[icg+1], excl, TRUE, bexcl);
 +
 +        naaj = calc_naaj(icg, cgs->nr);
 +        if (bTriclinic)
 +        {
 +            ns_inner_tric(fr->cg_cm, icg, i_egp_flags, naaj, &(aaj[icg]),
 +                          box, b_inv, rlist2, cgs, ns_buf,
 +                          bHaveVdW, ngid, md, bexcl, fr, put_in_list);
 +        }
 +        else
 +        {
 +            ns_inner_rect(fr->cg_cm, icg, i_egp_flags, naaj, &(aaj[icg]),
 +                          bBox, box_size, b_inv, rlist2, cgs, ns_buf,
 +                          bHaveVdW, ngid, md, bexcl, fr, put_in_list);
 +        }
 +        nsearch += naaj;
 +
 +        for (nn = 0; (nn < ngid); nn++)
 +        {
 +            for (k = 0; (k < SHIFTS); k++)
 +            {
 +                nsbuf = &(ns_buf[nn][k]);
 +                if (nsbuf->ncg > 0)
 +                {
 +                    put_in_list(bHaveVdW, ngid, md, icg, nn, nsbuf->ncg, nsbuf->jcg,
 +                                cgs->index, bexcl, k, fr, FALSE, TRUE, TRUE, fr->solvent_opt);
 +                    nsbuf->ncg = nsbuf->nj = 0;
 +                }
 +            }
 +        }
 +        /* setexcl(nri,i_atoms,excl,FALSE,bexcl); */
 +        setexcl(cgs->index[icg], cgs->index[icg+1], excl, FALSE, bexcl);
 +    }
 +    close_neighbor_lists(fr, FALSE);
 +
 +    return nsearch;
 +}
 +
 +/************************************************
 + *
 + *    N S 5     G R I D     S T U F F
 + *
 + ************************************************/
 +
 +static gmx_inline void get_dx(int Nx, real gridx, real rc2, int xgi, real x,
 +                              int *dx0, int *dx1, real *dcx2)
 +{
 +    real dcx, tmp;
 +    int  xgi0, xgi1, i;
 +
 +    if (xgi < 0)
 +    {
 +        *dx0 = 0;
 +        xgi0 = -1;
 +        *dx1 = -1;
 +        xgi1 = 0;
 +    }
 +    else if (xgi >= Nx)
 +    {
 +        *dx0 = Nx;
 +        xgi0 = Nx-1;
 +        *dx1 = Nx-1;
 +        xgi1 = Nx;
 +    }
 +    else
 +    {
 +        dcx2[xgi] = 0;
 +        *dx0      = xgi;
 +        xgi0      = xgi-1;
 +        *dx1      = xgi;
 +        xgi1      = xgi+1;
 +    }
 +
 +    for (i = xgi0; i >= 0; i--)
 +    {
 +        dcx = (i+1)*gridx-x;
 +        tmp = dcx*dcx;
 +        if (tmp >= rc2)
 +        {
 +            break;
 +        }
 +        *dx0    = i;
 +        dcx2[i] = tmp;
 +    }
 +    for (i = xgi1; i < Nx; i++)
 +    {
 +        dcx = i*gridx-x;
 +        tmp = dcx*dcx;
 +        if (tmp >= rc2)
 +        {
 +            break;
 +        }
 +        *dx1    = i;
 +        dcx2[i] = tmp;
 +    }
 +}
 +
 +static gmx_inline void get_dx_dd(int Nx, real gridx, real rc2, int xgi, real x,
 +                                 int ncpddc, int shift_min, int shift_max,
 +                                 int *g0, int *g1, real *dcx2)
 +{
 +    real dcx, tmp;
 +    int  g_min, g_max, shift_home;
 +
 +    if (xgi < 0)
 +    {
 +        g_min = 0;
 +        g_max = Nx - 1;
 +        *g0   = 0;
 +        *g1   = -1;
 +    }
 +    else if (xgi >= Nx)
 +    {
 +        g_min = 0;
 +        g_max = Nx - 1;
 +        *g0   = Nx;
 +        *g1   = Nx - 1;
 +    }
 +    else
 +    {
 +        if (ncpddc == 0)
 +        {
 +            g_min = 0;
 +            g_max = Nx - 1;
 +        }
 +        else
 +        {
 +            if (xgi < ncpddc)
 +            {
 +                shift_home = 0;
 +            }
 +            else
 +            {
 +                shift_home = -1;
 +            }
 +            g_min = (shift_min == shift_home ? 0          : ncpddc);
 +            g_max = (shift_max == shift_home ? ncpddc - 1 : Nx - 1);
 +        }
 +        if (shift_min > 0)
 +        {
 +            *g0 = g_min;
 +            *g1 = g_min - 1;
 +        }
 +        else if (shift_max < 0)
 +        {
 +            *g0 = g_max + 1;
 +            *g1 = g_max;
 +        }
 +        else
 +        {
 +            *g0       = xgi;
 +            *g1       = xgi;
 +            dcx2[xgi] = 0;
 +        }
 +    }
 +
 +    while (*g0 > g_min)
 +    {
 +        /* Check one grid cell down */
 +        dcx = ((*g0 - 1) + 1)*gridx - x;
 +        tmp = dcx*dcx;
 +        if (tmp >= rc2)
 +        {
 +            break;
 +        }
 +        (*g0)--;
 +        dcx2[*g0] = tmp;
 +    }
 +
 +    while (*g1 < g_max)
 +    {
 +        /* Check one grid cell up */
 +        dcx = (*g1 + 1)*gridx - x;
 +        tmp = dcx*dcx;
 +        if (tmp >= rc2)
 +        {
 +            break;
 +        }
 +        (*g1)++;
 +        dcx2[*g1] = tmp;
 +    }
 +}
 +
 +
 +#define sqr(x) ((x)*(x))
 +#define calc_dx2(XI, YI, ZI, y) (sqr(XI-y[XX]) + sqr(YI-y[YY]) + sqr(ZI-y[ZZ]))
 +#define calc_cyl_dx2(XI, YI, y) (sqr(XI-y[XX]) + sqr(YI-y[YY]))
 +/****************************************************
 + *
 + *    F A S T   N E I G H B O R  S E A R C H I N G
 + *
 + *    Optimized neighboursearching routine using grid
 + *    at least 1x1x1, see GROMACS manual
 + *
 + ****************************************************/
 +
 +
 +static void get_cutoff2(t_forcerec *fr, gmx_bool bDoLongRange,
 +                        real *rvdw2, real *rcoul2,
 +                        real *rs2, real *rm2, real *rl2)
 +{
 +    *rs2 = sqr(fr->rlist);
 +
 +    if (bDoLongRange && fr->bTwinRange)
 +    {
 +        /* With plain cut-off or RF we need to make the list exactly
 +         * up to the cut-off and the cut-off's can be different,
 +         * so we can not simply set them to rlistlong.
 +         * To keep this code compatible with (exotic) old cases,
 +         * we also create lists up to rvdw/rcoulomb for PME and Ewald.
 +         * The interaction check should correspond to:
 +         * !ir_vdw/coulomb_might_be_zero_at_cutoff from inputrec.c.
 +         */
 +        if (((fr->vdwtype == evdwCUT || fr->vdwtype == evdwPME) &&
 +             fr->vdw_modifier == eintmodNONE) ||
 +            fr->rvdw <= fr->rlist)
 +        {
 +            *rvdw2  = sqr(fr->rvdw);
 +        }
 +        else
 +        {
 +            *rvdw2  = sqr(fr->rlistlong);
 +        }
 +        if (((fr->eeltype == eelCUT ||
 +              (EEL_RF(fr->eeltype) && fr->eeltype != eelRF_ZERO) ||
 +              fr->eeltype == eelPME ||
 +              fr->eeltype == eelEWALD) &&
 +             fr->coulomb_modifier == eintmodNONE) ||
 +            fr->rcoulomb <= fr->rlist)
 +        {
 +            *rcoul2 = sqr(fr->rcoulomb);
 +        }
 +        else
 +        {
 +            *rcoul2 = sqr(fr->rlistlong);
 +        }
 +    }
 +    else
 +    {
 +        /* Workaround for a gcc -O3 or -ffast-math problem */
 +        *rvdw2  = *rs2;
 +        *rcoul2 = *rs2;
 +    }
 +    *rm2 = min(*rvdw2, *rcoul2);
 +    *rl2 = max(*rvdw2, *rcoul2);
 +}
 +
 +static void init_nsgrid_lists(t_forcerec *fr, int ngid, gmx_ns_t *ns)
 +{
 +    real rvdw2, rcoul2, rs2, rm2, rl2;
 +    int  j;
 +
 +    get_cutoff2(fr, TRUE, &rvdw2, &rcoul2, &rs2, &rm2, &rl2);
 +
 +    /* Short range buffers */
 +    snew(ns->nl_sr, ngid);
 +    /* Counters */
 +    snew(ns->nsr, ngid);
 +    snew(ns->nlr_ljc, ngid);
 +    snew(ns->nlr_one, ngid);
 +
 +    /* Always allocate both list types, since rcoulomb might now change with PME load balancing */
 +    /* Long range VdW and Coul buffers */
 +    snew(ns->nl_lr_ljc, ngid);
 +    /* Long range VdW or Coul only buffers */
 +    snew(ns->nl_lr_one, ngid);
 +
 +    for (j = 0; (j < ngid); j++)
 +    {
 +        snew(ns->nl_sr[j], MAX_CG);
 +        snew(ns->nl_lr_ljc[j], MAX_CG);
 +        snew(ns->nl_lr_one[j], MAX_CG);
 +    }
 +    if (debug)
 +    {
 +        fprintf(debug,
 +                "ns5_core: rs2 = %g, rm2 = %g, rl2 = %g (nm^2)\n",
 +                rs2, rm2, rl2);
 +    }
 +}
 +
 +static int nsgrid_core(t_commrec *cr, t_forcerec *fr,
 +                       matrix box, int ngid,
 +                       gmx_localtop_t *top,
 +                       t_grid *grid,
 +                       t_excl bexcl[], gmx_bool *bExcludeAlleg,
 +                       t_mdatoms *md,
 +                       put_in_list_t *put_in_list,
 +                       gmx_bool bHaveVdW[],
 +                       gmx_bool bDoLongRange, gmx_bool bMakeQMMMnblist)
 +{
 +    gmx_ns_t     *ns;
 +    atom_id     **nl_lr_ljc, **nl_lr_one, **nl_sr;
 +    int          *nlr_ljc, *nlr_one, *nsr;
 +    gmx_domdec_t *dd     = NULL;
 +    t_block      *cgs    = &(top->cgs);
 +    int          *cginfo = fr->cginfo;
 +    /* atom_id *i_atoms,*cgsindex=cgs->index; */
 +    ivec          sh0, sh1, shp;
 +    int           cell_x, cell_y, cell_z;
 +    int           d, tx, ty, tz, dx, dy, dz, cj;
 +#ifdef ALLOW_OFFDIAG_LT_HALFDIAG
 +    int           zsh_ty, zsh_tx, ysh_tx;
 +#endif
 +    int           dx0, dx1, dy0, dy1, dz0, dz1;
 +    int           Nx, Ny, Nz, shift = -1, j, nrj, nns, nn = -1;
 +    real          gridx, gridy, gridz, grid_x, grid_y, grid_z;
 +    real         *dcx2, *dcy2, *dcz2;
 +    int           zgi, ygi, xgi;
 +    int           cg0, cg1, icg = -1, cgsnr, i0, igid, nri, naaj, max_jcg;
 +    int           jcg0, jcg1, jjcg, cgj0, jgid;
 +    int          *grida, *gridnra, *gridind;
 +    gmx_bool      rvdw_lt_rcoul, rcoul_lt_rvdw;
 +    rvec          xi, *cgcm, grid_offset;
 +    real          r2, rs2, rvdw2, rcoul2, rm2, rl2, XI, YI, ZI, dcx, dcy, dcz, tmp1, tmp2;
 +    int          *i_egp_flags;
 +    gmx_bool      bDomDec, bTriclinicX, bTriclinicY;
 +    ivec          ncpddc;
 +
 +    ns = &fr->ns;
 +
 +    bDomDec = DOMAINDECOMP(cr);
 +    if (bDomDec)
 +    {
 +        dd = cr->dd;
 +    }
 +
 +    bTriclinicX = ((YY < grid->npbcdim &&
 +                    (!bDomDec || dd->nc[YY] == 1) && box[YY][XX] != 0) ||
 +                   (ZZ < grid->npbcdim &&
 +                    (!bDomDec || dd->nc[ZZ] == 1) && box[ZZ][XX] != 0));
 +    bTriclinicY =  (ZZ < grid->npbcdim &&
 +                    (!bDomDec || dd->nc[ZZ] == 1) && box[ZZ][YY] != 0);
 +
 +    cgsnr    = cgs->nr;
 +
 +    get_cutoff2(fr, bDoLongRange, &rvdw2, &rcoul2, &rs2, &rm2, &rl2);
 +
 +    rvdw_lt_rcoul = (rvdw2 >= rcoul2);
 +    rcoul_lt_rvdw = (rcoul2 >= rvdw2);
 +
 +    if (bMakeQMMMnblist)
 +    {
 +        rm2 = rl2;
 +        rs2 = rl2;
 +    }
 +
 +    nl_sr     = ns->nl_sr;
 +    nsr       = ns->nsr;
 +    nl_lr_ljc = ns->nl_lr_ljc;
 +    nl_lr_one = ns->nl_lr_one;
 +    nlr_ljc   = ns->nlr_ljc;
 +    nlr_one   = ns->nlr_one;
 +
 +    /* Unpack arrays */
 +    cgcm    = fr->cg_cm;
 +    Nx      = grid->n[XX];
 +    Ny      = grid->n[YY];
 +    Nz      = grid->n[ZZ];
 +    grida   = grid->a;
 +    gridind = grid->index;
 +    gridnra = grid->nra;
 +    nns     = 0;
 +
 +    gridx      = grid->cell_size[XX];
 +    gridy      = grid->cell_size[YY];
 +    gridz      = grid->cell_size[ZZ];
 +    grid_x     = 1/gridx;
 +    grid_y     = 1/gridy;
 +    grid_z     = 1/gridz;
 +    copy_rvec(grid->cell_offset, grid_offset);
 +    copy_ivec(grid->ncpddc, ncpddc);
 +    dcx2       = grid->dcx2;
 +    dcy2       = grid->dcy2;
 +    dcz2       = grid->dcz2;
 +
 +#ifdef ALLOW_OFFDIAG_LT_HALFDIAG
 +    zsh_ty = floor(-box[ZZ][YY]/box[YY][YY]+0.5);
 +    zsh_tx = floor(-box[ZZ][XX]/box[XX][XX]+0.5);
 +    ysh_tx = floor(-box[YY][XX]/box[XX][XX]+0.5);
 +    if (zsh_tx != 0 && ysh_tx != 0)
 +    {
 +        /* This could happen due to rounding, when both ratios are 0.5 */
 +        ysh_tx = 0;
 +    }
 +#endif
 +
 +    debug_gmx();
 +
 +    if (fr->n_tpi)
 +    {
 +        /* We only want a list for the test particle */
 +        cg0 = cgsnr - 1;
 +    }
 +    else
 +    {
 +        cg0 = grid->icg0;
 +    }
 +    cg1 = grid->icg1;
 +
 +    /* Set the shift range */
 +    for (d = 0; d < DIM; d++)
 +    {
 +        sh0[d] = -1;
 +        sh1[d] = 1;
 +        /* Check if we need periodicity shifts.
 +         * Without PBC or with domain decomposition we don't need them.
 +         */
 +        if (d >= ePBC2npbcdim(fr->ePBC) || (bDomDec && dd->nc[d] > 1))
 +        {
 +            shp[d] = 0;
 +        }
 +        else
 +        {
 +            if (d == XX &&
 +                box[XX][XX] - fabs(box[YY][XX]) - fabs(box[ZZ][XX]) < sqrt(rl2))
 +            {
 +                shp[d] = 2;
 +            }
 +            else
 +            {
 +                shp[d] = 1;
 +            }
 +        }
 +    }
 +
 +    /* Loop over charge groups */
 +    for (icg = cg0; (icg < cg1); icg++)
 +    {
 +        igid = GET_CGINFO_GID(cginfo[icg]);
 +        /* Skip this charge group if all energy groups are excluded! */
 +        if (bExcludeAlleg[igid])
 +        {
 +            continue;
 +        }
 +
 +        i0   = cgs->index[icg];
 +
 +        if (bMakeQMMMnblist)
 +        {
 +            /* Skip this charge group if it is not a QM atom while making a
 +             * QM/MM neighbourlist
 +             */
 +            if (md->bQM[i0] == FALSE)
 +            {
 +                continue; /* MM particle, go to next particle */
 +            }
 +
 +            /* Compute the number of charge groups that fall within the control
 +             * of this one (icg)
 +             */
 +            naaj    = calc_naaj(icg, cgsnr);
 +            jcg0    = icg;
 +            jcg1    = icg + naaj;
 +            max_jcg = cgsnr;
 +        }
 +        else
 +        {
 +            /* make a normal neighbourlist */
 +
 +            if (bDomDec)
 +            {
 +                /* Get the j charge-group and dd cell shift ranges */
 +                dd_get_ns_ranges(cr->dd, icg, &jcg0, &jcg1, sh0, sh1);
 +                max_jcg = 0;
 +            }
 +            else
 +            {
 +                /* Compute the number of charge groups that fall within the control
 +                 * of this one (icg)
 +                 */
 +                naaj = calc_naaj(icg, cgsnr);
 +                jcg0 = icg;
 +                jcg1 = icg + naaj;
 +
 +                if (fr->n_tpi)
 +                {
 +                    /* The i-particle is awlways the test particle,
 +                     * so we want all j-particles
 +                     */
 +                    max_jcg = cgsnr - 1;
 +                }
 +                else
 +                {
 +                    max_jcg  = jcg1 - cgsnr;
 +                }
 +            }
 +        }
 +
 +        i_egp_flags = fr->egp_flags + igid*ngid;
 +
 +        /* Set the exclusions for the atoms in charge group icg using a bitmask */
 +        setexcl(i0, cgs->index[icg+1], &top->excls, TRUE, bexcl);
 +
 +        ci2xyz(grid, icg, &cell_x, &cell_y, &cell_z);
 +
 +        /* Changed iicg to icg, DvdS 990115
 +         * (but see consistency check above, DvdS 990330)
 +         */
 +#ifdef NS5DB
 +        fprintf(log, "icg=%5d, naaj=%5d, cell %d %d %d\n",
 +                icg, naaj, cell_x, cell_y, cell_z);
 +#endif
 +        /* Loop over shift vectors in three dimensions */
 +        for (tz = -shp[ZZ]; tz <= shp[ZZ]; tz++)
 +        {
 +            ZI = cgcm[icg][ZZ]+tz*box[ZZ][ZZ];
 +            /* Calculate range of cells in Z direction that have the shift tz */
 +            zgi = cell_z + tz*Nz;
 +#define FAST_DD_NS
 +#ifndef FAST_DD_NS
 +            get_dx(Nz, gridz, rl2, zgi, ZI, &dz0, &dz1, dcz2);
 +#else
 +            get_dx_dd(Nz, gridz, rl2, zgi, ZI-grid_offset[ZZ],
 +                      ncpddc[ZZ], sh0[ZZ], sh1[ZZ], &dz0, &dz1, dcz2);
 +#endif
 +            if (dz0 > dz1)
 +            {
 +                continue;
 +            }
 +            for (ty = -shp[YY]; ty <= shp[YY]; ty++)
 +            {
 +                YI = cgcm[icg][YY]+ty*box[YY][YY]+tz*box[ZZ][YY];
 +                /* Calculate range of cells in Y direction that have the shift ty */
 +                if (bTriclinicY)
 +                {
 +                    ygi = (int)(Ny + (YI - grid_offset[YY])*grid_y) - Ny;
 +                }
 +                else
 +                {
 +                    ygi = cell_y + ty*Ny;
 +                }
 +#ifndef FAST_DD_NS
 +                get_dx(Ny, gridy, rl2, ygi, YI, &dy0, &dy1, dcy2);
 +#else
 +                get_dx_dd(Ny, gridy, rl2, ygi, YI-grid_offset[YY],
 +                          ncpddc[YY], sh0[YY], sh1[YY], &dy0, &dy1, dcy2);
 +#endif
 +                if (dy0 > dy1)
 +                {
 +                    continue;
 +                }
 +                for (tx = -shp[XX]; tx <= shp[XX]; tx++)
 +                {
 +                    XI = cgcm[icg][XX]+tx*box[XX][XX]+ty*box[YY][XX]+tz*box[ZZ][XX];
 +                    /* Calculate range of cells in X direction that have the shift tx */
 +                    if (bTriclinicX)
 +                    {
 +                        xgi = (int)(Nx + (XI - grid_offset[XX])*grid_x) - Nx;
 +                    }
 +                    else
 +                    {
 +                        xgi = cell_x + tx*Nx;
 +                    }
 +#ifndef FAST_DD_NS
 +                    get_dx(Nx, gridx, rl2, xgi*Nx, XI, &dx0, &dx1, dcx2);
 +#else
 +                    get_dx_dd(Nx, gridx, rl2, xgi, XI-grid_offset[XX],
 +                              ncpddc[XX], sh0[XX], sh1[XX], &dx0, &dx1, dcx2);
 +#endif
 +                    if (dx0 > dx1)
 +                    {
 +                        continue;
 +                    }
 +                    /* Adress: an explicit cg that has a weigthing function of 0 is excluded
 +                     *  from the neigbour list as it will not interact  */
 +                    if (fr->adress_type != eAdressOff)
 +                    {
 +                        if (md->wf[cgs->index[icg]] <= GMX_REAL_EPS && egp_explicit(fr, igid))
 +                        {
 +                            continue;
 +                        }
 +                    }
 +                    /* Get shift vector */
 +                    shift = XYZ2IS(tx, ty, tz);
 +#ifdef NS5DB
 +                    range_check(shift, 0, SHIFTS);
 +#endif
 +                    for (nn = 0; (nn < ngid); nn++)
 +                    {
 +                        nsr[nn]      = 0;
 +                        nlr_ljc[nn]  = 0;
 +                        nlr_one[nn]  = 0;
 +                    }
 +#ifdef NS5DB
 +                    fprintf(log, "shift: %2d, dx0,1: %2d,%2d, dy0,1: %2d,%2d, dz0,1: %2d,%2d\n",
 +                            shift, dx0, dx1, dy0, dy1, dz0, dz1);
 +                    fprintf(log, "cgcm: %8.3f  %8.3f  %8.3f\n", cgcm[icg][XX],
 +                            cgcm[icg][YY], cgcm[icg][ZZ]);
 +                    fprintf(log, "xi:   %8.3f  %8.3f  %8.3f\n", XI, YI, ZI);
 +#endif
 +                    for (dx = dx0; (dx <= dx1); dx++)
 +                    {
 +                        tmp1 = rl2 - dcx2[dx];
 +                        for (dy = dy0; (dy <= dy1); dy++)
 +                        {
 +                            tmp2 = tmp1 - dcy2[dy];
 +                            if (tmp2 > 0)
 +                            {
 +                                for (dz = dz0; (dz <= dz1); dz++)
 +                                {
 +                                    if (tmp2 > dcz2[dz])
 +                                    {
 +                                        /* Find grid-cell cj in which possible neighbours are */
 +                                        cj   = xyz2ci(Ny, Nz, dx, dy, dz);
 +
 +                                        /* Check out how many cgs (nrj) there in this cell */
 +                                        nrj  = gridnra[cj];
 +
 +                                        /* Find the offset in the cg list */
 +                                        cgj0 = gridind[cj];
 +
 +                                        /* Check if all j's are out of range so we
 +                                         * can skip the whole cell.
 +                                         * Should save some time, especially with DD.
 +                                         */
 +                                        if (nrj == 0 ||
 +                                            (grida[cgj0] >= max_jcg &&
 +                                             (grida[cgj0] >= jcg1 || grida[cgj0+nrj-1] < jcg0)))
 +                                        {
 +                                            continue;
 +                                        }
 +
 +                                        /* Loop over cgs */
 +                                        for (j = 0; (j < nrj); j++)
 +                                        {
 +                                            jjcg = grida[cgj0+j];
 +
 +                                            /* check whether this guy is in range! */
 +                                            if ((jjcg >= jcg0 && jjcg < jcg1) ||
 +                                                (jjcg < max_jcg))
 +                                            {
 +                                                r2 = calc_dx2(XI, YI, ZI, cgcm[jjcg]);
 +                                                if (r2 < rl2)
 +                                                {
 +                                                    /* jgid = gid[cgsatoms[cgsindex[jjcg]]]; */
 +                                                    jgid = GET_CGINFO_GID(cginfo[jjcg]);
 +                                                    /* check energy group exclusions */
 +                                                    if (!(i_egp_flags[jgid] & EGP_EXCL))
 +                                                    {
 +                                                        if (r2 < rs2)
 +                                                        {
 +                                                            if (nsr[jgid] >= MAX_CG)
 +                                                            {
 +                                                                /* Add to short-range list */
 +                                                                put_in_list(bHaveVdW, ngid, md, icg, jgid,
 +                                                                            nsr[jgid], nl_sr[jgid],
 +                                                                            cgs->index, /* cgsatoms, */ bexcl,
 +                                                                            shift, fr, FALSE, TRUE, TRUE, fr->solvent_opt);
 +                                                                nsr[jgid] = 0;
 +                                                            }
 +                                                            nl_sr[jgid][nsr[jgid]++] = jjcg;
 +                                                        }
 +                                                        else if (r2 < rm2)
 +                                                        {
 +                                                            if (nlr_ljc[jgid] >= MAX_CG)
 +                                                            {
 +                                                                /* Add to LJ+coulomb long-range list */
 +                                                                put_in_list(bHaveVdW, ngid, md, icg, jgid,
 +                                                                            nlr_ljc[jgid], nl_lr_ljc[jgid], top->cgs.index,
 +                                                                            bexcl, shift, fr, TRUE, TRUE, TRUE, fr->solvent_opt);
 +                                                                nlr_ljc[jgid] = 0;
 +                                                            }
 +                                                            nl_lr_ljc[jgid][nlr_ljc[jgid]++] = jjcg;
 +                                                        }
 +                                                        else
 +                                                        {
 +                                                            if (nlr_one[jgid] >= MAX_CG)
 +                                                            {
 +                                                                /* Add to long-range list with only coul, or only LJ */
 +                                                                put_in_list(bHaveVdW, ngid, md, icg, jgid,
 +                                                                            nlr_one[jgid], nl_lr_one[jgid], top->cgs.index,
 +                                                                            bexcl, shift, fr, TRUE, rvdw_lt_rcoul, rcoul_lt_rvdw, fr->solvent_opt);
 +                                                                nlr_one[jgid] = 0;
 +                                                            }
 +                                                            nl_lr_one[jgid][nlr_one[jgid]++] = jjcg;
 +                                                        }
 +                                                    }
 +                                                }
 +                                                nns++;
 +                                            }
 +                                        }
 +                                    }
 +                                }
 +                            }
 +                        }
 +                    }
 +                    /* CHECK whether there is anything left in the buffers */
 +                    for (nn = 0; (nn < ngid); nn++)
 +                    {
 +                        if (nsr[nn] > 0)
 +                        {
 +                            put_in_list(bHaveVdW, ngid, md, icg, nn, nsr[nn], nl_sr[nn],
 +                                        cgs->index, /* cgsatoms, */ bexcl,
 +                                        shift, fr, FALSE, TRUE, TRUE, fr->solvent_opt);
 +                        }
 +
 +                        if (nlr_ljc[nn] > 0)
 +                        {
 +                            put_in_list(bHaveVdW, ngid, md, icg, nn, nlr_ljc[nn],
 +                                        nl_lr_ljc[nn], top->cgs.index,
 +                                        bexcl, shift, fr, TRUE, TRUE, TRUE, fr->solvent_opt);
 +                        }
 +
 +                        if (nlr_one[nn] > 0)
 +                        {
 +                            put_in_list(bHaveVdW, ngid, md, icg, nn, nlr_one[nn],
 +                                        nl_lr_one[nn], top->cgs.index,
 +                                        bexcl, shift, fr, TRUE, rvdw_lt_rcoul, rcoul_lt_rvdw, fr->solvent_opt);
 +                        }
 +                    }
 +                }
 +            }
 +        }
 +        /* setexcl(nri,i_atoms,&top->atoms.excl,FALSE,bexcl); */
 +        setexcl(cgs->index[icg], cgs->index[icg+1], &top->excls, FALSE, bexcl);
 +    }
 +    /* No need to perform any left-over force calculations anymore (as we used to do here)
 +     * since we now save the proper long-range lists for later evaluation.
 +     */
 +
 +    debug_gmx();
 +
 +    /* Close neighbourlists */
 +    close_neighbor_lists(fr, bMakeQMMMnblist);
 +
 +    return nns;
 +}
 +
 +void ns_realloc_natoms(gmx_ns_t *ns, int natoms)
 +{
 +    int i;
 +
 +    if (natoms > ns->nra_alloc)
 +    {
 +        ns->nra_alloc = over_alloc_dd(natoms);
 +        srenew(ns->bexcl, ns->nra_alloc);
 +        for (i = 0; i < ns->nra_alloc; i++)
 +        {
 +            ns->bexcl[i] = 0;
 +        }
 +    }
 +}
 +
 +void init_ns(FILE *fplog, const t_commrec *cr,
 +             gmx_ns_t *ns, t_forcerec *fr,
 +             const gmx_mtop_t *mtop)
 +{
 +    int  mt, icg, nr_in_cg, maxcg, i, j, jcg, ngid, ncg;
 +    t_block *cgs;
 +    char *ptr;
 +
 +    /* Compute largest charge groups size (# atoms) */
 +    nr_in_cg = 1;
 +    for (mt = 0; mt < mtop->nmoltype; mt++)
 +    {
 +        cgs = &mtop->moltype[mt].cgs;
 +        for (icg = 0; (icg < cgs->nr); icg++)
 +        {
 +            nr_in_cg = max(nr_in_cg, (int)(cgs->index[icg+1]-cgs->index[icg]));
 +        }
 +    }
 +
 +    /* Verify whether largest charge group is <= max cg.
 +     * This is determined by the type of the local exclusion type
 +     * Exclusions are stored in bits. (If the type is not large
 +     * enough, enlarge it, unsigned char -> unsigned short -> unsigned long)
 +     */
 +    maxcg = sizeof(t_excl)*8;
 +    if (nr_in_cg > maxcg)
 +    {
 +        gmx_fatal(FARGS, "Max #atoms in a charge group: %d > %d\n",
 +                  nr_in_cg, maxcg);
 +    }
 +
 +    ngid = mtop->groups.grps[egcENER].nr;
 +    snew(ns->bExcludeAlleg, ngid);
 +    for (i = 0; i < ngid; i++)
 +    {
 +        ns->bExcludeAlleg[i] = TRUE;
 +        for (j = 0; j < ngid; j++)
 +        {
 +            if (!(fr->egp_flags[i*ngid+j] & EGP_EXCL))
 +            {
 +                ns->bExcludeAlleg[i] = FALSE;
 +            }
 +        }
 +    }
 +
 +    if (fr->bGrid)
 +    {
 +        /* Grid search */
 +        ns->grid = init_grid(fplog, fr);
 +        init_nsgrid_lists(fr, ngid, ns);
 +    }
 +    else
 +    {
 +        /* Simple search */
 +        snew(ns->ns_buf, ngid);
 +        for (i = 0; (i < ngid); i++)
 +        {
 +            snew(ns->ns_buf[i], SHIFTS);
 +        }
 +        ncg = ncg_mtop(mtop);
 +        snew(ns->simple_aaj, 2*ncg);
 +        for (jcg = 0; (jcg < ncg); jcg++)
 +        {
 +            ns->simple_aaj[jcg]     = jcg;
 +            ns->simple_aaj[jcg+ncg] = jcg;
 +        }
 +    }
 +
 +    /* Create array that determines whether or not atoms have VdW */
 +    snew(ns->bHaveVdW, fr->ntype);
 +    for (i = 0; (i < fr->ntype); i++)
 +    {
 +        for (j = 0; (j < fr->ntype); j++)
 +        {
 +            ns->bHaveVdW[i] = (ns->bHaveVdW[i] ||
 +                               (fr->bBHAM ?
 +                                ((BHAMA(fr->nbfp, fr->ntype, i, j) != 0) ||
 +                                 (BHAMB(fr->nbfp, fr->ntype, i, j) != 0) ||
 +                                 (BHAMC(fr->nbfp, fr->ntype, i, j) != 0)) :
 +                                ((C6(fr->nbfp, fr->ntype, i, j) != 0) ||
 +                                 (C12(fr->nbfp, fr->ntype, i, j) != 0))));
 +        }
 +    }
 +    if (debug)
 +    {
 +        pr_bvec(debug, 0, "bHaveVdW", ns->bHaveVdW, fr->ntype, TRUE);
 +    }
 +
 +    ns->nra_alloc = 0;
 +    ns->bexcl     = NULL;
 +    if (!DOMAINDECOMP(cr))
 +    {
 +        ns_realloc_natoms(ns, mtop->natoms);
 +    }
 +
 +    ns->nblist_initialized = FALSE;
 +
 +    /* nbr list debug dump */
 +    {
 +        char *ptr = getenv("GMX_DUMP_NL");
 +        if (ptr)
 +        {
 +            ns->dump_nl = strtol(ptr, NULL, 10);
 +            if (fplog)
 +            {
 +                fprintf(fplog, "GMX_DUMP_NL = %d", ns->dump_nl);
 +            }
 +        }
 +        else
 +        {
 +            ns->dump_nl = 0;
 +        }
 +    }
 +}
 +
 +
 +int search_neighbours(FILE *log, t_forcerec *fr,
 +                      matrix box,
 +                      gmx_localtop_t *top,
 +                      gmx_groups_t *groups,
 +                      t_commrec *cr,
 +                      t_nrnb *nrnb, t_mdatoms *md,
 +                      gmx_bool bFillGrid,
 +                      gmx_bool bDoLongRangeNS)
 +{
 +    t_block  *cgs = &(top->cgs);
 +    rvec     box_size, grid_x0, grid_x1;
 +    int      i, j, m, ngid;
 +    real     min_size, grid_dens;
 +    int      nsearch;
 +    gmx_bool     bGrid;
 +    char     *ptr;
 +    gmx_bool     *i_egp_flags;
 +    int      cg_start, cg_end, start, end;
 +    gmx_ns_t *ns;
 +    t_grid   *grid;
 +    gmx_domdec_zones_t *dd_zones;
 +    put_in_list_t *put_in_list;
 +
 +    ns = &fr->ns;
 +
 +    /* Set some local variables */
 +    bGrid = fr->bGrid;
 +    ngid  = groups->grps[egcENER].nr;
 +
 +    for (m = 0; (m < DIM); m++)
 +    {
 +        box_size[m] = box[m][m];
 +    }
 +
 +    if (fr->ePBC != epbcNONE)
 +    {
 +        if (sqr(fr->rlistlong) >= max_cutoff2(fr->ePBC, box))
 +        {
 +            gmx_fatal(FARGS, "One of the box vectors has become shorter than twice the cut-off length or box_yy-|box_zy| or box_zz has become smaller than the cut-off.");
 +        }
 +        if (!bGrid)
 +        {
 +            min_size = min(box_size[XX], min(box_size[YY], box_size[ZZ]));
 +            if (2*fr->rlistlong >= min_size)
 +            {
 +                gmx_fatal(FARGS, "One of the box diagonal elements has become smaller than twice the cut-off length.");
 +            }
 +        }
 +    }
 +
 +    if (DOMAINDECOMP(cr))
 +    {
 +        ns_realloc_natoms(ns, cgs->index[cgs->nr]);
 +    }
 +    debug_gmx();
 +
 +    /* Reset the neighbourlists */
 +    reset_neighbor_lists(fr, TRUE, TRUE);
 +
 +    if (bGrid && bFillGrid)
 +    {
 +
 +        grid = ns->grid;
 +        if (DOMAINDECOMP(cr))
 +        {
 +            dd_zones = domdec_zones(cr->dd);
 +        }
 +        else
 +        {
 +            dd_zones = NULL;
 +
 +            get_nsgrid_boundaries(grid->nboundeddim, box, NULL, NULL, NULL, NULL,
 +                                  cgs->nr, fr->cg_cm, grid_x0, grid_x1, &grid_dens);
 +
 +            grid_first(log, grid, NULL, NULL, box, grid_x0, grid_x1,
 +                       fr->rlistlong, grid_dens);
 +        }
 +        debug_gmx();
 +
 +        start = 0;
 +        end   = cgs->nr;
 +
 +        if (DOMAINDECOMP(cr))
 +        {
 +            end = cgs->nr;
 +            fill_grid(dd_zones, grid, end, -1, end, fr->cg_cm);
 +            grid->icg0 = 0;
 +            grid->icg1 = dd_zones->izone[dd_zones->nizone-1].cg1;
 +        }
 +        else
 +        {
 +            fill_grid(NULL, grid, cgs->nr, fr->cg0, fr->hcg, fr->cg_cm);
 +            grid->icg0 = fr->cg0;
 +            grid->icg1 = fr->hcg;
 +            debug_gmx();
 +        }
 +
 +        calc_elemnr(grid, start, end, cgs->nr);
 +        calc_ptrs(grid);
 +        grid_last(grid, start, end, cgs->nr);
 +
 +        if (gmx_debug_at)
 +        {
 +            check_grid(grid);
 +            print_grid(debug, grid);
 +        }
 +    }
 +    else if (fr->n_tpi)
 +    {
 +        /* Set the grid cell index for the test particle only.
 +         * The cell to cg index is not corrected, but that does not matter.
 +         */
 +        fill_grid(NULL, ns->grid, fr->hcg, fr->hcg-1, fr->hcg, fr->cg_cm);
 +    }
 +    debug_gmx();
 +
 +    if (fr->adress_type == eAdressOff)
 +    {
 +        if (!fr->ns.bCGlist)
 +        {
 +            put_in_list = put_in_list_at;
 +        }
 +        else
 +        {
 +            put_in_list = put_in_list_cg;
 +        }
 +    }
 +    else
 +    {
 +        put_in_list = put_in_list_adress;
 +    }
 +
 +    /* Do the core! */
 +    if (bGrid)
 +    {
 +        grid    = ns->grid;
 +        nsearch = nsgrid_core(cr, fr, box, ngid, top,
 +                              grid, ns->bexcl, ns->bExcludeAlleg,
 +                              md, put_in_list, ns->bHaveVdW,
 +                              bDoLongRangeNS, FALSE);
 +
 +        /* neighbour searching withouth QMMM! QM atoms have zero charge in
 +         * the classical calculation. The charge-charge interaction
 +         * between QM and MM atoms is handled in the QMMM core calculation
 +         * (see QMMM.c). The VDW however, we'd like to compute classically
 +         * and the QM MM atom pairs have just been put in the
 +         * corresponding neighbourlists. in case of QMMM we still need to
 +         * fill a special QMMM neighbourlist that contains all neighbours
 +         * of the QM atoms. If bQMMM is true, this list will now be made:
 +         */
 +        if (fr->bQMMM && fr->qr->QMMMscheme != eQMMMschemeoniom)
 +        {
 +            nsearch += nsgrid_core(cr, fr, box, ngid, top,
 +                                   grid, ns->bexcl, ns->bExcludeAlleg,
 +                                   md, put_in_list_qmmm, ns->bHaveVdW,
 +                                   bDoLongRangeNS, TRUE);
 +        }
 +    }
 +    else
 +    {
 +        nsearch = ns_simple_core(fr, top, md, box, box_size,
 +                                 ns->bexcl, ns->simple_aaj,
 +                                 ngid, ns->ns_buf, put_in_list, ns->bHaveVdW);
 +    }
 +    debug_gmx();
 +
 +#ifdef DEBUG
 +    pr_nsblock(log);
 +#endif
 +
 +    inc_nrnb(nrnb, eNR_NS, nsearch);
 +    /* inc_nrnb(nrnb,eNR_LR,fr->nlr); */
 +
 +    return nsearch;
 +}
 +
 +int natoms_beyond_ns_buffer(t_inputrec *ir, t_forcerec *fr, t_block *cgs,
 +                            matrix scale_tot, rvec *x)
 +{
 +    int  cg0, cg1, cg, a0, a1, a, i, j;
 +    real rint, hbuf2, scale;
 +    rvec *cg_cm, cgsc;
 +    gmx_bool bIsotropic;
 +    int  nBeyond;
 +
 +    nBeyond = 0;
 +
 +    rint = max(ir->rcoulomb, ir->rvdw);
 +    if (ir->rlist < rint)
 +    {
 +        gmx_fatal(FARGS, "The neighbor search buffer has negative size: %f nm",
 +                  ir->rlist - rint);
 +    }
 +    cg_cm = fr->cg_cm;
 +
 +    cg0 = fr->cg0;
 +    cg1 = fr->hcg;
 +
 +    if (!EI_DYNAMICS(ir->eI) || !DYNAMIC_BOX(*ir))
 +    {
 +        hbuf2 = sqr(0.5*(ir->rlist - rint));
 +        for (cg = cg0; cg < cg1; cg++)
 +        {
 +            a0 = cgs->index[cg];
 +            a1 = cgs->index[cg+1];
 +            for (a = a0; a < a1; a++)
 +            {
 +                if (distance2(cg_cm[cg], x[a]) > hbuf2)
 +                {
 +                    nBeyond++;
 +                }
 +            }
 +        }
 +    }
 +    else
 +    {
 +        bIsotropic = TRUE;
 +        scale      = scale_tot[0][0];
 +        for (i = 1; i < DIM; i++)
 +        {
 +            /* With anisotropic scaling, the original spherical ns volumes become
 +             * ellipsoids. To avoid costly transformations we use the minimum
 +             * eigenvalue of the scaling matrix for determining the buffer size.
 +             * Since the lower half is 0, the eigenvalues are the diagonal elements.
 +             */
 +            scale = min(scale, scale_tot[i][i]);
 +            if (scale_tot[i][i] != scale_tot[i-1][i-1])
 +            {
 +                bIsotropic = FALSE;
 +            }
 +            for (j = 0; j < i; j++)
 +            {
 +                if (scale_tot[i][j] != 0)
 +                {
 +                    bIsotropic = FALSE;
 +                }
 +            }
 +        }
 +        hbuf2 = sqr(0.5*(scale*ir->rlist - rint));
 +        if (bIsotropic)
 +        {
 +            for (cg = cg0; cg < cg1; cg++)
 +            {
 +                svmul(scale, cg_cm[cg], cgsc);
 +                a0 = cgs->index[cg];
 +                a1 = cgs->index[cg+1];
 +                for (a = a0; a < a1; a++)
 +                {
 +                    if (distance2(cgsc, x[a]) > hbuf2)
 +                    {
 +                        nBeyond++;
 +                    }
 +                }
 +            }
 +        }
 +        else
 +        {
 +            /* Anistropic scaling */
 +            for (cg = cg0; cg < cg1; cg++)
 +            {
 +                /* Since scale_tot contains the transpose of the scaling matrix,
 +                 * we need to multiply with the transpose.
 +                 */
 +                tmvmul_ur0(scale_tot, cg_cm[cg], cgsc);
 +                a0 = cgs->index[cg];
 +                a1 = cgs->index[cg+1];
 +                for (a = a0; a < a1; a++)
 +                {
 +                    if (distance2(cgsc, x[a]) > hbuf2)
 +                    {
 +                        nBeyond++;
 +                    }
 +                }
 +            }
 +        }
 +    }
 +
 +    return nBeyond;
 +}
diff --cc src/gromacs/mdlib/sim_util.c
index 8155c99ee7,0000000000..02e1248b2c
mode 100644,000000..100644
--- a/src/gromacs/mdlib/sim_util.c
+++ b/src/gromacs/mdlib/sim_util.c
@@@ -1,2856 -1,0 +1,2894 @@@
 +/*
 + * This file is part of the GROMACS molecular simulation package.
 + *
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team.
 + * Copyright (c) 2013,2014, by the GROMACS development team, led by
 + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
 + * and including many others, as listed in the AUTHORS file in the
 + * top-level source directory and at http://www.gromacs.org.
 + *
 + * GROMACS is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU Lesser General Public License
 + * as published by the Free Software Foundation; either version 2.1
 + * of the License, or (at your option) any later version.
 + *
 + * GROMACS is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 + * Lesser General Public License for more details.
 + *
 + * You should have received a copy of the GNU Lesser General Public
 + * License along with GROMACS; if not, see
 + * http://www.gnu.org/licenses, or write to the Free Software Foundation,
 + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
 + *
 + * If you want to redistribute modifications to GROMACS, please
 + * consider that scientific software is very special. Version
 + * control is crucial - bugs must be traceable. We will be happy to
 + * consider code for inclusion in the official distribution, but
 + * derived work must not be called official GROMACS. Details are found
 + * in the README & COPYING files - if they are missing, get the
 + * official version at http://www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the research papers on the package. Check out http://www.gromacs.org.
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <assert.h>
 +#include <math.h>
 +#include <stdio.h>
 +#include <string.h>
 +#ifdef HAVE_SYS_TIME_H
 +#include <sys/time.h>
 +#endif
 +
 +#include "typedefs.h"
 +#include "gromacs/utility/cstringutil.h"
 +#include "gromacs/utility/smalloc.h"
 +#include "names.h"
 +#include "txtdump.h"
 +#include "pbc.h"
 +#include "chargegroup.h"
 +#include "vec.h"
 +#include "nrnb.h"
 +#include "mshift.h"
 +#include "mdrun.h"
 +#include "sim_util.h"
 +#include "update.h"
 +#include "physics.h"
 +#include "main.h"
 +#include "mdatoms.h"
 +#include "force.h"
 +#include "bondf.h"
 +#include "pme.h"
 +#include "disre.h"
 +#include "orires.h"
 +#include "network.h"
 +#include "calcmu.h"
 +#include "constr.h"
 +#include "xvgr.h"
 +#include "copyrite.h"
 +#include "domdec.h"
 +#include "genborn.h"
 +#include "nbnxn_atomdata.h"
 +#include "nbnxn_search.h"
 +#include "nbnxn_kernels/nbnxn_kernel_ref.h"
 +#include "nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.h"
 +#include "nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn.h"
 +#include "nbnxn_kernels/nbnxn_kernel_gpu_ref.h"
 +#include "nonbonded.h"
 +#include "../gmxlib/nonbonded/nb_kernel.h"
 +#include "../gmxlib/nonbonded/nb_free_energy.h"
 +
 +#include "gromacs/timing/wallcycle.h"
 +#include "gromacs/timing/walltime_accounting.h"
 +#include "gromacs/utility/gmxmpi.h"
 +#include "gromacs/essentialdynamics/edsam.h"
 +#include "gromacs/pulling/pull.h"
 +#include "gromacs/pulling/pull_rotation.h"
 +#include "gromacs/imd/imd.h"
 +#include "adress.h"
 +#include "qmmm.h"
 +
 +#include "gmx_omp_nthreads.h"
 +
 +#include "nbnxn_cuda_data_mgmt.h"
 +#include "nbnxn_cuda/nbnxn_cuda.h"
 +
 +void print_time(FILE                     *out,
 +                gmx_walltime_accounting_t walltime_accounting,
 +                gmx_int64_t               step,
 +                t_inputrec               *ir,
 +                t_commrec gmx_unused     *cr)
 +{
 +    time_t finish;
 +    char   timebuf[STRLEN];
 +    double dt, elapsed_seconds, time_per_step;
 +    char   buf[48];
 +
 +#ifndef GMX_THREAD_MPI
 +    if (!PAR(cr))
 +#endif
 +    {
 +        fprintf(out, "\r");
 +    }
 +    fprintf(out, "step %s", gmx_step_str(step, buf));
 +    if ((step >= ir->nstlist))
 +    {
 +        double seconds_since_epoch = gmx_gettime();
 +        elapsed_seconds = seconds_since_epoch - walltime_accounting_get_start_time_stamp(walltime_accounting);
 +        time_per_step   = elapsed_seconds/(step - ir->init_step + 1);
 +        dt              = (ir->nsteps + ir->init_step - step) * time_per_step;
 +
 +        if (ir->nsteps >= 0)
 +        {
 +            if (dt >= 300)
 +            {
 +                finish = (time_t) (seconds_since_epoch + dt);
 +                gmx_ctime_r(&finish, timebuf, STRLEN);
 +                sprintf(buf, "%s", timebuf);
 +                buf[strlen(buf)-1] = '\0';
 +                fprintf(out, ", will finish %s", buf);
 +            }
 +            else
 +            {
 +                fprintf(out, ", remaining wall clock time: %5d s          ", (int)dt);
 +            }
 +        }
 +        else
 +        {
 +            fprintf(out, " performance: %.1f ns/day    ",
 +                    ir->delta_t/1000*24*60*60/time_per_step);
 +        }
 +    }
 +#ifndef GMX_THREAD_MPI
 +    if (PAR(cr))
 +    {
 +        fprintf(out, "\n");
 +    }
 +#endif
 +
 +    fflush(out);
 +}
 +
 +void print_date_and_time(FILE *fplog, int nodeid, const char *title,
 +                         double the_time)
 +{
 +    char   time_string[STRLEN];
 +
 +    if (!fplog)
 +    {
 +        return;
 +    }
 +
 +    {
 +        int    i;
 +        char   timebuf[STRLEN];
 +        time_t temp_time = (time_t) the_time;
 +
 +        gmx_ctime_r(&temp_time, timebuf, STRLEN);
 +        for (i = 0; timebuf[i] >= ' '; i++)
 +        {
 +            time_string[i] = timebuf[i];
 +        }
 +        time_string[i] = '\0';
 +    }
 +
 +    fprintf(fplog, "%s on node %d %s\n", title, nodeid, time_string);
 +}
 +
 +void print_start(FILE *fplog, t_commrec *cr,
 +                 gmx_walltime_accounting_t walltime_accounting,
 +                 const char *name)
 +{
 +    char buf[STRLEN];
 +
 +    sprintf(buf, "Started %s", name);
 +    print_date_and_time(fplog, cr->nodeid, buf,
 +                        walltime_accounting_get_start_time_stamp(walltime_accounting));
 +}
 +
 +static void sum_forces(int start, int end, rvec f[], rvec flr[])
 +{
 +    int i;
 +
 +    if (gmx_debug_at)
 +    {
 +        pr_rvecs(debug, 0, "fsr", f+start, end-start);
 +        pr_rvecs(debug, 0, "flr", flr+start, end-start);
 +    }
 +    for (i = start; (i < end); i++)
 +    {
 +        rvec_inc(f[i], flr[i]);
 +    }
 +}
 +
 +/*
 + * calc_f_el calculates forces due to an electric field.
 + *
 + * force is kJ mol^-1 nm^-1 = e * kJ mol^-1 nm^-1 / e
 + *
 + * Et[] contains the parameters for the time dependent
 + * part of the field (not yet used).
 + * Ex[] contains the parameters for
 + * the spatial dependent part of the field. You can have cool periodic
 + * fields in principle, but only a constant field is supported
 + * now.
 + * The function should return the energy due to the electric field
 + * (if any) but for now returns 0.
 + *
 + * WARNING:
 + * There can be problems with the virial.
 + * Since the field is not self-consistent this is unavoidable.
 + * For neutral molecules the virial is correct within this approximation.
 + * For neutral systems with many charged molecules the error is small.
 + * But for systems with a net charge or a few charged molecules
 + * the error can be significant when the field is high.
 + * Solution: implement a self-consitent electric field into PME.
 + */
 +static void calc_f_el(FILE *fp, int  start, int homenr,
 +                      real charge[], rvec f[],
 +                      t_cosines Ex[], t_cosines Et[], double t)
 +{
 +    rvec Ext;
 +    real t0;
 +    int  i, m;
 +
 +    for (m = 0; (m < DIM); m++)
 +    {
 +        if (Et[m].n > 0)
 +        {
 +            if (Et[m].n == 3)
 +            {
 +                t0     = Et[m].a[1];
 +                Ext[m] = cos(Et[m].a[0]*(t-t0))*exp(-sqr(t-t0)/(2.0*sqr(Et[m].a[2])));
 +            }
 +            else
 +            {
 +                Ext[m] = cos(Et[m].a[0]*t);
 +            }
 +        }
 +        else
 +        {
 +            Ext[m] = 1.0;
 +        }
 +        if (Ex[m].n > 0)
 +        {
 +            /* Convert the field strength from V/nm to MD-units */
 +            Ext[m] *= Ex[m].a[0]*FIELDFAC;
 +            for (i = start; (i < start+homenr); i++)
 +            {
 +                f[i][m] += charge[i]*Ext[m];
 +            }
 +        }
 +        else
 +        {
 +            Ext[m] = 0;
 +        }
 +    }
 +    if (fp != NULL)
 +    {
 +        fprintf(fp, "%10g  %10g  %10g  %10g #FIELD\n", t,
 +                Ext[XX]/FIELDFAC, Ext[YY]/FIELDFAC, Ext[ZZ]/FIELDFAC);
 +    }
 +}
 +
 +static void calc_virial(int start, int homenr, rvec x[], rvec f[],
 +                        tensor vir_part, t_graph *graph, matrix box,
 +                        t_nrnb *nrnb, const t_forcerec *fr, int ePBC)
 +{
 +    int    i, j;
 +    tensor virtest;
 +
 +    /* The short-range virial from surrounding boxes */
 +    clear_mat(vir_part);
 +    calc_vir(SHIFTS, fr->shift_vec, fr->fshift, vir_part, ePBC == epbcSCREW, box);
 +    inc_nrnb(nrnb, eNR_VIRIAL, SHIFTS);
 +
 +    /* Calculate partial virial, for local atoms only, based on short range.
 +     * Total virial is computed in global_stat, called from do_md
 +     */
 +    f_calc_vir(start, start+homenr, x, f, vir_part, graph, box);
 +    inc_nrnb(nrnb, eNR_VIRIAL, homenr);
 +
 +    /* Add position restraint contribution */
 +    for (i = 0; i < DIM; i++)
 +    {
 +        vir_part[i][i] += fr->vir_diag_posres[i];
 +    }
 +
 +    /* Add wall contribution */
 +    for (i = 0; i < DIM; i++)
 +    {
 +        vir_part[i][ZZ] += fr->vir_wall_z[i];
 +    }
 +
 +    if (debug)
 +    {
 +        pr_rvecs(debug, 0, "vir_part", vir_part, DIM);
 +    }
 +}
 +
 +static void posres_wrapper(FILE *fplog,
 +                           int flags,
 +                           gmx_bool bSepDVDL,
 +                           t_inputrec *ir,
 +                           t_nrnb *nrnb,
 +                           gmx_localtop_t *top,
 +                           matrix box, rvec x[],
 +                           gmx_enerdata_t *enerd,
 +                           real *lambda,
 +                           t_forcerec *fr)
 +{
 +    t_pbc pbc;
 +    real  v, dvdl;
 +    int   i;
 +
 +    /* Position restraints always require full pbc */
 +    set_pbc(&pbc, ir->ePBC, box);
 +    dvdl = 0;
 +    v    = posres(top->idef.il[F_POSRES].nr, top->idef.il[F_POSRES].iatoms,
 +                  top->idef.iparams_posres,
 +                  (const rvec*)x, fr->f_novirsum, fr->vir_diag_posres,
 +                  ir->ePBC == epbcNONE ? NULL : &pbc,
 +                  lambda[efptRESTRAINT], &dvdl,
 +                  fr->rc_scaling, fr->ePBC, fr->posres_com, fr->posres_comB);
 +    if (bSepDVDL)
 +    {
 +        gmx_print_sepdvdl(fplog, interaction_function[F_POSRES].longname, v, dvdl);
 +    }
 +    enerd->term[F_POSRES] += v;
 +    /* If just the force constant changes, the FEP term is linear,
 +     * but if k changes, it is not.
 +     */
 +    enerd->dvdl_nonlin[efptRESTRAINT] += dvdl;
 +    inc_nrnb(nrnb, eNR_POSRES, top->idef.il[F_POSRES].nr/2);
 +
 +    if ((ir->fepvals->n_lambda > 0) && (flags & GMX_FORCE_DHDL))
 +    {
 +        for (i = 0; i < enerd->n_lambda; i++)
 +        {
 +            real dvdl_dum, lambda_dum;
 +
 +            lambda_dum = (i == 0 ? lambda[efptRESTRAINT] : ir->fepvals->all_lambda[efptRESTRAINT][i-1]);
 +            v          = posres(top->idef.il[F_POSRES].nr, top->idef.il[F_POSRES].iatoms,
 +                                top->idef.iparams_posres,
 +                                (const rvec*)x, NULL, NULL,
 +                                ir->ePBC == epbcNONE ? NULL : &pbc, lambda_dum, &dvdl,
 +                                fr->rc_scaling, fr->ePBC, fr->posres_com, fr->posres_comB);
 +            enerd->enerpart_lambda[i] += v;
 +        }
 +    }
 +}
 +
 +static void fbposres_wrapper(t_inputrec *ir,
 +                             t_nrnb *nrnb,
 +                             gmx_localtop_t *top,
 +                             matrix box, rvec x[],
 +                             gmx_enerdata_t *enerd,
 +                             t_forcerec *fr)
 +{
 +    t_pbc pbc;
 +    real  v;
 +
 +    /* Flat-bottomed position restraints always require full pbc */
 +    set_pbc(&pbc, ir->ePBC, box);
 +    v = fbposres(top->idef.il[F_FBPOSRES].nr, top->idef.il[F_FBPOSRES].iatoms,
 +                 top->idef.iparams_fbposres,
 +                 (const rvec*)x, fr->f_novirsum, fr->vir_diag_posres,
 +                 ir->ePBC == epbcNONE ? NULL : &pbc,
 +                 fr->rc_scaling, fr->ePBC, fr->posres_com);
 +    enerd->term[F_FBPOSRES] += v;
 +    inc_nrnb(nrnb, eNR_FBPOSRES, top->idef.il[F_FBPOSRES].nr/2);
 +}
 +
 +static void pull_potential_wrapper(FILE *fplog,
 +                                   gmx_bool bSepDVDL,
 +                                   t_commrec *cr,
 +                                   t_inputrec *ir,
 +                                   matrix box, rvec x[],
 +                                   rvec f[],
 +                                   tensor vir_force,
 +                                   t_mdatoms *mdatoms,
 +                                   gmx_enerdata_t *enerd,
 +                                   real *lambda,
 +                                   double t)
 +{
 +    t_pbc  pbc;
 +    real   dvdl;
 +
 +    /* Calculate the center of mass forces, this requires communication,
 +     * which is why pull_potential is called close to other communication.
 +     * The virial contribution is calculated directly,
 +     * which is why we call pull_potential after calc_virial.
 +     */
 +    set_pbc(&pbc, ir->ePBC, box);
 +    dvdl                     = 0;
 +    enerd->term[F_COM_PULL] +=
 +        pull_potential(ir->ePull, ir->pull, mdatoms, &pbc,
 +                       cr, t, lambda[efptRESTRAINT], x, f, vir_force, &dvdl);
 +    if (bSepDVDL)
 +    {
 +        gmx_print_sepdvdl(fplog, "Com pull", enerd->term[F_COM_PULL], dvdl);
 +    }
 +    enerd->dvdl_lin[efptRESTRAINT] += dvdl;
 +}
 +
 +static void pme_receive_force_ener(FILE           *fplog,
 +                                   gmx_bool        bSepDVDL,
 +                                   t_commrec      *cr,
 +                                   gmx_wallcycle_t wcycle,
 +                                   gmx_enerdata_t *enerd,
 +                                   t_forcerec     *fr)
 +{
 +    real   e_q, e_lj, v, dvdl_q, dvdl_lj;
 +    float  cycles_ppdpme, cycles_seppme;
 +
 +    cycles_ppdpme = wallcycle_stop(wcycle, ewcPPDURINGPME);
 +    dd_cycles_add(cr->dd, cycles_ppdpme, ddCyclPPduringPME);
 +
 +    /* In case of node-splitting, the PP nodes receive the long-range
 +     * forces, virial and energy from the PME nodes here.
 +     */
 +    wallcycle_start(wcycle, ewcPP_PMEWAITRECVF);
 +    dvdl_q  = 0;
 +    dvdl_lj = 0;
 +    gmx_pme_receive_f(cr, fr->f_novirsum, fr->vir_el_recip, &e_q,
 +                      fr->vir_lj_recip, &e_lj, &dvdl_q, &dvdl_lj,
 +                      &cycles_seppme);
 +    if (bSepDVDL)
 +    {
 +        gmx_print_sepdvdl(fplog, "Electrostatic PME mesh", e_q, dvdl_q);
 +        gmx_print_sepdvdl(fplog, "Lennard-Jones PME mesh", e_lj, dvdl_lj);
 +    }
 +    enerd->term[F_COUL_RECIP] += e_q;
 +    enerd->term[F_LJ_RECIP]   += e_lj;
 +    enerd->dvdl_lin[efptCOUL] += dvdl_q;
 +    enerd->dvdl_lin[efptVDW]  += dvdl_lj;
 +
 +    if (wcycle)
 +    {
 +        dd_cycles_add(cr->dd, cycles_seppme, ddCyclPME);
 +    }
 +    wallcycle_stop(wcycle, ewcPP_PMEWAITRECVF);
 +}
 +
 +static void print_large_forces(FILE *fp, t_mdatoms *md, t_commrec *cr,
 +                               gmx_int64_t step, real pforce, rvec *x, rvec *f)
 +{
 +    int  i;
 +    real pf2, fn2;
 +    char buf[STEPSTRSIZE];
 +
 +    pf2 = sqr(pforce);
 +    for (i = 0; i < md->homenr; i++)
 +    {
 +        fn2 = norm2(f[i]);
 +        /* We also catch NAN, if the compiler does not optimize this away. */
 +        if (fn2 >= pf2 || fn2 != fn2)
 +        {
 +            fprintf(fp, "step %s  atom %6d  x %8.3f %8.3f %8.3f  force %12.5e\n",
 +                    gmx_step_str(step, buf),
 +                    ddglatnr(cr->dd, i), x[i][XX], x[i][YY], x[i][ZZ], sqrt(fn2));
 +        }
 +    }
 +}
 +
 +static void post_process_forces(t_commrec *cr,
 +                                gmx_int64_t step,
 +                                t_nrnb *nrnb, gmx_wallcycle_t wcycle,
 +                                gmx_localtop_t *top,
 +                                matrix box, rvec x[],
 +                                rvec f[],
 +                                tensor vir_force,
 +                                t_mdatoms *mdatoms,
 +                                t_graph *graph,
 +                                t_forcerec *fr, gmx_vsite_t *vsite,
 +                                int flags)
 +{
 +    if (fr->bF_NoVirSum)
 +    {
 +        if (vsite)
 +        {
 +            /* Spread the mesh force on virtual sites to the other particles...
 +             * This is parallellized. MPI communication is performed
 +             * if the constructing atoms aren't local.
 +             */
 +            wallcycle_start(wcycle, ewcVSITESPREAD);
 +            spread_vsite_f(vsite, x, fr->f_novirsum, NULL,
 +                           (flags & GMX_FORCE_VIRIAL), fr->vir_el_recip,
 +                           nrnb,
 +                           &top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr);
 +            wallcycle_stop(wcycle, ewcVSITESPREAD);
 +        }
 +        if (flags & GMX_FORCE_VIRIAL)
 +        {
 +            /* Now add the forces, this is local */
 +            if (fr->bDomDec)
 +            {
 +                sum_forces(0, fr->f_novirsum_n, f, fr->f_novirsum);
 +            }
 +            else
 +            {
 +                sum_forces(0, mdatoms->homenr,
 +                           f, fr->f_novirsum);
 +            }
 +            if (EEL_FULL(fr->eeltype))
 +            {
 +                /* Add the mesh contribution to the virial */
 +                m_add(vir_force, fr->vir_el_recip, vir_force);
 +            }
 +            if (EVDW_PME(fr->vdwtype))
 +            {
 +                /* Add the mesh contribution to the virial */
 +                m_add(vir_force, fr->vir_lj_recip, vir_force);
 +            }
 +            if (debug)
 +            {
 +                pr_rvecs(debug, 0, "vir_force", vir_force, DIM);
 +            }
 +        }
 +    }
 +
 +    if (fr->print_force >= 0)
 +    {
 +        print_large_forces(stderr, mdatoms, cr, step, fr->print_force, x, f);
 +    }
 +}
 +
 +static void do_nb_verlet(t_forcerec *fr,
 +                         interaction_const_t *ic,
 +                         gmx_enerdata_t *enerd,
 +                         int flags, int ilocality,
 +                         int clearF,
 +                         t_nrnb *nrnb,
 +                         gmx_wallcycle_t wcycle)
 +{
 +    int                        nnbl, kernel_type, enr_nbnxn_kernel_ljc, enr_nbnxn_kernel_lj;
 +    char                      *env;
 +    nonbonded_verlet_group_t  *nbvg;
 +    gmx_bool                   bCUDA;
 +
 +    if (!(flags & GMX_FORCE_NONBONDED))
 +    {
 +        /* skip non-bonded calculation */
 +        return;
 +    }
 +
 +    nbvg = &fr->nbv->grp[ilocality];
 +
 +    /* CUDA kernel launch overhead is already timed separately */
 +    if (fr->cutoff_scheme != ecutsVERLET)
 +    {
 +        gmx_incons("Invalid cut-off scheme passed!");
 +    }
 +
 +    bCUDA = (nbvg->kernel_type == nbnxnk8x8x8_CUDA);
 +
 +    if (!bCUDA)
 +    {
 +        wallcycle_sub_start(wcycle, ewcsNONBONDED);
 +    }
 +    switch (nbvg->kernel_type)
 +    {
 +        case nbnxnk4x4_PlainC:
 +            nbnxn_kernel_ref(&nbvg->nbl_lists,
 +                             nbvg->nbat, ic,
 +                             fr->shift_vec,
 +                             flags,
 +                             clearF,
 +                             fr->fshift[0],
 +                             enerd->grpp.ener[egCOULSR],
 +                             fr->bBHAM ?
 +                             enerd->grpp.ener[egBHAMSR] :
 +                             enerd->grpp.ener[egLJSR]);
 +            break;
 +
 +        case nbnxnk4xN_SIMD_4xN:
 +            nbnxn_kernel_simd_4xn(&nbvg->nbl_lists,
 +                                  nbvg->nbat, ic,
 +                                  nbvg->ewald_excl,
 +                                  fr->shift_vec,
 +                                  flags,
 +                                  clearF,
 +                                  fr->fshift[0],
 +                                  enerd->grpp.ener[egCOULSR],
 +                                  fr->bBHAM ?
 +                                  enerd->grpp.ener[egBHAMSR] :
 +                                  enerd->grpp.ener[egLJSR]);
 +            break;
 +        case nbnxnk4xN_SIMD_2xNN:
 +            nbnxn_kernel_simd_2xnn(&nbvg->nbl_lists,
 +                                   nbvg->nbat, ic,
 +                                   nbvg->ewald_excl,
 +                                   fr->shift_vec,
 +                                   flags,
 +                                   clearF,
 +                                   fr->fshift[0],
 +                                   enerd->grpp.ener[egCOULSR],
 +                                   fr->bBHAM ?
 +                                   enerd->grpp.ener[egBHAMSR] :
 +                                   enerd->grpp.ener[egLJSR]);
 +            break;
 +
 +        case nbnxnk8x8x8_CUDA:
 +            nbnxn_cuda_launch_kernel(fr->nbv->cu_nbv, nbvg->nbat, flags, ilocality);
 +            break;
 +
 +        case nbnxnk8x8x8_PlainC:
 +            nbnxn_kernel_gpu_ref(nbvg->nbl_lists.nbl[0],
 +                                 nbvg->nbat, ic,
 +                                 fr->shift_vec,
 +                                 flags,
 +                                 clearF,
 +                                 nbvg->nbat->out[0].f,
 +                                 fr->fshift[0],
 +                                 enerd->grpp.ener[egCOULSR],
 +                                 fr->bBHAM ?
 +                                 enerd->grpp.ener[egBHAMSR] :
 +                                 enerd->grpp.ener[egLJSR]);
 +            break;
 +
 +        default:
 +            gmx_incons("Invalid nonbonded kernel type passed!");
 +
 +    }
 +    if (!bCUDA)
 +    {
 +        wallcycle_sub_stop(wcycle, ewcsNONBONDED);
 +    }
 +
 +    if (EEL_RF(ic->eeltype) || ic->eeltype == eelCUT)
 +    {
 +        enr_nbnxn_kernel_ljc = eNR_NBNXN_LJ_RF;
 +    }
 +    else if ((!bCUDA && nbvg->ewald_excl == ewaldexclAnalytical) ||
 +             (bCUDA && nbnxn_cuda_is_kernel_ewald_analytical(fr->nbv->cu_nbv)))
 +    {
 +        enr_nbnxn_kernel_ljc = eNR_NBNXN_LJ_EWALD;
 +    }
 +    else
 +    {
 +        enr_nbnxn_kernel_ljc = eNR_NBNXN_LJ_TAB;
 +    }
 +    enr_nbnxn_kernel_lj = eNR_NBNXN_LJ;
 +    if (flags & GMX_FORCE_ENERGY)
 +    {
 +        /* In eNR_??? the nbnxn F+E kernels are always the F kernel + 1 */
 +        enr_nbnxn_kernel_ljc += 1;
 +        enr_nbnxn_kernel_lj  += 1;
 +    }
 +
 +    inc_nrnb(nrnb, enr_nbnxn_kernel_ljc,
 +             nbvg->nbl_lists.natpair_ljq);
 +    inc_nrnb(nrnb, enr_nbnxn_kernel_lj,
 +             nbvg->nbl_lists.natpair_lj);
 +    /* The Coulomb-only kernels are offset -eNR_NBNXN_LJ_RF+eNR_NBNXN_RF */
 +    inc_nrnb(nrnb, enr_nbnxn_kernel_ljc-eNR_NBNXN_LJ_RF+eNR_NBNXN_RF,
 +             nbvg->nbl_lists.natpair_q);
 +
 +    if (ic->vdw_modifier == eintmodFORCESWITCH)
 +    {
 +        /* We add up the switch cost separately */
 +        inc_nrnb(nrnb, eNR_NBNXN_ADD_LJ_FSW+((flags & GMX_FORCE_ENERGY) ? 1 : 0),
 +                 nbvg->nbl_lists.natpair_ljq + nbvg->nbl_lists.natpair_lj);
 +    }
 +    if (ic->vdw_modifier == eintmodPOTSWITCH)
 +    {
 +        /* We add up the switch cost separately */
 +        inc_nrnb(nrnb, eNR_NBNXN_ADD_LJ_PSW+((flags & GMX_FORCE_ENERGY) ? 1 : 0),
 +                 nbvg->nbl_lists.natpair_ljq + nbvg->nbl_lists.natpair_lj);
 +    }
 +    if (ic->vdwtype == evdwPME)
 +    {
 +        /* We add up the LJ Ewald cost separately */
 +        inc_nrnb(nrnb, eNR_NBNXN_ADD_LJ_EWALD+((flags & GMX_FORCE_ENERGY) ? 1 : 0),
 +                 nbvg->nbl_lists.natpair_ljq + nbvg->nbl_lists.natpair_lj);
 +    }
 +}
 +
 +static void do_nb_verlet_fep(nbnxn_pairlist_set_t *nbl_lists,
 +                             t_forcerec           *fr,
 +                             rvec                  x[],
 +                             rvec                  f[],
 +                             t_mdatoms            *mdatoms,
 +                             t_lambda             *fepvals,
 +                             real                 *lambda,
 +                             gmx_enerdata_t       *enerd,
 +                             int                   flags,
 +                             t_nrnb               *nrnb,
 +                             gmx_wallcycle_t       wcycle)
 +{
 +    int              donb_flags;
 +    nb_kernel_data_t kernel_data;
 +    real             lam_i[efptNR];
 +    real             dvdl_nb[efptNR];
 +    int              th;
 +    int              i, j;
 +
 +    donb_flags = 0;
 +    /* Add short-range interactions */
 +    donb_flags |= GMX_NONBONDED_DO_SR;
 +
 +    /* Currently all group scheme kernels always calculate (shift-)forces */
 +    if (flags & GMX_FORCE_FORCES)
 +    {
 +        donb_flags |= GMX_NONBONDED_DO_FORCE;
 +    }
 +    if (flags & GMX_FORCE_VIRIAL)
 +    {
 +        donb_flags |= GMX_NONBONDED_DO_SHIFTFORCE;
 +    }
 +    if (flags & GMX_FORCE_ENERGY)
 +    {
 +        donb_flags |= GMX_NONBONDED_DO_POTENTIAL;
 +    }
 +    if (flags & GMX_FORCE_DO_LR)
 +    {
 +        donb_flags |= GMX_NONBONDED_DO_LR;
 +    }
 +
 +    kernel_data.flags  = donb_flags;
 +    kernel_data.lambda = lambda;
 +    kernel_data.dvdl   = dvdl_nb;
 +
 +    kernel_data.energygrp_elec = enerd->grpp.ener[egCOULSR];
 +    kernel_data.energygrp_vdw  = enerd->grpp.ener[egLJSR];
 +
 +    /* reset free energy components */
 +    for (i = 0; i < efptNR; i++)
 +    {
 +        dvdl_nb[i]  = 0;
 +    }
 +
 +    assert(gmx_omp_nthreads_get(emntNonbonded) == nbl_lists->nnbl);
 +
 +    wallcycle_sub_start(wcycle, ewcsNONBONDED);
 +#pragma omp parallel for schedule(static) num_threads(nbl_lists->nnbl)
 +    for (th = 0; th < nbl_lists->nnbl; th++)
 +    {
 +        gmx_nb_free_energy_kernel(nbl_lists->nbl_fep[th],
 +                                  x, f, fr, mdatoms, &kernel_data, nrnb);
 +    }
 +
 +    if (fepvals->sc_alpha != 0)
 +    {
 +        enerd->dvdl_nonlin[efptVDW]  += dvdl_nb[efptVDW];
 +        enerd->dvdl_nonlin[efptCOUL] += dvdl_nb[efptCOUL];
 +    }
 +    else
 +    {
 +        enerd->dvdl_lin[efptVDW]  += dvdl_nb[efptVDW];
 +        enerd->dvdl_lin[efptCOUL] += dvdl_nb[efptCOUL];
 +    }
 +
 +    /* If we do foreign lambda and we have soft-core interactions
 +     * we have to recalculate the (non-linear) energies contributions.
 +     */
 +    if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && fepvals->sc_alpha != 0)
 +    {
 +        kernel_data.flags          = (donb_flags & ~(GMX_NONBONDED_DO_FORCE | GMX_NONBONDED_DO_SHIFTFORCE)) | GMX_NONBONDED_DO_FOREIGNLAMBDA;
 +        kernel_data.lambda         = lam_i;
 +        kernel_data.energygrp_elec = enerd->foreign_grpp.ener[egCOULSR];
 +        kernel_data.energygrp_vdw  = enerd->foreign_grpp.ener[egLJSR];
 +        /* Note that we add to kernel_data.dvdl, but ignore the result */
 +
 +        for (i = 0; i < enerd->n_lambda; i++)
 +        {
 +            for (j = 0; j < efptNR; j++)
 +            {
 +                lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]);
 +            }
 +            reset_foreign_enerdata(enerd);
 +#pragma omp parallel for schedule(static) num_threads(nbl_lists->nnbl)
 +            for (th = 0; th < nbl_lists->nnbl; th++)
 +            {
 +                gmx_nb_free_energy_kernel(nbl_lists->nbl_fep[th],
 +                                          x, f, fr, mdatoms, &kernel_data, nrnb);
 +            }
 +
 +            sum_epot(&(enerd->foreign_grpp), enerd->foreign_term);
 +            enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT];
 +        }
 +    }
 +
 +    wallcycle_sub_stop(wcycle, ewcsNONBONDED);
 +}
 +
 +void do_force_cutsVERLET(FILE *fplog, t_commrec *cr,
 +                         t_inputrec *inputrec,
 +                         gmx_int64_t step, t_nrnb *nrnb, gmx_wallcycle_t wcycle,
 +                         gmx_localtop_t *top,
 +                         gmx_groups_t gmx_unused *groups,
 +                         matrix box, rvec x[], history_t *hist,
 +                         rvec f[],
 +                         tensor vir_force,
 +                         t_mdatoms *mdatoms,
 +                         gmx_enerdata_t *enerd, t_fcdata *fcd,
 +                         real *lambda, t_graph *graph,
 +                         t_forcerec *fr, interaction_const_t *ic,
 +                         gmx_vsite_t *vsite, rvec mu_tot,
 +                         double t, FILE *field, gmx_edsam_t ed,
 +                         gmx_bool bBornRadii,
 +                         int flags)
 +{
 +    int                 cg0, cg1, i, j;
 +    int                 start, homenr;
 +    int                 nb_kernel_type;
 +    double              mu[2*DIM];
 +    gmx_bool            bSepDVDL, bStateChanged, bNS, bFillGrid, bCalcCGCM, bBS;
 +    gmx_bool            bDoLongRange, bDoForces, bSepLRF, bUseGPU, bUseOrEmulGPU;
 +    gmx_bool            bDiffKernels = FALSE;
 +    matrix              boxs;
 +    rvec                vzero, box_diag;
 +    real                e, v, dvdl;
 +    float               cycles_pme, cycles_force, cycles_wait_gpu;
 +    nonbonded_verlet_t *nbv;
 +
 +    cycles_force    = 0;
 +    cycles_wait_gpu = 0;
 +    nbv             = fr->nbv;
 +    nb_kernel_type  = fr->nbv->grp[0].kernel_type;
 +
 +    start  = 0;
 +    homenr = mdatoms->homenr;
 +
 +    bSepDVDL = (fr->bSepDVDL && do_per_step(step, inputrec->nstlog));
 +
 +    clear_mat(vir_force);
 +
 +    cg0 = 0;
 +    if (DOMAINDECOMP(cr))
 +    {
 +        cg1 = cr->dd->ncg_tot;
 +    }
 +    else
 +    {
 +        cg1 = top->cgs.nr;
 +    }
 +    if (fr->n_tpi > 0)
 +    {
 +        cg1--;
 +    }
 +
 +    bStateChanged = (flags & GMX_FORCE_STATECHANGED);
 +    bNS           = (flags & GMX_FORCE_NS) && (fr->bAllvsAll == FALSE);
 +    bFillGrid     = (bNS && bStateChanged);
 +    bCalcCGCM     = (bFillGrid && !DOMAINDECOMP(cr));
 +    bDoLongRange  = (fr->bTwinRange && bNS && (flags & GMX_FORCE_DO_LR));
 +    bDoForces     = (flags & GMX_FORCE_FORCES);
 +    bSepLRF       = (bDoLongRange && bDoForces && (flags & GMX_FORCE_SEPLRF));
 +    bUseGPU       = fr->nbv->bUseGPU;
 +    bUseOrEmulGPU = bUseGPU || (nbv->grp[0].kernel_type == nbnxnk8x8x8_PlainC);
 +
 +    if (bStateChanged)
 +    {
 +        update_forcerec(fr, box);
 +
 +        if (NEED_MUTOT(*inputrec))
 +        {
 +            /* Calculate total (local) dipole moment in a temporary common array.
 +             * This makes it possible to sum them over nodes faster.
 +             */
 +            calc_mu(start, homenr,
 +                    x, mdatoms->chargeA, mdatoms->chargeB, mdatoms->nChargePerturbed,
 +                    mu, mu+DIM);
 +        }
 +    }
 +
 +    if (fr->ePBC != epbcNONE)
 +    {
 +        /* Compute shift vectors every step,
 +         * because of pressure coupling or box deformation!
 +         */
 +        if ((flags & GMX_FORCE_DYNAMICBOX) && bStateChanged)
 +        {
 +            calc_shifts(box, fr->shift_vec);
 +        }
 +
 +        if (bCalcCGCM)
 +        {
 +            put_atoms_in_box_omp(fr->ePBC, box, homenr, x);
 +            inc_nrnb(nrnb, eNR_SHIFTX, homenr);
 +        }
 +        else if (EI_ENERGY_MINIMIZATION(inputrec->eI) && graph)
 +        {
 +            unshift_self(graph, box, x);
 +        }
 +    }
 +
 +    nbnxn_atomdata_copy_shiftvec(flags & GMX_FORCE_DYNAMICBOX,
 +                                 fr->shift_vec, nbv->grp[0].nbat);
 +
 +#ifdef GMX_MPI
 +    if (!(cr->duty & DUTY_PME))
 +    {
 +        /* Send particle coordinates to the pme nodes.
 +         * Since this is only implemented for domain decomposition
 +         * and domain decomposition does not use the graph,
 +         * we do not need to worry about shifting.
 +         */
 +
 +        int pme_flags = 0;
 +
 +        wallcycle_start(wcycle, ewcPP_PMESENDX);
 +
 +        bBS = (inputrec->nwall == 2);
 +        if (bBS)
 +        {
 +            copy_mat(box, boxs);
 +            svmul(inputrec->wall_ewald_zfac, boxs[ZZ], boxs[ZZ]);
 +        }
 +
 +        if (EEL_PME(fr->eeltype))
 +        {
 +            pme_flags |= GMX_PME_DO_COULOMB;
 +        }
 +
 +        if (EVDW_PME(fr->vdwtype))
 +        {
 +            pme_flags |= GMX_PME_DO_LJ;
 +        }
 +
 +        gmx_pme_send_coordinates(cr, bBS ? boxs : box, x,
 +                                 mdatoms->nChargePerturbed, mdatoms->nTypePerturbed, lambda[efptCOUL], lambda[efptVDW],
 +                                 (flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY)),
 +                                 pme_flags, step);
 +
 +        wallcycle_stop(wcycle, ewcPP_PMESENDX);
 +    }
 +#endif /* GMX_MPI */
 +
 +    /* do gridding for pair search */
 +    if (bNS)
 +    {
 +        if (graph && bStateChanged)
 +        {
 +            /* Calculate intramolecular shift vectors to make molecules whole */
 +            mk_mshift(fplog, graph, fr->ePBC, box, x);
 +        }
 +
 +        clear_rvec(vzero);
 +        box_diag[XX] = box[XX][XX];
 +        box_diag[YY] = box[YY][YY];
 +        box_diag[ZZ] = box[ZZ][ZZ];
 +
 +        wallcycle_start(wcycle, ewcNS);
 +        if (!fr->bDomDec)
 +        {
 +            wallcycle_sub_start(wcycle, ewcsNBS_GRID_LOCAL);
 +            nbnxn_put_on_grid(nbv->nbs, fr->ePBC, box,
 +                              0, vzero, box_diag,
 +                              0, mdatoms->homenr, -1, fr->cginfo, x,
 +                              0, NULL,
 +                              nbv->grp[eintLocal].kernel_type,
 +                              nbv->grp[eintLocal].nbat);
 +            wallcycle_sub_stop(wcycle, ewcsNBS_GRID_LOCAL);
 +        }
 +        else
 +        {
 +            wallcycle_sub_start(wcycle, ewcsNBS_GRID_NONLOCAL);
 +            nbnxn_put_on_grid_nonlocal(nbv->nbs, domdec_zones(cr->dd),
 +                                       fr->cginfo, x,
 +                                       nbv->grp[eintNonlocal].kernel_type,
 +                                       nbv->grp[eintNonlocal].nbat);
 +            wallcycle_sub_stop(wcycle, ewcsNBS_GRID_NONLOCAL);
 +        }
 +
 +        if (nbv->ngrp == 1 ||
 +            nbv->grp[eintNonlocal].nbat == nbv->grp[eintLocal].nbat)
 +        {
 +            nbnxn_atomdata_set(nbv->grp[eintLocal].nbat, eatAll,
 +                               nbv->nbs, mdatoms, fr->cginfo);
 +        }
 +        else
 +        {
 +            nbnxn_atomdata_set(nbv->grp[eintLocal].nbat, eatLocal,
 +                               nbv->nbs, mdatoms, fr->cginfo);
 +            nbnxn_atomdata_set(nbv->grp[eintNonlocal].nbat, eatAll,
 +                               nbv->nbs, mdatoms, fr->cginfo);
 +        }
 +        wallcycle_stop(wcycle, ewcNS);
 +    }
 +
 +    /* initialize the GPU atom data and copy shift vector */
 +    if (bUseGPU)
 +    {
 +        if (bNS)
 +        {
 +            wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU_NB);
 +            nbnxn_cuda_init_atomdata(nbv->cu_nbv, nbv->grp[eintLocal].nbat);
 +            wallcycle_stop(wcycle, ewcLAUNCH_GPU_NB);
 +        }
 +
 +        wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU_NB);
 +        nbnxn_cuda_upload_shiftvec(nbv->cu_nbv, nbv->grp[eintLocal].nbat);
 +        wallcycle_stop(wcycle, ewcLAUNCH_GPU_NB);
 +    }
 +
 +    /* do local pair search */
 +    if (bNS)
 +    {
 +        wallcycle_start_nocount(wcycle, ewcNS);
 +        wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_LOCAL);
 +        nbnxn_make_pairlist(nbv->nbs, nbv->grp[eintLocal].nbat,
 +                            &top->excls,
 +                            ic->rlist,
 +                            nbv->min_ci_balanced,
 +                            &nbv->grp[eintLocal].nbl_lists,
 +                            eintLocal,
 +                            nbv->grp[eintLocal].kernel_type,
 +                            nrnb);
 +        wallcycle_sub_stop(wcycle, ewcsNBS_SEARCH_LOCAL);
 +
 +        if (bUseGPU)
 +        {
 +            /* initialize local pair-list on the GPU */
 +            nbnxn_cuda_init_pairlist(nbv->cu_nbv,
 +                                     nbv->grp[eintLocal].nbl_lists.nbl[0],
 +                                     eintLocal);
 +        }
 +        wallcycle_stop(wcycle, ewcNS);
 +    }
 +    else
 +    {
 +        wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
 +        wallcycle_sub_start(wcycle, ewcsNB_X_BUF_OPS);
 +        nbnxn_atomdata_copy_x_to_nbat_x(nbv->nbs, eatLocal, FALSE, x,
 +                                        nbv->grp[eintLocal].nbat);
 +        wallcycle_sub_stop(wcycle, ewcsNB_X_BUF_OPS);
 +        wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
 +    }
 +
 +    if (bUseGPU)
 +    {
 +        wallcycle_start(wcycle, ewcLAUNCH_GPU_NB);
 +        /* launch local nonbonded F on GPU */
 +        do_nb_verlet(fr, ic, enerd, flags, eintLocal, enbvClearFNo,
 +                     nrnb, wcycle);
 +        wallcycle_stop(wcycle, ewcLAUNCH_GPU_NB);
 +    }
 +
 +    /* Communicate coordinates and sum dipole if necessary +
 +       do non-local pair search */
 +    if (DOMAINDECOMP(cr))
 +    {
 +        bDiffKernels = (nbv->grp[eintNonlocal].kernel_type !=
 +                        nbv->grp[eintLocal].kernel_type);
 +
 +        if (bDiffKernels)
 +        {
 +            /* With GPU+CPU non-bonded calculations we need to copy
 +             * the local coordinates to the non-local nbat struct
 +             * (in CPU format) as the non-local kernel call also
 +             * calculates the local - non-local interactions.
 +             */
 +            wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
 +            wallcycle_sub_start(wcycle, ewcsNB_X_BUF_OPS);
 +            nbnxn_atomdata_copy_x_to_nbat_x(nbv->nbs, eatLocal, TRUE, x,
 +                                            nbv->grp[eintNonlocal].nbat);
 +            wallcycle_sub_stop(wcycle, ewcsNB_X_BUF_OPS);
 +            wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
 +        }
 +
 +        if (bNS)
 +        {
 +            wallcycle_start_nocount(wcycle, ewcNS);
 +            wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_NONLOCAL);
 +
 +            if (bDiffKernels)
 +            {
 +                nbnxn_grid_add_simple(nbv->nbs, nbv->grp[eintNonlocal].nbat);
 +            }
 +
 +            nbnxn_make_pairlist(nbv->nbs, nbv->grp[eintNonlocal].nbat,
 +                                &top->excls,
 +                                ic->rlist,
 +                                nbv->min_ci_balanced,
 +                                &nbv->grp[eintNonlocal].nbl_lists,
 +                                eintNonlocal,
 +                                nbv->grp[eintNonlocal].kernel_type,
 +                                nrnb);
 +
 +            wallcycle_sub_stop(wcycle, ewcsNBS_SEARCH_NONLOCAL);
 +
 +            if (nbv->grp[eintNonlocal].kernel_type == nbnxnk8x8x8_CUDA)
 +            {
 +                /* initialize non-local pair-list on the GPU */
 +                nbnxn_cuda_init_pairlist(nbv->cu_nbv,
 +                                         nbv->grp[eintNonlocal].nbl_lists.nbl[0],
 +                                         eintNonlocal);
 +            }
 +            wallcycle_stop(wcycle, ewcNS);
 +        }
 +        else
 +        {
 +            wallcycle_start(wcycle, ewcMOVEX);
 +            dd_move_x(cr->dd, box, x);
 +
 +            /* When we don't need the total dipole we sum it in global_stat */
 +            if (bStateChanged && NEED_MUTOT(*inputrec))
 +            {
 +                gmx_sumd(2*DIM, mu, cr);
 +            }
 +            wallcycle_stop(wcycle, ewcMOVEX);
 +
 +            wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
 +            wallcycle_sub_start(wcycle, ewcsNB_X_BUF_OPS);
 +            nbnxn_atomdata_copy_x_to_nbat_x(nbv->nbs, eatNonlocal, FALSE, x,
 +                                            nbv->grp[eintNonlocal].nbat);
 +            wallcycle_sub_stop(wcycle, ewcsNB_X_BUF_OPS);
 +            cycles_force += wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
 +        }
 +
 +        if (bUseGPU && !bDiffKernels)
 +        {
 +            wallcycle_start(wcycle, ewcLAUNCH_GPU_NB);
 +            /* launch non-local nonbonded F on GPU */
 +            do_nb_verlet(fr, ic, enerd, flags, eintNonlocal, enbvClearFNo,
 +                         nrnb, wcycle);
 +            cycles_force += wallcycle_stop(wcycle, ewcLAUNCH_GPU_NB);
 +        }
 +    }
 +
 +    if (bUseGPU)
 +    {
 +        /* launch D2H copy-back F */
 +        wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU_NB);
 +        if (DOMAINDECOMP(cr) && !bDiffKernels)
 +        {
 +            nbnxn_cuda_launch_cpyback(nbv->cu_nbv, nbv->grp[eintNonlocal].nbat,
 +                                      flags, eatNonlocal);
 +        }
 +        nbnxn_cuda_launch_cpyback(nbv->cu_nbv, nbv->grp[eintLocal].nbat,
 +                                  flags, eatLocal);
 +        cycles_force += wallcycle_stop(wcycle, ewcLAUNCH_GPU_NB);
 +    }
 +
 +    if (bStateChanged && NEED_MUTOT(*inputrec))
 +    {
 +        if (PAR(cr))
 +        {
 +            gmx_sumd(2*DIM, mu, cr);
 +        }
 +
 +        for (i = 0; i < 2; i++)
 +        {
 +            for (j = 0; j < DIM; j++)
 +            {
 +                fr->mu_tot[i][j] = mu[i*DIM + j];
 +            }
 +        }
 +    }
 +    if (fr->efep == efepNO)
 +    {
 +        copy_rvec(fr->mu_tot[0], mu_tot);
 +    }
 +    else
 +    {
 +        for (j = 0; j < DIM; j++)
 +        {
 +            mu_tot[j] =
 +                (1.0 - lambda[efptCOUL])*fr->mu_tot[0][j] +
 +                lambda[efptCOUL]*fr->mu_tot[1][j];
 +        }
 +    }
 +
 +    /* Reset energies */
 +    reset_enerdata(fr, bNS, enerd, MASTER(cr));
 +    clear_rvecs(SHIFTS, fr->fshift);
 +
 +    if (DOMAINDECOMP(cr) && !(cr->duty & DUTY_PME))
 +    {
 +        wallcycle_start(wcycle, ewcPPDURINGPME);
 +        dd_force_flop_start(cr->dd, nrnb);
 +    }
 +
 +    if (inputrec->bRot)
 +    {
 +        /* Enforced rotation has its own cycle counter that starts after the collective
 +         * coordinates have been communicated. It is added to ddCyclF to allow
 +         * for proper load-balancing */
 +        wallcycle_start(wcycle, ewcROT);
 +        do_rotation(cr, inputrec, box, x, t, step, wcycle, bNS);
 +        wallcycle_stop(wcycle, ewcROT);
 +    }
 +
 +    /* Start the force cycle counter.
 +     * This counter is stopped in do_forcelow_level.
 +     * No parallel communication should occur while this counter is running,
 +     * since that will interfere with the dynamic load balancing.
 +     */
 +    wallcycle_start(wcycle, ewcFORCE);
 +    if (bDoForces)
 +    {
 +        /* Reset forces for which the virial is calculated separately:
 +         * PME/Ewald forces if necessary */
 +        if (fr->bF_NoVirSum)
 +        {
 +            if (flags & GMX_FORCE_VIRIAL)
 +            {
 +                fr->f_novirsum = fr->f_novirsum_alloc;
 +                if (fr->bDomDec)
 +                {
 +                    clear_rvecs(fr->f_novirsum_n, fr->f_novirsum);
 +                }
 +                else
 +                {
 +                    clear_rvecs(homenr, fr->f_novirsum+start);
 +                }
 +            }
 +            else
 +            {
 +                /* We are not calculating the pressure so we do not need
 +                 * a separate array for forces that do not contribute
 +                 * to the pressure.
 +                 */
 +                fr->f_novirsum = f;
 +            }
 +        }
 +
 +        /* Clear the short- and long-range forces */
 +        clear_rvecs(fr->natoms_force_constr, f);
 +        if (bSepLRF && do_per_step(step, inputrec->nstcalclr))
 +        {
 +            clear_rvecs(fr->natoms_force_constr, fr->f_twin);
 +        }
 +
 +        clear_rvec(fr->vir_diag_posres);
 +    }
 +
 +    if (inputrec->ePull == epullCONSTRAINT)
 +    {
 +        clear_pull_forces(inputrec->pull);
 +    }
 +
 +    /* We calculate the non-bonded forces, when done on the CPU, here.
 +     * We do this before calling do_force_lowlevel, as in there bondeds
 +     * forces are calculated before PME, which does communication.
 +     * With this order, non-bonded and bonded force calculation imbalance
 +     * can be balanced out by the domain decomposition load balancing.
 +     */
 +
 +    if (!bUseOrEmulGPU)
 +    {
 +        /* Maybe we should move this into do_force_lowlevel */
 +        do_nb_verlet(fr, ic, enerd, flags, eintLocal, enbvClearFYes,
 +                     nrnb, wcycle);
 +    }
 +
 +    if (fr->efep != efepNO)
 +    {
 +        /* Calculate the local and non-local free energy interactions here.
 +         * Happens here on the CPU both with and without GPU.
 +         */
 +        if (fr->nbv->grp[eintLocal].nbl_lists.nbl_fep[0]->nrj > 0)
 +        {
 +            do_nb_verlet_fep(&fr->nbv->grp[eintLocal].nbl_lists,
 +                             fr, x, f, mdatoms,
 +                             inputrec->fepvals, lambda,
 +                             enerd, flags, nrnb, wcycle);
 +        }
 +
 +        if (DOMAINDECOMP(cr) &&
 +            fr->nbv->grp[eintNonlocal].nbl_lists.nbl_fep[0]->nrj > 0)
 +        {
 +            do_nb_verlet_fep(&fr->nbv->grp[eintNonlocal].nbl_lists,
 +                             fr, x, f, mdatoms,
 +                             inputrec->fepvals, lambda,
 +                             enerd, flags, nrnb, wcycle);
 +        }
 +    }
 +
 +    if (!bUseOrEmulGPU || bDiffKernels)
 +    {
 +        int aloc;
 +
 +        if (DOMAINDECOMP(cr))
 +        {
 +            do_nb_verlet(fr, ic, enerd, flags, eintNonlocal,
 +                         bDiffKernels ? enbvClearFYes : enbvClearFNo,
 +                         nrnb, wcycle);
 +        }
 +
 +        if (!bUseOrEmulGPU)
 +        {
 +            aloc = eintLocal;
 +        }
 +        else
 +        {
 +            aloc = eintNonlocal;
 +        }
 +
 +        /* Add all the non-bonded force to the normal force array.
 +         * This can be split into a local a non-local part when overlapping
 +         * communication with calculation with domain decomposition.
 +         */
 +        cycles_force += wallcycle_stop(wcycle, ewcFORCE);
 +        wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
 +        wallcycle_sub_start(wcycle, ewcsNB_F_BUF_OPS);
 +        nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs, eatAll, nbv->grp[aloc].nbat, f);
 +        wallcycle_sub_stop(wcycle, ewcsNB_F_BUF_OPS);
 +        cycles_force += wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
 +        wallcycle_start_nocount(wcycle, ewcFORCE);
 +
 +        /* if there are multiple fshift output buffers reduce them */
 +        if ((flags & GMX_FORCE_VIRIAL) &&
 +            nbv->grp[aloc].nbl_lists.nnbl > 1)
 +        {
 +            nbnxn_atomdata_add_nbat_fshift_to_fshift(nbv->grp[aloc].nbat,
 +                                                     fr->fshift);
 +        }
 +    }
 +
 +    /* update QMMMrec, if necessary */
 +    if (fr->bQMMM)
 +    {
 +        update_QMMMrec(cr, fr, x, mdatoms, box, top);
 +    }
 +
 +    if ((flags & GMX_FORCE_BONDED) && top->idef.il[F_POSRES].nr > 0)
 +    {
 +        posres_wrapper(fplog, flags, bSepDVDL, inputrec, nrnb, top, box, x,
 +                       enerd, lambda, fr);
 +    }
 +
 +    if ((flags & GMX_FORCE_BONDED) && top->idef.il[F_FBPOSRES].nr > 0)
 +    {
 +        fbposres_wrapper(inputrec, nrnb, top, box, x, enerd, fr);
 +    }
 +
 +    /* Compute the bonded and non-bonded energies and optionally forces */
 +    do_force_lowlevel(fplog, step, fr, inputrec, &(top->idef),
 +                      cr, nrnb, wcycle, mdatoms,
 +                      x, hist, f, bSepLRF ? fr->f_twin : f, enerd, fcd, top, fr->born,
 +                      &(top->atomtypes), bBornRadii, box,
 +                      inputrec->fepvals, lambda, graph, &(top->excls), fr->mu_tot,
 +                      flags, &cycles_pme);
 +
 +    if (bSepLRF)
 +    {
 +        if (do_per_step(step, inputrec->nstcalclr))
 +        {
 +            /* Add the long range forces to the short range forces */
 +            for (i = 0; i < fr->natoms_force_constr; i++)
 +            {
 +                rvec_add(fr->f_twin[i], f[i], f[i]);
 +            }
 +        }
 +    }
 +
 +    cycles_force += wallcycle_stop(wcycle, ewcFORCE);
 +
 +    if (ed)
 +    {
 +        do_flood(cr, inputrec, x, f, ed, box, step, bNS);
 +    }
 +
 +    if (bUseOrEmulGPU && !bDiffKernels)
 +    {
 +        /* wait for non-local forces (or calculate in emulation mode) */
 +        if (DOMAINDECOMP(cr))
 +        {
 +            if (bUseGPU)
 +            {
 +                float cycles_tmp;
 +
 +                wallcycle_start(wcycle, ewcWAIT_GPU_NB_NL);
 +                nbnxn_cuda_wait_gpu(nbv->cu_nbv,
 +                                    nbv->grp[eintNonlocal].nbat,
 +                                    flags, eatNonlocal,
 +                                    enerd->grpp.ener[egLJSR], enerd->grpp.ener[egCOULSR],
 +                                    fr->fshift);
 +                cycles_tmp       = wallcycle_stop(wcycle, ewcWAIT_GPU_NB_NL);
 +                cycles_wait_gpu += cycles_tmp;
 +                cycles_force    += cycles_tmp;
 +            }
 +            else
 +            {
 +                wallcycle_start_nocount(wcycle, ewcFORCE);
 +                do_nb_verlet(fr, ic, enerd, flags, eintNonlocal, enbvClearFYes,
 +                             nrnb, wcycle);
 +                cycles_force += wallcycle_stop(wcycle, ewcFORCE);
 +            }
 +            wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
 +            wallcycle_sub_start(wcycle, ewcsNB_F_BUF_OPS);
 +            /* skip the reduction if there was no non-local work to do */
 +            if (nbv->grp[eintLocal].nbl_lists.nbl[0]->nsci > 0)
 +            {
 +                nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs, eatNonlocal,
 +                                               nbv->grp[eintNonlocal].nbat, f);
 +            }
 +            wallcycle_sub_stop(wcycle, ewcsNB_F_BUF_OPS);
 +            cycles_force += wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
 +        }
 +    }
 +
 +    if (bDoForces && DOMAINDECOMP(cr))
 +    {
 +        /* Communicate the forces */
 +        wallcycle_start(wcycle, ewcMOVEF);
 +        dd_move_f(cr->dd, f, fr->fshift);
 +        /* Do we need to communicate the separate force array
 +         * for terms that do not contribute to the single sum virial?
 +         * Position restraints and electric fields do not introduce
 +         * inter-cg forces, only full electrostatics methods do.
 +         * When we do not calculate the virial, fr->f_novirsum = f,
 +         * so we have already communicated these forces.
 +         */
 +        if (EEL_FULL(fr->eeltype) && cr->dd->n_intercg_excl &&
 +            (flags & GMX_FORCE_VIRIAL))
 +        {
 +            dd_move_f(cr->dd, fr->f_novirsum, NULL);
 +        }
 +        if (bSepLRF)
 +        {
 +            /* We should not update the shift forces here,
 +             * since f_twin is already included in f.
 +             */
 +            dd_move_f(cr->dd, fr->f_twin, NULL);
 +        }
 +        wallcycle_stop(wcycle, ewcMOVEF);
 +    }
 +
 +    if (bUseOrEmulGPU)
 +    {
 +        /* wait for local forces (or calculate in emulation mode) */
 +        if (bUseGPU)
 +        {
 +            wallcycle_start(wcycle, ewcWAIT_GPU_NB_L);
 +            nbnxn_cuda_wait_gpu(nbv->cu_nbv,
 +                                nbv->grp[eintLocal].nbat,
 +                                flags, eatLocal,
 +                                enerd->grpp.ener[egLJSR], enerd->grpp.ener[egCOULSR],
 +                                fr->fshift);
 +            cycles_wait_gpu += wallcycle_stop(wcycle, ewcWAIT_GPU_NB_L);
 +
 +            /* now clear the GPU outputs while we finish the step on the CPU */
 +
 +            wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU_NB);
 +            nbnxn_cuda_clear_outputs(nbv->cu_nbv, flags);
 +            wallcycle_stop(wcycle, ewcLAUNCH_GPU_NB);
 +        }
 +        else
 +        {
 +            wallcycle_start_nocount(wcycle, ewcFORCE);
 +            do_nb_verlet(fr, ic, enerd, flags, eintLocal,
 +                         DOMAINDECOMP(cr) ? enbvClearFNo : enbvClearFYes,
 +                         nrnb, wcycle);
 +            wallcycle_stop(wcycle, ewcFORCE);
 +        }
 +        wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
 +        wallcycle_sub_start(wcycle, ewcsNB_F_BUF_OPS);
 +        if (nbv->grp[eintLocal].nbl_lists.nbl[0]->nsci > 0)
 +        {
 +            /* skip the reduction if there was no non-local work to do */
 +            nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs, eatLocal,
 +                                           nbv->grp[eintLocal].nbat, f);
 +        }
 +        wallcycle_sub_stop(wcycle, ewcsNB_F_BUF_OPS);
 +        wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
 +    }
 +
 +    if (DOMAINDECOMP(cr))
 +    {
 +        dd_force_flop_stop(cr->dd, nrnb);
 +        if (wcycle)
 +        {
 +            dd_cycles_add(cr->dd, cycles_force-cycles_pme, ddCyclF);
 +            if (bUseGPU)
 +            {
 +                dd_cycles_add(cr->dd, cycles_wait_gpu, ddCyclWaitGPU);
 +            }
 +        }
 +    }
 +
 +    if (bDoForces)
 +    {
 +        if (IR_ELEC_FIELD(*inputrec))
 +        {
 +            /* Compute forces due to electric field */
 +            calc_f_el(MASTER(cr) ? field : NULL,
 +                      start, homenr, mdatoms->chargeA, fr->f_novirsum,
 +                      inputrec->ex, inputrec->et, t);
 +        }
 +
 +        /* If we have NoVirSum forces, but we do not calculate the virial,
 +         * we sum fr->f_novirum=f later.
 +         */
 +        if (vsite && !(fr->bF_NoVirSum && !(flags & GMX_FORCE_VIRIAL)))
 +        {
 +            wallcycle_start(wcycle, ewcVSITESPREAD);
 +            spread_vsite_f(vsite, x, f, fr->fshift, FALSE, NULL, nrnb,
 +                           &top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr);
 +            wallcycle_stop(wcycle, ewcVSITESPREAD);
 +
 +            if (bSepLRF)
 +            {
 +                wallcycle_start(wcycle, ewcVSITESPREAD);
 +                spread_vsite_f(vsite, x, fr->f_twin, NULL, FALSE, NULL,
 +                               nrnb,
 +                               &top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr);
 +                wallcycle_stop(wcycle, ewcVSITESPREAD);
 +            }
 +        }
 +
 +        if (flags & GMX_FORCE_VIRIAL)
 +        {
 +            /* Calculation of the virial must be done after vsites! */
 +            calc_virial(0, mdatoms->homenr, x, f,
 +                        vir_force, graph, box, nrnb, fr, inputrec->ePBC);
 +        }
 +    }
 +
 +    if (inputrec->ePull == epullUMBRELLA || inputrec->ePull == epullCONST_F)
 +    {
 +        pull_potential_wrapper(fplog, bSepDVDL, cr, inputrec, box, x,
 +                               f, vir_force, mdatoms, enerd, lambda, t);
 +    }
 +
 +    /* Add the forces from enforced rotation potentials (if any) */
 +    if (inputrec->bRot)
 +    {
 +        wallcycle_start(wcycle, ewcROTadd);
 +        enerd->term[F_COM_PULL] += add_rot_forces(inputrec->rot, f, cr, step, t);
 +        wallcycle_stop(wcycle, ewcROTadd);
 +    }
 +
 +    /* Add forces from interactive molecular dynamics (IMD), if bIMD == TRUE. */
 +    IMD_apply_forces(inputrec->bIMD, inputrec->imd, cr, f, wcycle);
 +
 +    if (PAR(cr) && !(cr->duty & DUTY_PME))
 +    {
 +        /* In case of node-splitting, the PP nodes receive the long-range
 +         * forces, virial and energy from the PME nodes here.
 +         */
 +        pme_receive_force_ener(fplog, bSepDVDL, cr, wcycle, enerd, fr);
 +    }
 +
 +    if (bDoForces)
 +    {
 +        post_process_forces(cr, step, nrnb, wcycle,
 +                            top, box, x, f, vir_force, mdatoms, graph, fr, vsite,
 +                            flags);
 +    }
 +
 +    /* Sum the potential energy terms from group contributions */
 +    sum_epot(&(enerd->grpp), enerd->term);
 +}
 +
 +void do_force_cutsGROUP(FILE *fplog, t_commrec *cr,
 +                        t_inputrec *inputrec,
 +                        gmx_int64_t step, t_nrnb *nrnb, gmx_wallcycle_t wcycle,
 +                        gmx_localtop_t *top,
 +                        gmx_groups_t *groups,
 +                        matrix box, rvec x[], history_t *hist,
 +                        rvec f[],
 +                        tensor vir_force,
 +                        t_mdatoms *mdatoms,
 +                        gmx_enerdata_t *enerd, t_fcdata *fcd,
 +                        real *lambda, t_graph *graph,
 +                        t_forcerec *fr, gmx_vsite_t *vsite, rvec mu_tot,
 +                        double t, FILE *field, gmx_edsam_t ed,
 +                        gmx_bool bBornRadii,
 +                        int flags)
 +{
 +    int        cg0, cg1, i, j;
 +    int        start, homenr;
 +    double     mu[2*DIM];
 +    gmx_bool   bSepDVDL, bStateChanged, bNS, bFillGrid, bCalcCGCM, bBS;
 +    gmx_bool   bDoLongRangeNS, bDoForces, bDoPotential, bSepLRF;
 +    gmx_bool   bDoAdressWF;
 +    matrix     boxs;
 +    rvec       vzero, box_diag;
 +    real       e, v, dvdlambda[efptNR];
 +    t_pbc      pbc;
 +    float      cycles_pme, cycles_force;
 +
 +    start  = 0;
 +    homenr = mdatoms->homenr;
 +
 +    bSepDVDL = (fr->bSepDVDL && do_per_step(step, inputrec->nstlog));
 +
 +    clear_mat(vir_force);
 +
 +    cg0 = 0;
 +    if (DOMAINDECOMP(cr))
 +    {
 +        cg1 = cr->dd->ncg_tot;
 +    }
 +    else
 +    {
 +        cg1 = top->cgs.nr;
 +    }
 +    if (fr->n_tpi > 0)
 +    {
 +        cg1--;
 +    }
 +
 +    bStateChanged  = (flags & GMX_FORCE_STATECHANGED);
 +    bNS            = (flags & GMX_FORCE_NS) && (fr->bAllvsAll == FALSE);
 +    /* Should we update the long-range neighborlists at this step? */
 +    bDoLongRangeNS = fr->bTwinRange && bNS;
 +    /* Should we perform the long-range nonbonded evaluation inside the neighborsearching? */
 +    bFillGrid      = (bNS && bStateChanged);
 +    bCalcCGCM      = (bFillGrid && !DOMAINDECOMP(cr));
 +    bDoForces      = (flags & GMX_FORCE_FORCES);
 +    bDoPotential   = (flags & GMX_FORCE_ENERGY);
 +    bSepLRF        = ((inputrec->nstcalclr > 1) && bDoForces &&
 +                      (flags & GMX_FORCE_SEPLRF) && (flags & GMX_FORCE_DO_LR));
 +
 +    /* should probably move this to the forcerec since it doesn't change */
 +    bDoAdressWF   = ((fr->adress_type != eAdressOff));
 +
 +    if (bStateChanged)
 +    {
 +        update_forcerec(fr, box);
 +
 +        if (NEED_MUTOT(*inputrec))
 +        {
 +            /* Calculate total (local) dipole moment in a temporary common array.
 +             * This makes it possible to sum them over nodes faster.
 +             */
 +            calc_mu(start, homenr,
 +                    x, mdatoms->chargeA, mdatoms->chargeB, mdatoms->nChargePerturbed,
 +                    mu, mu+DIM);
 +        }
 +    }
 +
 +    if (fr->ePBC != epbcNONE)
 +    {
 +        /* Compute shift vectors every step,
 +         * because of pressure coupling or box deformation!
 +         */
 +        if ((flags & GMX_FORCE_DYNAMICBOX) && bStateChanged)
 +        {
 +            calc_shifts(box, fr->shift_vec);
 +        }
 +
 +        if (bCalcCGCM)
 +        {
 +            put_charge_groups_in_box(fplog, cg0, cg1, fr->ePBC, box,
 +                                     &(top->cgs), x, fr->cg_cm);
 +            inc_nrnb(nrnb, eNR_CGCM, homenr);
 +            inc_nrnb(nrnb, eNR_RESETX, cg1-cg0);
 +        }
 +        else if (EI_ENERGY_MINIMIZATION(inputrec->eI) && graph)
 +        {
 +            unshift_self(graph, box, x);
 +        }
 +    }
 +    else if (bCalcCGCM)
 +    {
 +        calc_cgcm(fplog, cg0, cg1, &(top->cgs), x, fr->cg_cm);
 +        inc_nrnb(nrnb, eNR_CGCM, homenr);
 +    }
 +
 +    if (bCalcCGCM && gmx_debug_at)
 +    {
 +        pr_rvecs(debug, 0, "cgcm", fr->cg_cm, top->cgs.nr);
 +    }
 +
 +#ifdef GMX_MPI
 +    if (!(cr->duty & DUTY_PME))
 +    {
 +        /* Send particle coordinates to the pme nodes.
 +         * Since this is only implemented for domain decomposition
 +         * and domain decomposition does not use the graph,
 +         * we do not need to worry about shifting.
 +         */
 +
 +        int pme_flags = 0;
 +
 +        wallcycle_start(wcycle, ewcPP_PMESENDX);
 +
 +        bBS = (inputrec->nwall == 2);
 +        if (bBS)
 +        {
 +            copy_mat(box, boxs);
 +            svmul(inputrec->wall_ewald_zfac, boxs[ZZ], boxs[ZZ]);
 +        }
 +
 +        if (EEL_PME(fr->eeltype))
 +        {
 +            pme_flags |= GMX_PME_DO_COULOMB;
 +        }
 +
 +        if (EVDW_PME(fr->vdwtype))
 +        {
 +            pme_flags |= GMX_PME_DO_LJ;
 +        }
 +
 +        gmx_pme_send_coordinates(cr, bBS ? boxs : box, x,
 +                                 mdatoms->nChargePerturbed, mdatoms->nTypePerturbed, lambda[efptCOUL], lambda[efptVDW],
 +                                 (flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY)),
 +                                 pme_flags, step);
 +
 +        wallcycle_stop(wcycle, ewcPP_PMESENDX);
 +    }
 +#endif /* GMX_MPI */
 +
 +    /* Communicate coordinates and sum dipole if necessary */
 +    if (DOMAINDECOMP(cr))
 +    {
 +        wallcycle_start(wcycle, ewcMOVEX);
 +        dd_move_x(cr->dd, box, x);
 +        wallcycle_stop(wcycle, ewcMOVEX);
 +    }
 +
 +    /* update adress weight beforehand */
 +    if (bStateChanged && bDoAdressWF)
 +    {
 +        /* need pbc for adress weight calculation with pbc_dx */
 +        set_pbc(&pbc, inputrec->ePBC, box);
 +        if (fr->adress_site == eAdressSITEcog)
 +        {
 +            update_adress_weights_cog(top->idef.iparams, top->idef.il, x, fr, mdatoms,
 +                                      inputrec->ePBC == epbcNONE ? NULL : &pbc);
 +        }
 +        else if (fr->adress_site == eAdressSITEcom)
 +        {
 +            update_adress_weights_com(fplog, cg0, cg1, &(top->cgs), x, fr, mdatoms,
 +                                      inputrec->ePBC == epbcNONE ? NULL : &pbc);
 +        }
 +        else if (fr->adress_site == eAdressSITEatomatom)
 +        {
 +            update_adress_weights_atom_per_atom(cg0, cg1, &(top->cgs), x, fr, mdatoms,
 +                                                inputrec->ePBC == epbcNONE ? NULL : &pbc);
 +        }
 +        else
 +        {
 +            update_adress_weights_atom(cg0, cg1, &(top->cgs), x, fr, mdatoms,
 +                                       inputrec->ePBC == epbcNONE ? NULL : &pbc);
 +        }
 +    }
 +
 +    if (NEED_MUTOT(*inputrec))
 +    {
 +
 +        if (bStateChanged)
 +        {
 +            if (PAR(cr))
 +            {
 +                gmx_sumd(2*DIM, mu, cr);
 +            }
 +            for (i = 0; i < 2; i++)
 +            {
 +                for (j = 0; j < DIM; j++)
 +                {
 +                    fr->mu_tot[i][j] = mu[i*DIM + j];
 +                }
 +            }
 +        }
 +        if (fr->efep == efepNO)
 +        {
 +            copy_rvec(fr->mu_tot[0], mu_tot);
 +        }
 +        else
 +        {
 +            for (j = 0; j < DIM; j++)
 +            {
 +                mu_tot[j] =
 +                    (1.0 - lambda[efptCOUL])*fr->mu_tot[0][j] + lambda[efptCOUL]*fr->mu_tot[1][j];
 +            }
 +        }
 +    }
 +
 +    /* Reset energies */
 +    reset_enerdata(fr, bNS, enerd, MASTER(cr));
 +    clear_rvecs(SHIFTS, fr->fshift);
 +
 +    if (bNS)
 +    {
 +        wallcycle_start(wcycle, ewcNS);
 +
 +        if (graph && bStateChanged)
 +        {
 +            /* Calculate intramolecular shift vectors to make molecules whole */
 +            mk_mshift(fplog, graph, fr->ePBC, box, x);
 +        }
 +
 +        /* Do the actual neighbour searching */
 +        ns(fplog, fr, box,
 +           groups, top, mdatoms,
 +           cr, nrnb, bFillGrid,
 +           bDoLongRangeNS);
 +
 +        wallcycle_stop(wcycle, ewcNS);
 +    }
 +
 +    if (inputrec->implicit_solvent && bNS)
 +    {
 +        make_gb_nblist(cr, inputrec->gb_algorithm,
 +                       x, box, fr, &top->idef, graph, fr->born);
 +    }
 +
 +    if (DOMAINDECOMP(cr) && !(cr->duty & DUTY_PME))
 +    {
 +        wallcycle_start(wcycle, ewcPPDURINGPME);
 +        dd_force_flop_start(cr->dd, nrnb);
 +    }
 +
 +    if (inputrec->bRot)
 +    {
 +        /* Enforced rotation has its own cycle counter that starts after the collective
 +         * coordinates have been communicated. It is added to ddCyclF to allow
 +         * for proper load-balancing */
 +        wallcycle_start(wcycle, ewcROT);
 +        do_rotation(cr, inputrec, box, x, t, step, wcycle, bNS);
 +        wallcycle_stop(wcycle, ewcROT);
 +    }
 +
 +    /* Start the force cycle counter.
 +     * This counter is stopped in do_forcelow_level.
 +     * No parallel communication should occur while this counter is running,
 +     * since that will interfere with the dynamic load balancing.
 +     */
 +    wallcycle_start(wcycle, ewcFORCE);
 +
 +    if (bDoForces)
 +    {
 +        /* Reset forces for which the virial is calculated separately:
 +         * PME/Ewald forces if necessary */
 +        if (fr->bF_NoVirSum)
 +        {
 +            if (flags & GMX_FORCE_VIRIAL)
 +            {
 +                fr->f_novirsum = fr->f_novirsum_alloc;
 +                if (fr->bDomDec)
 +                {
 +                    clear_rvecs(fr->f_novirsum_n, fr->f_novirsum);
 +                }
 +                else
 +                {
 +                    clear_rvecs(homenr, fr->f_novirsum+start);
 +                }
 +            }
 +            else
 +            {
 +                /* We are not calculating the pressure so we do not need
 +                 * a separate array for forces that do not contribute
 +                 * to the pressure.
 +                 */
 +                fr->f_novirsum = f;
 +            }
 +        }
 +
 +        /* Clear the short- and long-range forces */
 +        clear_rvecs(fr->natoms_force_constr, f);
 +        if (bSepLRF && do_per_step(step, inputrec->nstcalclr))
 +        {
 +            clear_rvecs(fr->natoms_force_constr, fr->f_twin);
 +        }
 +
 +        clear_rvec(fr->vir_diag_posres);
 +    }
 +    if (inputrec->ePull == epullCONSTRAINT)
 +    {
 +        clear_pull_forces(inputrec->pull);
 +    }
 +
 +    /* update QMMMrec, if necessary */
 +    if (fr->bQMMM)
 +    {
 +        update_QMMMrec(cr, fr, x, mdatoms, box, top);
 +    }
 +
 +    if ((flags & GMX_FORCE_BONDED) && top->idef.il[F_POSRES].nr > 0)
 +    {
 +        posres_wrapper(fplog, flags, bSepDVDL, inputrec, nrnb, top, box, x,
 +                       enerd, lambda, fr);
 +    }
 +
 +    if ((flags & GMX_FORCE_BONDED) && top->idef.il[F_FBPOSRES].nr > 0)
 +    {
 +        fbposres_wrapper(inputrec, nrnb, top, box, x, enerd, fr);
 +    }
 +
 +    /* Compute the bonded and non-bonded energies and optionally forces */
 +    do_force_lowlevel(fplog, step, fr, inputrec, &(top->idef),
 +                      cr, nrnb, wcycle, mdatoms,
 +                      x, hist, f, bSepLRF ? fr->f_twin : f, enerd, fcd, top, fr->born,
 +                      &(top->atomtypes), bBornRadii, box,
 +                      inputrec->fepvals, lambda,
 +                      graph, &(top->excls), fr->mu_tot,
 +                      flags,
 +                      &cycles_pme);
 +
 +    if (bSepLRF)
 +    {
 +        if (do_per_step(step, inputrec->nstcalclr))
 +        {
 +            /* Add the long range forces to the short range forces */
 +            for (i = 0; i < fr->natoms_force_constr; i++)
 +            {
 +                rvec_add(fr->f_twin[i], f[i], f[i]);
 +            }
 +        }
 +    }
 +
 +    cycles_force = wallcycle_stop(wcycle, ewcFORCE);
 +
 +    if (ed)
 +    {
 +        do_flood(cr, inputrec, x, f, ed, box, step, bNS);
 +    }
 +
 +    if (DOMAINDECOMP(cr))
 +    {
 +        dd_force_flop_stop(cr->dd, nrnb);
 +        if (wcycle)
 +        {
 +            dd_cycles_add(cr->dd, cycles_force-cycles_pme, ddCyclF);
 +        }
 +    }
 +
 +    if (bDoForces)
 +    {
 +        if (IR_ELEC_FIELD(*inputrec))
 +        {
 +            /* Compute forces due to electric field */
 +            calc_f_el(MASTER(cr) ? field : NULL,
 +                      start, homenr, mdatoms->chargeA, fr->f_novirsum,
 +                      inputrec->ex, inputrec->et, t);
 +        }
 +
 +        if (bDoAdressWF && fr->adress_icor == eAdressICThermoForce)
 +        {
 +            /* Compute thermodynamic force in hybrid AdResS region */
 +            adress_thermo_force(start, homenr, &(top->cgs), x, fr->f_novirsum, fr, mdatoms,
 +                                inputrec->ePBC == epbcNONE ? NULL : &pbc);
 +        }
 +
 +        /* Communicate the forces */
 +        if (DOMAINDECOMP(cr))
 +        {
 +            wallcycle_start(wcycle, ewcMOVEF);
 +            dd_move_f(cr->dd, f, fr->fshift);
 +            /* Do we need to communicate the separate force array
 +             * for terms that do not contribute to the single sum virial?
 +             * Position restraints and electric fields do not introduce
 +             * inter-cg forces, only full electrostatics methods do.
 +             * When we do not calculate the virial, fr->f_novirsum = f,
 +             * so we have already communicated these forces.
 +             */
 +            if (EEL_FULL(fr->eeltype) && cr->dd->n_intercg_excl &&
 +                (flags & GMX_FORCE_VIRIAL))
 +            {
 +                dd_move_f(cr->dd, fr->f_novirsum, NULL);
 +            }
 +            if (bSepLRF)
 +            {
 +                /* We should not update the shift forces here,
 +                 * since f_twin is already included in f.
 +                 */
 +                dd_move_f(cr->dd, fr->f_twin, NULL);
 +            }
 +            wallcycle_stop(wcycle, ewcMOVEF);
 +        }
 +
 +        /* If we have NoVirSum forces, but we do not calculate the virial,
 +         * we sum fr->f_novirum=f later.
 +         */
 +        if (vsite && !(fr->bF_NoVirSum && !(flags & GMX_FORCE_VIRIAL)))
 +        {
 +            wallcycle_start(wcycle, ewcVSITESPREAD);
 +            spread_vsite_f(vsite, x, f, fr->fshift, FALSE, NULL, nrnb,
 +                           &top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr);
 +            wallcycle_stop(wcycle, ewcVSITESPREAD);
 +
 +            if (bSepLRF)
 +            {
 +                wallcycle_start(wcycle, ewcVSITESPREAD);
 +                spread_vsite_f(vsite, x, fr->f_twin, NULL, FALSE, NULL,
 +                               nrnb,
 +                               &top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr);
 +                wallcycle_stop(wcycle, ewcVSITESPREAD);
 +            }
 +        }
 +
 +        if (flags & GMX_FORCE_VIRIAL)
 +        {
 +            /* Calculation of the virial must be done after vsites! */
 +            calc_virial(0, mdatoms->homenr, x, f,
 +                        vir_force, graph, box, nrnb, fr, inputrec->ePBC);
 +        }
 +    }
 +
 +    if (inputrec->ePull == epullUMBRELLA || inputrec->ePull == epullCONST_F)
 +    {
 +        pull_potential_wrapper(fplog, bSepDVDL, cr, inputrec, box, x,
 +                               f, vir_force, mdatoms, enerd, lambda, t);
 +    }
 +
 +    /* Add the forces from enforced rotation potentials (if any) */
 +    if (inputrec->bRot)
 +    {
 +        wallcycle_start(wcycle, ewcROTadd);
 +        enerd->term[F_COM_PULL] += add_rot_forces(inputrec->rot, f, cr, step, t);
 +        wallcycle_stop(wcycle, ewcROTadd);
 +    }
 +
 +    /* Add forces from interactive molecular dynamics (IMD), if bIMD == TRUE. */
 +    IMD_apply_forces(inputrec->bIMD, inputrec->imd, cr, f, wcycle);
 +
 +    if (PAR(cr) && !(cr->duty & DUTY_PME))
 +    {
 +        /* In case of node-splitting, the PP nodes receive the long-range
 +         * forces, virial and energy from the PME nodes here.
 +         */
 +        pme_receive_force_ener(fplog, bSepDVDL, cr, wcycle, enerd, fr);
 +    }
 +
 +    if (bDoForces)
 +    {
 +        post_process_forces(cr, step, nrnb, wcycle,
 +                            top, box, x, f, vir_force, mdatoms, graph, fr, vsite,
 +                            flags);
 +    }
 +
 +    /* Sum the potential energy terms from group contributions */
 +    sum_epot(&(enerd->grpp), enerd->term);
 +}
 +
 +void do_force(FILE *fplog, t_commrec *cr,
 +              t_inputrec *inputrec,
 +              gmx_int64_t step, t_nrnb *nrnb, gmx_wallcycle_t wcycle,
 +              gmx_localtop_t *top,
 +              gmx_groups_t *groups,
 +              matrix box, rvec x[], history_t *hist,
 +              rvec f[],
 +              tensor vir_force,
 +              t_mdatoms *mdatoms,
 +              gmx_enerdata_t *enerd, t_fcdata *fcd,
 +              real *lambda, t_graph *graph,
 +              t_forcerec *fr,
 +              gmx_vsite_t *vsite, rvec mu_tot,
 +              double t, FILE *field, gmx_edsam_t ed,
 +              gmx_bool bBornRadii,
 +              int flags)
 +{
 +    /* modify force flag if not doing nonbonded */
 +    if (!fr->bNonbonded)
 +    {
 +        flags &= ~GMX_FORCE_NONBONDED;
 +    }
 +
 +    switch (inputrec->cutoff_scheme)
 +    {
 +        case ecutsVERLET:
 +            do_force_cutsVERLET(fplog, cr, inputrec,
 +                                step, nrnb, wcycle,
 +                                top,
 +                                groups,
 +                                box, x, hist,
 +                                f, vir_force,
 +                                mdatoms,
 +                                enerd, fcd,
 +                                lambda, graph,
 +                                fr, fr->ic,
 +                                vsite, mu_tot,
 +                                t, field, ed,
 +                                bBornRadii,
 +                                flags);
 +            break;
 +        case ecutsGROUP:
 +            do_force_cutsGROUP(fplog, cr, inputrec,
 +                               step, nrnb, wcycle,
 +                               top,
 +                               groups,
 +                               box, x, hist,
 +                               f, vir_force,
 +                               mdatoms,
 +                               enerd, fcd,
 +                               lambda, graph,
 +                               fr, vsite, mu_tot,
 +                               t, field, ed,
 +                               bBornRadii,
 +                               flags);
 +            break;
 +        default:
 +            gmx_incons("Invalid cut-off scheme passed!");
 +    }
 +}
 +
 +
 +void do_constrain_first(FILE *fplog, gmx_constr_t constr,
 +                        t_inputrec *ir, t_mdatoms *md,
 +                        t_state *state, t_commrec *cr, t_nrnb *nrnb,
 +                        t_forcerec *fr, gmx_localtop_t *top)
 +{
 +    int             i, m, start, end;
 +    gmx_int64_t     step;
 +    real            dt = ir->delta_t;
 +    real            dvdl_dum;
 +    rvec           *savex;
 +
 +    snew(savex, state->natoms);
 +
 +    start = 0;
 +    end   = md->homenr;
 +
 +    if (debug)
 +    {
 +        fprintf(debug, "vcm: start=%d, homenr=%d, end=%d\n",
 +                start, md->homenr, end);
 +    }
 +    /* Do a first constrain to reset particles... */
 +    step = ir->init_step;
 +    if (fplog)
 +    {
 +        char buf[STEPSTRSIZE];
 +        fprintf(fplog, "\nConstraining the starting coordinates (step %s)\n",
 +                gmx_step_str(step, buf));
 +    }
 +    dvdl_dum = 0;
 +
 +    /* constrain the current position */
 +    constrain(NULL, TRUE, FALSE, constr, &(top->idef),
 +              ir, NULL, cr, step, 0, md,
 +              state->x, state->x, NULL,
 +              fr->bMolPBC, state->box,
 +              state->lambda[efptBONDED], &dvdl_dum,
 +              NULL, NULL, nrnb, econqCoord,
 +              ir->epc == epcMTTK, state->veta, state->veta);
 +    if (EI_VV(ir->eI))
 +    {
 +        /* constrain the inital velocity, and save it */
 +        /* also may be useful if we need the ekin from the halfstep for velocity verlet */
 +        /* might not yet treat veta correctly */
 +        constrain(NULL, TRUE, FALSE, constr, &(top->idef),
 +                  ir, NULL, cr, step, 0, md,
 +                  state->x, state->v, state->v,
 +                  fr->bMolPBC, state->box,
 +                  state->lambda[efptBONDED], &dvdl_dum,
 +                  NULL, NULL, nrnb, econqVeloc,
 +                  ir->epc == epcMTTK, state->veta, state->veta);
 +    }
 +    /* constrain the inital velocities at t-dt/2 */
 +    if (EI_STATE_VELOCITY(ir->eI) && ir->eI != eiVV)
 +    {
 +        for (i = start; (i < end); i++)
 +        {
 +            for (m = 0; (m < DIM); m++)
 +            {
 +                /* Reverse the velocity */
 +                state->v[i][m] = -state->v[i][m];
 +                /* Store the position at t-dt in buf */
 +                savex[i][m] = state->x[i][m] + dt*state->v[i][m];
 +            }
 +        }
 +        /* Shake the positions at t=-dt with the positions at t=0
 +         * as reference coordinates.
 +         */
 +        if (fplog)
 +        {
 +            char buf[STEPSTRSIZE];
 +            fprintf(fplog, "\nConstraining the coordinates at t0-dt (step %s)\n",
 +                    gmx_step_str(step, buf));
 +        }
 +        dvdl_dum = 0;
 +        constrain(NULL, TRUE, FALSE, constr, &(top->idef),
 +                  ir, NULL, cr, step, -1, md,
 +                  state->x, savex, NULL,
 +                  fr->bMolPBC, state->box,
 +                  state->lambda[efptBONDED], &dvdl_dum,
 +                  state->v, NULL, nrnb, econqCoord,
 +                  ir->epc == epcMTTK, state->veta, state->veta);
 +
 +        for (i = start; i < end; i++)
 +        {
 +            for (m = 0; m < DIM; m++)
 +            {
 +                /* Re-reverse the velocities */
 +                state->v[i][m] = -state->v[i][m];
 +            }
 +        }
 +    }
 +    sfree(savex);
 +}
 +
 +
 +static void
 +integrate_table(real vdwtab[], real scale, int offstart, int rstart, int rend,
 +                double *enerout, double *virout)
 +{
 +    double enersum, virsum;
 +    double invscale, invscale2, invscale3;
 +    double r, ea, eb, ec, pa, pb, pc, pd;
 +    double y0, f, g, h;
 +    int    ri, offset, tabfactor;
 +
 +    invscale  = 1.0/scale;
 +    invscale2 = invscale*invscale;
 +    invscale3 = invscale*invscale2;
 +
 +    /* Following summation derived from cubic spline definition,
 +     * Numerical Recipies in C, second edition, p. 113-116.  Exact for
 +     * the cubic spline.  We first calculate the negative of the
 +     * energy from rvdw to rvdw_switch, assuming that g(r)=1, and then
 +     * add the more standard, abrupt cutoff correction to that result,
 +     * yielding the long-range correction for a switched function.  We
 +     * perform both the pressure and energy loops at the same time for
 +     * simplicity, as the computational cost is low. */
 +
 +    if (offstart == 0)
 +    {
 +        /* Since the dispersion table has been scaled down a factor
 +         * 6.0 and the repulsion a factor 12.0 to compensate for the
 +         * c6/c12 parameters inside nbfp[] being scaled up (to save
 +         * flops in kernels), we need to correct for this.
 +         */
 +        tabfactor = 6.0;
 +    }
 +    else
 +    {
 +        tabfactor = 12.0;
 +    }
 +
 +    enersum = 0.0;
 +    virsum  = 0.0;
 +    for (ri = rstart; ri < rend; ++ri)
 +    {
 +        r  = ri*invscale;
 +        ea = invscale3;
 +        eb = 2.0*invscale2*r;
 +        ec = invscale*r*r;
 +
 +        pa = invscale3;
 +        pb = 3.0*invscale2*r;
 +        pc = 3.0*invscale*r*r;
 +        pd = r*r*r;
 +
 +        /* this "8" is from the packing in the vdwtab array - perhaps
 +           should be defined? */
 +
 +        offset = 8*ri + offstart;
 +        y0     = vdwtab[offset];
 +        f      = vdwtab[offset+1];
 +        g      = vdwtab[offset+2];
 +        h      = vdwtab[offset+3];
 +
 +        enersum += y0*(ea/3 + eb/2 + ec) + f*(ea/4 + eb/3 + ec/2) + g*(ea/5 + eb/4 + ec/3) + h*(ea/6 + eb/5 + ec/4);
 +        virsum  +=  f*(pa/4 + pb/3 + pc/2 + pd) + 2*g*(pa/5 + pb/4 + pc/3 + pd/2) + 3*h*(pa/6 + pb/5 + pc/4 + pd/3);
 +    }
 +    *enerout = 4.0*M_PI*enersum*tabfactor;
 +    *virout  = 4.0*M_PI*virsum*tabfactor;
 +}
 +
 +void calc_enervirdiff(FILE *fplog, int eDispCorr, t_forcerec *fr)
 +{
-     double eners[2], virs[2], enersum, virsum, y0, f, g, h;
-     double r0, r1, r, rc3, rc9, ea, eb, ec, pa, pb, pc, pd;
-     double invscale, invscale2, invscale3;
-     int    ri0, ri1, ri, i, offstart, offset;
-     real   scale, *vdwtab, tabfactor, tmp;
++    double   eners[2], virs[2], enersum, virsum, y0, f, g, h;
++    double   r0, r1, r, rc3, rc9, ea, eb, ec, pa, pb, pc, pd;
++    double   invscale, invscale2, invscale3;
++    int      ri0, ri1, ri, i, offstart, offset;
++    real     scale, *vdwtab, tabfactor, tmp;
 +
 +    fr->enershiftsix    = 0;
 +    fr->enershifttwelve = 0;
 +    fr->enerdiffsix     = 0;
 +    fr->enerdifftwelve  = 0;
 +    fr->virdiffsix      = 0;
 +    fr->virdifftwelve   = 0;
 +
 +    if (eDispCorr != edispcNO)
 +    {
 +        for (i = 0; i < 2; i++)
 +        {
 +            eners[i] = 0;
 +            virs[i]  = 0;
 +        }
-         if (fr->vdwtype == evdwSWITCH || fr->vdwtype == evdwSHIFT ||
-             fr->vdw_modifier == eintmodPOTSWITCH ||
-             fr->vdw_modifier == eintmodFORCESWITCH)
++        if ((fr->vdw_modifier == eintmodPOTSHIFT) ||
++            (fr->vdw_modifier == eintmodPOTSWITCH) ||
++            (fr->vdw_modifier == eintmodFORCESWITCH) ||
++            (fr->vdwtype == evdwSHIFT) ||
++            (fr->vdwtype == evdwSWITCH))
 +        {
-             if (fr->rvdw_switch == 0)
++            if (((fr->vdw_modifier == eintmodPOTSWITCH) ||
++                 (fr->vdw_modifier == eintmodFORCESWITCH) ||
++                 (fr->vdwtype == evdwSWITCH)) && fr->rvdw_switch == 0)
 +            {
 +                gmx_fatal(FARGS,
 +                          "With dispersion correction rvdw-switch can not be zero "
 +                          "for vdw-type = %s", evdw_names[fr->vdwtype]);
 +            }
 +
-             scale  = fr->nblists[0].table_elec_vdw.scale;
++            scale  = fr->nblists[0].table_vdw.scale;
 +            vdwtab = fr->nblists[0].table_vdw.data;
 +
 +            /* Round the cut-offs to exact table values for precision */
 +            ri0  = floor(fr->rvdw_switch*scale);
 +            ri1  = ceil(fr->rvdw*scale);
++
++            /* The code below has some support for handling force-switching, i.e.
++             * when the force (instead of potential) is switched over a limited
++             * region. This leads to a constant shift in the potential inside the
++             * switching region, which we can handle by adding a constant energy
++             * term in the force-switch case just like when we do potential-shift.
++             *
++             * For now this is not enabled, but to keep the functionality in the
++             * code we check separately for switch and shift. When we do force-switch
++             * the shifting point is rvdw_switch, while it is the cutoff when we
++             * have a classical potential-shift.
++             *
++             * For a pure potential-shift the potential has a constant shift
++             * all the way out to the cutoff, and that is it. For other forms
++             * we need to calculate the constant shift up to the point where we
++             * start modifying the potential.
++             */
++            ri0  = (fr->vdw_modifier == eintmodPOTSHIFT) ? ri1 : ri0;
++
 +            r0   = ri0/scale;
 +            r1   = ri1/scale;
 +            rc3  = r0*r0*r0;
 +            rc9  = rc3*rc3*rc3;
 +
-             if (fr->vdwtype == evdwSHIFT ||
-                 fr->vdw_modifier == eintmodFORCESWITCH)
++            if ((fr->vdw_modifier == eintmodFORCESWITCH) ||
++                (fr->vdwtype == evdwSHIFT))
 +            {
 +                /* Determine the constant energy shift below rvdw_switch.
 +                 * Table has a scale factor since we have scaled it down to compensate
 +                 * for scaling-up c6/c12 with the derivative factors to save flops in analytical kernels.
 +                 */
 +                fr->enershiftsix    = (real)(-1.0/(rc3*rc3)) - 6.0*vdwtab[8*ri0];
 +                fr->enershifttwelve = (real)( 1.0/(rc9*rc3)) - 12.0*vdwtab[8*ri0 + 4];
 +            }
++            else if (fr->vdw_modifier == eintmodPOTSHIFT)
++            {
++                fr->enershiftsix    = (real)(-1.0/(rc3*rc3));
++                fr->enershifttwelve = (real)( 1.0/(rc9*rc3));
++            }
++
 +            /* Add the constant part from 0 to rvdw_switch.
 +             * This integration from 0 to rvdw_switch overcounts the number
 +             * of interactions by 1, as it also counts the self interaction.
 +             * We will correct for this later.
 +             */
 +            eners[0] += 4.0*M_PI*fr->enershiftsix*rc3/3.0;
 +            eners[1] += 4.0*M_PI*fr->enershifttwelve*rc3/3.0;
++
++            /* Calculate the contribution in the range [r0,r1] where we
++             * modify the potential. For a pure potential-shift modifier we will
++             * have ri0==ri1, and there will not be any contribution here.
++             */
 +            for (i = 0; i < 2; i++)
 +            {
 +                enersum = 0;
 +                virsum  = 0;
 +                integrate_table(vdwtab, scale, (i == 0 ? 0 : 4), ri0, ri1, &enersum, &virsum);
 +                eners[i] -= enersum;
 +                virs[i]  -= virsum;
 +            }
 +
-             /* now add the correction for rvdw_switch to infinity */
++            /* Alright: Above we compensated by REMOVING the parts outside r0
++             * corresponding to the ideal VdW 1/r6 and /r12 potentials.
++             *
++             * Regardless of whether r0 is the point where we start switching,
++             * or the cutoff where we calculated the constant shift, we include
++             * all the parts we are missing out to infinity from r0 by
++             * calculating the analytical dispersion correction.
++             */
 +            eners[0] += -4.0*M_PI/(3.0*rc3);
 +            eners[1] +=  4.0*M_PI/(9.0*rc9);
 +            virs[0]  +=  8.0*M_PI/rc3;
 +            virs[1]  += -16.0*M_PI/(3.0*rc9);
 +        }
 +        else if (fr->vdwtype == evdwCUT ||
 +                 EVDW_PME(fr->vdwtype) ||
 +                 fr->vdwtype == evdwUSER)
 +        {
 +            if (fr->vdwtype == evdwUSER && fplog)
 +            {
 +                fprintf(fplog,
 +                        "WARNING: using dispersion correction with user tables\n");
 +            }
 +
 +            /* Note that with LJ-PME, the dispersion correction is multiplied
 +             * by the difference between the actual C6 and the value of C6
 +             * that would produce the combination rule.
 +             * This means the normal energy and virial difference formulas
 +             * can be used here.
 +             */
 +
 +            rc3  = fr->rvdw*fr->rvdw*fr->rvdw;
 +            rc9  = rc3*rc3*rc3;
 +            /* Contribution beyond the cut-off */
 +            eners[0] += -4.0*M_PI/(3.0*rc3);
 +            eners[1] +=  4.0*M_PI/(9.0*rc9);
 +            if (fr->vdw_modifier == eintmodPOTSHIFT)
 +            {
 +                /* Contribution within the cut-off */
 +                eners[0] += -4.0*M_PI/(3.0*rc3);
 +                eners[1] +=  4.0*M_PI/(3.0*rc9);
 +            }
 +            /* Contribution beyond the cut-off */
 +            virs[0]  +=  8.0*M_PI/rc3;
 +            virs[1]  += -16.0*M_PI/(3.0*rc9);
 +        }
 +        else
 +        {
 +            gmx_fatal(FARGS,
 +                      "Dispersion correction is not implemented for vdw-type = %s",
 +                      evdw_names[fr->vdwtype]);
 +        }
 +
-         /* TODO: remove this code once we have group LJ-PME kernels
-          * that calculate the exact, full LJ param C6/r^6 within the cut-off,
-          * as the current nbnxn kernels do.
-          */
++        /* When we deprecate the group kernels the code below can go too */
 +        if (fr->vdwtype == evdwPME && fr->cutoff_scheme == ecutsGROUP)
 +        {
 +            /* Calculate self-interaction coefficient (assuming that
 +             * the reciprocal-space contribution is constant in the
 +             * region that contributes to the self-interaction).
 +             */
 +            fr->enershiftsix = pow(fr->ewaldcoeff_lj, 6) / 6.0;
 +
 +            eners[0] += -pow(sqrt(M_PI)*fr->ewaldcoeff_lj, 3)/3.0;
 +            virs[0]  +=  pow(sqrt(M_PI)*fr->ewaldcoeff_lj, 3);
 +        }
 +
 +        fr->enerdiffsix    = eners[0];
 +        fr->enerdifftwelve = eners[1];
 +        /* The 0.5 is due to the Gromacs definition of the virial */
 +        fr->virdiffsix     = 0.5*virs[0];
 +        fr->virdifftwelve  = 0.5*virs[1];
 +    }
 +}
 +
 +void calc_dispcorr(FILE *fplog, t_inputrec *ir, t_forcerec *fr,
 +                   gmx_int64_t step, int natoms,
 +                   matrix box, real lambda, tensor pres, tensor virial,
 +                   real *prescorr, real *enercorr, real *dvdlcorr)
 +{
 +    gmx_bool bCorrAll, bCorrPres;
 +    real     dvdlambda, invvol, dens, ninter, avcsix, avctwelve, enerdiff, svir = 0, spres = 0;
 +    int      m;
 +
 +    *prescorr = 0;
 +    *enercorr = 0;
 +    *dvdlcorr = 0;
 +
 +    clear_mat(virial);
 +    clear_mat(pres);
 +
 +    if (ir->eDispCorr != edispcNO)
 +    {
 +        bCorrAll  = (ir->eDispCorr == edispcAllEner ||
 +                     ir->eDispCorr == edispcAllEnerPres);
 +        bCorrPres = (ir->eDispCorr == edispcEnerPres ||
 +                     ir->eDispCorr == edispcAllEnerPres);
 +
 +        invvol = 1/det(box);
 +        if (fr->n_tpi)
 +        {
 +            /* Only correct for the interactions with the inserted molecule */
 +            dens   = (natoms - fr->n_tpi)*invvol;
 +            ninter = fr->n_tpi;
 +        }
 +        else
 +        {
 +            dens   = natoms*invvol;
 +            ninter = 0.5*natoms;
 +        }
 +
 +        if (ir->efep == efepNO)
 +        {
 +            avcsix    = fr->avcsix[0];
 +            avctwelve = fr->avctwelve[0];
 +        }
 +        else
 +        {
 +            avcsix    = (1 - lambda)*fr->avcsix[0]    + lambda*fr->avcsix[1];
 +            avctwelve = (1 - lambda)*fr->avctwelve[0] + lambda*fr->avctwelve[1];
 +        }
 +
 +        enerdiff   = ninter*(dens*fr->enerdiffsix - fr->enershiftsix);
 +        *enercorr += avcsix*enerdiff;
 +        dvdlambda  = 0.0;
 +        if (ir->efep != efepNO)
 +        {
 +            dvdlambda += (fr->avcsix[1] - fr->avcsix[0])*enerdiff;
 +        }
 +        if (bCorrAll)
 +        {
 +            enerdiff   = ninter*(dens*fr->enerdifftwelve - fr->enershifttwelve);
 +            *enercorr += avctwelve*enerdiff;
 +            if (fr->efep != efepNO)
 +            {
 +                dvdlambda += (fr->avctwelve[1] - fr->avctwelve[0])*enerdiff;
 +            }
 +        }
 +
 +        if (bCorrPres)
 +        {
 +            svir = ninter*dens*avcsix*fr->virdiffsix/3.0;
 +            if (ir->eDispCorr == edispcAllEnerPres)
 +            {
 +                svir += ninter*dens*avctwelve*fr->virdifftwelve/3.0;
 +            }
 +            /* The factor 2 is because of the Gromacs virial definition */
 +            spres = -2.0*invvol*svir*PRESFAC;
 +
 +            for (m = 0; m < DIM; m++)
 +            {
 +                virial[m][m] += svir;
 +                pres[m][m]   += spres;
 +            }
 +            *prescorr += spres;
 +        }
 +
 +        /* Can't currently control when it prints, for now, just print when degugging */
 +        if (debug)
 +        {
 +            if (bCorrAll)
 +            {
 +                fprintf(debug, "Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
 +                        avcsix, avctwelve);
 +            }
 +            if (bCorrPres)
 +            {
 +                fprintf(debug,
 +                        "Long Range LJ corr.: Epot %10g, Pres: %10g, Vir: %10g\n",
 +                        *enercorr, spres, svir);
 +            }
 +            else
 +            {
 +                fprintf(debug, "Long Range LJ corr.: Epot %10g\n", *enercorr);
 +            }
 +        }
 +
 +        if (fr->bSepDVDL && do_per_step(step, ir->nstlog))
 +        {
 +            gmx_print_sepdvdl(fplog, "Dispersion correction", *enercorr, dvdlambda);
 +        }
 +        if (fr->efep != efepNO)
 +        {
 +            *dvdlcorr += dvdlambda;
 +        }
 +    }
 +}
 +
 +void do_pbc_first(FILE *fplog, matrix box, t_forcerec *fr,
 +                  t_graph *graph, rvec x[])
 +{
 +    if (fplog)
 +    {
 +        fprintf(fplog, "Removing pbc first time\n");
 +    }
 +    calc_shifts(box, fr->shift_vec);
 +    if (graph)
 +    {
 +        mk_mshift(fplog, graph, fr->ePBC, box, x);
 +        if (gmx_debug_at)
 +        {
 +            p_graph(debug, "do_pbc_first 1", graph);
 +        }
 +        shift_self(graph, box, x);
 +        /* By doing an extra mk_mshift the molecules that are broken
 +         * because they were e.g. imported from another software
 +         * will be made whole again. Such are the healing powers
 +         * of GROMACS.
 +         */
 +        mk_mshift(fplog, graph, fr->ePBC, box, x);
 +        if (gmx_debug_at)
 +        {
 +            p_graph(debug, "do_pbc_first 2", graph);
 +        }
 +    }
 +    if (fplog)
 +    {
 +        fprintf(fplog, "Done rmpbc\n");
 +    }
 +}
 +
 +static void low_do_pbc_mtop(FILE *fplog, int ePBC, matrix box,
 +                            gmx_mtop_t *mtop, rvec x[],
 +                            gmx_bool bFirst)
 +{
 +    t_graph        *graph;
 +    int             mb, as, mol;
 +    gmx_molblock_t *molb;
 +
 +    if (bFirst && fplog)
 +    {
 +        fprintf(fplog, "Removing pbc first time\n");
 +    }
 +
 +    snew(graph, 1);
 +    as = 0;
 +    for (mb = 0; mb < mtop->nmolblock; mb++)
 +    {
 +        molb = &mtop->molblock[mb];
 +        if (molb->natoms_mol == 1 ||
 +            (!bFirst && mtop->moltype[molb->type].cgs.nr == 1))
 +        {
 +            /* Just one atom or charge group in the molecule, no PBC required */
 +            as += molb->nmol*molb->natoms_mol;
 +        }
 +        else
 +        {
 +            /* Pass NULL iso fplog to avoid graph prints for each molecule type */
 +            mk_graph_ilist(NULL, mtop->moltype[molb->type].ilist,
 +                           0, molb->natoms_mol, FALSE, FALSE, graph);
 +
 +            for (mol = 0; mol < molb->nmol; mol++)
 +            {
 +                mk_mshift(fplog, graph, ePBC, box, x+as);
 +
 +                shift_self(graph, box, x+as);
 +                /* The molecule is whole now.
 +                 * We don't need the second mk_mshift call as in do_pbc_first,
 +                 * since we no longer need this graph.
 +                 */
 +
 +                as += molb->natoms_mol;
 +            }
 +            done_graph(graph);
 +        }
 +    }
 +    sfree(graph);
 +}
 +
 +void do_pbc_first_mtop(FILE *fplog, int ePBC, matrix box,
 +                       gmx_mtop_t *mtop, rvec x[])
 +{
 +    low_do_pbc_mtop(fplog, ePBC, box, mtop, x, TRUE);
 +}
 +
 +void do_pbc_mtop(FILE *fplog, int ePBC, matrix box,
 +                 gmx_mtop_t *mtop, rvec x[])
 +{
 +    low_do_pbc_mtop(fplog, ePBC, box, mtop, x, FALSE);
 +}
 +
 +void finish_run(FILE *fplog, t_commrec *cr,
 +                t_inputrec *inputrec,
 +                t_nrnb nrnb[], gmx_wallcycle_t wcycle,
 +                gmx_walltime_accounting_t walltime_accounting,
 +                wallclock_gpu_t *gputimes,
 +                gmx_bool bWriteStat)
 +{
 +    int     i, j;
 +    t_nrnb *nrnb_tot = NULL;
 +    real    delta_t;
 +    double  nbfs, mflop;
 +    double  elapsed_time,
 +            elapsed_time_over_all_ranks,
 +            elapsed_time_over_all_threads,
 +            elapsed_time_over_all_threads_over_all_ranks;
 +    wallcycle_sum(cr, wcycle);
 +
 +    if (cr->nnodes > 1)
 +    {
 +        snew(nrnb_tot, 1);
 +#ifdef GMX_MPI
 +        MPI_Allreduce(nrnb->n, nrnb_tot->n, eNRNB, MPI_DOUBLE, MPI_SUM,
 +                      cr->mpi_comm_mysim);
 +#endif
 +    }
 +    else
 +    {
 +        nrnb_tot = nrnb;
 +    }
 +
 +    elapsed_time                                 = walltime_accounting_get_elapsed_time(walltime_accounting);
 +    elapsed_time_over_all_ranks                  = elapsed_time;
 +    elapsed_time_over_all_threads                = walltime_accounting_get_elapsed_time_over_all_threads(walltime_accounting);
 +    elapsed_time_over_all_threads_over_all_ranks = elapsed_time_over_all_threads;
 +#ifdef GMX_MPI
 +    if (cr->nnodes > 1)
 +    {
 +        /* reduce elapsed_time over all MPI ranks in the current simulation */
 +        MPI_Allreduce(&elapsed_time,
 +                      &elapsed_time_over_all_ranks,
 +                      1, MPI_DOUBLE, MPI_SUM,
 +                      cr->mpi_comm_mysim);
 +        elapsed_time_over_all_ranks /= cr->nnodes;
 +        /* Reduce elapsed_time_over_all_threads over all MPI ranks in the
 +         * current simulation. */
 +        MPI_Allreduce(&elapsed_time_over_all_threads,
 +                      &elapsed_time_over_all_threads_over_all_ranks,
 +                      1, MPI_DOUBLE, MPI_SUM,
 +                      cr->mpi_comm_mysim);
 +    }
 +#endif
 +
 +    if (SIMMASTER(cr))
 +    {
 +        print_flop(fplog, nrnb_tot, &nbfs, &mflop);
 +    }
 +    if (cr->nnodes > 1)
 +    {
 +        sfree(nrnb_tot);
 +    }
 +
 +    if ((cr->duty & DUTY_PP) && DOMAINDECOMP(cr))
 +    {
 +        print_dd_statistics(cr, inputrec, fplog);
 +    }
 +
 +    if (SIMMASTER(cr))
 +    {
 +        wallcycle_print(fplog, cr->nnodes, cr->npmenodes,
 +                        elapsed_time_over_all_ranks,
 +                        wcycle, gputimes);
 +
 +        if (EI_DYNAMICS(inputrec->eI))
 +        {
 +            delta_t = inputrec->delta_t;
 +        }
 +        else
 +        {
 +            delta_t = 0;
 +        }
 +
 +        if (fplog)
 +        {
 +            print_perf(fplog, elapsed_time_over_all_threads_over_all_ranks,
 +                       elapsed_time_over_all_ranks,
 +                       walltime_accounting_get_nsteps_done(walltime_accounting),
 +                       delta_t, nbfs, mflop);
 +        }
 +        if (bWriteStat)
 +        {
 +            print_perf(stderr, elapsed_time_over_all_threads_over_all_ranks,
 +                       elapsed_time_over_all_ranks,
 +                       walltime_accounting_get_nsteps_done(walltime_accounting),
 +                       delta_t, nbfs, mflop);
 +        }
 +    }
 +}
 +
 +extern void initialize_lambdas(FILE *fplog, t_inputrec *ir, int *fep_state, real *lambda, double *lam0)
 +{
 +    /* this function works, but could probably use a logic rewrite to keep all the different
 +       types of efep straight. */
 +
 +    int       i;
 +    t_lambda *fep = ir->fepvals;
 +
 +    if ((ir->efep == efepNO) && (ir->bSimTemp == FALSE))
 +    {
 +        for (i = 0; i < efptNR; i++)
 +        {
 +            lambda[i] = 0.0;
 +            if (lam0)
 +            {
 +                lam0[i] = 0.0;
 +            }
 +        }
 +        return;
 +    }
 +    else
 +    {
 +        *fep_state = fep->init_fep_state; /* this might overwrite the checkpoint
 +                                             if checkpoint is set -- a kludge is in for now
 +                                             to prevent this.*/
 +        for (i = 0; i < efptNR; i++)
 +        {
 +            /* overwrite lambda state with init_lambda for now for backwards compatibility */
 +            if (fep->init_lambda >= 0) /* if it's -1, it was never initializd */
 +            {
 +                lambda[i] = fep->init_lambda;
 +                if (lam0)
 +                {
 +                    lam0[i] = lambda[i];
 +                }
 +            }
 +            else
 +            {
 +                lambda[i] = fep->all_lambda[i][*fep_state];
 +                if (lam0)
 +                {
 +                    lam0[i] = lambda[i];
 +                }
 +            }
 +        }
 +        if (ir->bSimTemp)
 +        {
 +            /* need to rescale control temperatures to match current state */
 +            for (i = 0; i < ir->opts.ngtc; i++)
 +            {
 +                if (ir->opts.ref_t[i] > 0)
 +                {
 +                    ir->opts.ref_t[i] = ir->simtempvals->temperatures[*fep_state];
 +                }
 +            }
 +        }
 +    }
 +
 +    /* Send to the log the information on the current lambdas */
 +    if (fplog != NULL)
 +    {
 +        fprintf(fplog, "Initial vector of lambda components:[ ");
 +        for (i = 0; i < efptNR; i++)
 +        {
 +            fprintf(fplog, "%10.4f ", lambda[i]);
 +        }
 +        fprintf(fplog, "]\n");
 +    }
 +    return;
 +}
 +
 +
 +void init_md(FILE *fplog,
 +             t_commrec *cr, t_inputrec *ir, const output_env_t oenv,
 +             double *t, double *t0,
 +             real *lambda, int *fep_state, double *lam0,
 +             t_nrnb *nrnb, gmx_mtop_t *mtop,
 +             gmx_update_t *upd,
 +             int nfile, const t_filenm fnm[],
 +             gmx_mdoutf_t *outf, t_mdebin **mdebin,
 +             tensor force_vir, tensor shake_vir, rvec mu_tot,
 +             gmx_bool *bSimAnn, t_vcm **vcm, unsigned long Flags)
 +{
 +    int  i, j, n;
 +    real tmpt, mod;
 +
 +    /* Initial values */
 +    *t = *t0       = ir->init_t;
 +
 +    *bSimAnn = FALSE;
 +    for (i = 0; i < ir->opts.ngtc; i++)
 +    {
 +        /* set bSimAnn if any group is being annealed */
 +        if (ir->opts.annealing[i] != eannNO)
 +        {
 +            *bSimAnn = TRUE;
 +        }
 +    }
 +    if (*bSimAnn)
 +    {
 +        update_annealing_target_temp(&(ir->opts), ir->init_t);
 +    }
 +
 +    /* Initialize lambda variables */
 +    initialize_lambdas(fplog, ir, fep_state, lambda, lam0);
 +
 +    if (upd)
 +    {
 +        *upd = init_update(ir);
 +    }
 +
 +
 +    if (vcm != NULL)
 +    {
 +        *vcm = init_vcm(fplog, &mtop->groups, ir);
 +    }
 +
 +    if (EI_DYNAMICS(ir->eI) && !(Flags & MD_APPENDFILES))
 +    {
 +        if (ir->etc == etcBERENDSEN)
 +        {
 +            please_cite(fplog, "Berendsen84a");
 +        }
 +        if (ir->etc == etcVRESCALE)
 +        {
 +            please_cite(fplog, "Bussi2007a");
 +        }
 +    }
 +
 +    init_nrnb(nrnb);
 +
 +    if (nfile != -1)
 +    {
 +        *outf = init_mdoutf(fplog, nfile, fnm, Flags, cr, ir, mtop, oenv);
 +
 +        *mdebin = init_mdebin((Flags & MD_APPENDFILES) ? NULL : mdoutf_get_fp_ene(*outf),
 +                              mtop, ir, mdoutf_get_fp_dhdl(*outf));
 +    }
 +
 +    if (ir->bAdress)
 +    {
 +        please_cite(fplog, "Fritsch12");
 +        please_cite(fplog, "Junghans10");
 +    }
 +    /* Initiate variables */
 +    clear_mat(force_vir);
 +    clear_mat(shake_vir);
 +    clear_rvec(mu_tot);
 +
 +    debug_gmx();
 +}
diff --cc src/gromacs/mdlib/tables.c
index ff73a90e89,0000000000..404516c467
mode 100644,000000..100644
--- a/src/gromacs/mdlib/tables.c
+++ b/src/gromacs/mdlib/tables.c
@@@ -1,1677 -1,0 +1,1777 @@@
 +/*
 + * This file is part of the GROMACS molecular simulation package.
 + *
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team.
 + * Copyright (c) 2013,2014, by the GROMACS development team, led by
 + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
 + * and including many others, as listed in the AUTHORS file in the
 + * top-level source directory and at http://www.gromacs.org.
 + *
 + * GROMACS is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU Lesser General Public License
 + * as published by the Free Software Foundation; either version 2.1
 + * of the License, or (at your option) any later version.
 + *
 + * GROMACS is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 + * Lesser General Public License for more details.
 + *
 + * You should have received a copy of the GNU Lesser General Public
 + * License along with GROMACS; if not, see
 + * http://www.gnu.org/licenses, or write to the Free Software Foundation,
 + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
 + *
 + * If you want to redistribute modifications to GROMACS, please
 + * consider that scientific software is very special. Version
 + * control is crucial - bugs must be traceable. We will be happy to
 + * consider code for inclusion in the official distribution, but
 + * derived work must not be called official GROMACS. Details are found
 + * in the README & COPYING files - if they are missing, get the
 + * official version at http://www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the research papers on the package. Check out http://www.gromacs.org.
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <math.h>
 +#include "gromacs/math/utilities.h"
 +#include "typedefs.h"
 +#include "names.h"
 +#include "gromacs/utility/smalloc.h"
 +#include "gmx_fatal.h"
 +#include "gromacs/fileio/futil.h"
 +#include "xvgr.h"
 +#include "vec.h"
 +#include "main.h"
 +#include "network.h"
 +#include "physics.h"
 +#include "force.h"
 +#include "gromacs/fileio/gmxfio.h"
 +#include "macros.h"
 +#include "tables.h"
 +
 +/* All the possible (implemented) table functions */
 +enum {
 +    etabLJ6,
 +    etabLJ12,
 +    etabLJ6Shift,
 +    etabLJ12Shift,
 +    etabShift,
 +    etabRF,
 +    etabRF_ZERO,
 +    etabCOUL,
 +    etabEwald,
 +    etabEwaldSwitch,
 +    etabEwaldUser,
 +    etabEwaldUserSwitch,
 +    etabLJ6Ewald,
 +    etabLJ6Switch,
 +    etabLJ12Switch,
 +    etabCOULSwitch,
 +    etabLJ6Encad,
 +    etabLJ12Encad,
 +    etabCOULEncad,
 +    etabEXPMIN,
 +    etabUSER,
 +    etabNR
 +};
 +
 +/** Evaluates to true if the table type contains user data. */
 +#define ETAB_USER(e)  ((e) == etabUSER || \
 +                       (e) == etabEwaldUser || (e) == etabEwaldUserSwitch)
 +
 +typedef struct {
 +    const char *name;
 +    gmx_bool    bCoulomb;
 +} t_tab_props;
 +
 +/* This structure holds name and a flag that tells whether
 +   this is a Coulomb type funtion */
 +static const t_tab_props tprops[etabNR] = {
 +    { "LJ6",  FALSE },
 +    { "LJ12", FALSE },
 +    { "LJ6Shift", FALSE },
 +    { "LJ12Shift", FALSE },
 +    { "Shift", TRUE },
 +    { "RF", TRUE },
 +    { "RF-zero", TRUE },
 +    { "COUL", TRUE },
 +    { "Ewald", TRUE },
 +    { "Ewald-Switch", TRUE },
 +    { "Ewald-User", TRUE },
 +    { "Ewald-User-Switch", TRUE },
 +    { "LJ6Ewald", FALSE },
 +    { "LJ6Switch", FALSE },
 +    { "LJ12Switch", FALSE },
 +    { "COULSwitch", TRUE },
 +    { "LJ6-Encad shift", FALSE },
 +    { "LJ12-Encad shift", FALSE },
 +    { "COUL-Encad shift",  TRUE },
 +    { "EXPMIN", FALSE },
 +    { "USER", FALSE },
 +};
 +
 +/* Index in the table that says which function to use */
 +enum {
 +    etiCOUL, etiLJ6, etiLJ12, etiNR
 +};
 +
 +typedef struct {
 +    int     nx, nx0;
 +    double  tabscale;
 +    double *x, *v, *f;
 +} t_tabledata;
 +
 +#define pow2(x) ((x)*(x))
 +#define pow3(x) ((x)*(x)*(x))
 +#define pow4(x) ((x)*(x)*(x)*(x))
 +#define pow5(x) ((x)*(x)*(x)*(x)*(x))
 +
 +double v_q_ewald_lr(double beta, double r)
 +{
 +    if (r == 0)
 +    {
 +        return beta*2/sqrt(M_PI);
 +    }
 +    else
 +    {
 +        return gmx_erfd(beta*r)/r;
 +    }
 +}
 +
 +double v_lj_ewald_lr(double beta, double r)
 +{
 +    double br, br2, br4, r6, factor;
 +    if (r == 0)
 +    {
 +        return pow(beta, 6)/6;
 +    }
 +    else
 +    {
 +        br     = beta*r;
 +        br2    = br*br;
 +        br4    = br2*br2;
 +        r6     = pow(r, 6.0);
 +        factor = (1.0 - exp(-br2)*(1 + br2 + 0.5*br4))/r6;
 +        return factor;
 +    }
 +}
 +
 +void table_spline3_fill_ewald_lr(real                                 *table_f,
 +                                 real                                 *table_v,
 +                                 real                                 *table_fdv0,
 +                                 int                                   ntab,
 +                                 real                                  dx,
 +                                 real                                  beta,
 +                                 real_space_grid_contribution_computer v_lr)
 +{
 +    real     tab_max;
 +    int      i, i_inrange;
 +    double   dc, dc_new;
 +    gmx_bool bOutOfRange;
 +    double   v_r0, v_r1, v_inrange, vi, a0, a1, a2dx;
 +    double   x_r0;
 +
 +    /* This function is called using either v_ewald_lr or v_lj_ewald_lr as a function argument
 +     * depending on wether we should create electrostatic or Lennard-Jones Ewald tables.
 +     */
 +
 +    if (ntab < 2)
 +    {
 +        gmx_fatal(FARGS, "Can not make a spline table with less than 2 points");
 +    }
 +
 +    /* We need some margin to be able to divide table values by r
 +     * in the kernel and also to do the integration arithmetics
 +     * without going out of range. Furthemore, we divide by dx below.
 +     */
 +    tab_max = GMX_REAL_MAX*0.0001;
 +
 +    /* This function produces a table with:
 +     * maximum energy error: V'''/(6*12*sqrt(3))*dx^3
 +     * maximum force error:  V'''/(6*4)*dx^2
 +     * The rms force error is the max error times 1/sqrt(5)=0.45.
 +     */
 +
 +    bOutOfRange = FALSE;
 +    i_inrange   = ntab;
 +    v_inrange   = 0;
 +    dc          = 0;
 +    for (i = ntab-1; i >= 0; i--)
 +    {
 +        x_r0 = i*dx;
 +
 +        v_r0 = (*v_lr)(beta, x_r0);
 +
 +        if (!bOutOfRange)
 +        {
 +            i_inrange = i;
 +            v_inrange = v_r0;
 +
 +            vi = v_r0;
 +        }
 +        else
 +        {
 +            /* Linear continuation for the last point in range */
 +            vi = v_inrange - dc*(i - i_inrange)*dx;
 +        }
 +
 +        if (table_v != NULL)
 +        {
 +            table_v[i] = vi;
 +        }
 +
 +        if (i == 0)
 +        {
 +            continue;
 +        }
 +
 +        /* Get the potential at table point i-1 */
 +        v_r1 = (*v_lr)(beta, (i-1)*dx);
 +
 +        if (v_r1 != v_r1 || v_r1 < -tab_max || v_r1 > tab_max)
 +        {
 +            bOutOfRange = TRUE;
 +        }
 +
 +        if (!bOutOfRange)
 +        {
 +            /* Calculate the average second derivative times dx over interval i-1 to i.
 +             * Using the function values at the end points and in the middle.
 +             */
 +            a2dx = (v_r0 + v_r1 - 2*(*v_lr)(beta, x_r0-0.5*dx))/(0.25*dx);
 +            /* Set the derivative of the spline to match the difference in potential
 +             * over the interval plus the average effect of the quadratic term.
 +             * This is the essential step for minimizing the error in the force.
 +             */
 +            dc = (v_r0 - v_r1)/dx + 0.5*a2dx;
 +        }
 +
 +        if (i == ntab - 1)
 +        {
 +            /* Fill the table with the force, minus the derivative of the spline */
 +            table_f[i] = -dc;
 +        }
 +        else
 +        {
 +            /* tab[i] will contain the average of the splines over the two intervals */
 +            table_f[i] += -0.5*dc;
 +        }
 +
 +        if (!bOutOfRange)
 +        {
 +            /* Make spline s(x) = a0 + a1*(x - xr) + 0.5*a2*(x - xr)^2
 +             * matching the potential at the two end points
 +             * and the derivative dc at the end point xr.
 +             */
 +            a0   = v_r0;
 +            a1   = dc;
 +            a2dx = (a1*dx + v_r1 - a0)*2/dx;
 +
 +            /* Set dc to the derivative at the next point */
 +            dc_new = a1 - a2dx;
 +
 +            if (dc_new != dc_new || dc_new < -tab_max || dc_new > tab_max)
 +            {
 +                bOutOfRange = TRUE;
 +            }
 +            else
 +            {
 +                dc = dc_new;
 +            }
 +        }
 +
 +        table_f[(i-1)] = -0.5*dc;
 +    }
 +    /* Currently the last value only contains half the force: double it */
 +    table_f[0] *= 2;
 +
 +    if (table_v != NULL && table_fdv0 != NULL)
 +    {
 +        /* Copy to FDV0 table too. Allocation occurs in forcerec.c,
 +         * init_ewald_f_table().
 +         */
 +        for (i = 0; i < ntab-1; i++)
 +        {
 +            table_fdv0[4*i]     = table_f[i];
 +            table_fdv0[4*i+1]   = table_f[i+1]-table_f[i];
 +            table_fdv0[4*i+2]   = table_v[i];
 +            table_fdv0[4*i+3]   = 0.0;
 +        }
 +        table_fdv0[4*(ntab-1)]    = table_f[(ntab-1)];
 +        table_fdv0[4*(ntab-1)+1]  = -table_f[(ntab-1)];
 +        table_fdv0[4*(ntab-1)+2]  = table_v[(ntab-1)];
 +        table_fdv0[4*(ntab-1)+3]  = 0.0;
 +    }
 +}
 +
 +/* The scale (1/spacing) for third order spline interpolation
 + * of the Ewald mesh contribution which needs to be subtracted
 + * from the non-bonded interactions.
 + */
 +real ewald_spline3_table_scale(real ewaldcoeff, real rc)
 +{
 +    double erf_x_d3 = 1.0522; /* max of (erf(x)/x)''' */
 +    double ftol, etol;
 +    double sc_f, sc_e;
 +
 +    /* Force tolerance: single precision accuracy */
 +    ftol = GMX_FLOAT_EPS;
 +    sc_f = sqrt(erf_x_d3/(6*4*ftol*ewaldcoeff))*ewaldcoeff;
 +
 +    /* Energy tolerance: 10x more accurate than the cut-off jump */
 +    etol = 0.1*gmx_erfc(ewaldcoeff*rc);
 +    etol = max(etol, GMX_REAL_EPS);
 +    sc_e = pow(erf_x_d3/(6*12*sqrt(3)*etol), 1.0/3.0)*ewaldcoeff;
 +
 +    return max(sc_f, sc_e);
 +}
 +
 +/* Calculate the potential and force for an r value
 + * in exactly the same way it is done in the inner loop.
 + * VFtab is a pointer to the table data, offset is
 + * the point where we should begin and stride is
 + * 4 if we have a buckingham table, 3 otherwise.
 + * If you want to evaluate table no N, set offset to 4*N.
 + *
 + * We use normal precision here, since that is what we
 + * will use in the inner loops.
 + */
 +static void evaluate_table(real VFtab[], int offset, int stride,
 +                           real tabscale, real r, real *y, real *yp)
 +{
 +    int  n;
 +    real rt, eps, eps2;
 +    real Y, F, Geps, Heps2, Fp;
 +
 +    rt       =  r*tabscale;
 +    n        =  (int)rt;
 +    eps      =  rt - n;
 +    eps2     =  eps*eps;
 +    n        =  offset+stride*n;
 +    Y        =  VFtab[n];
 +    F        =  VFtab[n+1];
 +    Geps     =  eps*VFtab[n+2];
 +    Heps2    =  eps2*VFtab[n+3];
 +    Fp       =  F+Geps+Heps2;
 +    *y       =  Y+eps*Fp;
 +    *yp      =  (Fp+Geps+2.0*Heps2)*tabscale;
 +}
 +
 +static void copy2table(int n, int offset, int stride,
 +                       double x[], double Vtab[], double Ftab[], real scalefactor,
 +                       real dest[])
 +{
 +/* Use double prec. for the intermediary variables
 + * and temporary x/vtab/vtab2 data to avoid unnecessary
 + * loss of precision.
 + */
 +    int    i, nn0;
 +    double F, G, H, h;
 +
 +    h = 0;
 +    for (i = 0; (i < n); i++)
 +    {
 +        if (i < n-1)
 +        {
 +            h   = x[i+1] - x[i];
 +            F   = -Ftab[i]*h;
 +            G   =  3*(Vtab[i+1] - Vtab[i]) + (Ftab[i+1] + 2*Ftab[i])*h;
 +            H   = -2*(Vtab[i+1] - Vtab[i]) - (Ftab[i+1] +   Ftab[i])*h;
 +        }
 +        else
 +        {
 +            /* Fill the last entry with a linear potential,
 +             * this is mainly for rounding issues with angle and dihedral potentials.
 +             */
 +            F   = -Ftab[i]*h;
 +            G   = 0;
 +            H   = 0;
 +        }
 +        nn0         = offset + i*stride;
 +        dest[nn0]   = scalefactor*Vtab[i];
 +        dest[nn0+1] = scalefactor*F;
 +        dest[nn0+2] = scalefactor*G;
 +        dest[nn0+3] = scalefactor*H;
 +    }
 +}
 +
 +static void init_table(int n, int nx0,
 +                       double tabscale, t_tabledata *td, gmx_bool bAlloc)
 +{
 +    int i;
 +
 +    td->nx       = n;
 +    td->nx0      = nx0;
 +    td->tabscale = tabscale;
 +    if (bAlloc)
 +    {
 +        snew(td->x, td->nx);
 +        snew(td->v, td->nx);
 +        snew(td->f, td->nx);
 +    }
 +    for (i = 0; (i < td->nx); i++)
 +    {
 +        td->x[i] = i/tabscale;
 +    }
 +}
 +
 +static void spline_forces(int nx, double h, double v[], gmx_bool bS3, gmx_bool bE3,
 +                          double f[])
 +{
 +    int    start, end, i;
 +    double v3, b_s, b_e, b;
 +    double beta, *gamma;
 +
 +    /* Formulas can be found in:
 +     * H.J.C. Berendsen, Simulating the Physical World, Cambridge 2007
 +     */
 +
 +    if (nx < 4 && (bS3 || bE3))
 +    {
 +        gmx_fatal(FARGS, "Can not generate splines with third derivative boundary conditions with less than 4 (%d) points", nx);
 +    }
 +
 +    /* To make life easy we initially set the spacing to 1
 +     * and correct for this at the end.
 +     */
 +    beta = 2;
 +    if (bS3)
 +    {
 +        /* Fit V''' at the start */
 +        v3  = v[3] - 3*v[2] + 3*v[1] - v[0];
 +        if (debug)
 +        {
 +            fprintf(debug, "The left third derivative is %g\n", v3/(h*h*h));
 +        }
 +        b_s   = 2*(v[1] - v[0]) + v3/6;
 +        start = 0;
 +
 +        if (FALSE)
 +        {
 +            /* Fit V'' at the start */
 +            real v2;
 +
 +            v2  = -v[3] + 4*v[2] - 5*v[1] + 2*v[0];
 +            /* v2  = v[2] - 2*v[1] + v[0]; */
 +            if (debug)
 +            {
 +                fprintf(debug, "The left second derivative is %g\n", v2/(h*h));
 +            }
 +            b_s   = 3*(v[1] - v[0]) - v2/2;
 +            start = 0;
 +        }
 +    }
 +    else
 +    {
 +        b_s   = 3*(v[2] - v[0]) + f[0]*h;
 +        start = 1;
 +    }
 +    if (bE3)
 +    {
 +        /* Fit V''' at the end */
 +        v3  = v[nx-1] - 3*v[nx-2] + 3*v[nx-3] - v[nx-4];
 +        if (debug)
 +        {
 +            fprintf(debug, "The right third derivative is %g\n", v3/(h*h*h));
 +        }
 +        b_e = 2*(v[nx-1] - v[nx-2]) + v3/6;
 +        end = nx;
 +    }
 +    else
 +    {
 +        /* V'=0 at the end */
 +        b_e = 3*(v[nx-1] - v[nx-3]) + f[nx-1]*h;
 +        end = nx - 1;
 +    }
 +
 +    snew(gamma, nx);
 +    beta = (bS3 ? 1 : 4);
 +
 +    /* For V'' fitting */
 +    /* beta = (bS3 ? 2 : 4); */
 +
 +    f[start] = b_s/beta;
 +    for (i = start+1; i < end; i++)
 +    {
 +        gamma[i] = 1/beta;
 +        beta     = 4 - gamma[i];
 +        b        =  3*(v[i+1] - v[i-1]);
 +        f[i]     = (b - f[i-1])/beta;
 +    }
 +    gamma[end-1] = 1/beta;
 +    beta         = (bE3 ? 1 : 4) - gamma[end-1];
 +    f[end-1]     = (b_e - f[end-2])/beta;
 +
 +    for (i = end-2; i >= start; i--)
 +    {
 +        f[i] -= gamma[i+1]*f[i+1];
 +    }
 +    sfree(gamma);
 +
 +    /* Correct for the minus sign and the spacing */
 +    for (i = start; i < end; i++)
 +    {
 +        f[i] = -f[i]/h;
 +    }
 +}
 +
 +static void set_forces(FILE *fp, int angle,
 +                       int nx, double h, double v[], double f[],
 +                       int table)
 +{
 +    int start, end;
 +
 +    if (angle == 2)
 +    {
 +        gmx_fatal(FARGS,
 +                  "Force generation for dihedral tables is not (yet) implemented");
 +    }
 +
 +    start = 0;
 +    while (v[start] == 0)
 +    {
 +        start++;
 +    }
 +
 +    end = nx;
 +    while (v[end-1] == 0)
 +    {
 +        end--;
 +    }
 +    if (end > nx - 2)
 +    {
 +        end = nx;
 +    }
 +    else
 +    {
 +        end++;
 +    }
 +
 +    if (fp)
 +    {
 +        fprintf(fp, "Generating forces for table %d, boundary conditions: V''' at %g, %s at %g\n",
 +                table+1, start*h, end == nx ? "V'''" : "V'=0", (end-1)*h);
 +    }
 +    spline_forces(end-start, h, v+start, TRUE, end == nx, f+start);
 +}
 +
 +static void read_tables(FILE *fp, const char *fn,
 +                        int ntab, int angle, t_tabledata td[])
 +{
 +    char    *libfn;
 +    char     buf[STRLEN];
 +    double **yy = NULL, start, end, dx0, dx1, ssd, vm, vp, f, numf;
 +    int      k, i, nx, nx0 = 0, ny, nny, ns;
 +    gmx_bool bAllZero, bZeroV, bZeroF;
 +    double   tabscale;
 +
 +    nny   = 2*ntab+1;
 +    libfn = gmxlibfn(fn);
 +    nx    = read_xvg(libfn, &yy, &ny);
 +    if (ny != nny)
 +    {
 +        gmx_fatal(FARGS, "Trying to read file %s, but nr columns = %d, should be %d",
 +                  libfn, ny, nny);
 +    }
 +    if (angle == 0)
 +    {
 +        if (yy[0][0] != 0.0)
 +        {
 +            gmx_fatal(FARGS,
 +                      "The first distance in file %s is %f nm instead of %f nm",
 +                      libfn, yy[0][0], 0.0);
 +        }
 +    }
 +    else
 +    {
 +        if (angle == 1)
 +        {
 +            start = 0.0;
 +        }
 +        else
 +        {
 +            start = -180.0;
 +        }
 +        end = 180.0;
 +        if (yy[0][0] != start || yy[0][nx-1] != end)
 +        {
 +            gmx_fatal(FARGS, "The angles in file %s should go from %f to %f instead of %f to %f\n",
 +                      libfn, start, end, yy[0][0], yy[0][nx-1]);
 +        }
 +    }
 +
 +    tabscale = (nx-1)/(yy[0][nx-1] - yy[0][0]);
 +
 +    if (fp)
 +    {
 +        fprintf(fp, "Read user tables from %s with %d data points.\n", libfn, nx);
 +        if (angle == 0)
 +        {
 +            fprintf(fp, "Tabscale = %g points/nm\n", tabscale);
 +        }
 +    }
 +
 +    bAllZero = TRUE;
 +    for (k = 0; k < ntab; k++)
 +    {
 +        bZeroV = TRUE;
 +        bZeroF = TRUE;
 +        for (i = 0; (i < nx); i++)
 +        {
 +            if (i >= 2)
 +            {
 +                dx0 = yy[0][i-1] - yy[0][i-2];
 +                dx1 = yy[0][i]   - yy[0][i-1];
 +                /* Check for 1% deviation in spacing */
 +                if (fabs(dx1 - dx0) >= 0.005*(fabs(dx0) + fabs(dx1)))
 +                {
 +                    gmx_fatal(FARGS, "In table file '%s' the x values are not equally spaced: %f %f %f", fn, yy[0][i-2], yy[0][i-1], yy[0][i]);
 +                }
 +            }
 +            if (yy[1+k*2][i] != 0)
 +            {
 +                bZeroV = FALSE;
 +                if (bAllZero)
 +                {
 +                    bAllZero = FALSE;
 +                    nx0      = i;
 +                }
 +                if (yy[1+k*2][i] >  0.01*GMX_REAL_MAX ||
 +                    yy[1+k*2][i] < -0.01*GMX_REAL_MAX)
 +                {
 +                    gmx_fatal(FARGS, "Out of range potential value %g in file '%s'",
 +                              yy[1+k*2][i], fn);
 +                }
 +            }
 +            if (yy[1+k*2+1][i] != 0)
 +            {
 +                bZeroF = FALSE;
 +                if (bAllZero)
 +                {
 +                    bAllZero = FALSE;
 +                    nx0      = i;
 +                }
 +                if (yy[1+k*2+1][i] >  0.01*GMX_REAL_MAX ||
 +                    yy[1+k*2+1][i] < -0.01*GMX_REAL_MAX)
 +                {
 +                    gmx_fatal(FARGS, "Out of range force value %g in file '%s'",
 +                              yy[1+k*2+1][i], fn);
 +                }
 +            }
 +        }
 +
 +        if (!bZeroV && bZeroF)
 +        {
 +            set_forces(fp, angle, nx, 1/tabscale, yy[1+k*2], yy[1+k*2+1], k);
 +        }
 +        else
 +        {
 +            /* Check if the second column is close to minus the numerical
 +             * derivative of the first column.
 +             */
 +            ssd = 0;
 +            ns  = 0;
 +            for (i = 1; (i < nx-1); i++)
 +            {
 +                vm = yy[1+2*k][i-1];
 +                vp = yy[1+2*k][i+1];
 +                f  = yy[1+2*k+1][i];
 +                if (vm != 0 && vp != 0 && f != 0)
 +                {
 +                    /* Take the centered difference */
 +                    numf = -(vp - vm)*0.5*tabscale;
 +                    ssd += fabs(2*(f - numf)/(f + numf));
 +                    ns++;
 +                }
 +            }
 +            if (ns > 0)
 +            {
 +                ssd /= ns;
 +                sprintf(buf, "For the %d non-zero entries for table %d in %s the forces deviate on average %d%% from minus the numerical derivative of the potential\n", ns, k, libfn, (int)(100*ssd+0.5));
 +                if (debug)
 +                {
 +                    fprintf(debug, "%s", buf);
 +                }
 +                if (ssd > 0.2)
 +                {
 +                    if (fp)
 +                    {
 +                        fprintf(fp, "\nWARNING: %s\n", buf);
 +                    }
 +                    fprintf(stderr, "\nWARNING: %s\n", buf);
 +                }
 +            }
 +        }
 +    }
 +    if (bAllZero && fp)
 +    {
 +        fprintf(fp, "\nNOTE: All elements in table %s are zero\n\n", libfn);
 +    }
 +
 +    for (k = 0; (k < ntab); k++)
 +    {
 +        init_table(nx, nx0, tabscale, &(td[k]), TRUE);
 +        for (i = 0; (i < nx); i++)
 +        {
 +            td[k].x[i] = yy[0][i];
 +            td[k].v[i] = yy[2*k+1][i];
 +            td[k].f[i] = yy[2*k+2][i];
 +        }
 +    }
 +    for (i = 0; (i < ny); i++)
 +    {
 +        sfree(yy[i]);
 +    }
 +    sfree(yy);
 +    sfree(libfn);
 +}
 +
 +static void done_tabledata(t_tabledata *td)
 +{
 +    int i;
 +
 +    if (!td)
 +    {
 +        return;
 +    }
 +
 +    sfree(td->x);
 +    sfree(td->v);
 +    sfree(td->f);
 +}
 +
- static void fill_table(t_tabledata *td, int tp, const t_forcerec *fr)
++static void fill_table(t_tabledata *td, int tp, const t_forcerec *fr,
++                       gmx_bool b14only)
 +{
 +    /* Fill the table according to the formulas in the manual.
 +     * In principle, we only need the potential and the second
 +     * derivative, but then we would have to do lots of calculations
 +     * in the inner loop. By precalculating some terms (see manual)
 +     * we get better eventual performance, despite a larger table.
 +     *
 +     * Since some of these higher-order terms are very small,
 +     * we always use double precision to calculate them here, in order
 +     * to avoid unnecessary loss of precision.
 +     */
 +#ifdef DEBUG_SWITCH
 +    FILE    *fp;
 +#endif
 +    int      i;
 +    double   reppow, p;
 +    double   r1, rc, r12, r13;
-     double   r, r2, r6, rc6;
++    double   r, r2, r6, rc2, rc6, rc12;
 +    double   expr, Vtab, Ftab;
 +    /* Parameters for David's function */
 +    double   A = 0, B = 0, C = 0, A_3 = 0, B_4 = 0;
 +    /* Parameters for the switching function */
 +    double   ksw, swi, swi1;
 +    /* Temporary parameters */
-     gmx_bool bSwitch, bShift;
++    gmx_bool bPotentialSwitch, bForceSwitch, bPotentialShift;
 +    double   ewc   = fr->ewaldcoeff_q;
 +    double   ewclj = fr->ewaldcoeff_lj;
++    double   Vcut  = 0;
 +
-     bSwitch = ((tp == etabLJ6Switch) || (tp == etabLJ12Switch) ||
-                (tp == etabCOULSwitch) ||
-                (tp == etabEwaldSwitch) || (tp == etabEwaldUserSwitch));
- 
-     bShift  = ((tp == etabLJ6Shift) || (tp == etabLJ12Shift) ||
-                (tp == etabShift));
++    if (b14only)
++    {
++        bPotentialSwitch = FALSE;
++        bForceSwitch     = FALSE;
++        bPotentialShift  = FALSE;
++    }
++    else
++    {
++        bPotentialSwitch = ((tp == etabLJ6Switch) || (tp == etabLJ12Switch) ||
++                            (tp == etabCOULSwitch) ||
++                            (tp == etabEwaldSwitch) || (tp == etabEwaldUserSwitch) ||
++                            (tprops[tp].bCoulomb && (fr->coulomb_modifier == eintmodPOTSWITCH)) ||
++                            (!tprops[tp].bCoulomb && (fr->vdw_modifier == eintmodPOTSWITCH)));
++        bForceSwitch  = ((tp == etabLJ6Shift) || (tp == etabLJ12Shift) ||
++                         (tp == etabShift) ||
++                         (tprops[tp].bCoulomb && (fr->coulomb_modifier == eintmodFORCESWITCH)) ||
++                         (!tprops[tp].bCoulomb && (fr->vdw_modifier == eintmodFORCESWITCH)));
++        bPotentialShift = ((tprops[tp].bCoulomb && (fr->coulomb_modifier == eintmodPOTSHIFT)) ||
++                           (!tprops[tp].bCoulomb && (fr->vdw_modifier == eintmodPOTSHIFT)));
++    }
 +
 +    reppow = fr->reppow;
 +
 +    if (tprops[tp].bCoulomb)
 +    {
 +        r1 = fr->rcoulomb_switch;
 +        rc = fr->rcoulomb;
 +    }
 +    else
 +    {
 +        r1 = fr->rvdw_switch;
 +        rc = fr->rvdw;
 +    }
-     if (bSwitch)
++    if (bPotentialSwitch)
 +    {
 +        ksw  = 1.0/(pow5(rc-r1));
 +    }
 +    else
 +    {
 +        ksw  = 0.0;
 +    }
-     if (bShift)
++    if (bForceSwitch)
 +    {
 +        if (tp == etabShift)
 +        {
 +            p = 1;
 +        }
 +        else if (tp == etabLJ6Shift)
 +        {
 +            p = 6;
 +        }
 +        else
 +        {
 +            p = reppow;
 +        }
 +
 +        A = p * ((p+1)*r1-(p+4)*rc)/(pow(rc, p+2)*pow2(rc-r1));
 +        B = -p * ((p+1)*r1-(p+3)*rc)/(pow(rc, p+2)*pow3(rc-r1));
 +        C = 1.0/pow(rc, p)-A/3.0*pow3(rc-r1)-B/4.0*pow4(rc-r1);
 +        if (tp == etabLJ6Shift)
 +        {
 +            A = -A;
 +            B = -B;
 +            C = -C;
 +        }
 +        A_3 = A/3.0;
 +        B_4 = B/4.0;
 +    }
 +    if (debug)
 +    {
 +        fprintf(debug, "Setting up tables\n"); fflush(debug);
 +    }
 +
 +#ifdef DEBUG_SWITCH
 +    fp = xvgropen("switch.xvg", "switch", "r", "s");
 +#endif
 +
++    if (bPotentialShift)
++    {
++        rc2   = rc*rc;
++        rc6   = 1.0/(rc2*rc2*rc2);
++        if (gmx_within_tol(reppow, 12.0, 10*GMX_DOUBLE_EPS))
++        {
++            rc12 = rc6*rc6;
++        }
++        else
++        {
++            rc12 = pow(rc, -reppow);
++        }
++
++        switch (tp)
++        {
++            case etabLJ6:
++                /* Dispersion */
++                Vcut = -rc6;
++                break;
++            case etabLJ6Ewald:
++                Vcut  = -rc6*exp(-ewclj*ewclj*rc2)*(1 + ewclj*ewclj*rc2 + pow4(ewclj)*rc2*rc2/2);
++                break;
++            case etabLJ12:
++                /* Repulsion */
++                Vcut  = rc12;
++                break;
++            case etabCOUL:
++                Vcut  = 1.0/rc;
++                break;
++            case etabEwald:
++            case etabEwaldSwitch:
++                Vtab  = gmx_erfc(ewc*rc)/rc;
++                break;
++            case etabEwaldUser:
++                /* Only calculate minus the reciprocal space contribution */
++                Vtab  = -gmx_erf(ewc*rc)/rc;
++                break;
++            case etabRF:
++            case etabRF_ZERO:
++                /* No need for preventing the usage of modifiers with RF */
++                Vcut  = 0.0;
++                break;
++            case etabEXPMIN:
++                Vcut  = exp(-rc);
++                break;
++            default:
++                gmx_fatal(FARGS, "Cannot apply new potential-shift modifier to interaction type '%s' yet. (%s,%d)",
++                          tprops[tp].name, __FILE__, __LINE__);
++        }
++    }
++
 +    for (i = td->nx0; (i < td->nx); i++)
 +    {
 +        r     = td->x[i];
 +        r2    = r*r;
 +        r6    = 1.0/(r2*r2*r2);
 +        if (gmx_within_tol(reppow, 12.0, 10*GMX_DOUBLE_EPS))
 +        {
 +            r12 = r6*r6;
 +        }
 +        else
 +        {
 +            r12 = pow(r, -reppow);
 +        }
 +        Vtab  = 0.0;
 +        Ftab  = 0.0;
-         if (bSwitch)
++        if (bPotentialSwitch)
 +        {
 +            /* swi is function, swi1 1st derivative and swi2 2nd derivative */
 +            /* The switch function is 1 for r<r1, 0 for r>rc, and smooth for
 +             * r1<=r<=rc. The 1st and 2nd derivatives are both zero at
 +             * r1 and rc.
 +             * ksw is just the constant 1/(rc-r1)^5, to save some calculations...
 +             */
 +            if (r <= r1)
 +            {
 +                swi  = 1.0;
 +                swi1 = 0.0;
 +            }
 +            else if (r >= rc)
 +            {
 +                swi  = 0.0;
 +                swi1 = 0.0;
 +            }
 +            else
 +            {
 +                swi      = 1 - 10*pow3(r-r1)*ksw*pow2(rc-r1)
 +                    + 15*pow4(r-r1)*ksw*(rc-r1) - 6*pow5(r-r1)*ksw;
 +                swi1     = -30*pow2(r-r1)*ksw*pow2(rc-r1)
 +                    + 60*pow3(r-r1)*ksw*(rc-r1) - 30*pow4(r-r1)*ksw;
 +            }
 +        }
 +        else /* not really needed, but avoids compiler warnings... */
 +        {
 +            swi  = 1.0;
 +            swi1 = 0.0;
 +        }
 +#ifdef DEBUG_SWITCH
 +        fprintf(fp, "%10g  %10g  %10g  %10g\n", r, swi, swi1, swi2);
 +#endif
 +
 +        rc6 = rc*rc*rc;
 +        rc6 = 1.0/(rc6*rc6);
 +
 +        switch (tp)
 +        {
 +            case etabLJ6:
 +                /* Dispersion */
 +                Vtab = -r6;
 +                Ftab = 6.0*Vtab/r;
 +                break;
 +            case etabLJ6Switch:
 +            case etabLJ6Shift:
 +                /* Dispersion */
 +                if (r < rc)
 +                {
 +                    Vtab = -r6;
 +                    Ftab = 6.0*Vtab/r;
 +                    break;
 +                }
 +                break;
 +            case etabLJ12:
 +                /* Repulsion */
 +                Vtab  = r12;
 +                Ftab  = reppow*Vtab/r;
 +                break;
 +            case etabLJ12Switch:
 +            case etabLJ12Shift:
 +                /* Repulsion */
 +                if (r < rc)
 +                {
 +                    Vtab  = r12;
 +                    Ftab  = reppow*Vtab/r;
 +                }
 +                break;
 +            case etabLJ6Encad:
 +                if (r < rc)
 +                {
 +                    Vtab  = -(r6-6.0*(rc-r)*rc6/rc-rc6);
 +                    Ftab  = -(6.0*r6/r-6.0*rc6/rc);
 +                }
 +                else /* r>rc */
 +                {
 +                    Vtab  = 0;
 +                    Ftab  = 0;
 +                }
 +                break;
 +            case etabLJ12Encad:
 +                if (r < rc)
 +                {
 +                    Vtab  = -(r6-6.0*(rc-r)*rc6/rc-rc6);
 +                    Ftab  = -(6.0*r6/r-6.0*rc6/rc);
 +                }
 +                else /* r>rc */
 +                {
 +                    Vtab  = 0;
 +                    Ftab  = 0;
 +                }
 +                break;
 +            case etabCOUL:
 +                Vtab  = 1.0/r;
 +                Ftab  = 1.0/r2;
 +                break;
 +            case etabCOULSwitch:
 +            case etabShift:
 +                if (r < rc)
 +                {
 +                    Vtab  = 1.0/r;
 +                    Ftab  = 1.0/r2;
 +                }
 +                break;
 +            case etabEwald:
 +            case etabEwaldSwitch:
 +                Vtab  = gmx_erfc(ewc*r)/r;
 +                Ftab  = gmx_erfc(ewc*r)/r2+exp(-(ewc*ewc*r2))*ewc*M_2_SQRTPI/r;
 +                break;
 +            case etabEwaldUser:
 +            case etabEwaldUserSwitch:
 +                /* Only calculate the negative of the reciprocal space contribution */
 +                Vtab  = -gmx_erf(ewc*r)/r;
 +                Ftab  = -gmx_erf(ewc*r)/r2+exp(-(ewc*ewc*r2))*ewc*M_2_SQRTPI/r;
 +                break;
 +            case etabLJ6Ewald:
 +                Vtab  = -r6*exp(-ewclj*ewclj*r2)*(1 + ewclj*ewclj*r2 + pow4(ewclj)*r2*r2/2);
 +                Ftab  = 6.0*Vtab/r - r6*exp(-ewclj*ewclj*r2)*pow5(ewclj)*ewclj*r2*r2*r;
 +                break;
 +            case etabRF:
 +            case etabRF_ZERO:
 +                Vtab  = 1.0/r      +   fr->k_rf*r2 - fr->c_rf;
 +                Ftab  = 1.0/r2     - 2*fr->k_rf*r;
 +                if (tp == etabRF_ZERO && r >= rc)
 +                {
 +                    Vtab = 0;
 +                    Ftab = 0;
 +                }
 +                break;
 +            case etabEXPMIN:
 +                expr  = exp(-r);
 +                Vtab  = expr;
 +                Ftab  = expr;
 +                break;
 +            case etabCOULEncad:
 +                if (r < rc)
 +                {
 +                    Vtab  = 1.0/r-(rc-r)/(rc*rc)-1.0/rc;
 +                    Ftab  = 1.0/r2-1.0/(rc*rc);
 +                }
 +                else /* r>rc */
 +                {
 +                    Vtab  = 0;
 +                    Ftab  = 0;
 +                }
 +                break;
 +            default:
 +                gmx_fatal(FARGS, "Table type %d not implemented yet. (%s,%d)",
 +                          tp, __FILE__, __LINE__);
 +        }
-         if (bShift)
++        if (bForceSwitch)
 +        {
 +            /* Normal coulomb with cut-off correction for potential */
 +            if (r < rc)
 +            {
 +                Vtab -= C;
 +                /* If in Shifting range add something to it */
 +                if (r > r1)
 +                {
 +                    r12    = (r-r1)*(r-r1);
 +                    r13    = (r-r1)*r12;
 +                    Vtab  += -A_3*r13 - B_4*r12*r12;
 +                    Ftab  +=   A*r12 + B*r13;
 +                }
 +            }
++            else
++            {
++                /* Make sure interactions are zero outside cutoff with modifiers */
++                Vtab = 0;
++                Ftab = 0;
++            }
++        }
++        if (bPotentialShift)
++        {
++            if (r < rc)
++            {
++                Vtab -= Vcut;
++            }
++            else
++            {
++                /* Make sure interactions are zero outside cutoff with modifiers */
++                Vtab = 0;
++                Ftab = 0;
++            }
 +        }
 +
 +        if (ETAB_USER(tp))
 +        {
 +            Vtab += td->v[i];
 +            Ftab += td->f[i];
 +        }
 +
-         if ((r > r1) && bSwitch)
++        if (bPotentialSwitch)
 +        {
-             Ftab = Ftab*swi - Vtab*swi1;
-             Vtab = Vtab*swi;
++            if (r >= rc)
++            {
++                /* Make sure interactions are zero outside cutoff with modifiers */
++                Vtab = 0;
++                Ftab = 0;
++            }
++            else if (r > r1)
++            {
++                Ftab = Ftab*swi - Vtab*swi1;
++                Vtab = Vtab*swi;
++            }
 +        }
- 
 +        /* Convert to single precision when we store to mem */
 +        td->v[i]  = Vtab;
 +        td->f[i]  = Ftab;
 +    }
 +
 +    /* Continue the table linearly from nx0 to 0.
 +     * These values are only required for energy minimization with overlap or TPI.
 +     */
 +    for (i = td->nx0-1; i >= 0; i--)
 +    {
 +        td->v[i] = td->v[i+1] + td->f[i+1]*(td->x[i+1] - td->x[i]);
 +        td->f[i] = td->f[i+1];
 +    }
 +
 +#ifdef DEBUG_SWITCH
 +    gmx_fio_fclose(fp);
 +#endif
 +}
 +
 +static void set_table_type(int tabsel[], const t_forcerec *fr, gmx_bool b14only)
 +{
 +    int eltype, vdwtype;
 +
 +    /* Set the different table indices.
 +     * Coulomb first.
 +     */
 +
 +
 +    if (b14only)
 +    {
 +        switch (fr->eeltype)
 +        {
 +            case eelRF_NEC:
 +                eltype = eelRF;
 +                break;
 +            case eelUSER:
 +            case eelPMEUSER:
 +            case eelPMEUSERSWITCH:
 +                eltype = eelUSER;
 +                break;
 +            default:
 +                eltype = eelCUT;
 +        }
 +    }
 +    else
 +    {
 +        eltype = fr->eeltype;
 +    }
 +
 +    switch (eltype)
 +    {
 +        case eelCUT:
 +            tabsel[etiCOUL] = etabCOUL;
 +            break;
 +        case eelPOISSON:
 +            tabsel[etiCOUL] = etabShift;
 +            break;
 +        case eelSHIFT:
 +            if (fr->rcoulomb > fr->rcoulomb_switch)
 +            {
 +                tabsel[etiCOUL] = etabShift;
 +            }
 +            else
 +            {
 +                tabsel[etiCOUL] = etabCOUL;
 +            }
 +            break;
 +        case eelEWALD:
 +        case eelPME:
 +        case eelP3M_AD:
 +            tabsel[etiCOUL] = etabEwald;
 +            break;
 +        case eelPMESWITCH:
 +            tabsel[etiCOUL] = etabEwaldSwitch;
 +            break;
 +        case eelPMEUSER:
 +            tabsel[etiCOUL] = etabEwaldUser;
 +            break;
 +        case eelPMEUSERSWITCH:
 +            tabsel[etiCOUL] = etabEwaldUserSwitch;
 +            break;
 +        case eelRF:
 +        case eelGRF:
 +        case eelRF_NEC:
 +            tabsel[etiCOUL] = etabRF;
 +            break;
 +        case eelRF_ZERO:
 +            tabsel[etiCOUL] = etabRF_ZERO;
 +            break;
 +        case eelSWITCH:
 +            tabsel[etiCOUL] = etabCOULSwitch;
 +            break;
 +        case eelUSER:
 +            tabsel[etiCOUL] = etabUSER;
 +            break;
 +        case eelENCADSHIFT:
 +            tabsel[etiCOUL] = etabCOULEncad;
 +            break;
 +        default:
 +            gmx_fatal(FARGS, "Invalid eeltype %d", eltype);
 +    }
 +
 +    /* Van der Waals time */
 +    if (fr->bBHAM && !b14only)
 +    {
 +        tabsel[etiLJ6]  = etabLJ6;
 +        tabsel[etiLJ12] = etabEXPMIN;
 +    }
 +    else
 +    {
 +        if (b14only && fr->vdwtype != evdwUSER)
 +        {
 +            vdwtype = evdwCUT;
 +        }
 +        else
 +        {
 +            vdwtype = fr->vdwtype;
 +        }
 +
 +        switch (vdwtype)
 +        {
 +            case evdwSWITCH:
 +                tabsel[etiLJ6]  = etabLJ6Switch;
 +                tabsel[etiLJ12] = etabLJ12Switch;
 +                break;
 +            case evdwSHIFT:
 +                tabsel[etiLJ6]  = etabLJ6Shift;
 +                tabsel[etiLJ12] = etabLJ12Shift;
 +                break;
 +            case evdwUSER:
 +                tabsel[etiLJ6]  = etabUSER;
 +                tabsel[etiLJ12] = etabUSER;
 +                break;
 +            case evdwCUT:
 +                tabsel[etiLJ6]  = etabLJ6;
 +                tabsel[etiLJ12] = etabLJ12;
 +                break;
 +            case evdwENCADSHIFT:
 +                tabsel[etiLJ6]  = etabLJ6Encad;
 +                tabsel[etiLJ12] = etabLJ12Encad;
 +                break;
 +            case evdwPME:
 +                tabsel[etiLJ6]  = etabLJ6Ewald;
 +                tabsel[etiLJ12] = etabLJ12;
 +                break;
 +            default:
 +                gmx_fatal(FARGS, "Invalid vdwtype %d in %s line %d", vdwtype,
 +                          __FILE__, __LINE__);
 +        }
 +
 +        if (!b14only && fr->vdw_modifier != eintmodNONE)
 +        {
 +            if (fr->vdw_modifier != eintmodPOTSHIFT &&
 +                fr->vdwtype != evdwCUT)
 +            {
 +                gmx_incons("Potential modifiers other than potential-shift are only implemented for LJ cut-off");
 +            }
 +
-             switch (fr->vdw_modifier)
++            /* LJ-PME and other (shift-only) modifiers are handled by applying the modifiers
++             * to the original interaction forms when we fill the table, so we only check cutoffs here.
++             */
++            if (fr->vdwtype == evdwCUT)
 +            {
-                 case eintmodNONE:
-                 case eintmodPOTSHIFT:
-                 case eintmodEXACTCUTOFF:
-                     /* No modification */
-                     break;
-                 case eintmodPOTSWITCH:
-                     tabsel[etiLJ6]  = etabLJ6Switch;
-                     tabsel[etiLJ12] = etabLJ12Switch;
-                     break;
-                 case eintmodFORCESWITCH:
-                     tabsel[etiLJ6]  = etabLJ6Shift;
-                     tabsel[etiLJ12] = etabLJ12Shift;
-                     break;
-                 default:
-                     gmx_incons("Unsupported vdw_modifier");
++                switch (fr->vdw_modifier)
++                {
++                    case eintmodNONE:
++                    case eintmodPOTSHIFT:
++                    case eintmodEXACTCUTOFF:
++                        /* No modification */
++                        break;
++                    case eintmodPOTSWITCH:
++                        tabsel[etiLJ6]  = etabLJ6Switch;
++                        tabsel[etiLJ12] = etabLJ12Switch;
++                        break;
++                    case eintmodFORCESWITCH:
++                        tabsel[etiLJ6]  = etabLJ6Shift;
++                        tabsel[etiLJ12] = etabLJ12Shift;
++                        break;
++                    default:
++                        gmx_incons("Unsupported vdw_modifier");
++                }
 +            }
 +        }
 +    }
 +}
 +
 +t_forcetable make_tables(FILE *out, const output_env_t oenv,
 +                         const t_forcerec *fr,
 +                         gmx_bool bVerbose, const char *fn,
 +                         real rtab, int flags)
 +{
 +    const char     *fns[3]   = { "ctab.xvg", "dtab.xvg", "rtab.xvg" };
 +    const char     *fns14[3] = { "ctab14.xvg", "dtab14.xvg", "rtab14.xvg" };
 +    FILE           *fp;
 +    t_tabledata    *td;
 +    gmx_bool        b14only, bReadTab, bGenTab;
 +    real            x0, y0, yp;
 +    int             i, j, k, nx, nx0, tabsel[etiNR];
 +    real            scalefactor;
 +
 +    t_forcetable    table;
 +
 +    b14only = (flags & GMX_MAKETABLES_14ONLY);
 +
 +    if (flags & GMX_MAKETABLES_FORCEUSER)
 +    {
 +        tabsel[etiCOUL] = etabUSER;
 +        tabsel[etiLJ6]  = etabUSER;
 +        tabsel[etiLJ12] = etabUSER;
 +    }
 +    else
 +    {
 +        set_table_type(tabsel, fr, b14only);
 +    }
 +    snew(td, etiNR);
 +    table.r         = rtab;
 +    table.scale     = 0;
 +    table.n         = 0;
 +    table.scale_exp = 0;
 +    nx0             = 10;
 +    nx              = 0;
 +
 +    table.interaction   = GMX_TABLE_INTERACTION_ELEC_VDWREP_VDWDISP;
 +    table.format        = GMX_TABLE_FORMAT_CUBICSPLINE_YFGH;
 +    table.formatsize    = 4;
 +    table.ninteractions = 3;
 +    table.stride        = table.formatsize*table.ninteractions;
 +
 +    /* Check whether we have to read or generate */
 +    bReadTab = FALSE;
 +    bGenTab  = FALSE;
 +    for (i = 0; (i < etiNR); i++)
 +    {
 +        if (ETAB_USER(tabsel[i]))
 +        {
 +            bReadTab = TRUE;
 +        }
 +        if (tabsel[i] != etabUSER)
 +        {
 +            bGenTab  = TRUE;
 +        }
 +    }
 +    if (bReadTab)
 +    {
 +        read_tables(out, fn, etiNR, 0, td);
 +        if (rtab == 0 || (flags & GMX_MAKETABLES_14ONLY))
 +        {
 +            rtab      = td[0].x[td[0].nx-1];
 +            table.n   = td[0].nx;
 +            nx        = table.n;
 +        }
 +        else
 +        {
 +            if (td[0].x[td[0].nx-1] < rtab)
 +            {
 +                gmx_fatal(FARGS, "Tables in file %s not long enough for cut-off:\n"
 +                          "\tshould be at least %f nm\n", fn, rtab);
 +            }
 +            nx        = table.n = (int)(rtab*td[0].tabscale + 0.5);
 +        }
 +        table.scale = td[0].tabscale;
 +        nx0         = td[0].nx0;
 +    }
 +    if (bGenTab)
 +    {
 +        if (!bReadTab)
 +        {
 +#ifdef GMX_DOUBLE
 +            table.scale = 2000.0;
 +#else
 +            table.scale = 500.0;
 +#endif
 +            nx = table.n = rtab*table.scale;
 +        }
 +    }
 +    if (fr->bBHAM)
 +    {
 +        if (fr->bham_b_max != 0)
 +        {
 +            table.scale_exp = table.scale/fr->bham_b_max;
 +        }
 +        else
 +        {
 +            table.scale_exp = table.scale;
 +        }
 +    }
 +
 +    /* Each table type (e.g. coul,lj6,lj12) requires four
 +     * numbers per nx+1 data points. For performance reasons we want
 +     * the table data to be aligned to 16-byte.
 +     */
 +    snew_aligned(table.data, 12*(nx+1)*sizeof(real), 32);
 +
 +    for (k = 0; (k < etiNR); k++)
 +    {
 +        if (tabsel[k] != etabUSER)
 +        {
 +            init_table(nx, nx0,
 +                       (tabsel[k] == etabEXPMIN) ? table.scale_exp : table.scale,
 +                       &(td[k]), !bReadTab);
-             fill_table(&(td[k]), tabsel[k], fr);
++            fill_table(&(td[k]), tabsel[k], fr, b14only);
 +            if (out)
 +            {
 +                fprintf(out, "%s table with %d data points for %s%s.\n"
 +                        "Tabscale = %g points/nm\n",
 +                        ETAB_USER(tabsel[k]) ? "Modified" : "Generated",
 +                        td[k].nx, b14only ? "1-4 " : "", tprops[tabsel[k]].name,
 +                        td[k].tabscale);
 +            }
 +        }
 +
 +        /* Set scalefactor for c6/c12 tables. This is because we save flops in the non-table kernels
 +         * by including the derivative constants (6.0 or 12.0) in the parameters, since
 +         * we no longer calculate force in most steps. This means the c6/c12 parameters
 +         * have been scaled up, so we need to scale down the table interactions too.
 +         * It comes here since we need to scale user tables too.
 +         */
 +        if (k == etiLJ6)
 +        {
 +            scalefactor = 1.0/6.0;
 +        }
 +        else if (k == etiLJ12 && tabsel[k] != etabEXPMIN)
 +        {
 +            scalefactor = 1.0/12.0;
 +        }
 +        else
 +        {
 +            scalefactor = 1.0;
 +        }
 +
 +        copy2table(table.n, k*4, 12, td[k].x, td[k].v, td[k].f, scalefactor, table.data);
 +
 +        if (bDebugMode() && bVerbose)
 +        {
 +            if (b14only)
 +            {
 +                fp = xvgropen(fns14[k], fns14[k], "r", "V", oenv);
 +            }
 +            else
 +            {
 +                fp = xvgropen(fns[k], fns[k], "r", "V", oenv);
 +            }
 +            /* plot the output 5 times denser than the table data */
 +            for (i = 5*((nx0+1)/2); i < 5*table.n; i++)
 +            {
 +                x0 = i*table.r/(5*(table.n-1));
 +                evaluate_table(table.data, 4*k, 12, table.scale, x0, &y0, &yp);
 +                fprintf(fp, "%15.10e  %15.10e  %15.10e\n", x0, y0, yp);
 +            }
 +            gmx_fio_fclose(fp);
 +        }
 +        done_tabledata(&(td[k]));
 +    }
 +    sfree(td);
 +
 +    return table;
 +}
 +
 +t_forcetable make_gb_table(const output_env_t oenv,
 +                           const t_forcerec  *fr)
 +{
 +    const char     *fns[3]   = { "gbctab.xvg", "gbdtab.xvg", "gbrtab.xvg" };
 +    const char     *fns14[3] = { "gbctab14.xvg", "gbdtab14.xvg", "gbrtab14.xvg" };
 +    FILE           *fp;
 +    t_tabledata    *td;
 +    gmx_bool        bReadTab, bGenTab;
 +    real            x0, y0, yp;
 +    int             i, j, k, nx, nx0, tabsel[etiNR];
 +    double          r, r2, Vtab, Ftab, expterm;
 +
 +    t_forcetable    table;
 +
 +    double          abs_error_r, abs_error_r2;
 +    double          rel_error_r, rel_error_r2;
 +    double          rel_error_r_old = 0, rel_error_r2_old = 0;
 +    double          x0_r_error, x0_r2_error;
 +
 +
 +    /* Only set a Coulomb table for GB */
 +    /*
 +       tabsel[0]=etabGB;
 +       tabsel[1]=-1;
 +       tabsel[2]=-1;
 +     */
 +
 +    /* Set the table dimensions for GB, not really necessary to
 +     * use etiNR (since we only have one table, but ...)
 +     */
 +    snew(td, 1);
 +    table.interaction   = GMX_TABLE_INTERACTION_ELEC;
 +    table.format        = GMX_TABLE_FORMAT_CUBICSPLINE_YFGH;
 +    table.r             = fr->gbtabr;
 +    table.scale         = fr->gbtabscale;
 +    table.scale_exp     = 0;
 +    table.n             = table.scale*table.r;
 +    table.formatsize    = 4;
 +    table.ninteractions = 1;
 +    table.stride        = table.formatsize*table.ninteractions;
 +    nx0                 = 0;
 +    nx                  = table.scale*table.r;
 +
 +    /* Check whether we have to read or generate
 +     * We will always generate a table, so remove the read code
 +     * (Compare with original make_table function
 +     */
 +    bReadTab = FALSE;
 +    bGenTab  = TRUE;
 +
 +    /* Each table type (e.g. coul,lj6,lj12) requires four
 +     * numbers per datapoint. For performance reasons we want
 +     * the table data to be aligned to 16-byte. This is accomplished
 +     * by allocating 16 bytes extra to a temporary pointer, and then
 +     * calculating an aligned pointer. This new pointer must not be
 +     * used in a free() call, but thankfully we're sloppy enough not
 +     * to do this :-)
 +     */
 +
 +    snew_aligned(table.data, 4*nx, 32);
 +
 +    init_table(nx, nx0, table.scale, &(td[0]), !bReadTab);
 +
 +    /* Local implementation so we don't have to use the etabGB
 +     * enum above, which will cause problems later when
 +     * making the other tables (right now even though we are using
 +     * GB, the normal Coulomb tables will be created, but this
 +     * will cause a problem since fr->eeltype==etabGB which will not
 +     * be defined in fill_table and set_table_type
 +     */
 +
 +    for (i = nx0; i < nx; i++)
 +    {
 +        r       = td->x[i];
 +        r2      = r*r;
 +        expterm = exp(-0.25*r2);
 +
 +        Vtab = 1/sqrt(r2+expterm);
 +        Ftab = (r-0.25*r*expterm)/((r2+expterm)*sqrt(r2+expterm));
 +
 +        /* Convert to single precision when we store to mem */
 +        td->v[i]  = Vtab;
 +        td->f[i]  = Ftab;
 +
 +    }
 +
 +    copy2table(table.n, 0, 4, td[0].x, td[0].v, td[0].f, 1.0, table.data);
 +
 +    if (bDebugMode())
 +    {
 +        fp = xvgropen(fns[0], fns[0], "r", "V", oenv);
 +        /* plot the output 5 times denser than the table data */
 +        /* for(i=5*nx0;i<5*table.n;i++) */
 +        for (i = nx0; i < table.n; i++)
 +        {
 +            /* x0=i*table.r/(5*table.n); */
 +            x0 = i*table.r/table.n;
 +            evaluate_table(table.data, 0, 4, table.scale, x0, &y0, &yp);
 +            fprintf(fp, "%15.10e  %15.10e  %15.10e\n", x0, y0, yp);
 +
 +        }
 +        gmx_fio_fclose(fp);
 +    }
 +
 +    /*
 +       for(i=100*nx0;i<99.81*table.n;i++)
 +       {
 +       r = i*table.r/(100*table.n);
 +       r2      = r*r;
 +       expterm = exp(-0.25*r2);
 +
 +       Vtab = 1/sqrt(r2+expterm);
 +       Ftab = (r-0.25*r*expterm)/((r2+expterm)*sqrt(r2+expterm));
 +
 +
 +       evaluate_table(table.data,0,4,table.scale,r,&y0,&yp);
 +       printf("gb: i=%d, x0=%g, y0=%15.15f, Vtab=%15.15f, yp=%15.15f, Ftab=%15.15f\n",i,r, y0, Vtab, yp, Ftab);
 +
 +       abs_error_r=fabs(y0-Vtab);
 +       abs_error_r2=fabs(yp-(-1)*Ftab);
 +
 +       rel_error_r=abs_error_r/y0;
 +       rel_error_r2=fabs(abs_error_r2/yp);
 +
 +
 +       if(rel_error_r>rel_error_r_old)
 +       {
 +       rel_error_r_old=rel_error_r;
 +       x0_r_error=x0;
 +       }
 +
 +       if(rel_error_r2>rel_error_r2_old)
 +       {
 +       rel_error_r2_old=rel_error_r2;
 +       x0_r2_error=x0;
 +       }
 +       }
 +
 +       printf("gb: MAX REL ERROR IN R=%15.15f, MAX REL ERROR IN R2=%15.15f\n",rel_error_r_old, rel_error_r2_old);
 +       printf("gb: XO_R=%g, X0_R2=%g\n",x0_r_error, x0_r2_error);
 +
 +       exit(1); */
 +    done_tabledata(&(td[0]));
 +    sfree(td);
 +
 +    return table;
 +
 +
 +}
 +
 +t_forcetable make_atf_table(FILE *out, const output_env_t oenv,
 +                            const t_forcerec *fr,
 +                            const char *fn,
 +                            matrix box)
 +{
 +    const char  *fns[3] = { "tf_tab.xvg", "atfdtab.xvg", "atfrtab.xvg" };
 +    FILE        *fp;
 +    t_tabledata *td;
 +    real         x0, y0, yp, rtab;
 +    int          i, nx, nx0;
 +    real         rx, ry, rz, box_r;
 +
 +    t_forcetable table;
 +
 +
 +    /* Set the table dimensions for ATF, not really necessary to
 +     * use etiNR (since we only have one table, but ...)
 +     */
 +    snew(td, 1);
 +
 +    if (fr->adress_type == eAdressSphere)
 +    {
 +        /* take half box diagonal direction as tab range */
 +        rx    = 0.5*box[0][0]+0.5*box[1][0]+0.5*box[2][0];
 +        ry    = 0.5*box[0][1]+0.5*box[1][1]+0.5*box[2][1];
 +        rz    = 0.5*box[0][2]+0.5*box[1][2]+0.5*box[2][2];
 +        box_r = sqrt(rx*rx+ry*ry+rz*rz);
 +
 +    }
 +    else
 +    {
 +        /* xsplit: take half box x direction as tab range */
 +        box_r        = box[0][0]/2;
 +    }
 +    table.r         = box_r;
 +    table.scale     = 0;
 +    table.n         = 0;
 +    table.scale_exp = 0;
 +    nx0             = 10;
 +    nx              = 0;
 +
 +    read_tables(out, fn, 1, 0, td);
 +    rtab      = td[0].x[td[0].nx-1];
 +
 +    if (fr->adress_type == eAdressXSplit && (rtab < box[0][0]/2))
 +    {
 +        gmx_fatal(FARGS, "AdResS full box therm force table in file %s extends to %f:\n"
 +                  "\tshould extend to at least half the length of the box in x-direction"
 +                  "%f\n", fn, rtab, box[0][0]/2);
 +    }
 +    if (rtab < box_r)
 +    {
 +        gmx_fatal(FARGS, "AdResS full box therm force table in file %s extends to %f:\n"
 +                  "\tshould extend to at least for spherical adress"
 +                  "%f (=distance from center to furthermost point in box \n", fn, rtab, box_r);
 +    }
 +
 +
 +    table.n     = td[0].nx;
 +    nx          = table.n;
 +    table.scale = td[0].tabscale;
 +    nx0         = td[0].nx0;
 +
 +    /* Each table type (e.g. coul,lj6,lj12) requires four
 +     * numbers per datapoint. For performance reasons we want
 +     * the table data to be aligned to 16-byte. This is accomplished
 +     * by allocating 16 bytes extra to a temporary pointer, and then
 +     * calculating an aligned pointer. This new pointer must not be
 +     * used in a free() call, but thankfully we're sloppy enough not
 +     * to do this :-)
 +     */
 +
 +    snew_aligned(table.data, 4*nx, 32);
 +
 +    copy2table(table.n, 0, 4, td[0].x, td[0].v, td[0].f, 1.0, table.data);
 +
 +    if (bDebugMode())
 +    {
 +        fp = xvgropen(fns[0], fns[0], "r", "V", oenv);
 +        /* plot the output 5 times denser than the table data */
 +        /* for(i=5*nx0;i<5*table.n;i++) */
 +
 +        for (i = 5*((nx0+1)/2); i < 5*table.n; i++)
 +        {
 +            /* x0=i*table.r/(5*table.n); */
 +            x0 = i*table.r/(5*(table.n-1));
 +            evaluate_table(table.data, 0, 4, table.scale, x0, &y0, &yp);
 +            fprintf(fp, "%15.10e  %15.10e  %15.10e\n", x0, y0, yp);
 +
 +        }
 +        gmx_ffclose(fp);
 +    }
 +
 +    done_tabledata(&(td[0]));
 +    sfree(td);
 +
 +    table.interaction   = GMX_TABLE_INTERACTION_ELEC_VDWREP_VDWDISP;
 +    table.format        = GMX_TABLE_FORMAT_CUBICSPLINE_YFGH;
 +    table.formatsize    = 4;
 +    table.ninteractions = 3;
 +    table.stride        = table.formatsize*table.ninteractions;
 +
 +
 +    return table;
 +}
 +
 +bondedtable_t make_bonded_table(FILE *fplog, char *fn, int angle)
 +{
 +    t_tabledata   td;
 +    double        start;
 +    int           i;
 +    bondedtable_t tab;
 +
 +    if (angle < 2)
 +    {
 +        start = 0;
 +    }
 +    else
 +    {
 +        start = -180.0;
 +    }
 +    read_tables(fplog, fn, 1, angle, &td);
 +    if (angle > 0)
 +    {
 +        /* Convert the table from degrees to radians */
 +        for (i = 0; i < td.nx; i++)
 +        {
 +            td.x[i] *= DEG2RAD;
 +            td.f[i] *= RAD2DEG;
 +        }
 +        td.tabscale *= RAD2DEG;
 +    }
 +    tab.n     = td.nx;
 +    tab.scale = td.tabscale;
 +    snew(tab.data, tab.n*4);
 +    copy2table(tab.n, 0, 4, td.x, td.v, td.f, 1.0, tab.data);
 +    done_tabledata(&td);
 +
 +    return tab;
 +}