From: Roland Schulz <roland@utk.edu>
Date: Thu, 27 Jun 2013 03:34:35 +0000 (-0400)
Subject: Merge branch 'release-4-6' into master
X-Git-Url: http://biod.pnpi.spb.ru/gitweb/?a=commitdiff_plain;h=2398ab23cc07aad54eb1dc601c93d5ef5f221ad1;p=alexxy%2Fgromacs.git

Merge branch 'release-4-6' into master

Mostly easy. selhelp.cpp got the fix from selhelp.c

Omitted the content of 34a402e7 (clang-AMD-FMA work-around) in favour
of reworking it for C++ in a child patch

    Conflicts:
        CMakeLists.txt - kept master version number!
        src/gmxlib/selection/selhelp.c - deleted
        src/gromacs/gmxana/pp2shift.h - deleted
        src/gromacs/legacyheaders/pull_rotation.h

Change-Id: Ibf0c9af136e39dfcbef0a85eb7d314740706cb60
---

2398ab23cc07aad54eb1dc601c93d5ef5f221ad1
diff --cc src/gromacs/gmxana/dlist.c
index ef3c45f515,0000000000..d2e7373be5
mode 100644,000000..100644
--- a/src/gromacs/gmxana/dlist.c
+++ b/src/gromacs/gmxana/dlist.c
@@@ -1,439 -1,0 +1,441 @@@
 +/*
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + *          GROningen MAchine for Chemical Simulations
 + *
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + *
 + * For more info, check our website at http://www.gromacs.org
 + *
 + * And Hey:
 + * Green Red Orange Magenta Azure Cyan Skyblue
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <stdlib.h>
 +
 +#include "string2.h"
 +#include "smalloc.h"
 +#include "gstat.h"
 +#include "gmx_fatal.h"
 +#include "index.h"
 +
 +t_dlist *mk_dlist(FILE *log,
 +                  t_atoms *atoms, int *nlist,
 +                  gmx_bool bPhi, gmx_bool bPsi, gmx_bool bChi, gmx_bool bHChi,
 +                  int maxchi, int r0, gmx_residuetype_t rt)
 +{
 +    int       ires, i, j, k, ii;
 +    t_dihatms atm, prev;
 +    int       nl = 0, nc[edMax];
 +    char     *thisres;
 +    t_dlist  *dl;
 +
 +    snew(dl, atoms->nres+1);
-     prev.C = prev.O = -1;
++    prev.C = prev.Cn[1] = -1; /* Keep the compiler quiet */
 +    for (i = 0; (i < edMax); i++)
 +    {
 +        nc[i] = 0;
 +    }
 +    ires = -1;
 +    i    =  0;
 +    while (i < atoms->nr)
 +    {
 +        ires = atoms->atom[i].resind;
 +
 +        /* Initiate all atom numbers to -1 */
-         atm.minC = atm.H = atm.N = atm.C = atm.O = atm.minO = -1;
++        atm.minC = atm.H = atm.N = atm.C = atm.O = atm.minCalpha = -1;
 +        for (j = 0; (j < MAXCHI+3); j++)
 +        {
 +            atm.Cn[j] = -1;
 +        }
 +
 +        /* Look for atoms in this residue */
 +        /* maybe should allow for chis to hydrogens? */
 +        while ((i < atoms->nr) && (atoms->atom[i].resind == ires))
 +        {
 +            if ((strcmp(*(atoms->atomname[i]), "H") == 0) ||
-                 (strcmp(*(atoms->atomname[i]), "H1") == 0) )
++                (strcmp(*(atoms->atomname[i]), "H1") == 0) ||
++                (strcmp(*(atoms->atomname[i]), "HN") == 0) )
 +            {
 +                atm.H = i;
 +            }
 +            else if (strcmp(*(atoms->atomname[i]), "N") == 0)
 +            {
 +                atm.N = i;
 +            }
 +            else if (strcmp(*(atoms->atomname[i]), "C") == 0)
 +            {
 +                atm.C = i;
 +            }
 +            else if ((strcmp(*(atoms->atomname[i]), "O") == 0) ||
 +                     (strcmp(*(atoms->atomname[i]), "O1") == 0))
 +            {
 +                atm.O = i;
 +            }
 +            else if (strcmp(*(atoms->atomname[i]), "CA") == 0)
 +            {
 +                atm.Cn[1] = i;
 +            }
 +            else if (strcmp(*(atoms->atomname[i]), "CB") == 0)
 +            {
 +                atm.Cn[2] = i;
 +            }
 +            else if ((strcmp(*(atoms->atomname[i]), "CG") == 0)  ||
 +                     (strcmp(*(atoms->atomname[i]), "CG1") == 0) ||
 +                     (strcmp(*(atoms->atomname[i]), "OG") == 0)  ||
 +                     (strcmp(*(atoms->atomname[i]), "OG1") == 0) ||
 +                     (strcmp(*(atoms->atomname[i]), "SG") == 0))
 +            {
 +                atm.Cn[3] = i;
 +            }
 +            else if ((strcmp(*(atoms->atomname[i]), "CD") == 0)  ||
 +                     (strcmp(*(atoms->atomname[i]), "CD1") == 0) ||
 +                     (strcmp(*(atoms->atomname[i]), "SD") == 0)  ||
 +                     (strcmp(*(atoms->atomname[i]), "OD1") == 0) ||
 +                     (strcmp(*(atoms->atomname[i]), "ND1") == 0))
 +            {
 +                atm.Cn[4] = i;
 +            }
 +            /* by grs - split the Cn[4] into 2 bits to check allowing dih to H */
 +            else if (bHChi && ((strcmp(*(atoms->atomname[i]), "HG")  == 0) ||
 +                               (strcmp(*(atoms->atomname[i]), "HG1")  == 0)) )
 +            {
 +                atm.Cn[4] = i;
 +            }
 +            else if ((strcmp(*(atoms->atomname[i]), "CE") == 0) ||
 +                     (strcmp(*(atoms->atomname[i]), "CE1") == 0) ||
 +                     (strcmp(*(atoms->atomname[i]), "OE1") == 0) ||
 +                     (strcmp(*(atoms->atomname[i]), "NE") == 0))
 +            {
 +                atm.Cn[5] = i;
 +            }
 +            else if ((strcmp(*(atoms->atomname[i]), "CZ") == 0) ||
 +                     (strcmp(*(atoms->atomname[i]), "NZ") == 0))
 +            {
 +                atm.Cn[6] = i;
 +            }
 +            /* HChi flag here too */
 +            else if (bHChi && (strcmp(*(atoms->atomname[i]), "NH1") == 0))
 +            {
 +                atm.Cn[7] = i;
 +            }
 +            i++;
 +        }
 +
 +        thisres = *(atoms->resinfo[ires].name);
 +
 +        /* added by grs - special case for aromatics, whose chis above 2 are
 +           not real and produce rubbish output - so set back to -1 */
 +        if (strcmp(thisres, "PHE") == 0 ||
 +            strcmp(thisres, "TYR") == 0 ||
 +            strcmp(thisres, "PTR") == 0 ||
 +            strcmp(thisres, "TRP") == 0 ||
 +            strcmp(thisres, "HIS") == 0 ||
 +            strcmp(thisres, "HISA") == 0 ||
 +            strcmp(thisres, "HISB") == 0)
 +        {
 +            for (ii = 5; ii <= 7; ii++)
 +            {
 +                atm.Cn[ii] = -1;
 +            }
 +        }
 +        /* end fixing aromatics */
 +
 +        /* Special case for Pro, has no H */
 +        if (strcmp(thisres, "PRO") == 0)
 +        {
 +            atm.H = atm.Cn[4];
 +        }
 +        /* Carbon from previous residue */
 +        if (prev.C != -1)
 +        {
 +            atm.minC = prev.C;
 +        }
-         if (prev.O != -1)
++        /* Alpha-carbon from previous residue */
++        if (prev.Cn[1] != -1)
 +        {
-             atm.minO = prev.O;
++            atm.minCalpha = prev.Cn[1];
 +        }
 +        prev = atm;
 +
 +        /* Check how many dihedrals we have */
 +        if ((atm.N != -1) && (atm.Cn[1] != -1) && (atm.C != -1) &&
 +            (atm.O != -1) && ((atm.H != -1) || (atm.minC != -1)))
 +        {
 +            dl[nl].resnr     = ires+1;
 +            dl[nl].atm       = atm;
 +            dl[nl].atm.Cn[0] = atm.N;
 +            if ((atm.Cn[3] != -1) && (atm.Cn[2] != -1) && (atm.Cn[1] != -1))
 +            {
 +                nc[0]++;
 +                if (atm.Cn[4] != -1)
 +                {
 +                    nc[1]++;
 +                    if (atm.Cn[5] != -1)
 +                    {
 +                        nc[2]++;
 +                        if (atm.Cn[6] != -1)
 +                        {
 +                            nc[3]++;
 +                            if (atm.Cn[7] != -1)
 +                            {
 +                                nc[4]++;
 +                                if (atm.Cn[8] != -1)
 +                                {
 +                                    nc[5]++;
 +                                }
 +                            }
 +                        }
 +                    }
 +                }
 +            }
-             if ((atm.minC != -1) && (atm.minO != -1))
++            if ((atm.minC != -1) && (atm.minCalpha != -1))
 +            {
 +                nc[6]++;
 +            }
 +            dl[nl].index = gmx_residuetype_get_index(rt, thisres);
 +
 +            sprintf(dl[nl].name, "%s%d", thisres, ires+r0);
 +            nl++;
 +        }
 +        else if (debug)
 +        {
 +            fprintf(debug, "Could not find N atom but could find other atoms"
 +                    " in residue %s%d\n", thisres, ires+r0);
 +        }
 +    }
 +    fprintf(stderr, "\n");
 +    fprintf(log, "\n");
 +    fprintf(log, "There are %d residues with dihedrals\n", nl);
 +    j = 0;
 +    if (bPhi)
 +    {
 +        j += nl;
 +    }
 +    if (bPsi)
 +    {
 +        j += nl;
 +    }
 +    if (bChi)
 +    {
 +        for (i = 0; (i < maxchi); i++)
 +        {
 +            j += nc[i];
 +        }
 +    }
 +    fprintf(log, "There are %d dihedrals\n", j);
 +    fprintf(log, "Dihedral: ");
 +    if (bPhi)
 +    {
 +        fprintf(log, " Phi  ");
 +    }
 +    if (bPsi)
 +    {
 +        fprintf(log, " Psi  ");
 +    }
 +    if (bChi)
 +    {
 +        for (i = 0; (i < maxchi); i++)
 +        {
 +            fprintf(log, "Chi%d  ", i+1);
 +        }
 +    }
 +    fprintf(log, "\nNumber:   ");
 +    if (bPhi)
 +    {
 +        fprintf(log, "%4d  ", nl);
 +    }
 +    if (bPsi)
 +    {
 +        fprintf(log, "%4d  ", nl);
 +    }
 +    if (bChi)
 +    {
 +        for (i = 0; (i < maxchi); i++)
 +        {
 +            fprintf(log, "%4d  ", nc[i]);
 +        }
 +    }
 +    fprintf(log, "\n");
 +
 +    *nlist = nl;
 +
 +    return dl;
 +}
 +
 +gmx_bool has_dihedral(int Dih, t_dlist *dl)
 +{
 +    gmx_bool b = FALSE;
 +    int      ddd;
 +
 +    switch (Dih)
 +    {
 +        case edPhi:
 +            b = ((dl->atm.H != -1) && (dl->atm.N != -1) && (dl->atm.Cn[1] != -1) && (dl->atm.C != -1));
 +            break;
 +        case edPsi:
 +            b = ((dl->atm.N != -1) && (dl->atm.Cn[1] != -1) && (dl->atm.C != -1) && (dl->atm.O != -1));
 +            break;
 +        case edOmega:
-             b = ((dl->atm.minO != -1) && (dl->atm.minC != -1) && (dl->atm.N != -1) && (dl->atm.Cn[1] != -1));
++            b = ((dl->atm.minCalpha != -1) && (dl->atm.minC != -1) && (dl->atm.N != -1) && (dl->atm.Cn[1] != -1));
 +            break;
 +        case edChi1:
 +        case edChi2:
 +        case edChi3:
 +        case edChi4:
 +        case edChi5:
 +        case edChi6:
 +            ddd = Dih - edChi1;
 +            b   = ((dl->atm.Cn[ddd] != -1) &&  (dl->atm.Cn[ddd+1] != -1) &&
 +                   (dl->atm.Cn[ddd+2] != -1) && (dl->atm.Cn[ddd+3] != -1));
 +            break;
 +        default:
 +            pr_dlist(stdout, 1, dl, 1, 0, TRUE, TRUE, TRUE, TRUE, MAXCHI);
 +            gmx_fatal(FARGS, "Non existant dihedral %d in file %s, line %d",
 +                      Dih, __FILE__, __LINE__);
 +    }
 +    return b;
 +}
 +
 +static void pr_one_ro(FILE *fp, t_dlist *dl, int nDih, real dt)
 +{
 +    int k;
 +    for (k = 0; k < NROT; k++)
 +    {
 +        fprintf(fp, "  %6.2f", dl->rot_occ[nDih][k]);
 +    }
 +    fprintf(fp, "\n");
 +}
 +
 +static void pr_ntr_s2(FILE *fp, t_dlist *dl, int nDih, real dt)
 +{
 +    fprintf(fp, "  %6.2f  %6.2f\n", (dt == 0) ? 0 : dl->ntr[nDih]/dt, dl->S2[nDih]);
 +}
 +
 +void pr_dlist(FILE *fp, int nl, t_dlist dl[], real dt, int printtype,
 +              gmx_bool bPhi, gmx_bool bPsi, gmx_bool bChi, gmx_bool bOmega, int maxchi)
 +{
 +    int   i, Xi;
 +
 +    void  (*pr_props)(FILE *, t_dlist *, int, real);
 +
 +    /* Analysis of dihedral transitions etc */
 +
 +    if (printtype == edPrintST)
 +    {
 +        pr_props = pr_ntr_s2;
 +        fprintf(stderr, "Now printing out transitions and OPs...\n");
 +    }
 +    else
 +    {
 +        pr_props = pr_one_ro;
 +        fprintf(stderr, "Now printing out rotamer occupancies...\n");
 +        fprintf(fp, "\nXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX\n\n");
 +    }
 +
 +    /* change atom numbers from 0 based to 1 based */
 +    for (i = 0; (i < nl); i++)
 +    {
 +        fprintf(fp, "Residue %s\n", dl[i].name);
 +        if (printtype == edPrintST)
 +        {
 +            fprintf(fp, " Angle [   AI,   AJ,   AK,   AL]  #tr/ns  S^2D  \n"
 +                    "--------------------------------------------\n");
 +        }
 +        else
 +        {
 +            fprintf(fp, " Angle [   AI,   AJ,   AK,   AL]  rotamers  0  g(-)  t  g(+)\n"
 +                    "--------------------------------------------\n");
 +        }
 +        if (bPhi)
 +        {
 +            fprintf(fp, "   Phi [%5d,%5d,%5d,%5d]",
 +                    (dl[i].atm.H == -1) ? 1+dl[i].atm.minC : 1+dl[i].atm.H,
 +                    1+dl[i].atm.N, 1+dl[i].atm.Cn[1], 1+dl[i].atm.C);
 +            pr_props(fp, &dl[i], edPhi, dt);
 +        }
 +        if (bPsi)
 +        {
 +            fprintf(fp, "   Psi [%5d,%5d,%5d,%5d]", 1+dl[i].atm.N, 1+dl[i].atm.Cn[1],
 +                    1+dl[i].atm.C, 1+dl[i].atm.O);
 +            pr_props(fp, &dl[i], edPsi, dt);
 +        }
 +        if (bOmega && has_dihedral(edOmega, &(dl[i])))
 +        {
-             fprintf(fp, " Omega [%5d,%5d,%5d,%5d]", 1+dl[i].atm.minO, 1+dl[i].atm.minC,
++            fprintf(fp, " Omega [%5d,%5d,%5d,%5d]", 1+dl[i].atm.minCalpha, 1+dl[i].atm.minC,
 +                    1+dl[i].atm.N, 1+dl[i].atm.Cn[1]);
 +            pr_props(fp, &dl[i], edOmega, dt);
 +        }
 +        for (Xi = 0; Xi < MAXCHI; Xi++)
 +        {
 +            if (bChi && (Xi < maxchi) && (dl[i].atm.Cn[Xi+3] != -1) )
 +            {
 +                fprintf(fp, "   Chi%d[%5d,%5d,%5d,%5d]", Xi+1, 1+dl[i].atm.Cn[Xi],
 +                        1+dl[i].atm.Cn[Xi+1], 1+dl[i].atm.Cn[Xi+2],
 +                        1+dl[i].atm.Cn[Xi+3]);
 +                pr_props(fp, &dl[i], Xi+edChi1, dt); /* Xi+2 was wrong here */
 +            }
 +        }
 +        fprintf(fp, "\n");
 +    }
 +}
 +
 +
 +
 +int pr_trans(FILE *fp, int nl, t_dlist dl[], real dt, int Xi)
 +{
 +    /* never called at the moment */
 +
 +    int  i, nn, nz;
 +
 +    nz = 0;
 +    fprintf(fp, "\\begin{table}[h]\n");
 +    fprintf(fp, "\\caption{Number of dihedral transitions per nanosecond}\n");
 +    fprintf(fp, "\\begin{tabular}{|l|l|}\n");
 +    fprintf(fp, "\\hline\n");
 +    fprintf(fp, "Residue\t&$\\chi_%d$\t\\\\\n", Xi+1);
 +    for (i = 0; (i < nl); i++)
 +    {
 +        nn = dl[i].ntr[Xi]/dt;
 +
 +        if (nn == 0)
 +        {
 +            fprintf(fp, "%s\t&\\HL{%d}\t\\\\\n", dl[i].name, nn);
 +            nz++;
 +        }
 +        else if (nn > 0)
 +        {
 +            fprintf(fp, "%s\t&\\%d\t\\\\\n", dl[i].name, nn);
 +        }
 +    }
 +    fprintf(fp, "\\hline\n");
 +    fprintf(fp, "\\end{tabular}\n");
 +    fprintf(fp, "\\end{table}\n\n");
 +
 +    return nz;
 +}
diff --cc src/gromacs/gmxana/gmx_chi.c
index d3fcb8b6d8,0000000000..0c37cd8e5b
mode 100644,000000..100644
--- a/src/gromacs/gmxana/gmx_chi.c
+++ b/src/gromacs/gmxana/gmx_chi.c
@@@ -1,1582 -1,0 +1,1582 @@@
 +/*
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + *          GROningen MAchine for Chemical Simulations
 + *
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + *
 + * For more info, check our website at http://www.gromacs.org
 + *
 + * And Hey:
 + * Green Red Orange Magenta Azure Cyan Skyblue
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +#include <stdio.h>
 +#include <math.h>
 +
 +#include "confio.h"
 +#include "pdbio.h"
 +#include "copyrite.h"
 +#include "gmx_fatal.h"
 +#include "futil.h"
 +#include "gstat.h"
 +#include "macros.h"
 +#include "maths.h"
 +#include "physics.h"
 +#include "index.h"
 +#include "smalloc.h"
 +#include "statutil.h"
 +#include "tpxio.h"
 +#include <string.h>
 +#include "sysstuff.h"
 +#include "txtdump.h"
 +#include "typedefs.h"
 +#include "vec.h"
 +#include "strdb.h"
 +#include "xvgr.h"
 +#include "matio.h"
 +#include "gmx_ana.h"
 +
 +static gmx_bool bAllowed(real phi, real psi)
 +{
 +    static const char *map[] = {
 +        "1100000000000000001111111000000000001111111111111111111111111",
 +        "1100000000000000001111110000000000011111111111111111111111111",
 +        "1100000000000000001111110000000000011111111111111111111111111",
 +        "1100000000000000001111100000000000111111111111111111111111111",
 +        "1100000000000000001111100000000000111111111111111111111111111",
 +        "1100000000000000001111100000000001111111111111111111111111111",
 +        "1100000000000000001111100000000001111111111111111111111111111",
 +        "1100000000000000001111100000000011111111111111111111111111111",
 +        "1110000000000000001111110000000111111111111111111111111111111",
 +        "1110000000000000001111110000001111111111111111111111111111111",
 +        "1110000000000000001111111000011111111111111111111111111111111",
 +        "1110000000000000001111111100111111111111111111111111111111111",
 +        "1110000000000000001111111111111111111111111111111111111111111",
 +        "1110000000000000001111111111111111111111111111111111111111111",
 +        "1110000000000000001111111111111111111111111111111111111111111",
 +        "1110000000000000001111111111111111111111111111111111111111111",
 +        "1110000000000000001111111111111110011111111111111111111111111",
 +        "1110000000000000001111111111111100000111111111111111111111111",
 +        "1110000000000000001111111111111000000000001111111111111111111",
 +        "1100000000000000001111111111110000000000000011111111111111111",
 +        "1100000000000000001111111111100000000000000011111111111111111",
 +        "1000000000000000001111111111000000000000000001111111111111110",
 +        "0000000000000000001111111110000000000000000000111111111111100",
 +        "0000000000000000000000000000000000000000000000000000000000000",
 +        "0000000000000000000000000000000000000000000000000000000000000",
 +        "0000000000000000000000000000000000000000000000000000000000000",
 +        "0000000000000000000000000000000000000000000000000000000000000",
 +        "0000000000000000000000000000000000000000000000000000000000000",
 +        "0000000000000000000000000000000000000000000000000000000000000",
 +        "0000000000000000000000000000000000000000000000000000000000000",
 +        "0000000000000000000000000000000000000000000000000000000000000",
 +        "0000000000000000000000000000000000000000000000000000000000000",
 +        "0000000000000000000000000000000000000000000000000000000000000",
 +        "0000000000000000000000000000000000000000000000000000000000000",
 +        "0000000000000000000000000000000000000000000000000000000000000",
 +        "0000000000000000000000000000000000000000000000000000000000000",
 +        "0000000000000000000000000000000000000000000000000000000000000",
 +        "0000000000000000000000000000000000000000000000000000000000000",
 +        "0000000000000000000000000000000000111111111111000000000000000",
 +        "1100000000000000000000000000000001111111111111100000000000111",
 +        "1100000000000000000000000000000001111111111111110000000000111",
 +        "0000000000000000000000000000000000000000000000000000000000000",
 +        "0000000000000000000000000000000000000000000000000000000000000",
 +        "0000000000000000000000000000000000000000000000000000000000000",
 +        "0000000000000000000000000000000000000000000000000000000000000",
 +        "0000000000000000000000000000000000000000000000000000000000000",
 +        "0000000000000000000000000000000000000000000000000000000000000",
 +        "0000000000000000000000000000000000000000000000000000000000000",
 +        "0000000000000000000000000000000000000000000000000000000000000",
 +        "0000000000000000000000000000000000000000000000000000000000000",
 +        "0000000000000000000000000000000000000000000000000000000000000",
 +        "0000000000000000000000000000000000000000000000000000000000000",
 +        "0000000000000000000000000000000000000000000000000000000000000",
 +        "0000000000000000000000000000000000000000000000000000000000000",
 +        "0000000000000000000000000000000000000000000000000000000000000",
 +        "0000000000000000000000000000000000000000000000000000000000000",
 +        "0000000000000000000000000000000000000000000000000000000000000",
 +        "0000000000000000000000000000000000000000000000000000000000000",
 +        "0000000000000000000000000000000000000000000000000000000000000",
 +        "0000000000000000000000000000000000000000000000000000000000000",
 +        "0000000000000000000000000000000000000000000000000000000000000"
 +    };
 +#define NPP asize(map)
 +    int                x, y;
 +
 +#define INDEX(ppp) ((((int) (360+ppp*RAD2DEG)) % 360)/6)
 +    x = INDEX(phi);
 +    y = INDEX(psi);
 +#undef INDEX
 +    return (gmx_bool) map[x][y];
 +}
 +
 +atom_id *make_chi_ind(int nl, t_dlist dl[], int *ndih)
 +{
 +    atom_id *id;
 +    int      i, Xi, n;
 +
 +    /* There are nl residues with max edMax dihedrals with 4 atoms each */
 +    snew(id, nl*edMax*4);
 +
 +    n = 0;
 +    for (i = 0; (i < nl); i++)
 +    {
 +        /* Phi, fake the first one */
 +        dl[i].j0[edPhi] = n/4;
 +        if (dl[i].atm.minC >= 0)
 +        {
 +            id[n++] = dl[i].atm.minC;
 +        }
 +        else
 +        {
 +            id[n++] = dl[i].atm.H;
 +        }
 +        id[n++] = dl[i].atm.N;
 +        id[n++] = dl[i].atm.Cn[1];
 +        id[n++] = dl[i].atm.C;
 +    }
 +    for (i = 0; (i < nl); i++)
 +    {
 +        /* Psi, fake the last one */
 +        dl[i].j0[edPsi] = n/4;
 +        id[n++]         = dl[i].atm.N;
 +        id[n++]         = dl[i].atm.Cn[1];
 +        id[n++]         = dl[i].atm.C;
 +        if (i < (nl-1) )
 +        {
 +            id[n++] = dl[i+1].atm.N;
 +        }
 +        else
 +        {
 +            id[n++] = dl[i].atm.O;
 +        }
 +    }
-     for (i = 0; (i < nl); i++)
++    for (i = 1; (i < nl); i++)
 +    {
 +        /* Omega */
 +        if (has_dihedral(edOmega, &(dl[i])))
 +        {
 +            dl[i].j0[edOmega] = n/4;
-             id[n++]           = dl[i].atm.minO;
++            id[n++]           = dl[i].atm.minCalpha;
 +            id[n++]           = dl[i].atm.minC;
 +            id[n++]           = dl[i].atm.N;
-             id[n++]           = dl[i].atm.H;
++            id[n++]           = dl[i].atm.Cn[1];
 +        }
 +    }
 +    for (Xi = 0; (Xi < MAXCHI); Xi++)
 +    {
 +        /* Chi# */
 +        for (i = 0; (i < nl); i++)
 +        {
 +            if (dl[i].atm.Cn[Xi+3] != -1)
 +            {
 +                dl[i].j0[edChi1+Xi] = n/4;
 +                id[n++]             = dl[i].atm.Cn[Xi];
 +                id[n++]             = dl[i].atm.Cn[Xi+1];
 +                id[n++]             = dl[i].atm.Cn[Xi+2];
 +                id[n++]             = dl[i].atm.Cn[Xi+3];
 +            }
 +        }
 +    }
 +    *ndih = n/4;
 +
 +    return id;
 +}
 +
 +int bin(real chi, int mult)
 +{
 +    mult = 3;
 +
 +    return (int) (chi*mult/360.0);
 +}
 +
 +
 +static void do_dihcorr(const char *fn, int nf, int ndih, real **dih, real dt,
 +                       int nlist, t_dlist dlist[], real time[], int maxchi,
 +                       gmx_bool bPhi, gmx_bool bPsi, gmx_bool bChi, gmx_bool bOmega,
 +                       const output_env_t oenv)
 +{
 +    char name1[256], name2[256];
 +    int  i, j, Xi;
 +
 +    do_autocorr(fn, oenv, "Dihedral Autocorrelation Function",
 +                nf, ndih, dih, dt, eacCos, FALSE);
 +    /* Dump em all */
 +    j = 0;
 +    for (i = 0; (i < nlist); i++)
 +    {
 +        if (bPhi)
 +        {
 +            print_one(oenv, "corrphi", dlist[i].name, "Phi ACF for", "C(t)", nf/2, time,
 +                      dih[j]);
 +        }
 +        j++;
 +    }
 +    for (i = 0; (i < nlist); i++)
 +    {
 +        if (bPsi)
 +        {
 +            print_one(oenv, "corrpsi", dlist[i].name, "Psi ACF for", "C(t)", nf/2, time,
 +                      dih[j]);
 +        }
 +        j++;
 +    }
 +    for (i = 0; (i < nlist); i++)
 +    {
 +        if (has_dihedral(edOmega, &dlist[i]))
 +        {
 +            if (bOmega)
 +            {
 +                print_one(oenv, "corromega", dlist[i].name, "Omega ACF for", "C(t)",
 +                          nf/2, time, dih[j]);
 +            }
 +            j++;
 +        }
 +    }
 +    for (Xi = 0; (Xi < maxchi); Xi++)
 +    {
 +        sprintf(name1, "corrchi%d", Xi+1);
 +        sprintf(name2, "Chi%d ACF for", Xi+1);
 +        for (i = 0; (i < nlist); i++)
 +        {
 +            if (dlist[i].atm.Cn[Xi+3] != -1)
 +            {
 +                if (bChi)
 +                {
 +                    print_one(oenv, name1, dlist[i].name, name2, "C(t)", nf/2, time, dih[j]);
 +                }
 +                j++;
 +            }
 +        }
 +    }
 +    fprintf(stderr, "\n");
 +}
 +
 +static void copy_dih_data(real in[], real out[], int nf, gmx_bool bLEAVE)
 +{
 +    /* if bLEAVE, do nothing to data in copying to out
 +     * otherwise multiply by 180/pi to convert rad to deg */
 +    int  i;
 +    real mult;
 +    if (bLEAVE)
 +    {
 +        mult = 1;
 +    }
 +    else
 +    {
 +        mult = (180.0/M_PI);
 +    }
 +    for (i = 0; (i < nf); i++)
 +    {
 +        out[i] = in[i]*mult;
 +    }
 +}
 +
 +static void dump_em_all(int nlist, t_dlist dlist[], int nf, real time[],
 +                        real **dih, int maxchi,
 +                        gmx_bool bPhi, gmx_bool bPsi, gmx_bool bChi, gmx_bool bOmega, gmx_bool bRAD,
 +                        const output_env_t oenv)
 +{
 +    char  name[256], titlestr[256], ystr[256];
 +    real *data;
 +    int   i, j, Xi;
 +
 +    snew(data, nf);
 +    if (bRAD)
 +    {
 +        strcpy(ystr, "Angle (rad)");
 +    }
 +    else
 +    {
 +        strcpy(ystr, "Angle (degrees)");
 +    }
 +
 +    /* Dump em all */
 +    j = 0;
 +    for (i = 0; (i < nlist); i++)
 +    {
 +        /* grs debug  printf("OK i %d j %d\n", i, j) ; */
 +        if (bPhi)
 +        {
 +            copy_dih_data(dih[j], data, nf, bRAD);
 +            print_one(oenv, "phi", dlist[i].name, "\\xf\\f{}", ystr, nf, time, data);
 +        }
 +        j++;
 +    }
 +    for (i = 0; (i < nlist); i++)
 +    {
 +        if (bPsi)
 +        {
 +            copy_dih_data(dih[j], data, nf, bRAD);
 +            print_one(oenv, "psi", dlist[i].name, "\\xy\\f{}", ystr, nf, time, data);
 +        }
 +        j++;
 +    }
 +    for (i = 0; (i < nlist); i++)
 +    {
 +        if (has_dihedral(edOmega, &(dlist[i])))
 +        {
 +            if (bOmega)
 +            {
 +                copy_dih_data(dih[j], data, nf, bRAD);
 +                print_one(oenv, "omega", dlist[i].name, "\\xw\\f{}", ystr, nf, time, data);
 +            }
 +            j++;
 +        }
 +    }
 +
 +    for (Xi = 0; (Xi < maxchi); Xi++)
 +    {
 +        for (i = 0; (i < nlist); i++)
 +        {
 +            if (dlist[i].atm.Cn[Xi+3] != -1)
 +            {
 +                if (bChi)
 +                {
 +                    sprintf(name, "chi%d", Xi+1);
 +                    sprintf(titlestr, "\\xc\\f{}\\s%d\\N", Xi+1);
 +                    copy_dih_data(dih[j], data, nf, bRAD);
 +                    print_one(oenv, name, dlist[i].name, titlestr, ystr, nf, time, data);
 +                }
 +                j++;
 +            }
 +        }
 +    }
 +    fprintf(stderr, "\n");
 +}
 +
 +static void reset_one(real dih[], int nf, real phase)
 +{
 +    int j;
 +
 +    for (j = 0; (j < nf); j++)
 +    {
 +        dih[j] += phase;
 +        while (dih[j] < -M_PI)
 +        {
 +            dih[j] += 2*M_PI;
 +        }
 +        while (dih[j] >= M_PI)
 +        {
 +            dih[j] -= 2*M_PI;
 +        }
 +    }
 +}
 +
 +static int reset_em_all(int nlist, t_dlist dlist[], int nf,
 +                        real **dih, int maxchi)
 +{
 +    int  i, j, Xi;
 +
 +    /* Reset em all */
 +    j = 0;
 +    /* Phi */
 +    for (i = 0; (i < nlist); i++)
 +    {
 +        if (dlist[i].atm.minC == -1)
 +        {
 +            reset_one(dih[j++], nf, M_PI);
 +        }
 +        else
 +        {
 +            reset_one(dih[j++], nf, 0);
 +        }
 +    }
 +    /* Psi */
 +    for (i = 0; (i < nlist-1); i++)
 +    {
 +        reset_one(dih[j++], nf, 0);
 +    }
 +    /* last Psi is faked from O */
 +    reset_one(dih[j++], nf, M_PI);
 +
 +    /* Omega */
 +    for (i = 0; (i < nlist); i++)
 +    {
 +        if (has_dihedral(edOmega, &dlist[i]))
 +        {
 +            reset_one(dih[j++], nf, 0);
 +        }
 +    }
 +    /* Chi 1 thru maxchi */
 +    for (Xi = 0; (Xi < maxchi); Xi++)
 +    {
 +        for (i = 0; (i < nlist); i++)
 +        {
 +            if (dlist[i].atm.Cn[Xi+3] != -1)
 +            {
 +                reset_one(dih[j], nf, 0);
 +                j++;
 +            }
 +        }
 +    }
 +    fprintf(stderr, "j after resetting (nr. active dihedrals) = %d\n", j);
 +    return j;
 +}
 +
 +static void histogramming(FILE *log, int nbin, gmx_residuetype_t rt,
 +                          int nf, int maxchi, real **dih,
 +                          int nlist, t_dlist dlist[],
 +                          atom_id index[],
 +                          gmx_bool bPhi, gmx_bool bPsi, gmx_bool bOmega, gmx_bool bChi,
 +                          gmx_bool bNormalize, gmx_bool bSSHisto, const char *ssdump,
 +                          real bfac_max, t_atoms *atoms,
 +                          gmx_bool bDo_jc, const char *fn,
 +                          const output_env_t oenv)
 +{
 +    /* also gets 3J couplings and order parameters S2 */
 +    t_karplus kkkphi[] = {
 +        { "J_NHa1",    6.51, -1.76,  1.6, -M_PI/3,   0.0,  0.0 },
 +        { "J_NHa2",    6.51, -1.76,  1.6,  M_PI/3,   0.0,  0.0 },
 +        { "J_HaC'",    4.0,   1.1,   0.1,  0.0,      0.0,  0.0 },
 +        { "J_NHCb",    4.7,  -1.5,  -0.2,  M_PI/3,   0.0,  0.0 },
 +        { "J_Ci-1Hai", 4.5,  -1.3,  -1.2,  2*M_PI/3, 0.0,  0.0 }
 +    };
 +    t_karplus kkkpsi[] = {
 +        { "J_HaN",   -0.88, -0.61, -0.27, M_PI/3,  0.0,  0.0 }
 +    };
 +    t_karplus kkkchi1[] = {
 +        { "JHaHb2",       9.5, -1.6, 1.8, -M_PI/3, 0,  0.0 },
 +        { "JHaHb3",       9.5, -1.6, 1.8, 0, 0,  0.0 }
 +    };
 +#define NKKKPHI asize(kkkphi)
 +#define NKKKPSI asize(kkkpsi)
 +#define NKKKCHI asize(kkkchi1)
 +#define NJC (NKKKPHI+NKKKPSI+NKKKCHI)
 +
 +    FILE       *fp, *ssfp[3] = {NULL, NULL, NULL};
 +    const char *sss[3] = { "sheet", "helix", "coil" };
 +    real        S2;
 +    real       *normhisto;
 +    real      **Jc, **Jcsig;
 +    int     ****his_aa_ss = NULL;
 +    int      ***his_aa, **his_aa1, *histmp;
 +    int         i, j, k, m, n, nn, Dih, nres, hindex, angle;
 +    gmx_bool    bBfac, bOccup;
 +    char        hisfile[256], hhisfile[256], sshisfile[256], title[256], *ss_str = NULL;
 +    char      **leg;
 +    const char *residue_name;
 +    int         rt_size;
 +
 +    rt_size = gmx_residuetype_get_size(rt);
 +    if (bSSHisto)
 +    {
 +        fp = ffopen(ssdump, "r");
 +        if (1 != fscanf(fp, "%d", &nres))
 +        {
 +            gmx_fatal(FARGS, "Error reading from file %s", ssdump);
 +        }
 +
 +        snew(ss_str, nres+1);
 +        if (1 != fscanf(fp, "%s", ss_str))
 +        {
 +            gmx_fatal(FARGS, "Error reading from file %s", ssdump);
 +        }
 +
 +        ffclose(fp);
 +        /* Four dimensional array... Very cool */
 +        snew(his_aa_ss, 3);
 +        for (i = 0; (i < 3); i++)
 +        {
 +            snew(his_aa_ss[i], rt_size+1);
 +            for (j = 0; (j <= rt_size); j++)
 +            {
 +                snew(his_aa_ss[i][j], edMax);
 +                for (Dih = 0; (Dih < edMax); Dih++)
 +                {
 +                    snew(his_aa_ss[i][j][Dih], nbin+1);
 +                }
 +            }
 +        }
 +    }
 +    snew(his_aa, edMax);
 +    for (Dih = 0; (Dih < edMax); Dih++)
 +    {
 +        snew(his_aa[Dih], rt_size+1);
 +        for (i = 0; (i <= rt_size); i++)
 +        {
 +            snew(his_aa[Dih][i], nbin+1);
 +        }
 +    }
 +    snew(histmp, nbin);
 +
 +    snew(Jc, nlist);
 +    snew(Jcsig, nlist);
 +    for (i = 0; (i < nlist); i++)
 +    {
 +        snew(Jc[i], NJC);
 +        snew(Jcsig[i], NJC);
 +    }
 +
 +    j = 0;
 +    n = 0;
 +    for (Dih = 0; (Dih < NONCHI+maxchi); Dih++)
 +    {
 +        for (i = 0; (i < nlist); i++)
 +        {
 +            if (((Dih  < edOmega) ) ||
 +                ((Dih == edOmega) && (has_dihedral(edOmega, &(dlist[i])))) ||
 +                ((Dih  > edOmega) && (dlist[i].atm.Cn[Dih-NONCHI+3] != -1)))
 +            {
 +                make_histo(log, nf, dih[j], nbin, histmp, -M_PI, M_PI);
 +
 +                if (bSSHisto)
 +                {
 +                    /* Assume there is only one structure, the first.
 +                     * Compute index in histogram.
 +                     */
 +                    /* Check the atoms to see whether their B-factors are low enough
 +                     * Check atoms to see their occupancy is 1.
 +                     */
 +                    bBfac = bOccup = TRUE;
 +                    for (nn = 0; (nn < 4); nn++, n++)
 +                    {
 +                        bBfac  = bBfac  && (atoms->pdbinfo[index[n]].bfac <= bfac_max);
 +                        bOccup = bOccup && (atoms->pdbinfo[index[n]].occup == 1);
 +                    }
 +                    if (bOccup && ((bfac_max <= 0) || ((bfac_max > 0) && bBfac)))
 +                    {
 +                        hindex = ((dih[j][0]+M_PI)*nbin)/(2*M_PI);
 +                        range_check(hindex, 0, nbin);
 +
 +                        /* Assign dihedral to either of the structure determined
 +                         * histograms
 +                         */
 +                        switch (ss_str[dlist[i].resnr])
 +                        {
 +                            case 'E':
 +                                his_aa_ss[0][dlist[i].index][Dih][hindex]++;
 +                                break;
 +                            case 'H':
 +                                his_aa_ss[1][dlist[i].index][Dih][hindex]++;
 +                                break;
 +                            default:
 +                                his_aa_ss[2][dlist[i].index][Dih][hindex]++;
 +                                break;
 +                        }
 +                    }
 +                    else if (debug)
 +                    {
 +                        fprintf(debug, "Res. %d has imcomplete occupancy or bfacs > %g\n",
 +                                dlist[i].resnr, bfac_max);
 +                    }
 +                }
 +                else
 +                {
 +                    n += 4;
 +                }
 +
 +                switch (Dih)
 +                {
 +                    case edPhi:
 +                        calc_distribution_props(nbin, histmp, -M_PI, NKKKPHI, kkkphi, &S2);
 +
 +                        for (m = 0; (m < NKKKPHI); m++)
 +                        {
 +                            Jc[i][m]    = kkkphi[m].Jc;
 +                            Jcsig[i][m] = kkkphi[m].Jcsig;
 +                        }
 +                        break;
 +                    case edPsi:
 +                        calc_distribution_props(nbin, histmp, -M_PI, NKKKPSI, kkkpsi, &S2);
 +
 +                        for (m = 0; (m < NKKKPSI); m++)
 +                        {
 +                            Jc[i][NKKKPHI+m]    = kkkpsi[m].Jc;
 +                            Jcsig[i][NKKKPHI+m] = kkkpsi[m].Jcsig;
 +                        }
 +                        break;
 +                    case edChi1:
 +                        calc_distribution_props(nbin, histmp, -M_PI, NKKKCHI, kkkchi1, &S2);
 +                        for (m = 0; (m < NKKKCHI); m++)
 +                        {
 +                            Jc[i][NKKKPHI+NKKKPSI+m]    = kkkchi1[m].Jc;
 +                            Jcsig[i][NKKKPHI+NKKKPSI+m] = kkkchi1[m].Jcsig;
 +                        }
 +                        break;
 +                    default: /* covers edOmega and higher Chis than Chi1 */
 +                        calc_distribution_props(nbin, histmp, -M_PI, 0, NULL, &S2);
 +                        break;
 +                }
 +                dlist[i].S2[Dih]        = S2;
 +
 +                /* Sum distribution per amino acid type as well */
 +                for (k = 0; (k < nbin); k++)
 +                {
 +                    his_aa[Dih][dlist[i].index][k] += histmp[k];
 +                    histmp[k] = 0;
 +                }
 +                j++;
 +            }
 +            else /* dihed not defined */
 +            {
 +                dlist[i].S2[Dih] = 0.0;
 +            }
 +        }
 +    }
 +    sfree(histmp);
 +
 +    /* Print out Jcouplings */
 +    fprintf(log, "\n *** J-Couplings from simulation (plus std. dev.) ***\n\n");
 +    fprintf(log, "Residue   ");
 +    for (i = 0; (i < NKKKPHI); i++)
 +    {
 +        fprintf(log, "%7s   SD", kkkphi[i].name);
 +    }
 +    for (i = 0; (i < NKKKPSI); i++)
 +    {
 +        fprintf(log, "%7s   SD", kkkpsi[i].name);
 +    }
 +    for (i = 0; (i < NKKKCHI); i++)
 +    {
 +        fprintf(log, "%7s   SD", kkkchi1[i].name);
 +    }
 +    fprintf(log, "\n");
 +    for (i = 0; (i < NJC+1); i++)
 +    {
 +        fprintf(log, "------------");
 +    }
 +    fprintf(log, "\n");
 +    for (i = 0; (i < nlist); i++)
 +    {
 +        fprintf(log, "%-10s", dlist[i].name);
 +        for (j = 0; (j < NJC); j++)
 +        {
 +            fprintf(log, "  %5.2f %4.2f", Jc[i][j], Jcsig[i][j]);
 +        }
 +        fprintf(log, "\n");
 +    }
 +    fprintf(log, "\n");
 +
 +    /* and to -jc file... */
 +    if (bDo_jc)
 +    {
 +        fp = xvgropen(fn, "\\S3\\NJ-Couplings from Karplus Equation", "Residue",
 +                      "Coupling", oenv);
 +        snew(leg, NJC);
 +        for (i = 0; (i < NKKKPHI); i++)
 +        {
 +            leg[i] = strdup(kkkphi[i].name);
 +        }
 +        for (i = 0; (i < NKKKPSI); i++)
 +        {
 +            leg[i+NKKKPHI] = strdup(kkkpsi[i].name);
 +        }
 +        for (i = 0; (i < NKKKCHI); i++)
 +        {
 +            leg[i+NKKKPHI+NKKKPSI] = strdup(kkkchi1[i].name);
 +        }
 +        xvgr_legend(fp, NJC, (const char**)leg, oenv);
 +        fprintf(fp, "%5s ", "#Res.");
 +        for (i = 0; (i < NJC); i++)
 +        {
 +            fprintf(fp, "%10s ", leg[i]);
 +        }
 +        fprintf(fp, "\n");
 +        for (i = 0; (i < nlist); i++)
 +        {
 +            fprintf(fp, "%5d ", dlist[i].resnr);
 +            for (j = 0; (j < NJC); j++)
 +            {
 +                fprintf(fp, "  %8.3f", Jc[i][j]);
 +            }
 +            fprintf(fp, "\n");
 +        }
 +        ffclose(fp);
 +        for (i = 0; (i < NJC); i++)
 +        {
 +            sfree(leg[i]);
 +        }
 +    }
 +    /* finished -jc stuff */
 +
 +    snew(normhisto, nbin);
 +    for (i = 0; (i < rt_size); i++)
 +    {
 +        for (Dih = 0; (Dih < edMax); Dih++)
 +        {
 +            /* First check whether something is in there */
 +            for (j = 0; (j < nbin); j++)
 +            {
 +                if (his_aa[Dih][i][j] != 0)
 +                {
 +                    break;
 +                }
 +            }
 +            if ((j < nbin) &&
 +                ((bPhi && (Dih == edPhi)) ||
 +                 (bPsi && (Dih == edPsi)) ||
 +                 (bOmega && (Dih == edOmega)) ||
 +                 (bChi && (Dih >= edChi1))))
 +            {
 +                if (bNormalize)
 +                {
 +                    normalize_histo(nbin, his_aa[Dih][i], (360.0/nbin), normhisto);
 +                }
 +
 +                residue_name = gmx_residuetype_get_name(rt, i);
 +                switch (Dih)
 +                {
 +                    case edPhi:
 +                        sprintf(hisfile, "histo-phi%s", residue_name);
 +                        sprintf(title, "\\xf\\f{} Distribution for %s", residue_name);
 +                        break;
 +                    case edPsi:
 +                        sprintf(hisfile, "histo-psi%s", residue_name);
 +                        sprintf(title, "\\xy\\f{} Distribution for %s", residue_name);
 +                        break;
 +                    case edOmega:
 +                        sprintf(hisfile, "histo-omega%s", residue_name);
 +                        sprintf(title, "\\xw\\f{} Distribution for %s", residue_name);
 +                        break;
 +                    default:
 +                        sprintf(hisfile, "histo-chi%d%s", Dih-NONCHI+1, residue_name);
 +                        sprintf(title, "\\xc\\f{}\\s%d\\N Distribution for %s",
 +                                Dih-NONCHI+1, residue_name);
 +                }
 +                strcpy(hhisfile, hisfile);
 +                strcat(hhisfile, ".xvg");
 +                fp = xvgropen(hhisfile, title, "Degrees", "", oenv);
 +                fprintf(fp, "@ with g0\n");
 +                xvgr_world(fp, -180, 0, 180, 0.1, oenv);
 +                fprintf(fp, "# this effort to set graph size fails unless you run with -autoscale none or -autoscale y flags\n");
 +                fprintf(fp, "@ xaxis tick on\n");
 +                fprintf(fp, "@ xaxis tick major 90\n");
 +                fprintf(fp, "@ xaxis tick minor 30\n");
 +                fprintf(fp, "@ xaxis ticklabel prec 0\n");
 +                fprintf(fp, "@ yaxis tick off\n");
 +                fprintf(fp, "@ yaxis ticklabel off\n");
 +                fprintf(fp, "@ type xy\n");
 +                if (bSSHisto)
 +                {
 +                    for (k = 0; (k < 3); k++)
 +                    {
 +                        sprintf(sshisfile, "%s-%s.xvg", hisfile, sss[k]);
 +                        ssfp[k] = ffopen(sshisfile, "w");
 +                    }
 +                }
 +                for (j = 0; (j < nbin); j++)
 +                {
 +                    angle = -180 + (360/nbin)*j;
 +                    if (bNormalize)
 +                    {
 +                        fprintf(fp, "%5d  %10g\n", angle, normhisto[j]);
 +                    }
 +                    else
 +                    {
 +                        fprintf(fp, "%5d  %10d\n", angle, his_aa[Dih][i][j]);
 +                    }
 +                    if (bSSHisto)
 +                    {
 +                        for (k = 0; (k < 3); k++)
 +                        {
 +                            fprintf(ssfp[k], "%5d  %10d\n", angle,
 +                                    his_aa_ss[k][i][Dih][j]);
 +                        }
 +                    }
 +                }
 +                fprintf(fp, "&\n");
 +                ffclose(fp);
 +                if (bSSHisto)
 +                {
 +                    for (k = 0; (k < 3); k++)
 +                    {
 +                        fprintf(ssfp[k], "&\n");
 +                        ffclose(ssfp[k]);
 +                    }
 +                }
 +            }
 +        }
 +    }
 +    sfree(normhisto);
 +
 +    if (bSSHisto)
 +    {
 +        /* Four dimensional array... Very cool */
 +        for (i = 0; (i < 3); i++)
 +        {
 +            for (j = 0; (j <= rt_size); j++)
 +            {
 +                for (Dih = 0; (Dih < edMax); Dih++)
 +                {
 +                    sfree(his_aa_ss[i][j][Dih]);
 +                }
 +                sfree(his_aa_ss[i][j]);
 +            }
 +            sfree(his_aa_ss[i]);
 +        }
 +        sfree(his_aa_ss);
 +        sfree(ss_str);
 +    }
 +}
 +
 +static FILE *rama_file(const char *fn, const char *title, const char *xaxis,
 +                       const char *yaxis, const output_env_t oenv)
 +{
 +    FILE *fp;
 +
 +    fp = xvgropen(fn, title, xaxis, yaxis, oenv);
 +    fprintf(fp, "@ with g0\n");
 +    xvgr_world(fp, -180, -180, 180, 180, oenv);
 +    fprintf(fp, "@ xaxis tick on\n");
 +    fprintf(fp, "@ xaxis tick major 90\n");
 +    fprintf(fp, "@ xaxis tick minor 30\n");
 +    fprintf(fp, "@ xaxis ticklabel prec 0\n");
 +    fprintf(fp, "@ yaxis tick on\n");
 +    fprintf(fp, "@ yaxis tick major 90\n");
 +    fprintf(fp, "@ yaxis tick minor 30\n");
 +    fprintf(fp, "@ yaxis ticklabel prec 0\n");
 +    fprintf(fp, "@    s0 type xy\n");
 +    fprintf(fp, "@    s0 symbol 2\n");
 +    fprintf(fp, "@    s0 symbol size 0.410000\n");
 +    fprintf(fp, "@    s0 symbol fill 1\n");
 +    fprintf(fp, "@    s0 symbol color 1\n");
 +    fprintf(fp, "@    s0 symbol linewidth 1\n");
 +    fprintf(fp, "@    s0 symbol linestyle 1\n");
 +    fprintf(fp, "@    s0 symbol center false\n");
 +    fprintf(fp, "@    s0 symbol char 0\n");
 +    fprintf(fp, "@    s0 skip 0\n");
 +    fprintf(fp, "@    s0 linestyle 0\n");
 +    fprintf(fp, "@    s0 linewidth 1\n");
 +    fprintf(fp, "@ type xy\n");
 +
 +    return fp;
 +}
 +
 +static void do_rama(int nf, int nlist, t_dlist dlist[], real **dih,
 +                    gmx_bool bViol, gmx_bool bRamOmega, const output_env_t oenv)
 +{
 +    FILE    *fp, *gp = NULL;
 +    gmx_bool bOm;
 +    char     fn[256];
 +    int      i, j, k, Xi1, Xi2, Phi, Psi, Om = 0, nlevels;
 +#define NMAT 120
 +    real   **mat  = NULL, phi, psi, omega, axis[NMAT], lo, hi;
 +    t_rgb    rlo  = { 1.0, 0.0, 0.0 };
 +    t_rgb    rmid = { 1.0, 1.0, 1.0 };
 +    t_rgb    rhi  = { 0.0, 0.0, 1.0 };
 +
 +    for (i = 0; (i < nlist); i++)
 +    {
 +        if ((has_dihedral(edPhi, &(dlist[i]))) &&
 +            (has_dihedral(edPsi, &(dlist[i]))))
 +        {
 +            sprintf(fn, "ramaPhiPsi%s.xvg", dlist[i].name);
 +            fp = rama_file(fn, "Ramachandran Plot",
 +                           "\\8f\\4 (deg)", "\\8y\\4 (deg)", oenv);
 +            bOm = bRamOmega && has_dihedral(edOmega, &(dlist[i]));
 +            if (bOm)
 +            {
 +                Om = dlist[i].j0[edOmega];
 +                snew(mat, NMAT);
 +                for (j = 0; (j < NMAT); j++)
 +                {
 +                    snew(mat[j], NMAT);
 +                    axis[j] = -180+(360*j)/NMAT;
 +                }
 +            }
 +            if (bViol)
 +            {
 +                sprintf(fn, "violPhiPsi%s.xvg", dlist[i].name);
 +                gp = ffopen(fn, "w");
 +            }
 +            Phi = dlist[i].j0[edPhi];
 +            Psi = dlist[i].j0[edPsi];
 +            for (j = 0; (j < nf); j++)
 +            {
 +                phi = RAD2DEG*dih[Phi][j];
 +                psi = RAD2DEG*dih[Psi][j];
 +                fprintf(fp, "%10g  %10g\n", phi, psi);
 +                if (bViol)
 +                {
 +                    fprintf(gp, "%d\n", !bAllowed(dih[Phi][j], RAD2DEG*dih[Psi][j]));
 +                }
 +                if (bOm)
 +                {
 +                    omega = RAD2DEG*dih[Om][j];
 +                    mat[(int)((phi*NMAT)/360)+NMAT/2][(int)((psi*NMAT)/360)+NMAT/2]
 +                        += omega;
 +                }
 +            }
 +            if (bViol)
 +            {
 +                ffclose(gp);
 +            }
 +            ffclose(fp);
 +            if (bOm)
 +            {
 +                sprintf(fn, "ramomega%s.xpm", dlist[i].name);
 +                fp = ffopen(fn, "w");
 +                lo = hi = 0;
 +                for (j = 0; (j < NMAT); j++)
 +                {
 +                    for (k = 0; (k < NMAT); k++)
 +                    {
 +                        mat[j][k] /= nf;
 +                        lo         = min(mat[j][k], lo);
 +                        hi         = max(mat[j][k], hi);
 +                    }
 +                }
 +                /* Symmetrise */
 +                if (fabs(lo) > fabs(hi))
 +                {
 +                    hi = -lo;
 +                }
 +                else
 +                {
 +                    lo = -hi;
 +                }
 +                /* Add 180 */
 +                for (j = 0; (j < NMAT); j++)
 +                {
 +                    for (k = 0; (k < NMAT); k++)
 +                    {
 +                        mat[j][k] += 180;
 +                    }
 +                }
 +                lo     += 180;
 +                hi     += 180;
 +                nlevels = 20;
 +                write_xpm3(fp, 0, "Omega/Ramachandran Plot", "Deg", "Phi", "Psi",
 +                           NMAT, NMAT, axis, axis, mat, lo, 180.0, hi, rlo, rmid, rhi, &nlevels);
 +                ffclose(fp);
 +                for (j = 0; (j < NMAT); j++)
 +                {
 +                    sfree(mat[j]);
 +                }
 +                sfree(mat);
 +            }
 +        }
 +        if ((has_dihedral(edChi1, &(dlist[i]))) &&
 +            (has_dihedral(edChi2, &(dlist[i]))))
 +        {
 +            sprintf(fn, "ramaX1X2%s.xvg", dlist[i].name);
 +            fp = rama_file(fn, "\\8c\\4\\s1\\N-\\8c\\4\\s2\\N Ramachandran Plot",
 +                           "\\8c\\4\\s1\\N (deg)", "\\8c\\4\\s2\\N (deg)", oenv);
 +            Xi1 = dlist[i].j0[edChi1];
 +            Xi2 = dlist[i].j0[edChi2];
 +            for (j = 0; (j < nf); j++)
 +            {
 +                fprintf(fp, "%10g  %10g\n", RAD2DEG*dih[Xi1][j], RAD2DEG*dih[Xi2][j]);
 +            }
 +            ffclose(fp);
 +        }
 +        else
 +        {
 +            fprintf(stderr, "No chi1 & chi2 angle for %s\n", dlist[i].name);
 +        }
 +    }
 +}
 +
 +
 +static void print_transitions(const char *fn, int maxchi, int nlist,
 +                              t_dlist dlist[], t_atoms *atoms, rvec x[],
 +                              matrix box, gmx_bool bPhi, gmx_bool bPsi, gmx_bool bChi, real dt,
 +                              const output_env_t oenv)
 +{
 +    /* based on order_params below */
 +    FILE *fp;
 +    int   nh[edMax];
 +    int   i, Dih, Xi;
 +
 +    /*  must correspond with enum in pp2shift.h:38 */
 +    char *leg[edMax];
 +#define NLEG asize(leg)
 +
 +    leg[0] = strdup("Phi");
 +    leg[1] = strdup("Psi");
 +    leg[2] = strdup("Omega");
 +    leg[3] = strdup("Chi1");
 +    leg[4] = strdup("Chi2");
 +    leg[5] = strdup("Chi3");
 +    leg[6] = strdup("Chi4");
 +    leg[7] = strdup("Chi5");
 +    leg[8] = strdup("Chi6");
 +
 +    /* Print order parameters */
 +    fp = xvgropen(fn, "Dihedral Rotamer Transitions", "Residue", "Transitions/ns",
 +                  oenv);
 +    xvgr_legend(fp, NONCHI+maxchi, (const char**)leg, oenv);
 +
 +    for (Dih = 0; (Dih < edMax); Dih++)
 +    {
 +        nh[Dih] = 0;
 +    }
 +
 +    fprintf(fp, "%5s ", "#Res.");
 +    fprintf(fp, "%10s %10s %10s ", leg[edPhi], leg[edPsi], leg[edOmega]);
 +    for (Xi = 0; Xi < maxchi; Xi++)
 +    {
 +        fprintf(fp, "%10s ", leg[NONCHI+Xi]);
 +    }
 +    fprintf(fp, "\n");
 +
 +    for (i = 0; (i < nlist); i++)
 +    {
 +        fprintf(fp, "%5d ", dlist[i].resnr);
 +        for (Dih = 0; (Dih < NONCHI+maxchi); Dih++)
 +        {
 +            fprintf(fp, "%10.3f ", dlist[i].ntr[Dih]/dt);
 +        }
 +        /* fprintf(fp,"%12s\n",dlist[i].name);  this confuses xmgrace */
 +        fprintf(fp, "\n");
 +    }
 +    ffclose(fp);
 +}
 +
 +static void order_params(FILE *log,
 +                         const char *fn, int maxchi, int nlist, t_dlist dlist[],
 +                         const char *pdbfn, real bfac_init,
 +                         t_atoms *atoms, rvec x[], int ePBC, matrix box,
 +                         gmx_bool bPhi, gmx_bool bPsi, gmx_bool bChi, const output_env_t oenv)
 +{
 +    FILE *fp;
 +    int   nh[edMax];
 +    char  buf[STRLEN];
 +    int   i, Dih, Xi;
 +    real  S2Max, S2Min;
 +
 +    /* except for S2Min/Max, must correspond with enum in pp2shift.h:38 */
 +    const char *const_leg[2+edMax] = {
 +        "S2Min", "S2Max", "Phi", "Psi", "Omega",
 +        "Chi1", "Chi2", "Chi3", "Chi4", "Chi5",
 +        "Chi6"
 +    };
 +#define NLEG asize(leg)
 +
 +    char *leg[2+edMax];
 +
 +    for (i = 0; i < NLEG; i++)
 +    {
 +        leg[i] = strdup(const_leg[i]);
 +    }
 +
 +    /* Print order parameters */
 +    fp = xvgropen(fn, "Dihedral Order Parameters", "Residue", "S2", oenv);
 +    xvgr_legend(fp, 2+NONCHI+maxchi, const_leg, oenv);
 +
 +    for (Dih = 0; (Dih < edMax); Dih++)
 +    {
 +        nh[Dih] = 0;
 +    }
 +
 +    fprintf(fp, "%5s ", "#Res.");
 +    fprintf(fp, "%10s %10s ", leg[0], leg[1]);
 +    fprintf(fp, "%10s %10s %10s ", leg[2+edPhi], leg[2+edPsi], leg[2+edOmega]);
 +    for (Xi = 0; Xi < maxchi; Xi++)
 +    {
 +        fprintf(fp, "%10s ", leg[2+NONCHI+Xi]);
 +    }
 +    fprintf(fp, "\n");
 +
 +    for (i = 0; (i < nlist); i++)
 +    {
 +        S2Max = -10;
 +        S2Min = 10;
 +        for (Dih = 0; (Dih < NONCHI+maxchi); Dih++)
 +        {
 +            if (dlist[i].S2[Dih] != 0)
 +            {
 +                if (dlist[i].S2[Dih] > S2Max)
 +                {
 +                    S2Max = dlist[i].S2[Dih];
 +                }
 +                if (dlist[i].S2[Dih] < S2Min)
 +                {
 +                    S2Min = dlist[i].S2[Dih];
 +                }
 +            }
 +            if (dlist[i].S2[Dih] > 0.8)
 +            {
 +                nh[Dih]++;
 +            }
 +        }
 +        fprintf(fp, "%5d ", dlist[i].resnr);
 +        fprintf(fp, "%10.3f %10.3f ", S2Min, S2Max);
 +        for (Dih = 0; (Dih < NONCHI+maxchi); Dih++)
 +        {
 +            fprintf(fp, "%10.3f ", dlist[i].S2[Dih]);
 +        }
 +        fprintf(fp, "\n");
 +        /* fprintf(fp,"%12s\n",dlist[i].name);  this confuses xmgrace */
 +    }
 +    ffclose(fp);
 +
 +    if (NULL != pdbfn)
 +    {
 +        real x0, y0, z0;
 +
 +        if (NULL == atoms->pdbinfo)
 +        {
 +            snew(atoms->pdbinfo, atoms->nr);
 +        }
 +        for (i = 0; (i < atoms->nr); i++)
 +        {
 +            atoms->pdbinfo[i].bfac = bfac_init;
 +        }
 +
 +        for (i = 0; (i < nlist); i++)
 +        {
 +            atoms->pdbinfo[dlist[i].atm.N].bfac = -dlist[i].S2[0]; /* Phi */
 +            atoms->pdbinfo[dlist[i].atm.H].bfac = -dlist[i].S2[0]; /* Phi */
 +            atoms->pdbinfo[dlist[i].atm.C].bfac = -dlist[i].S2[1]; /* Psi */
 +            atoms->pdbinfo[dlist[i].atm.O].bfac = -dlist[i].S2[1]; /* Psi */
 +            for (Xi = 0; (Xi < maxchi); Xi++)                      /* Chi's */
 +            {
 +                if (dlist[i].atm.Cn[Xi+3] != -1)
 +                {
 +                    atoms->pdbinfo[dlist[i].atm.Cn[Xi+1]].bfac = -dlist[i].S2[NONCHI+Xi];
 +                }
 +            }
 +        }
 +
 +        fp = ffopen(pdbfn, "w");
 +        fprintf(fp, "REMARK generated by g_chi\n");
 +        fprintf(fp, "REMARK "
 +                "B-factor field contains negative of dihedral order parameters\n");
 +        write_pdbfile(fp, NULL, atoms, x, ePBC, box, ' ', 0, NULL, TRUE);
 +        x0 = y0 = z0 = 1000.0;
 +        for (i = 0; (i < atoms->nr); i++)
 +        {
 +            x0 = min(x0, x[i][XX]);
 +            y0 = min(y0, x[i][YY]);
 +            z0 = min(z0, x[i][ZZ]);
 +        }
 +        x0 *= 10.0; /* nm -> angstrom */
 +        y0 *= 10.0; /* nm -> angstrom */
 +        z0 *= 10.0; /* nm -> angstrom */
 +        sprintf(buf, "%s%%6.f%%6.2f\n", get_pdbformat());
 +        for (i = 0; (i < 10); i++)
 +        {
 +            fprintf(fp, buf, "ATOM  ", atoms->nr+1+i, "CA", "LEG", ' ',
 +                    atoms->nres+1, ' ', x0, y0, z0+(1.2*i), 0.0, -0.1*i);
 +        }
 +        ffclose(fp);
 +    }
 +
 +    fprintf(log, "Dihedrals with S2 > 0.8\n");
 +    fprintf(log, "Dihedral: ");
 +    if (bPhi)
 +    {
 +        fprintf(log, " Phi  ");
 +    }
 +    if (bPsi)
 +    {
 +        fprintf(log, " Psi ");
 +    }
 +    if (bChi)
 +    {
 +        for (Xi = 0; (Xi < maxchi); Xi++)
 +        {
 +            fprintf(log, " %s ", leg[2+NONCHI+Xi]);
 +        }
 +    }
 +    fprintf(log, "\nNumber:   ");
 +    if (bPhi)
 +    {
 +        fprintf(log, "%4d  ", nh[0]);
 +    }
 +    if (bPsi)
 +    {
 +        fprintf(log, "%4d  ", nh[1]);
 +    }
 +    if (bChi)
 +    {
 +        for (Xi = 0; (Xi < maxchi); Xi++)
 +        {
 +            fprintf(log, "%4d  ", nh[NONCHI+Xi]);
 +        }
 +    }
 +    fprintf(log, "\n");
 +
 +    for (i = 0; i < NLEG; i++)
 +    {
 +        sfree(leg[i]);
 +    }
 +
 +}
 +
 +int gmx_chi(int argc, char *argv[])
 +{
 +    const char *desc[] = {
 +        "[TT]g_chi[tt] computes [GRK]phi[grk], [GRK]psi[grk], [GRK]omega[grk], and [GRK]chi[grk] dihedrals for all your ",
 +        "amino acid backbone and sidechains.",
 +        "It can compute dihedral angle as a function of time, and as",
 +        "histogram distributions.",
 +        "The distributions [TT](histo-(dihedral)(RESIDUE).xvg[tt]) are cumulative over all residues of each type.[PAR]",
 +        "If option [TT]-corr[tt] is given, the program will",
 +        "calculate dihedral autocorrelation functions. The function used",
 +        "is C(t) = [CHEVRON][COS][GRK]chi[grk]([GRK]tau[grk])[cos] [COS][GRK]chi[grk]([GRK]tau[grk]+t)[cos][chevron]. The use of cosines",
 +        "rather than angles themselves, resolves the problem of periodicity.",
 +        "(Van der Spoel & Berendsen (1997), Biophys. J. 72, 2032-2041).",
 +        "Separate files for each dihedral of each residue",
 +        "[TT](corr(dihedral)(RESIDUE)(nresnr).xvg[tt]) are output, as well as a",
 +        "file containing the information for all residues (argument of [TT]-corr[tt]).[PAR]",
 +        "With option [TT]-all[tt], the angles themselves as a function of time for",
 +        "each residue are printed to separate files [TT](dihedral)(RESIDUE)(nresnr).xvg[tt].",
 +        "These can be in radians or degrees.[PAR]",
 +        "A log file (argument [TT]-g[tt]) is also written. This contains [BR]",
 +        "(a) information about the number of residues of each type.[BR]",
 +        "(b) The NMR ^3J coupling constants from the Karplus equation.[BR]",
 +        "(c) a table for each residue of the number of transitions between ",
 +        "rotamers per nanosecond,  and the order parameter S^2 of each dihedral.[BR]",
 +        "(d) a table for each residue of the rotamer occupancy.[PAR]",
 +        "All rotamers are taken as 3-fold, except for [GRK]omega[grk] and [GRK]chi[grk] dihedrals",
 +        "to planar groups (i.e. [GRK]chi[grk][SUB]2[sub] of aromatics, Asp and Asn; [GRK]chi[grk][SUB]3[sub] of Glu",
 +        "and Gln; and [GRK]chi[grk][SUB]4[sub] of Arg), which are 2-fold. \"rotamer 0\" means ",
 +        "that the dihedral was not in the core region of each rotamer. ",
 +        "The width of the core region can be set with [TT]-core_rotamer[tt][PAR]",
 +
 +        "The S^2 order parameters are also output to an [TT].xvg[tt] file",
 +        "(argument [TT]-o[tt] ) and optionally as a [TT].pdb[tt] file with",
 +        "the S^2 values as B-factor (argument [TT]-p[tt]). ",
 +        "The total number of rotamer transitions per timestep",
 +        "(argument [TT]-ot[tt]), the number of transitions per rotamer",
 +        "(argument [TT]-rt[tt]), and the ^3J couplings (argument [TT]-jc[tt]), ",
 +        "can also be written to [TT].xvg[tt] files. Note that the analysis",
 +        "of rotamer transitions assumes that the supplied trajectory frames",
 +        "are equally spaced in time.[PAR]",
 +
 +        "If [TT]-chi_prod[tt] is set (and [TT]-maxchi[tt] > 0), cumulative rotamers, e.g.",
 +        "1+9([GRK]chi[grk][SUB]1[sub]-1)+3([GRK]chi[grk][SUB]2[sub]-1)+([GRK]chi[grk][SUB]3[sub]-1) (if the residue has three 3-fold ",
 +        "dihedrals and [TT]-maxchi[tt] >= 3)",
 +        "are calculated. As before, if any dihedral is not in the core region,",
 +        "the rotamer is taken to be 0. The occupancies of these cumulative ",
 +        "rotamers (starting with rotamer 0) are written to the file",
 +        "that is the argument of [TT]-cp[tt], and if the [TT]-all[tt] flag",
 +        "is given, the rotamers as functions of time",
 +        "are written to [TT]chiproduct(RESIDUE)(nresnr).xvg[tt] ",
 +        "and their occupancies to [TT]histo-chiproduct(RESIDUE)(nresnr).xvg[tt].[PAR]",
 +
 +        "The option [TT]-r[tt] generates a contour plot of the average [GRK]omega[grk] angle",
 +        "as a function of the [GRK]phi[grk] and [GRK]psi[grk] angles, that is, in a Ramachandran plot",
 +        "the average [GRK]omega[grk] angle is plotted using color coding.",
 +
 +    };
 +
 +    const char *bugs[] = {
 +        "Produces MANY output files (up to about 4 times the number of residues in the protein, twice that if autocorrelation functions are calculated). Typically several hundred files are output.",
 +        "[GRK]phi[grk] and [GRK]psi[grk] dihedrals are calculated in a non-standard way, using H-N-CA-C for [GRK]phi[grk] instead of C(-)-N-CA-C, and N-CA-C-O for [GRK]psi[grk] instead of N-CA-C-N(+). This causes (usually small) discrepancies with the output of other tools like [TT]g_rama[tt].",
 +        "[TT]-r0[tt] option does not work properly",
 +        "Rotamers with multiplicity 2 are printed in [TT]chi.log[tt] as if they had multiplicity 3, with the 3rd (g(+)) always having probability 0"
 +    };
 +
 +    /* defaults */
 +    static int         r0          = 1, ndeg = 1, maxchi = 2;
 +    static gmx_bool    bAll        = FALSE;
 +    static gmx_bool    bPhi        = FALSE, bPsi = FALSE, bOmega = FALSE;
 +    static real        bfac_init   = -1.0, bfac_max = 0;
 +    static const char *maxchistr[] = { NULL, "0", "1", "2", "3",  "4", "5", "6", NULL };
 +    static gmx_bool    bRama       = FALSE, bShift = FALSE, bViol = FALSE, bRamOmega = FALSE;
 +    static gmx_bool    bNormHisto  = TRUE, bChiProduct = FALSE, bHChi = FALSE, bRAD = FALSE, bPBC = TRUE;
 +    static real        core_frac   = 0.5;
 +    t_pargs            pa[]        = {
 +        { "-r0",  FALSE, etINT, {&r0},
 +          "starting residue" },
 +        { "-phi",  FALSE, etBOOL, {&bPhi},
 +          "Output for [GRK]phi[grk] dihedral angles" },
 +        { "-psi",  FALSE, etBOOL, {&bPsi},
 +          "Output for [GRK]psi[grk] dihedral angles" },
 +        { "-omega", FALSE, etBOOL, {&bOmega},
 +          "Output for [GRK]omega[grk] dihedrals (peptide bonds)" },
 +        { "-rama", FALSE, etBOOL, {&bRama},
 +          "Generate [GRK]phi[grk]/[GRK]psi[grk] and [GRK]chi[grk][SUB]1[sub]/[GRK]chi[grk][SUB]2[sub] Ramachandran plots" },
 +        { "-viol", FALSE, etBOOL, {&bViol},
 +          "Write a file that gives 0 or 1 for violated Ramachandran angles" },
 +        { "-periodic", FALSE, etBOOL, {&bPBC},
 +          "Print dihedral angles modulo 360 degrees" },
 +        { "-all",  FALSE, etBOOL, {&bAll},
 +          "Output separate files for every dihedral." },
 +        { "-rad",  FALSE, etBOOL, {&bRAD},
 +          "in angle vs time files, use radians rather than degrees."},
 +        { "-shift", FALSE, etBOOL, {&bShift},
 +          "Compute chemical shifts from [GRK]phi[grk]/[GRK]psi[grk] angles" },
 +        { "-binwidth", FALSE, etINT, {&ndeg},
 +          "bin width for histograms (degrees)" },
 +        { "-core_rotamer", FALSE, etREAL, {&core_frac},
 +          "only the central [TT]-core_rotamer[tt]*(360/multiplicity) belongs to each rotamer (the rest is assigned to rotamer 0)" },
 +        { "-maxchi", FALSE, etENUM, {maxchistr},
 +          "calculate first ndih [GRK]chi[grk] dihedrals" },
 +        { "-normhisto", FALSE, etBOOL, {&bNormHisto},
 +          "Normalize histograms" },
 +        { "-ramomega", FALSE, etBOOL, {&bRamOmega},
 +          "compute average omega as a function of [GRK]phi[grk]/[GRK]psi[grk] and plot it in an [TT].xpm[tt] plot" },
 +        { "-bfact", FALSE, etREAL, {&bfac_init},
 +          "B-factor value for [TT].pdb[tt] file for atoms with no calculated dihedral order parameter"},
 +        { "-chi_prod", FALSE, etBOOL, {&bChiProduct},
 +          "compute a single cumulative rotamer for each residue"},
 +        { "-HChi", FALSE, etBOOL, {&bHChi},
 +          "Include dihedrals to sidechain hydrogens"},
 +        { "-bmax",  FALSE, etREAL, {&bfac_max},
 +          "Maximum B-factor on any of the atoms that make up a dihedral, for the dihedral angle to be considere in the statistics. Applies to database work where a number of X-Ray structures is analyzed. [TT]-bmax[tt] <= 0 means no limit." }
 +    };
 +
 +    FILE              *log;
 +    int                natoms, nlist, idum, nbin;
 +    t_atoms            atoms;
 +    rvec              *x;
 +    int                ePBC;
 +    matrix             box;
 +    char               title[256], grpname[256];
 +    t_dlist           *dlist;
 +    gmx_bool           bChi, bCorr, bSSHisto;
 +    gmx_bool           bDo_rt, bDo_oh, bDo_ot, bDo_jc;
 +    real               dt = 0, traj_t_ns;
 +    output_env_t       oenv;
 +    gmx_residuetype_t  rt;
 +
 +    atom_id            isize, *index;
 +    int                ndih, nactdih, nf;
 +    real             **dih, *trans_frac, *aver_angle, *time;
 +    int                i, j, **chi_lookup, *multiplicity;
 +
 +    t_filenm           fnm[] = {
 +        { efSTX, "-s",  NULL,     ffREAD  },
 +        { efTRX, "-f",  NULL,     ffREAD  },
 +        { efXVG, "-o",  "order",  ffWRITE },
 +        { efPDB, "-p",  "order",  ffOPTWR },
 +        { efDAT, "-ss", "ssdump", ffOPTRD },
 +        { efXVG, "-jc", "Jcoupling", ffWRITE },
 +        { efXVG, "-corr",  "dihcorr", ffOPTWR },
 +        { efLOG, "-g",  "chi",    ffWRITE },
 +        /* add two more arguments copying from g_angle */
 +        { efXVG, "-ot", "dihtrans", ffOPTWR },
 +        { efXVG, "-oh", "trhisto",  ffOPTWR },
 +        { efXVG, "-rt", "restrans",  ffOPTWR },
 +        { efXVG, "-cp", "chiprodhisto",  ffOPTWR }
 +    };
 +#define NFILE asize(fnm)
 +    int                npargs;
 +    t_pargs           *ppa;
 +
 +    npargs = asize(pa);
 +    ppa    = add_acf_pargs(&npargs, pa);
 +    parse_common_args(&argc, argv, PCA_CAN_VIEW | PCA_CAN_TIME | PCA_BE_NICE,
 +                      NFILE, fnm, npargs, ppa, asize(desc), desc, asize(bugs), bugs,
 +                      &oenv);
 +
 +    /* Handle result from enumerated type */
 +    sscanf(maxchistr[0], "%d", &maxchi);
 +    bChi = (maxchi > 0);
 +
 +    log = ffopen(ftp2fn(efLOG, NFILE, fnm), "w");
 +
 +    if (bRamOmega)
 +    {
 +        bOmega = TRUE;
 +        bPhi   = TRUE;
 +        bPsi   = TRUE;
 +    }
 +
 +    /* set some options */
 +    bDo_rt = (opt2bSet("-rt", NFILE, fnm));
 +    bDo_oh = (opt2bSet("-oh", NFILE, fnm));
 +    bDo_ot = (opt2bSet("-ot", NFILE, fnm));
 +    bDo_jc = (opt2bSet("-jc", NFILE, fnm));
 +    bCorr  = (opt2bSet("-corr", NFILE, fnm));
 +    if (bCorr)
 +    {
 +        fprintf(stderr, "Will calculate autocorrelation\n");
 +    }
 +
 +    if (core_frac > 1.0)
 +    {
 +        fprintf(stderr, "core_rotamer fraction > 1.0 ; will use 1.0\n");
 +        core_frac = 1.0;
 +    }
 +    if (core_frac < 0.0)
 +    {
 +        fprintf(stderr, "core_rotamer fraction < 0.0 ; will use 0.0\n");
 +        core_frac = 0.0;
 +    }
 +
 +    if (maxchi > MAXCHI)
 +    {
 +        fprintf(stderr,
 +                "Will only calculate first %d Chi dihedrals in stead of %d.\n",
 +                MAXCHI, maxchi);
 +        maxchi = MAXCHI;
 +    }
 +    bSSHisto = ftp2bSet(efDAT, NFILE, fnm);
 +    nbin     = 360/ndeg;
 +
 +    /* Find the chi angles using atoms struct and a list of amino acids */
 +    get_stx_coordnum(ftp2fn(efSTX, NFILE, fnm), &natoms);
 +    init_t_atoms(&atoms, natoms, TRUE);
 +    snew(x, natoms);
 +    read_stx_conf(ftp2fn(efSTX, NFILE, fnm), title, &atoms, x, NULL, &ePBC, box);
 +    fprintf(log, "Title: %s\n", title);
 +
 +    gmx_residuetype_init(&rt);
 +    dlist = mk_dlist(log, &atoms, &nlist, bPhi, bPsi, bChi, bHChi, maxchi, r0, rt);
 +    fprintf(stderr, "%d residues with dihedrals found\n", nlist);
 +
 +    if (nlist == 0)
 +    {
 +        gmx_fatal(FARGS, "No dihedrals in your structure!\n");
 +    }
 +
 +    /* Make a linear index for reading all. */
 +    index = make_chi_ind(nlist, dlist, &ndih);
 +    isize = 4*ndih;
 +    fprintf(stderr, "%d dihedrals found\n", ndih);
 +
 +    snew(dih, ndih);
 +
 +    /* COMPUTE ALL DIHEDRALS! */
 +    read_ang_dih(ftp2fn(efTRX, NFILE, fnm), FALSE, TRUE, FALSE, bPBC, 1, &idum,
 +                 &nf, &time, isize, index, &trans_frac, &aver_angle, dih, oenv);
 +
 +    dt = (time[nf-1]-time[0])/(nf-1); /* might want this for corr or n. transit*/
 +    if (bCorr)
 +    {
 +        if (nf < 2)
 +        {
 +            gmx_fatal(FARGS, "Need at least 2 frames for correlation");
 +        }
 +    }
 +
 +    /* put angles in -M_PI to M_PI ! and correct phase factor for phi and psi
 +     * pass nactdih instead of ndih to low_ana_dih_trans and get_chi_product_traj
 +     * to prevent accessing off end of arrays when maxchi < 5 or 6. */
 +    nactdih = reset_em_all(nlist, dlist, nf, dih, maxchi);
 +
 +    if (bAll)
 +    {
 +        dump_em_all(nlist, dlist, nf, time, dih, maxchi, bPhi, bPsi, bChi, bOmega, bRAD, oenv);
 +    }
 +
 +    /* Histogramming & J coupling constants & calc of S2 order params */
 +    histogramming(log, nbin, rt, nf, maxchi, dih, nlist, dlist, index,
 +                  bPhi, bPsi, bOmega, bChi,
 +                  bNormHisto, bSSHisto, ftp2fn(efDAT, NFILE, fnm), bfac_max, &atoms,
 +                  bDo_jc, opt2fn("-jc", NFILE, fnm), oenv);
 +
 +    /* transitions
 +     *
 +     * added multiplicity */
 +
 +    snew(multiplicity, ndih);
 +    mk_multiplicity_lookup(multiplicity, maxchi, dih, nlist, dlist, ndih);
 +
 +    strcpy(grpname, "All residues, ");
 +    if (bPhi)
 +    {
 +        strcat(grpname, "Phi ");
 +    }
 +    if (bPsi)
 +    {
 +        strcat(grpname, "Psi ");
 +    }
 +    if (bOmega)
 +    {
 +        strcat(grpname, "Omega ");
 +    }
 +    if (bChi)
 +    {
 +        strcat(grpname, "Chi 1-");
 +        sprintf(grpname + strlen(grpname), "%i", maxchi);
 +    }
 +
 +
 +    low_ana_dih_trans(bDo_ot, opt2fn("-ot", NFILE, fnm),
 +                      bDo_oh, opt2fn("-oh", NFILE, fnm), maxchi,
 +                      dih, nlist, dlist, nf, nactdih, grpname, multiplicity,
 +                      time, FALSE, core_frac, oenv);
 +
 +    /* Order parameters */
 +    order_params(log, opt2fn("-o", NFILE, fnm), maxchi, nlist, dlist,
 +                 ftp2fn_null(efPDB, NFILE, fnm), bfac_init,
 +                 &atoms, x, ePBC, box, bPhi, bPsi, bChi, oenv);
 +
 +    /* Print ramachandran maps! */
 +    if (bRama)
 +    {
 +        do_rama(nf, nlist, dlist, dih, bViol, bRamOmega, oenv);
 +    }
 +
 +    if (bShift)
 +    {
 +        do_pp2shifts(log, nf, nlist, dlist, dih);
 +    }
 +
 +    /* rprint S^2, transitions, and rotamer occupancies to log */
 +    traj_t_ns = 0.001 * (time[nf-1]-time[0]);
 +    pr_dlist(log, nlist, dlist, traj_t_ns, edPrintST, bPhi, bPsi, bChi, bOmega, maxchi);
 +    pr_dlist(log, nlist, dlist, traj_t_ns, edPrintRO, bPhi, bPsi, bChi, bOmega, maxchi);
 +    ffclose(log);
 +    /* transitions to xvg */
 +    if (bDo_rt)
 +    {
 +        print_transitions(opt2fn("-rt", NFILE, fnm), maxchi, nlist, dlist,
 +                          &atoms, x, box, bPhi, bPsi, bChi, traj_t_ns, oenv);
 +    }
 +
 +    /* chi_product trajectories (ie one "rotamer number" for each residue) */
 +    if (bChiProduct && bChi)
 +    {
 +        snew(chi_lookup, nlist);
 +        for (i = 0; i < nlist; i++)
 +        {
 +            snew(chi_lookup[i], maxchi);
 +        }
 +        mk_chi_lookup(chi_lookup, maxchi, dih, nlist, dlist);
 +
 +        get_chi_product_traj(dih, nf, nactdih, nlist,
 +                             maxchi, dlist, time, chi_lookup, multiplicity,
 +                             FALSE, bNormHisto, core_frac, bAll,
 +                             opt2fn("-cp", NFILE, fnm), oenv);
 +
 +        for (i = 0; i < nlist; i++)
 +        {
 +            sfree(chi_lookup[i]);
 +        }
 +    }
 +
 +    /* Correlation comes last because it fucks up the angles */
 +    if (bCorr)
 +    {
 +        do_dihcorr(opt2fn("-corr", NFILE, fnm), nf, ndih, dih, dt, nlist, dlist, time,
 +                   maxchi, bPhi, bPsi, bChi, bOmega, oenv);
 +    }
 +
 +
 +    do_view(oenv, opt2fn("-o", NFILE, fnm), "-nxy");
 +    do_view(oenv, opt2fn("-jc", NFILE, fnm), "-nxy");
 +    if (bCorr)
 +    {
 +        do_view(oenv, opt2fn("-corr", NFILE, fnm), "-nxy");
 +    }
 +
 +    gmx_residuetype_destroy(rt);
 +
 +    thanx(stderr);
 +
 +    return 0;
 +}
diff --cc src/gromacs/gmxana/gmx_gyrate.c
index 1111e899ff,0000000000..66bf4d406c
mode 100644,000000..100644
--- a/src/gromacs/gmxana/gmx_gyrate.c
+++ b/src/gromacs/gmxana/gmx_gyrate.c
@@@ -1,395 -1,0 +1,395 @@@
 +/*
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + *          GROningen MAchine for Chemical Simulations
 + *
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + *
 + * For more info, check our website at http://www.gromacs.org
 + *
 + * And Hey:
 + * Green Red Orange Magenta Azure Cyan Skyblue
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <math.h>
 +#include <string.h>
 +
 +#include "statutil.h"
 +#include "sysstuff.h"
 +#include "typedefs.h"
 +#include "smalloc.h"
 +#include "macros.h"
 +#include "vec.h"
 +#include "pbc.h"
 +#include "copyrite.h"
 +#include "futil.h"
 +#include "statutil.h"
 +#include "index.h"
 +#include "mshift.h"
 +#include "xvgr.h"
 +#include "princ.h"
 +#include "rmpbc.h"
 +#include "txtdump.h"
 +#include "tpxio.h"
 +#include "gstat.h"
 +#include "gmx_ana.h"
 +
 +
 +real calc_gyro(rvec x[], int gnx, atom_id index[], t_atom atom[], real tm,
 +               rvec gvec, rvec d, gmx_bool bQ, gmx_bool bRot, gmx_bool bMOI, matrix trans)
 +{
 +    int    i, ii, m;
 +    real   gyro, dx2, m0, Itot;
 +    rvec   comp;
 +
 +    if (bRot)
 +    {
 +        principal_comp(gnx, index, atom, x, trans, d);
 +        Itot = norm(d);
 +        if (bMOI)
 +        {
 +            return Itot;
 +        }
 +        for (m = 0; (m < DIM); m++)
 +        {
 +            d[m] = sqrt(d[m]/tm);
 +        }
 +#ifdef DEBUG
 +        pr_rvecs(stderr, 0, "trans", trans, DIM);
 +#endif
 +        /* rotate_atoms(gnx,index,x,trans); */
 +    }
 +    clear_rvec(comp);
 +    for (i = 0; (i < gnx); i++)
 +    {
 +        ii = index[i];
 +        if (bQ)
 +        {
 +            m0 = fabs(atom[ii].q);
 +        }
 +        else
 +        {
 +            m0 = atom[ii].m;
 +        }
 +        for (m = 0; (m < DIM); m++)
 +        {
 +            dx2      = x[ii][m]*x[ii][m];
 +            comp[m] += dx2*m0;
 +        }
 +    }
 +    gyro = comp[XX]+comp[YY]+comp[ZZ];
 +
 +    for (m = 0; (m < DIM); m++)
 +    {
 +        gvec[m] = sqrt((gyro-comp[m])/tm);
 +    }
 +
 +    return sqrt(gyro/tm);
 +}
 +
 +void calc_gyro_z(rvec x[], matrix box,
 +                 int gnx, atom_id index[], t_atom atom[],
 +                 int nz, real time, FILE *out)
 +{
 +    static dvec   *inertia = NULL;
 +    static double *tm      = NULL;
 +    int            i, ii, j, zi;
 +    real           zf, w, sdet, e1, e2;
 +
 +    if (inertia == NULL)
 +    {
 +        snew(inertia, nz);
 +        snew(tm, nz);
 +    }
 +
 +    for (i = 0; i < nz; i++)
 +    {
 +        clear_dvec(inertia[i]);
 +        tm[i] = 0;
 +    }
 +
 +    for (i = 0; (i < gnx); i++)
 +    {
 +        ii = index[i];
 +        zf = nz*x[ii][ZZ]/box[ZZ][ZZ];
 +        if (zf >= nz)
 +        {
 +            zf -= nz;
 +        }
 +        if (zf < 0)
 +        {
 +            zf += nz;
 +        }
 +        for (j = 0; j < 2; j++)
 +        {
 +            zi = zf + j;
 +            if (zi == nz)
 +            {
 +                zi = 0;
 +            }
 +            w               = atom[ii].m*(1 + cos(M_PI*(zf - zi)));
 +            inertia[zi][0] += w*sqr(x[ii][YY]);
 +            inertia[zi][1] += w*sqr(x[ii][XX]);
 +            inertia[zi][2] -= w*x[ii][XX]*x[ii][YY];
 +            tm[zi]         += w;
 +        }
 +    }
 +    fprintf(out, "%10g", time);
 +    for (j = 0; j < nz; j++)
 +    {
 +        for (i = 0; i < 3; i++)
 +        {
 +            inertia[j][i] /= tm[j];
 +        }
 +        sdet = sqrt(sqr(inertia[j][0] - inertia[j][1]) + 4*sqr(inertia[j][2]));
 +        e1   = sqrt(0.5*(inertia[j][0] + inertia[j][1] + sdet));
 +        e2   = sqrt(0.5*(inertia[j][0] + inertia[j][1] - sdet));
 +        fprintf(out, " %5.3f %5.3f", e1, e2);
 +    }
 +    fprintf(out, "\n");
 +}
 +
 +int gmx_gyrate(int argc, char *argv[])
 +{
 +    const char     *desc[] = {
-         "[TT]g_gyrate[tt] computes the radius of gyration of a group of atoms",
++        "[TT]g_gyrate[tt] computes the radius of gyration of a molecule",
 +        "and the radii of gyration about the [IT]x[it]-, [IT]y[it]- and [IT]z[it]-axes,",
 +        "as a function of time. The atoms are explicitly mass weighted.[PAR]",
 +        "With the [TT]-nmol[tt] option the radius of gyration will be calculated",
 +        "for multiple molecules by splitting the analysis group in equally",
 +        "sized parts.[PAR]",
 +        "With the option [TT]-nz[tt] 2D radii of gyration in the [IT]x-y[it] plane",
 +        "of slices along the [IT]z[it]-axis are calculated."
 +    };
 +    static int      nmol = 1, nz = 0;
 +    static gmx_bool bQ   = FALSE, bRot = FALSE, bMOI = FALSE;
 +    t_pargs         pa[] = {
 +        { "-nmol", FALSE, etINT, {&nmol},
 +          "The number of molecules to analyze" },
 +        { "-q", FALSE, etBOOL, {&bQ},
 +          "Use absolute value of the charge of an atom as weighting factor instead of mass" },
 +        { "-p", FALSE, etBOOL, {&bRot},
 +          "Calculate the radii of gyration about the principal axes." },
 +        { "-moi", FALSE, etBOOL, {&bMOI},
 +          "Calculate the moments of inertia (defined by the principal axes)." },
 +        { "-nz", FALSE, etINT, {&nz},
 +          "Calculate the 2D radii of gyration of this number of slices along the z-axis" },
 +    };
 +    FILE           *out;
 +    t_trxstatus    *status;
 +    t_topology      top;
 +    int             ePBC;
 +    rvec           *x, *x_s;
 +    rvec            xcm, gvec, gvec1;
 +    matrix          box, trans;
 +    gmx_bool        bACF;
 +    real          **moi_trans = NULL;
 +    int             max_moi   = 0, delta_moi = 100;
 +    rvec            d, d1; /* eigenvalues of inertia tensor */
 +    real            t, t0, tm, gyro;
 +    int             natoms;
 +    char           *grpname, title[256];
 +    int             i, j, m, gnx, nam, mol;
 +    atom_id        *index;
 +    output_env_t    oenv;
 +    gmx_rmpbc_t     gpbc   = NULL;
 +    const char     *leg[]  = { "Rg", "RgX", "RgY", "RgZ" };
 +    const char     *legI[] = { "Itot", "I1", "I2", "I3" };
 +#define NLEG asize(leg)
 +    t_filenm        fnm[] = {
 +        { efTRX, "-f",   NULL,       ffREAD },
 +        { efTPS, NULL,   NULL,       ffREAD },
 +        { efNDX, NULL,   NULL,       ffOPTRD },
 +        { efXVG, NULL,   "gyrate",   ffWRITE },
 +        { efXVG, "-acf", "moi-acf",  ffOPTWR },
 +    };
 +#define NFILE asize(fnm)
 +    int             npargs;
 +    t_pargs        *ppa;
 +
 +    npargs = asize(pa);
 +    ppa    = add_acf_pargs(&npargs, pa);
 +
 +    parse_common_args(&argc, argv, PCA_CAN_TIME | PCA_CAN_VIEW | PCA_BE_NICE,
 +                      NFILE, fnm, npargs, ppa, asize(desc), desc, 0, NULL, &oenv);
 +    bACF = opt2bSet("-acf", NFILE, fnm);
 +    if (bACF && nmol != 1)
 +    {
 +        gmx_fatal(FARGS, "Can only do acf with nmol=1");
 +    }
 +    bRot = bRot || bMOI || bACF;
 +    /*
 +       if (nz > 0)
 +       bMOI = TRUE;
 +     */
 +    if (bRot)
 +    {
 +        printf("Will rotate system along principal axes\n");
 +        snew(moi_trans, DIM);
 +    }
 +    if (bMOI)
 +    {
 +        printf("Will print moments of inertia\n");
 +        bQ = FALSE;
 +    }
 +    if (bQ)
 +    {
 +        printf("Will print radius normalised by charge\n");
 +    }
 +
 +    read_tps_conf(ftp2fn(efTPS, NFILE, fnm), title, &top, &ePBC, &x, NULL, box, TRUE);
 +    get_index(&top.atoms, ftp2fn_null(efNDX, NFILE, fnm), 1, &gnx, &index, &grpname);
 +
 +    if (nmol > gnx || gnx % nmol != 0)
 +    {
 +        gmx_fatal(FARGS, "The number of atoms in the group (%d) is not a multiple of nmol (%d)", gnx, nmol);
 +    }
 +    nam = gnx/nmol;
 +
 +    natoms = read_first_x(oenv, &status, ftp2fn(efTRX, NFILE, fnm), &t, &x, box);
 +    snew(x_s, natoms);
 +
 +    j  = 0;
 +    t0 = t;
 +    if (bQ)
 +    {
 +        out = xvgropen(ftp2fn(efXVG, NFILE, fnm),
 +                       "Radius of Charge", "Time (ps)", "Rg (nm)", oenv);
 +    }
 +    else if (bMOI)
 +    {
 +        out = xvgropen(ftp2fn(efXVG, NFILE, fnm),
 +                       "Moments of inertia", "Time (ps)", "I (a.m.u. nm\\S2\\N)", oenv);
 +    }
 +    else
 +    {
 +        out = xvgropen(ftp2fn(efXVG, NFILE, fnm),
 +                       "Radius of gyration", "Time (ps)", "Rg (nm)", oenv);
 +    }
 +    if (bMOI)
 +    {
 +        xvgr_legend(out, NLEG, legI, oenv);
 +    }
 +    else
 +    {
 +        if (bRot)
 +        {
 +            if (output_env_get_print_xvgr_codes(oenv))
 +            {
 +                fprintf(out, "@ subtitle \"Axes are principal component axes\"\n");
 +            }
 +        }
 +        xvgr_legend(out, NLEG, leg, oenv);
 +    }
 +    if (nz == 0)
 +    {
 +        gpbc = gmx_rmpbc_init(&top.idef, ePBC, natoms, box);
 +    }
 +    do
 +    {
 +        if (nz == 0)
 +        {
 +            gmx_rmpbc_copy(gpbc, natoms, box, x, x_s);
 +        }
 +        gyro = 0;
 +        clear_rvec(gvec);
 +        clear_rvec(d);
 +        for (mol = 0; mol < nmol; mol++)
 +        {
 +            tm    = sub_xcm(nz == 0 ? x_s : x, nam, index+mol*nam, top.atoms.atom, xcm, bQ);
 +            if (nz == 0)
 +            {
 +                gyro += calc_gyro(x_s, nam, index+mol*nam, top.atoms.atom,
 +                                  tm, gvec1, d1, bQ, bRot, bMOI, trans);
 +            }
 +            else
 +            {
 +                calc_gyro_z(x, box, nam, index+mol*nam, top.atoms.atom, nz, t, out);
 +            }
 +            rvec_inc(gvec, gvec1);
 +            rvec_inc(d, d1);
 +        }
 +        if (nmol > 0)
 +        {
 +            gyro /= nmol;
 +            svmul(1.0/nmol, gvec, gvec);
 +            svmul(1.0/nmol, d, d);
 +        }
 +
 +        if (nz == 0)
 +        {
 +            if (bRot)
 +            {
 +                if (j >= max_moi)
 +                {
 +                    max_moi += delta_moi;
 +                    for (m = 0; (m < DIM); m++)
 +                    {
 +                        srenew(moi_trans[m], max_moi*DIM);
 +                    }
 +                }
 +                for (m = 0; (m < DIM); m++)
 +                {
 +                    copy_rvec(trans[m], moi_trans[m]+DIM*j);
 +                }
 +                fprintf(out, "%10g  %10g  %10g  %10g  %10g\n",
 +                        t, gyro, d[XX], d[YY], d[ZZ]);
 +            }
 +            else
 +            {
 +                fprintf(out, "%10g  %10g  %10g  %10g  %10g\n",
 +                        t, gyro, gvec[XX], gvec[YY], gvec[ZZ]);
 +            }
 +        }
 +        j++;
 +    }
 +    while (read_next_x(oenv, status, &t, natoms, x, box));
 +    close_trj(status);
 +    if (nz == 0)
 +    {
 +        gmx_rmpbc_done(gpbc);
 +    }
 +
 +    ffclose(out);
 +
 +    if (bACF)
 +    {
 +        int mode = eacVector;
 +
 +        do_autocorr(opt2fn("-acf", NFILE, fnm), oenv,
 +                    "Moment of inertia vector ACF",
 +                    j, 3, moi_trans, (t-t0)/j, mode, FALSE);
 +        do_view(oenv, opt2fn("-acf", NFILE, fnm), "-nxy");
 +    }
 +
 +    do_view(oenv, ftp2fn(efXVG, NFILE, fnm), "-nxy");
 +
 +    thanx(stderr);
 +
 +    return 0;
 +}
diff --cc src/gromacs/gmxlib/nonbonded/nonbonded.c
index ac5fe893d4,0000000000..d866862453
mode 100644,000000..100644
--- a/src/gromacs/gmxlib/nonbonded/nonbonded.c
+++ b/src/gromacs/gmxlib/nonbonded/nonbonded.c
@@@ -1,685 -1,0 +1,686 @@@
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + *          GROningen MAchine for Chemical Simulations
 + *
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + *
 + * For more info, check our website at http://www.gromacs.org
 + *
 + * And Hey:
 + * GROningen Mixture of Alchemy and Childrens' Stories
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#ifdef GMX_THREAD_MPI
 +#include <thread_mpi.h>
 +#endif
 +
 +#include <stdio.h>
 +#include <stdlib.h>
 +#include "typedefs.h"
 +#include "txtdump.h"
 +#include "smalloc.h"
 +#include "ns.h"
 +#include "vec.h"
 +#include "maths.h"
 +#include "macros.h"
 +#include "string2.h"
 +#include "force.h"
 +#include "names.h"
 +#include "main.h"
 +#include "xvgr.h"
 +#include "gmx_fatal.h"
 +#include "physics.h"
 +#include "force.h"
 +#include "bondf.h"
 +#include "nrnb.h"
 +#include "smalloc.h"
 +#include "nonbonded.h"
 +
 +#include "nb_kernel.h"
 +#include "nb_free_energy.h"
 +#include "nb_generic.h"
 +#include "nb_generic_cg.h"
 +#include "nb_generic_adress.h"
 +
 +/* Different default (c) and accelerated interaction-specific kernels */
 +#include "nb_kernel_c/nb_kernel_c.h"
 +
 +#if (defined GMX_CPU_ACCELERATION_X86_SSE2) && !(defined GMX_DOUBLE)
 +#    include "nb_kernel_sse2_single/nb_kernel_sse2_single.h"
 +#endif
 +#if (defined GMX_CPU_ACCELERATION_X86_SSE4_1) && !(defined GMX_DOUBLE)
 +#    include "nb_kernel_sse4_1_single/nb_kernel_sse4_1_single.h"
 +#endif
 +#if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
 +#    include "nb_kernel_avx_128_fma_single/nb_kernel_avx_128_fma_single.h"
 +#endif
 +#if (defined GMX_CPU_ACCELERATION_X86_AVX_256) && !(defined GMX_DOUBLE)
 +#    include "nb_kernel_avx_256_single/nb_kernel_avx_256_single.h"
 +#endif
 +#if (defined GMX_CPU_ACCELERATION_X86_SSE2 && defined GMX_DOUBLE)
 +#    include "nb_kernel_sse2_double/nb_kernel_sse2_double.h"
 +#endif
 +#if (defined GMX_CPU_ACCELERATION_X86_SSE4_1 && defined GMX_DOUBLE)
 +#    include "nb_kernel_sse4_1_double/nb_kernel_sse4_1_double.h"
 +#endif
 +#if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA && defined GMX_DOUBLE)
 +#    include "nb_kernel_avx_128_fma_double/nb_kernel_avx_128_fma_double.h"
 +#endif
 +#if (defined GMX_CPU_ACCELERATION_X86_AVX_256 && defined GMX_DOUBLE)
 +#    include "nb_kernel_avx_256_double/nb_kernel_avx_256_double.h"
 +#endif
 +#if (defined GMX_CPU_ACCELERATION_SPARC64_HPC_ACE && defined GMX_DOUBLE)
 +#    include "nb_kernel_sparc64_hpc_ace_double/nb_kernel_sparc64_hpc_ace_double.h"
 +#endif
 +
 +
 +#ifdef GMX_THREAD_MPI
 +static tMPI_Thread_mutex_t nonbonded_setup_mutex = TMPI_THREAD_MUTEX_INITIALIZER;
 +#endif
 +static gmx_bool            nonbonded_setup_done  = FALSE;
 +
 +
 +void
 +gmx_nonbonded_setup(FILE *         fplog,
 +                    t_forcerec *   fr,
 +                    gmx_bool       bGenericKernelOnly)
 +{
 +#ifdef GMX_THREAD_MPI
 +    tMPI_Thread_mutex_lock(&nonbonded_setup_mutex);
 +#endif
 +    /* Here we are guaranteed only one thread made it. */
 +    if (nonbonded_setup_done == FALSE)
 +    {
 +        if (bGenericKernelOnly == FALSE)
 +        {
 +            /* Add the generic kernels to the structure stored statically in nb_kernel.c */
 +            nb_kernel_list_add_kernels(kernellist_c, kernellist_c_size);
 +
 +            if (!(fr != NULL && fr->use_cpu_acceleration == FALSE))
 +            {
 +                /* Add interaction-specific kernels for different architectures */
 +                /* Single precision */
 +#if (defined GMX_CPU_ACCELERATION_X86_SSE2) && !(defined GMX_DOUBLE)
 +                nb_kernel_list_add_kernels(kernellist_sse2_single, kernellist_sse2_single_size);
 +#endif
 +#if (defined GMX_CPU_ACCELERATION_X86_SSE4_1) && !(defined GMX_DOUBLE)
 +                nb_kernel_list_add_kernels(kernellist_sse4_1_single, kernellist_sse4_1_single_size);
 +#endif
 +#if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
 +                nb_kernel_list_add_kernels(kernellist_avx_128_fma_single, kernellist_avx_128_fma_single_size);
 +#endif
 +#if (defined GMX_CPU_ACCELERATION_X86_AVX_256) && !(defined GMX_DOUBLE)
 +                nb_kernel_list_add_kernels(kernellist_avx_256_single, kernellist_avx_256_single_size);
 +#endif
 +                /* Double precision */
 +#if (defined GMX_CPU_ACCELERATION_X86_SSE2 && defined GMX_DOUBLE)
 +                nb_kernel_list_add_kernels(kernellist_sse2_double, kernellist_sse2_double_size);
 +#endif
 +#if (defined GMX_CPU_ACCELERATION_X86_SSE4_1 && defined GMX_DOUBLE)
 +                nb_kernel_list_add_kernels(kernellist_sse4_1_double, kernellist_sse4_1_double_size);
 +#endif
 +#if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA && defined GMX_DOUBLE)
 +                nb_kernel_list_add_kernels(kernellist_avx_128_fma_double, kernellist_avx_128_fma_double_size);
 +#endif
 +#if (defined GMX_CPU_ACCELERATION_X86_AVX_256 && defined GMX_DOUBLE)
 +                nb_kernel_list_add_kernels(kernellist_avx_256_double, kernellist_avx_256_double_size);
 +#endif
 +#if (defined GMX_CPU_ACCELERATION_SPARC64_HPC_ACE && defined GMX_DOUBLE)
 +                nb_kernel_list_add_kernels(kernellist_sparc64_hpc_ace_double,kernellist_sparc64_hpc_ace_double_size);
 +#endif
 +                ; /* empty statement to avoid a completely empty block */
 +            }
 +        }
 +        /* Create a hash for faster lookups */
 +        nb_kernel_list_hash_init();
 +
 +        nonbonded_setup_done = TRUE;
 +    }
 +#ifdef GMX_THREAD_MPI
 +    tMPI_Thread_mutex_unlock(&nonbonded_setup_mutex);
 +#endif
 +}
 +
 +
 +
 +void
 +gmx_nonbonded_set_kernel_pointers(FILE *log, t_nblist *nl)
 +{
 +    const char *     elec;
 +    const char *     elec_mod;
 +    const char *     vdw;
 +    const char *     vdw_mod;
 +    const char *     geom;
 +    const char *     other;
 +    const char *     vf;
 +
 +    struct
 +    {
 +        const char *  arch;
 +        int           simd_padding_width;
 +    }
 +    arch_and_padding[] =
 +    {
 +        /* Single precision */
 +#if (defined GMX_CPU_ACCELERATION_X86_AVX_256) && !(defined GMX_DOUBLE)
 +        { "avx_256_single", 8 },
 +#endif
 +#if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
 +        { "avx_128_fma_single", 4 },
 +#endif
 +#if (defined GMX_CPU_ACCELERATION_X86_SSE4_1) && !(defined GMX_DOUBLE)
 +        { "sse4_1_single", 4 },
 +#endif
 +#if (defined GMX_CPU_ACCELERATION_X86_SSE2) && !(defined GMX_DOUBLE)
 +        { "sse2_single", 4 },
 +#endif
 +        /* Double precision */
 +#if (defined GMX_CPU_ACCELERATION_X86_AVX_256 && defined GMX_DOUBLE)
 +        { "avx_256_double", 4 },
 +#endif
 +#if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA && defined GMX_DOUBLE)
 +        /* Sic. Double precision 2-way SIMD does not require neighbor list padding,
 +         * since the kernels execute a loop unrolled a factor 2, followed by
 +         * a possible single odd-element epilogue.
 +         */
 +        { "avx_128_fma_double", 1 },
 +#endif
 +#if (defined GMX_CPU_ACCELERATION_X86_SSE2 && defined GMX_DOUBLE)
 +        /* No padding - see comment above */
 +        { "sse2_double", 1 },
 +#endif
 +#if (defined GMX_CPU_ACCELERATION_X86_SSE4_1 && defined GMX_DOUBLE)
 +        /* No padding - see comment above */
 +        { "sse4_1_double", 1 },
 +#endif
 +#if (defined GMX_CPU_ACCELERATION_SPARC64_HPC_ACE && defined GMX_DOUBLE)
 +        /* No padding - see comment above */
 +        { "sparc64_hpc_ace_double", 1 },
 +#endif
 +        { "c", 1 },
 +    };
 +    int              narch = asize(arch_and_padding);
 +    int              i;
 +
 +    if (nonbonded_setup_done == FALSE)
 +    {
 +        /* We typically call this setup routine before starting timers,
 +         * but if that has not been done for whatever reason we do it now.
 +         */
 +        gmx_nonbonded_setup(NULL, NULL, FALSE);
 +    }
 +
 +    /* Not used yet */
 +    other = "";
 +
 +    nl->kernelptr_vf = NULL;
 +    nl->kernelptr_v  = NULL;
 +    nl->kernelptr_f  = NULL;
 +
 +    elec     = gmx_nbkernel_elec_names[nl->ielec];
 +    elec_mod = eintmod_names[nl->ielecmod];
 +    vdw      = gmx_nbkernel_vdw_names[nl->ivdw];
 +    vdw_mod  = eintmod_names[nl->ivdwmod];
 +    geom     = gmx_nblist_geometry_names[nl->igeometry];
 +
 +    if (nl->type == GMX_NBLIST_INTERACTION_ADRESS)
 +    {
 +        nl->kernelptr_vf       = (void *) gmx_nb_generic_adress_kernel;
 +        nl->kernelptr_f        = (void *) gmx_nb_generic_adress_kernel;
 +        nl->simd_padding_width = 1;
 +        return;
 +    }
 +
 +    if (nl->type == GMX_NBLIST_INTERACTION_FREE_ENERGY)
 +    {
 +        nl->kernelptr_vf       = (void *) gmx_nb_free_energy_kernel;
 +        nl->kernelptr_f        = (void *) gmx_nb_free_energy_kernel;
 +        nl->simd_padding_width = 1;
 +    }
 +    else if (!gmx_strcasecmp_min(geom, "CG-CG"))
 +    {
 +        nl->kernelptr_vf       = (void *) gmx_nb_generic_cg_kernel;
 +        nl->kernelptr_f        = (void *) gmx_nb_generic_cg_kernel;
 +        nl->simd_padding_width = 1;
 +    }
 +    else
 +    {
 +        /* Try to find a specific kernel first */
 +
 +        for (i = 0; i < narch && nl->kernelptr_vf == NULL; i++)
 +        {
 +            nl->kernelptr_vf       = (void *) nb_kernel_list_findkernel(log, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "PotentialAndForce");
 +            nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
 +        }
 +        for (i = 0; i < narch && nl->kernelptr_f == NULL; i++)
 +        {
 +            nl->kernelptr_f        = (void *) nb_kernel_list_findkernel(log, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "Force");
 +            nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
 +
 +            /* If there is not force-only optimized kernel, is there a potential & force one? */
 +            if (nl->kernelptr_f == NULL)
 +            {
 +                nl->kernelptr_f        = (void *) nb_kernel_list_findkernel(NULL, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "PotentialAndForce");
 +                nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
 +            }
 +        }
 +
 +        /* Give up, pick a generic one instead */
 +        if (nl->kernelptr_vf == NULL)
 +        {
 +            nl->kernelptr_vf       = (void *) gmx_nb_generic_kernel;
 +            nl->kernelptr_f        = (void *) gmx_nb_generic_kernel;
 +            nl->simd_padding_width = 1;
 +            if (debug)
 +            {
 +                fprintf(debug,
 +                        "WARNING - Slow generic NB kernel used for neighborlist with\n"
 +                        "    Elec: '%s', Modifier: '%s'\n"
 +                        "    Vdw:  '%s', Modifier: '%s'\n"
 +                        "    Geom: '%s', Other: '%s'\n\n",
 +                        elec, elec_mod, vdw, vdw_mod, geom, other);
 +            }
 +        }
 +    }
 +
 +    return;
 +}
 +
 +void do_nonbonded(t_commrec *cr, t_forcerec *fr,
 +                  rvec x[], rvec f_shortrange[], rvec f_longrange[], t_mdatoms *mdatoms, t_blocka *excl,
 +                  gmx_grppairener_t *grppener, rvec box_size,
 +                  t_nrnb *nrnb, real *lambda, real *dvdl,
 +                  int nls, int eNL, int flags)
 +{
 +    t_nblist *        nlist;
 +    int               n, n0, n1, i, i0, i1, sz, range;
 +    t_nblists *       nblists;
 +    nb_kernel_data_t  kernel_data;
 +    nb_kernel_t *     kernelptr = NULL;
 +    rvec *            f;
 +
 +    kernel_data.flags                   = flags;
 +    kernel_data.exclusions              = excl;
 +    kernel_data.lambda                  = lambda;
 +    kernel_data.dvdl                    = dvdl;
 +
 +    if (fr->bAllvsAll)
 +    {
++        gmx_incons("All-vs-all kernels have not been implemented in version 4.6");
 +        return;
 +    }
 +
 +    if (eNL >= 0)
 +    {
 +        i0 = eNL;
 +        i1 = i0+1;
 +    }
 +    else
 +    {
 +        i0 = 0;
 +        i1 = eNL_NR;
 +    }
 +
 +    if (nls >= 0)
 +    {
 +        n0 = nls;
 +        n1 = nls+1;
 +    }
 +    else
 +    {
 +        n0 = 0;
 +        n1 = fr->nnblists;
 +    }
 +
 +    for (n = n0; (n < n1); n++)
 +    {
 +        nblists = &fr->nblists[n];
 +
 +        kernel_data.table_elec              = &nblists->table_elec;
 +        kernel_data.table_vdw               = &nblists->table_vdw;
 +        kernel_data.table_elec_vdw          = &nblists->table_elec_vdw;
 +
 +        for (range = 0; range < 2; range++)
 +        {
 +            /* Are we doing short/long-range? */
 +            if (range == 0)
 +            {
 +                /* Short-range */
 +                if (!(flags & GMX_NONBONDED_DO_SR))
 +                {
 +                    continue;
 +                }
 +                kernel_data.energygrp_elec          = grppener->ener[egCOULSR];
 +                kernel_data.energygrp_vdw           = grppener->ener[fr->bBHAM ? egBHAMSR : egLJSR];
 +                kernel_data.energygrp_polarization  = grppener->ener[egGB];
 +                nlist = nblists->nlist_sr;
 +                f                                   = f_shortrange;
 +            }
 +            else if (range == 1)
 +            {
 +                /* Long-range */
 +                if (!(flags & GMX_NONBONDED_DO_LR))
 +                {
 +                    continue;
 +                }
 +                kernel_data.energygrp_elec          = grppener->ener[egCOULLR];
 +                kernel_data.energygrp_vdw           = grppener->ener[fr->bBHAM ? egBHAMLR : egLJLR];
 +                kernel_data.energygrp_polarization  = grppener->ener[egGB];
 +                nlist = nblists->nlist_lr;
 +                f                                   = f_longrange;
 +            }
 +
 +            for (i = i0; (i < i1); i++)
 +            {
 +                if (nlist[i].nri > 0)
 +                {
 +                    if (flags & GMX_NONBONDED_DO_POTENTIAL)
 +                    {
 +                        /* Potential and force */
 +                        kernelptr = (nb_kernel_t *)nlist[i].kernelptr_vf;
 +                    }
 +                    else
 +                    {
 +                        /* Force only, no potential */
 +                        kernelptr = (nb_kernel_t *)nlist[i].kernelptr_f;
 +                    }
 +
 +                    if (nlist[i].type != GMX_NBLIST_INTERACTION_FREE_ENERGY && (flags & GMX_NONBONDED_DO_FOREIGNLAMBDA))
 +                    {
 +                        /* We don't need the non-perturbed interactions */
 +                        continue;
 +                    }
 +                    (*kernelptr)(&(nlist[i]), x, f, fr, mdatoms, &kernel_data, nrnb);
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +static void
 +nb_listed_warning_rlimit(const rvec *x, int ai, int aj, int * global_atom_index, real r, real rlimit)
 +{
 +    gmx_warning("Listed nonbonded interaction between particles %d and %d\n"
 +                "at distance %.3f which is larger than the table limit %.3f nm.\n\n"
 +                "This is likely either a 1,4 interaction, or a listed interaction inside\n"
 +                "a smaller molecule you are decoupling during a free energy calculation.\n"
 +                "Since interactions at distances beyond the table cannot be computed,\n"
 +                "they are skipped until they are inside the table limit again. You will\n"
 +                "only see this message once, even if it occurs for several interactions.\n\n"
 +                "IMPORTANT: This should not happen in a stable simulation, so there is\n"
 +                "probably something wrong with your system. Only change the table-extension\n"
 +                "distance in the mdp file if you are really sure that is the reason.\n",
 +                glatnr(global_atom_index, ai), glatnr(global_atom_index, aj), r, rlimit);
 +
 +    if (debug)
 +    {
 +        fprintf(debug,
 +                "%8f %8f %8f\n%8f %8f %8f\n1-4 (%d,%d) interaction not within cut-off! r=%g. Ignored\n",
 +                x[ai][XX], x[ai][YY], x[ai][ZZ], x[aj][XX], x[aj][YY], x[aj][ZZ],
 +                glatnr(global_atom_index, ai), glatnr(global_atom_index, aj), r);
 +    }
 +}
 +
 +
 +
 +/* This might logically belong better in the nb_generic.c module, but it is only
 + * used in do_nonbonded_listed(), and we want it to be inlined there to avoid an
 + * extra functional call for every single pair listed in the topology.
 + */
 +static real
 +nb_evaluate_single(real r2, real tabscale, real *vftab,
 +                   real qq, real c6, real c12, real *velec, real *vvdw)
 +{
 +    real       rinv, r, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VVe, FFe, VVd, FFd, VVr, FFr, fscal;
 +    int        ntab;
 +
 +    /* Do the tabulated interactions - first table lookup */
 +    rinv             = gmx_invsqrt(r2);
 +    r                = r2*rinv;
 +    rtab             = r*tabscale;
 +    ntab             = rtab;
 +    eps              = rtab-ntab;
 +    eps2             = eps*eps;
 +    ntab             = 12*ntab;
 +    /* Electrostatics */
 +    Y                = vftab[ntab];
 +    F                = vftab[ntab+1];
 +    Geps             = eps*vftab[ntab+2];
 +    Heps2            = eps2*vftab[ntab+3];
 +    Fp               = F+Geps+Heps2;
 +    VVe              = Y+eps*Fp;
 +    FFe              = Fp+Geps+2.0*Heps2;
 +    /* Dispersion */
 +    Y                = vftab[ntab+4];
 +    F                = vftab[ntab+5];
 +    Geps             = eps*vftab[ntab+6];
 +    Heps2            = eps2*vftab[ntab+7];
 +    Fp               = F+Geps+Heps2;
 +    VVd              = Y+eps*Fp;
 +    FFd              = Fp+Geps+2.0*Heps2;
 +    /* Repulsion */
 +    Y                = vftab[ntab+8];
 +    F                = vftab[ntab+9];
 +    Geps             = eps*vftab[ntab+10];
 +    Heps2            = eps2*vftab[ntab+11];
 +    Fp               = F+Geps+Heps2;
 +    VVr              = Y+eps*Fp;
 +    FFr              = Fp+Geps+2.0*Heps2;
 +
 +    *velec           = qq*VVe;
 +    *vvdw            = c6*VVd+c12*VVr;
 +
 +    fscal            = -(qq*FFe+c6*FFd+c12*FFr)*tabscale*rinv;
 +
 +    return fscal;
 +}
 +
 +
 +real
 +do_nonbonded_listed(int ftype, int nbonds,
 +                    const t_iatom iatoms[], const t_iparams iparams[],
 +                    const rvec x[], rvec f[], rvec fshift[],
 +                    const t_pbc *pbc, const t_graph *g,
 +                    real *lambda, real *dvdl,
 +                    const t_mdatoms *md,
 +                    const t_forcerec *fr, gmx_grppairener_t *grppener,
 +                    int *global_atom_index)
 +{
 +    int              ielec, ivdw;
 +    real             qq, c6, c12;
 +    rvec             dx;
 +    ivec             dt;
 +    int              i, j, itype, ai, aj, gid;
 +    int              fshift_index;
 +    real             r2, rinv;
 +    real             fscal, velec, vvdw;
 +    real *           energygrp_elec;
 +    real *           energygrp_vdw;
 +    static gmx_bool  warned_rlimit = FALSE;
 +    /* Free energy stuff */
 +    gmx_bool         bFreeEnergy;
 +    real             LFC[2], LFV[2], DLF[2], lfac_coul[2], lfac_vdw[2], dlfac_coul[2], dlfac_vdw[2];
 +    real             qqB, c6B, c12B, sigma2_def, sigma2_min;
 +
 +
 +    switch (ftype)
 +    {
 +        case F_LJ14:
 +        case F_LJC14_Q:
 +            energygrp_elec = grppener->ener[egCOUL14];
 +            energygrp_vdw  = grppener->ener[egLJ14];
 +            break;
 +        case F_LJC_PAIRS_NB:
 +            energygrp_elec = grppener->ener[egCOULSR];
 +            energygrp_vdw  = grppener->ener[egLJSR];
 +            break;
 +        default:
 +            energygrp_elec = NULL; /* Keep compiler happy */
 +            energygrp_vdw  = NULL; /* Keep compiler happy */
 +            gmx_fatal(FARGS, "Unknown function type %d in do_nonbonded14", ftype);
 +            break;
 +    }
 +
 +    if (fr->efep != efepNO)
 +    {
 +        /* Lambda factor for state A=1-lambda and B=lambda */
 +        LFC[0] = 1.0 - lambda[efptCOUL];
 +        LFV[0] = 1.0 - lambda[efptVDW];
 +        LFC[1] = lambda[efptCOUL];
 +        LFV[1] = lambda[efptVDW];
 +
 +        /*derivative of the lambda factor for state A and B */
 +        DLF[0] = -1;
 +        DLF[1] = 1;
 +
 +        /* precalculate */
 +        sigma2_def = pow(fr->sc_sigma6_def, 1.0/3.0);
 +        sigma2_min = pow(fr->sc_sigma6_min, 1.0/3.0);
 +
 +        for (i = 0; i < 2; i++)
 +        {
 +            lfac_coul[i]  = (fr->sc_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
 +            dlfac_coul[i] = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power == 2 ? (1-LFC[i]) : 1);
 +            lfac_vdw[i]   = (fr->sc_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
 +            dlfac_vdw[i]  = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power == 2 ? (1-LFV[i]) : 1);
 +        }
 +    }
 +    else
 +    {
 +        sigma2_min = sigma2_def = 0;
 +    }
 +
 +    bFreeEnergy = FALSE;
 +    for (i = 0; (i < nbonds); )
 +    {
 +        itype = iatoms[i++];
 +        ai    = iatoms[i++];
 +        aj    = iatoms[i++];
 +        gid   = GID(md->cENER[ai], md->cENER[aj], md->nenergrp);
 +
 +        /* Get parameters */
 +        switch (ftype)
 +        {
 +            case F_LJ14:
 +                bFreeEnergy =
 +                    (fr->efep != efepNO &&
 +                     ((md->nPerturbed && (md->bPerturbed[ai] || md->bPerturbed[aj])) ||
 +                      iparams[itype].lj14.c6A != iparams[itype].lj14.c6B ||
 +                      iparams[itype].lj14.c12A != iparams[itype].lj14.c12B));
 +                qq               = md->chargeA[ai]*md->chargeA[aj]*fr->epsfac*fr->fudgeQQ;
 +                c6               = iparams[itype].lj14.c6A;
 +                c12              = iparams[itype].lj14.c12A;
 +                break;
 +            case F_LJC14_Q:
 +                qq               = iparams[itype].ljc14.qi*iparams[itype].ljc14.qj*fr->epsfac*iparams[itype].ljc14.fqq;
 +                c6               = iparams[itype].ljc14.c6;
 +                c12              = iparams[itype].ljc14.c12;
 +                break;
 +            case F_LJC_PAIRS_NB:
 +                qq               = iparams[itype].ljcnb.qi*iparams[itype].ljcnb.qj*fr->epsfac;
 +                c6               = iparams[itype].ljcnb.c6;
 +                c12              = iparams[itype].ljcnb.c12;
 +                break;
 +            default:
 +                /* Cannot happen since we called gmx_fatal() above in this case */
 +                qq = c6 = c12 = 0; /* Keep compiler happy */
 +                break;
 +        }
 +
 +        /* To save flops in the optimized kernels, c6/c12 have 6.0/12.0 derivative prefactors
 +         * included in the general nfbp array now. This means the tables are scaled down by the
 +         * same factor, so when we use the original c6/c12 parameters from iparams[] they must
 +         * be scaled up.
 +         */
 +        c6  *= 6.0;
 +        c12 *= 12.0;
 +
 +        /* Do we need to apply full periodic boundary conditions? */
 +        if (fr->bMolPBC == TRUE)
 +        {
 +            fshift_index = pbc_dx_aiuc(pbc, x[ai], x[aj], dx);
 +        }
 +        else
 +        {
 +            fshift_index = CENTRAL;
 +            rvec_sub(x[ai], x[aj], dx);
 +        }
 +        r2           = norm2(dx);
 +
 +        if (r2 >= fr->tab14.r*fr->tab14.r)
 +        {
 +            if (warned_rlimit == FALSE)
 +            {
 +                nb_listed_warning_rlimit(x, ai, aj, global_atom_index, sqrt(r2), fr->tab14.r);
 +                warned_rlimit = TRUE;
 +            }
 +            continue;
 +        }
 +
 +        if (bFreeEnergy)
 +        {
 +            /* Currently free energy is only supported for F_LJ14, so no need to check for that if we got here */
 +            qqB              = md->chargeB[ai]*md->chargeB[aj]*fr->epsfac*fr->fudgeQQ;
 +            c6B              = iparams[itype].lj14.c6B*6.0;
 +            c12B             = iparams[itype].lj14.c12B*12.0;
 +
 +            fscal            = nb_free_energy_evaluate_single(r2, fr->sc_r_power, fr->sc_alphacoul, fr->sc_alphavdw,
 +                                                              fr->tab14.scale, fr->tab14.data, qq, c6, c12, qqB, c6B, c12B,
 +                                                              LFC, LFV, DLF, lfac_coul, lfac_vdw, dlfac_coul, dlfac_vdw,
 +                                                              fr->sc_sigma6_def, fr->sc_sigma6_min, sigma2_def, sigma2_min, &velec, &vvdw, dvdl);
 +        }
 +        else
 +        {
 +            /* Evaluate tabulated interaction without free energy */
 +            fscal            = nb_evaluate_single(r2, fr->tab14.scale, fr->tab14.data, qq, c6, c12, &velec, &vvdw);
 +        }
 +
 +        energygrp_elec[gid]  += velec;
 +        energygrp_vdw[gid]   += vvdw;
 +        svmul(fscal, dx, dx);
 +
 +        /* Add the forces */
 +        rvec_inc(f[ai], dx);
 +        rvec_dec(f[aj], dx);
 +
 +        if (g)
 +        {
 +            /* Correct the shift forces using the graph */
 +            ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
 +            fshift_index = IVEC2IS(dt);
 +        }
 +        if (fshift_index != CENTRAL)
 +        {
 +            rvec_inc(fshift[fshift_index], dx);
 +            rvec_dec(fshift[CENTRAL], dx);
 +        }
 +    }
 +    return 0.0;
 +}
diff --cc src/gromacs/gmxlib/tpxio.c
index 80dac300ba,0000000000..e8d5d2c73b
mode 100644,000000..100644
--- a/src/gromacs/gmxlib/tpxio.c
+++ b/src/gromacs/gmxlib/tpxio.c
@@@ -1,3538 -1,0 +1,3538 @@@
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + *          GROningen MAchine for Chemical Simulations
 + *
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + *
 + * For more info, check our website at http://www.gromacs.org
 + *
 + * And Hey:
 + * GROningen Mixture of Alchemy and Childrens' Stories
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +/* This file is completely threadsafe - keep it that way! */
 +#ifdef GMX_THREAD_MPI
 +#include <thread_mpi.h>
 +#endif
 +
 +
 +#include <ctype.h>
 +#include "sysstuff.h"
 +#include "smalloc.h"
 +#include "string2.h"
 +#include "gmx_fatal.h"
 +#include "macros.h"
 +#include "names.h"
 +#include "symtab.h"
 +#include "futil.h"
 +#include "filenm.h"
 +#include "gmxfio.h"
 +#include "topsort.h"
 +#include "tpxio.h"
 +#include "txtdump.h"
 +#include "confio.h"
 +#include "atomprop.h"
 +#include "copyrite.h"
 +#include "vec.h"
 +#include "mtop_util.h"
 +
 +#define TPX_TAG_RELEASE  "release"
 +
 +/* This is the tag string which is stored in the tpx file.
 + * Change this if you want to change the tpx format in a feature branch.
 + * This ensures that there will not be different tpx formats around which
 + * can not be distinguished.
 + */
 +static const char *tpx_tag = TPX_TAG_RELEASE;
 +
 +/* This number should be increased whenever the file format changes! */
 +static const int tpx_version = 92;
 +
 +/* This number should only be increased when you edit the TOPOLOGY section
 + * or the HEADER of the tpx format.
 + * This way we can maintain forward compatibility too for all analysis tools
 + * and/or external programs that only need to know the atom/residue names,
 + * charges, and bond connectivity.
 + *
 + * It first appeared in tpx version 26, when I also moved the inputrecord
 + * to the end of the tpx file, so we can just skip it if we only
 + * want the topology.
 + */
 +static const int tpx_generation = 25;
 +
 +/* This number should be the most recent backwards incompatible version
 + * I.e., if this number is 9, we cannot read tpx version 9 with this code.
 + */
 +static const int tpx_incompatible_version = 9;
 +
 +
 +
 +/* Struct used to maintain tpx compatibility when function types are added */
 +typedef struct {
 +    int fvnr;  /* file version number in which the function type first appeared */
 +    int ftype; /* function type */
 +} t_ftupd;
 +
 +/*
 + * The entries should be ordered in:
 + * 1. ascending file version number
 + * 2. ascending function type number
 + */
 +/*static const t_ftupd ftupd[] = {
 +   { 20, F_CUBICBONDS        },
 +   { 20, F_CONNBONDS         },
 +   { 20, F_HARMONIC          },
 +   { 20, F_EQM,              },
 +   { 22, F_DISRESVIOL        },
 +   { 22, F_ORIRES            },
 +   { 22, F_ORIRESDEV         },
 +   { 26, F_FOURDIHS          },
 +   { 26, F_PIDIHS            },
 +   { 26, F_DIHRES            },
 +   { 26, F_DIHRESVIOL        },
 +   { 30, F_CROSS_BOND_BONDS  },
 +   { 30, F_CROSS_BOND_ANGLES },
 +   { 30, F_UREY_BRADLEY      },
 +   { 30, F_POLARIZATION      },
 +   { 54, F_DHDL_CON          },
 +   };*/
 +/*
 + * The entries should be ordered in:
 + * 1. ascending function type number
 + * 2. ascending file version number
 + */
 +/* question; what is the purpose of the commented code above? */
 +static const t_ftupd ftupd[] = {
 +    { 20, F_CUBICBONDS        },
 +    { 20, F_CONNBONDS         },
 +    { 20, F_HARMONIC          },
 +    { 34, F_FENEBONDS         },
 +    { 43, F_TABBONDS          },
 +    { 43, F_TABBONDSNC        },
 +    { 70, F_RESTRBONDS        },
 +    { 76, F_LINEAR_ANGLES     },
 +    { 30, F_CROSS_BOND_BONDS  },
 +    { 30, F_CROSS_BOND_ANGLES },
 +    { 30, F_UREY_BRADLEY      },
 +    { 34, F_QUARTIC_ANGLES    },
 +    { 43, F_TABANGLES         },
 +    { 26, F_FOURDIHS          },
 +    { 26, F_PIDIHS            },
 +    { 43, F_TABDIHS           },
 +    { 65, F_CMAP              },
 +    { 60, F_GB12              },
 +    { 61, F_GB13              },
 +    { 61, F_GB14              },
 +    { 72, F_GBPOL             },
 +    { 72, F_NPSOLVATION       },
 +    { 41, F_LJC14_Q           },
 +    { 41, F_LJC_PAIRS_NB      },
 +    { 32, F_BHAM_LR           },
 +    { 32, F_RF_EXCL           },
 +    { 32, F_COUL_RECIP        },
 +    { 46, F_DPD               },
 +    { 30, F_POLARIZATION      },
 +    { 36, F_THOLE_POL         },
 +    { 90, F_FBPOSRES          },
 +    { 22, F_DISRESVIOL        },
 +    { 22, F_ORIRES            },
 +    { 22, F_ORIRESDEV         },
 +    { 26, F_DIHRES            },
 +    { 26, F_DIHRESVIOL        },
 +    { 49, F_VSITE4FDN         },
 +    { 50, F_VSITEN            },
 +    { 46, F_COM_PULL          },
 +    { 20, F_EQM               },
 +    { 46, F_ECONSERVED        },
 +    { 69, F_VTEMP_NOLONGERUSED},
 +    { 66, F_PDISPCORR         },
 +    { 54, F_DVDL_CONSTR       },
 +    { 76, F_ANHARM_POL        },
 +    { 79, F_DVDL_COUL         },
 +    { 79, F_DVDL_VDW,         },
 +    { 79, F_DVDL_BONDED,      },
 +    { 79, F_DVDL_RESTRAINT    },
 +    { 79, F_DVDL_TEMPERATURE  },
 +};
 +#define NFTUPD asize(ftupd)
 +
 +/* Needed for backward compatibility */
 +#define MAXNODES 256
 +
 +static void _do_section(t_fileio *fio, int key, gmx_bool bRead, const char *src,
 +                        int line)
 +{
 +    char     buf[STRLEN];
 +    gmx_bool bDbg;
 +
 +    if (gmx_fio_getftp(fio) == efTPA)
 +    {
 +        if (!bRead)
 +        {
 +            gmx_fio_write_string(fio, itemstr[key]);
 +            bDbg       = gmx_fio_getdebug(fio);
 +            gmx_fio_setdebug(fio, FALSE);
 +            gmx_fio_write_string(fio, comment_str[key]);
 +            gmx_fio_setdebug(fio, bDbg);
 +        }
 +        else
 +        {
 +            if (gmx_fio_getdebug(fio))
 +            {
 +                fprintf(stderr, "Looking for section %s (%s, %d)",
 +                        itemstr[key], src, line);
 +            }
 +
 +            do
 +            {
 +                gmx_fio_do_string(fio, buf);
 +            }
 +            while ((gmx_strcasecmp(buf, itemstr[key]) != 0));
 +
 +            if (gmx_strcasecmp(buf, itemstr[key]) != 0)
 +            {
 +                gmx_fatal(FARGS, "\nCould not find section heading %s", itemstr[key]);
 +            }
 +            else if (gmx_fio_getdebug(fio))
 +            {
 +                fprintf(stderr, " and found it\n");
 +            }
 +        }
 +    }
 +}
 +
 +#define do_section(fio, key, bRead) _do_section(fio, key, bRead, __FILE__, __LINE__)
 +
 +/**************************************************************
 + *
 + * Now the higer level routines that do io of the structures and arrays
 + *
 + **************************************************************/
 +static void do_pullgrp(t_fileio *fio, t_pullgrp *pgrp, gmx_bool bRead,
 +                       int file_version)
 +{
 +    gmx_bool bDum = TRUE;
 +    int      i;
 +
 +    gmx_fio_do_int(fio, pgrp->nat);
 +    if (bRead)
 +    {
 +        snew(pgrp->ind, pgrp->nat);
 +    }
 +    bDum = gmx_fio_ndo_int(fio, pgrp->ind, pgrp->nat);
 +    gmx_fio_do_int(fio, pgrp->nweight);
 +    if (bRead)
 +    {
 +        snew(pgrp->weight, pgrp->nweight);
 +    }
 +    bDum = gmx_fio_ndo_real(fio, pgrp->weight, pgrp->nweight);
 +    gmx_fio_do_int(fio, pgrp->pbcatom);
 +    gmx_fio_do_rvec(fio, pgrp->vec);
 +    gmx_fio_do_rvec(fio, pgrp->init);
 +    gmx_fio_do_real(fio, pgrp->rate);
 +    gmx_fio_do_real(fio, pgrp->k);
 +    if (file_version >= 56)
 +    {
 +        gmx_fio_do_real(fio, pgrp->kB);
 +    }
 +    else
 +    {
 +        pgrp->kB = pgrp->k;
 +    }
 +}
 +
 +static void do_expandedvals(t_fileio *fio, t_expanded *expand, t_lambda *fepvals, gmx_bool bRead, int file_version)
 +{
 +    /* i is used in the ndo_double macro*/
 +    int      i;
 +    real     fv;
 +    gmx_bool bDum = TRUE;
 +    real     rdum;
 +    int      n_lambda = fepvals->n_lambda;
 +
 +    /* reset the lambda calculation window */
 +    fepvals->lambda_start_n = 0;
 +    fepvals->lambda_stop_n  = n_lambda;
 +    if (file_version >= 79)
 +    {
 +        if (n_lambda > 0)
 +        {
 +            if (bRead)
 +            {
 +                snew(expand->init_lambda_weights, n_lambda);
 +            }
 +            bDum = gmx_fio_ndo_real(fio, expand->init_lambda_weights, n_lambda);
 +            gmx_fio_do_gmx_bool(fio, expand->bInit_weights);
 +        }
 +
 +        gmx_fio_do_int(fio, expand->nstexpanded);
 +        gmx_fio_do_int(fio, expand->elmcmove);
 +        gmx_fio_do_int(fio, expand->elamstats);
 +        gmx_fio_do_int(fio, expand->lmc_repeats);
 +        gmx_fio_do_int(fio, expand->gibbsdeltalam);
 +        gmx_fio_do_int(fio, expand->lmc_forced_nstart);
 +        gmx_fio_do_int(fio, expand->lmc_seed);
 +        gmx_fio_do_real(fio, expand->mc_temp);
 +        gmx_fio_do_int(fio, expand->bSymmetrizedTMatrix);
 +        gmx_fio_do_int(fio, expand->nstTij);
 +        gmx_fio_do_int(fio, expand->minvarmin);
 +        gmx_fio_do_int(fio, expand->c_range);
 +        gmx_fio_do_real(fio, expand->wl_scale);
 +        gmx_fio_do_real(fio, expand->wl_ratio);
 +        gmx_fio_do_real(fio, expand->init_wl_delta);
 +        gmx_fio_do_gmx_bool(fio, expand->bWLoneovert);
 +        gmx_fio_do_int(fio, expand->elmceq);
 +        gmx_fio_do_int(fio, expand->equil_steps);
 +        gmx_fio_do_int(fio, expand->equil_samples);
 +        gmx_fio_do_int(fio, expand->equil_n_at_lam);
 +        gmx_fio_do_real(fio, expand->equil_wl_delta);
 +        gmx_fio_do_real(fio, expand->equil_ratio);
 +    }
 +}
 +
 +static void do_simtempvals(t_fileio *fio, t_simtemp *simtemp, int n_lambda, gmx_bool bRead,
 +                           int file_version)
 +{
 +    gmx_bool bDum = TRUE;
 +
 +    if (file_version >= 79)
 +    {
 +        gmx_fio_do_int(fio, simtemp->eSimTempScale);
 +        gmx_fio_do_real(fio, simtemp->simtemp_high);
 +        gmx_fio_do_real(fio, simtemp->simtemp_low);
 +        if (n_lambda > 0)
 +        {
 +            if (bRead)
 +            {
 +                snew(simtemp->temperatures, n_lambda);
 +            }
 +            bDum = gmx_fio_ndo_real(fio, simtemp->temperatures, n_lambda);
 +        }
 +    }
 +}
 +
 +static void do_fepvals(t_fileio *fio, t_lambda *fepvals, gmx_bool bRead, int file_version)
 +{
 +    /* i is defined in the ndo_double macro; use g to iterate. */
 +    int      i, g;
 +    real     fv;
 +    gmx_bool bDum = TRUE;
 +    real     rdum;
 +
 +    /* free energy values */
 +
 +    if (file_version >= 79)
 +    {
 +        gmx_fio_do_int(fio, fepvals->init_fep_state);
 +        gmx_fio_do_double(fio, fepvals->init_lambda);
 +        gmx_fio_do_double(fio, fepvals->delta_lambda);
 +    }
 +    else if (file_version >= 59)
 +    {
 +        gmx_fio_do_double(fio, fepvals->init_lambda);
 +        gmx_fio_do_double(fio, fepvals->delta_lambda);
 +    }
 +    else
 +    {
 +        gmx_fio_do_real(fio, rdum);
 +        fepvals->init_lambda = rdum;
 +        gmx_fio_do_real(fio, rdum);
 +        fepvals->delta_lambda = rdum;
 +    }
 +    if (file_version >= 79)
 +    {
 +        gmx_fio_do_int(fio, fepvals->n_lambda);
 +        if (bRead)
 +        {
 +            snew(fepvals->all_lambda, efptNR);
 +        }
 +        for (g = 0; g < efptNR; g++)
 +        {
 +            if (fepvals->n_lambda > 0)
 +            {
 +                if (bRead)
 +                {
 +                    snew(fepvals->all_lambda[g], fepvals->n_lambda);
 +                }
 +                bDum = gmx_fio_ndo_double(fio, fepvals->all_lambda[g], fepvals->n_lambda);
 +                bDum = gmx_fio_ndo_int(fio, fepvals->separate_dvdl, efptNR);
 +            }
 +            else if (fepvals->init_lambda >= 0)
 +            {
 +                fepvals->separate_dvdl[efptFEP] = TRUE;
 +            }
 +        }
 +    }
 +    else if (file_version >= 64)
 +    {
 +        gmx_fio_do_int(fio, fepvals->n_lambda);
 +        if (bRead)
 +        {
 +            int g;
 +
 +            snew(fepvals->all_lambda, efptNR);
 +            /* still allocate the all_lambda array's contents. */
 +            for (g = 0; g < efptNR; g++)
 +            {
 +                if (fepvals->n_lambda > 0)
 +                {
 +                    snew(fepvals->all_lambda[g], fepvals->n_lambda);
 +                }
 +            }
 +        }
 +        bDum = gmx_fio_ndo_double(fio, fepvals->all_lambda[efptFEP],
 +                                  fepvals->n_lambda);
 +        if (fepvals->init_lambda >= 0)
 +        {
 +            int g, h;
 +
 +            fepvals->separate_dvdl[efptFEP] = TRUE;
 +
 +            if (bRead)
 +            {
 +                /* copy the contents of the efptFEP lambda component to all
 +                   the other components */
 +                for (g = 0; g < efptNR; g++)
 +                {
 +                    for (h = 0; h < fepvals->n_lambda; h++)
 +                    {
 +                        if (g != efptFEP)
 +                        {
 +                            fepvals->all_lambda[g][h] =
 +                                fepvals->all_lambda[efptFEP][h];
 +                        }
 +                    }
 +                }
 +            }
 +        }
 +    }
 +    else
 +    {
 +        fepvals->n_lambda     = 0;
 +        fepvals->all_lambda   = NULL;
 +        if (fepvals->init_lambda >= 0)
 +        {
 +            fepvals->separate_dvdl[efptFEP] = TRUE;
 +        }
 +    }
 +    if (file_version >= 13)
 +    {
 +        gmx_fio_do_real(fio, fepvals->sc_alpha);
 +    }
 +    else
 +    {
 +        fepvals->sc_alpha = 0;
 +    }
 +    if (file_version >= 38)
 +    {
 +        gmx_fio_do_int(fio, fepvals->sc_power);
 +    }
 +    else
 +    {
 +        fepvals->sc_power = 2;
 +    }
 +    if (file_version >= 79)
 +    {
 +        gmx_fio_do_real(fio, fepvals->sc_r_power);
 +    }
 +    else
 +    {
 +        fepvals->sc_r_power = 6.0;
 +    }
 +    if (file_version >= 15)
 +    {
 +        gmx_fio_do_real(fio, fepvals->sc_sigma);
 +    }
 +    else
 +    {
 +        fepvals->sc_sigma = 0.3;
 +    }
 +    if (bRead)
 +    {
 +        if (file_version >= 71)
 +        {
 +            fepvals->sc_sigma_min = fepvals->sc_sigma;
 +        }
 +        else
 +        {
 +            fepvals->sc_sigma_min = 0;
 +        }
 +    }
 +    if (file_version >= 79)
 +    {
 +        gmx_fio_do_int(fio, fepvals->bScCoul);
 +    }
 +    else
 +    {
 +        fepvals->bScCoul = TRUE;
 +    }
 +    if (file_version >= 64)
 +    {
 +        gmx_fio_do_int(fio, fepvals->nstdhdl);
 +    }
 +    else
 +    {
 +        fepvals->nstdhdl = 1;
 +    }
 +
 +    if (file_version >= 73)
 +    {
 +        gmx_fio_do_int(fio, fepvals->separate_dhdl_file);
 +        gmx_fio_do_int(fio, fepvals->dhdl_derivatives);
 +    }
 +    else
 +    {
 +        fepvals->separate_dhdl_file = esepdhdlfileYES;
 +        fepvals->dhdl_derivatives   = edhdlderivativesYES;
 +    }
 +    if (file_version >= 71)
 +    {
 +        gmx_fio_do_int(fio, fepvals->dh_hist_size);
 +        gmx_fio_do_double(fio, fepvals->dh_hist_spacing);
 +    }
 +    else
 +    {
 +        fepvals->dh_hist_size    = 0;
 +        fepvals->dh_hist_spacing = 0.1;
 +    }
 +    if (file_version >= 79)
 +    {
 +        gmx_fio_do_int(fio, fepvals->bPrintEnergy);
 +    }
 +    else
 +    {
 +        fepvals->bPrintEnergy = FALSE;
 +    }
 +
 +    /* handle lambda_neighbors */
 +    if ((file_version >= 83 && file_version < 90) || file_version >= 92)
 +    {
 +        gmx_fio_do_int(fio, fepvals->lambda_neighbors);
 +        if ( (fepvals->lambda_neighbors >= 0) && (fepvals->init_fep_state >= 0) &&
 +             (fepvals->init_lambda < 0) )
 +        {
 +            fepvals->lambda_start_n = (fepvals->init_fep_state -
 +                                       fepvals->lambda_neighbors);
 +            fepvals->lambda_stop_n = (fepvals->init_fep_state +
 +                                      fepvals->lambda_neighbors + 1);
 +            if (fepvals->lambda_start_n < 0)
 +            {
 +                fepvals->lambda_start_n = 0;;
 +            }
 +            if (fepvals->lambda_stop_n >= fepvals->n_lambda)
 +            {
 +                fepvals->lambda_stop_n = fepvals->n_lambda;
 +            }
 +        }
 +        else
 +        {
 +            fepvals->lambda_start_n = 0;
 +            fepvals->lambda_stop_n  = fepvals->n_lambda;
 +        }
 +    }
 +    else
 +    {
 +        fepvals->lambda_start_n = 0;
 +        fepvals->lambda_stop_n  = fepvals->n_lambda;
 +    }
 +}
 +
 +static void do_pull(t_fileio *fio, t_pull *pull, gmx_bool bRead, int file_version)
 +{
 +    int g;
 +
 +    gmx_fio_do_int(fio, pull->ngrp);
 +    gmx_fio_do_int(fio, pull->eGeom);
 +    gmx_fio_do_ivec(fio, pull->dim);
 +    gmx_fio_do_real(fio, pull->cyl_r1);
 +    gmx_fio_do_real(fio, pull->cyl_r0);
 +    gmx_fio_do_real(fio, pull->constr_tol);
 +    gmx_fio_do_int(fio, pull->nstxout);
 +    gmx_fio_do_int(fio, pull->nstfout);
 +    if (bRead)
 +    {
 +        snew(pull->grp, pull->ngrp+1);
 +    }
 +    for (g = 0; g < pull->ngrp+1; g++)
 +    {
 +        do_pullgrp(fio, &pull->grp[g], bRead, file_version);
 +    }
 +}
 +
 +
 +static void do_rotgrp(t_fileio *fio, t_rotgrp *rotg, gmx_bool bRead, int file_version)
 +{
 +    gmx_bool bDum = TRUE;
 +    int      i;
 +
 +    gmx_fio_do_int(fio, rotg->eType);
 +    gmx_fio_do_int(fio, rotg->bMassW);
 +    gmx_fio_do_int(fio, rotg->nat);
 +    if (bRead)
 +    {
 +        snew(rotg->ind, rotg->nat);
 +    }
 +    gmx_fio_ndo_int(fio, rotg->ind, rotg->nat);
 +    if (bRead)
 +    {
 +        snew(rotg->x_ref, rotg->nat);
 +    }
 +    gmx_fio_ndo_rvec(fio, rotg->x_ref, rotg->nat);
 +    gmx_fio_do_rvec(fio, rotg->vec);
 +    gmx_fio_do_rvec(fio, rotg->pivot);
 +    gmx_fio_do_real(fio, rotg->rate);
 +    gmx_fio_do_real(fio, rotg->k);
 +    gmx_fio_do_real(fio, rotg->slab_dist);
 +    gmx_fio_do_real(fio, rotg->min_gaussian);
 +    gmx_fio_do_real(fio, rotg->eps);
 +    gmx_fio_do_int(fio, rotg->eFittype);
 +    gmx_fio_do_int(fio, rotg->PotAngle_nstep);
 +    gmx_fio_do_real(fio, rotg->PotAngle_step);
 +}
 +
 +static void do_rot(t_fileio *fio, t_rot *rot, gmx_bool bRead, int file_version)
 +{
 +    int g;
 +
 +    gmx_fio_do_int(fio, rot->ngrp);
 +    gmx_fio_do_int(fio, rot->nstrout);
 +    gmx_fio_do_int(fio, rot->nstsout);
 +    if (bRead)
 +    {
 +        snew(rot->grp, rot->ngrp);
 +    }
 +    for (g = 0; g < rot->ngrp; g++)
 +    {
 +        do_rotgrp(fio, &rot->grp[g], bRead, file_version);
 +    }
 +}
 +
 +
 +static void do_inputrec(t_fileio *fio, t_inputrec *ir, gmx_bool bRead,
 +                        int file_version, real *fudgeQQ)
 +{
 +    int      i, j, k, *tmp, idum = 0;
 +    gmx_bool bDum = TRUE;
 +    real     rdum, bd_temp;
 +    rvec     vdum;
 +    gmx_bool bSimAnn;
 +    real     zerotemptime, finish_t, init_temp, finish_temp;
 +
 +    if (file_version != tpx_version)
 +    {
 +        /* Give a warning about features that are not accessible */
 +        fprintf(stderr, "Note: file tpx version %d, software tpx version %d\n",
 +                file_version, tpx_version);
 +    }
 +
 +    if (bRead)
 +    {
 +        init_inputrec(ir);
 +    }
 +
 +    if (file_version == 0)
 +    {
 +        return;
 +    }
 +
 +    /* Basic inputrec stuff */
 +    gmx_fio_do_int(fio, ir->eI);
 +    if (file_version >= 62)
 +    {
 +        gmx_fio_do_gmx_large_int(fio, ir->nsteps);
 +    }
 +    else
 +    {
 +        gmx_fio_do_int(fio, idum);
 +        ir->nsteps = idum;
 +    }
 +    if (file_version > 25)
 +    {
 +        if (file_version >= 62)
 +        {
 +            gmx_fio_do_gmx_large_int(fio, ir->init_step);
 +        }
 +        else
 +        {
 +            gmx_fio_do_int(fio, idum);
 +            ir->init_step = idum;
 +        }
 +    }
 +    else
 +    {
 +        ir->init_step = 0;
 +    }
 +
 +    if (file_version >= 58)
 +    {
 +        gmx_fio_do_int(fio, ir->simulation_part);
 +    }
 +    else
 +    {
 +        ir->simulation_part = 1;
 +    }
 +
 +    if (file_version >= 67)
 +    {
 +        gmx_fio_do_int(fio, ir->nstcalcenergy);
 +    }
 +    else
 +    {
 +        ir->nstcalcenergy = 1;
 +    }
 +    if (file_version < 53)
 +    {
 +        /* The pbc info has been moved out of do_inputrec,
 +         * since we always want it, also without reading the inputrec.
 +         */
 +        gmx_fio_do_int(fio, ir->ePBC);
 +        if ((file_version <= 15) && (ir->ePBC == 2))
 +        {
 +            ir->ePBC = epbcNONE;
 +        }
 +        if (file_version >= 45)
 +        {
 +            gmx_fio_do_int(fio, ir->bPeriodicMols);
 +        }
 +        else
 +        {
 +            if (ir->ePBC == 2)
 +            {
 +                ir->ePBC          = epbcXYZ;
 +                ir->bPeriodicMols = TRUE;
 +            }
 +            else
 +            {
 +                ir->bPeriodicMols = FALSE;
 +            }
 +        }
 +    }
 +    if (file_version >= 81)
 +    {
 +        gmx_fio_do_int(fio, ir->cutoff_scheme);
 +    }
 +    else
 +    {
 +        ir->cutoff_scheme = ecutsGROUP;
 +    }
 +    gmx_fio_do_int(fio, ir->ns_type);
 +    gmx_fio_do_int(fio, ir->nstlist);
 +    gmx_fio_do_int(fio, ir->ndelta);
 +    if (file_version < 41)
 +    {
 +        gmx_fio_do_int(fio, idum);
 +        gmx_fio_do_int(fio, idum);
 +    }
 +    if (file_version >= 45)
 +    {
 +        gmx_fio_do_real(fio, ir->rtpi);
 +    }
 +    else
 +    {
 +        ir->rtpi = 0.05;
 +    }
 +    gmx_fio_do_int(fio, ir->nstcomm);
 +    if (file_version > 34)
 +    {
 +        gmx_fio_do_int(fio, ir->comm_mode);
 +    }
 +    else if (ir->nstcomm < 0)
 +    {
 +        ir->comm_mode = ecmANGULAR;
 +    }
 +    else
 +    {
 +        ir->comm_mode = ecmLINEAR;
 +    }
 +    ir->nstcomm = abs(ir->nstcomm);
 +
 +    if (file_version > 25)
 +    {
 +        gmx_fio_do_int(fio, ir->nstcheckpoint);
 +    }
 +    else
 +    {
 +        ir->nstcheckpoint = 0;
 +    }
 +
 +    gmx_fio_do_int(fio, ir->nstcgsteep);
 +
 +    if (file_version >= 30)
 +    {
 +        gmx_fio_do_int(fio, ir->nbfgscorr);
 +    }
 +    else if (bRead)
 +    {
 +        ir->nbfgscorr = 10;
 +    }
 +
 +    gmx_fio_do_int(fio, ir->nstlog);
 +    gmx_fio_do_int(fio, ir->nstxout);
 +    gmx_fio_do_int(fio, ir->nstvout);
 +    gmx_fio_do_int(fio, ir->nstfout);
 +    gmx_fio_do_int(fio, ir->nstenergy);
 +    gmx_fio_do_int(fio, ir->nstxtcout);
 +    if (file_version >= 59)
 +    {
 +        gmx_fio_do_double(fio, ir->init_t);
 +        gmx_fio_do_double(fio, ir->delta_t);
 +    }
 +    else
 +    {
 +        gmx_fio_do_real(fio, rdum);
 +        ir->init_t = rdum;
 +        gmx_fio_do_real(fio, rdum);
 +        ir->delta_t = rdum;
 +    }
 +    gmx_fio_do_real(fio, ir->xtcprec);
 +    if (file_version < 19)
 +    {
 +        gmx_fio_do_int(fio, idum);
 +        gmx_fio_do_int(fio, idum);
 +    }
 +    if (file_version < 18)
 +    {
 +        gmx_fio_do_int(fio, idum);
 +    }
 +    if (file_version >= 81)
 +    {
 +        gmx_fio_do_real(fio, ir->verletbuf_drift);
 +    }
 +    else
 +    {
 +        ir->verletbuf_drift = 0;
 +    }
 +    gmx_fio_do_real(fio, ir->rlist);
 +    if (file_version >= 67)
 +    {
 +        gmx_fio_do_real(fio, ir->rlistlong);
 +    }
 +    if (file_version >= 82 && file_version != 90)
 +    {
 +        gmx_fio_do_int(fio, ir->nstcalclr);
 +    }
 +    else
 +    {
 +        /* Calculate at NS steps */
 +        ir->nstcalclr = ir->nstlist;
 +    }
 +    gmx_fio_do_int(fio, ir->coulombtype);
 +    if (file_version < 32 && ir->coulombtype == eelRF)
 +    {
 +        ir->coulombtype = eelRF_NEC;
 +    }
 +    if (file_version >= 81)
 +    {
 +        gmx_fio_do_int(fio, ir->coulomb_modifier);
 +    }
 +    else
 +    {
 +        ir->coulomb_modifier = (ir->cutoff_scheme == ecutsVERLET ? eintmodPOTSHIFT : eintmodNONE);
 +    }
 +    gmx_fio_do_real(fio, ir->rcoulomb_switch);
 +    gmx_fio_do_real(fio, ir->rcoulomb);
 +    gmx_fio_do_int(fio, ir->vdwtype);
 +    if (file_version >= 81)
 +    {
 +        gmx_fio_do_int(fio, ir->vdw_modifier);
 +    }
 +    else
 +    {
 +        ir->vdw_modifier = (ir->cutoff_scheme == ecutsVERLET ? eintmodPOTSHIFT : eintmodNONE);
 +    }
 +    gmx_fio_do_real(fio, ir->rvdw_switch);
 +    gmx_fio_do_real(fio, ir->rvdw);
 +    if (file_version < 67)
 +    {
 +        ir->rlistlong = max_cutoff(ir->rlist, max_cutoff(ir->rvdw, ir->rcoulomb));
 +    }
 +    gmx_fio_do_int(fio, ir->eDispCorr);
 +    gmx_fio_do_real(fio, ir->epsilon_r);
 +    if (file_version >= 37)
 +    {
 +        gmx_fio_do_real(fio, ir->epsilon_rf);
 +    }
 +    else
 +    {
 +        if (EEL_RF(ir->coulombtype))
 +        {
 +            ir->epsilon_rf = ir->epsilon_r;
 +            ir->epsilon_r  = 1.0;
 +        }
 +        else
 +        {
 +            ir->epsilon_rf = 1.0;
 +        }
 +    }
 +    if (file_version >= 29)
 +    {
 +        gmx_fio_do_real(fio, ir->tabext);
 +    }
 +    else
 +    {
 +        ir->tabext = 1.0;
 +    }
 +
 +    if (file_version > 25)
 +    {
 +        gmx_fio_do_int(fio, ir->gb_algorithm);
 +        gmx_fio_do_int(fio, ir->nstgbradii);
 +        gmx_fio_do_real(fio, ir->rgbradii);
 +        gmx_fio_do_real(fio, ir->gb_saltconc);
 +        gmx_fio_do_int(fio, ir->implicit_solvent);
 +    }
 +    else
 +    {
 +        ir->gb_algorithm     = egbSTILL;
 +        ir->nstgbradii       = 1;
 +        ir->rgbradii         = 1.0;
 +        ir->gb_saltconc      = 0;
 +        ir->implicit_solvent = eisNO;
 +    }
 +    if (file_version >= 55)
 +    {
 +        gmx_fio_do_real(fio, ir->gb_epsilon_solvent);
 +        gmx_fio_do_real(fio, ir->gb_obc_alpha);
 +        gmx_fio_do_real(fio, ir->gb_obc_beta);
 +        gmx_fio_do_real(fio, ir->gb_obc_gamma);
 +        if (file_version >= 60)
 +        {
 +            gmx_fio_do_real(fio, ir->gb_dielectric_offset);
 +            gmx_fio_do_int(fio, ir->sa_algorithm);
 +        }
 +        else
 +        {
 +            ir->gb_dielectric_offset = 0.009;
 +            ir->sa_algorithm         = esaAPPROX;
 +        }
 +        gmx_fio_do_real(fio, ir->sa_surface_tension);
 +
 +        /* Override sa_surface_tension if it is not changed in the mpd-file */
 +        if (ir->sa_surface_tension < 0)
 +        {
 +            if (ir->gb_algorithm == egbSTILL)
 +            {
 +                ir->sa_surface_tension = 0.0049 * 100 * CAL2JOULE;
 +            }
 +            else if (ir->gb_algorithm == egbHCT || ir->gb_algorithm == egbOBC)
 +            {
 +                ir->sa_surface_tension = 0.0054 * 100 * CAL2JOULE;
 +            }
 +        }
 +
 +    }
 +    else
 +    {
 +        /* Better use sensible values than insane (0.0) ones... */
 +        ir->gb_epsilon_solvent = 80;
 +        ir->gb_obc_alpha       = 1.0;
 +        ir->gb_obc_beta        = 0.8;
 +        ir->gb_obc_gamma       = 4.85;
 +        ir->sa_surface_tension = 2.092;
 +    }
 +
 +
 +    if (file_version >= 81)
 +    {
 +        gmx_fio_do_real(fio, ir->fourier_spacing);
 +    }
 +    else
 +    {
 +        ir->fourier_spacing = 0.0;
 +    }
 +    gmx_fio_do_int(fio, ir->nkx);
 +    gmx_fio_do_int(fio, ir->nky);
 +    gmx_fio_do_int(fio, ir->nkz);
 +    gmx_fio_do_int(fio, ir->pme_order);
 +    gmx_fio_do_real(fio, ir->ewald_rtol);
 +
 +    if (file_version >= 24)
 +    {
 +        gmx_fio_do_int(fio, ir->ewald_geometry);
 +    }
 +    else
 +    {
 +        ir->ewald_geometry = eewg3D;
 +    }
 +
 +    if (file_version <= 17)
 +    {
 +        ir->epsilon_surface = 0;
 +        if (file_version == 17)
 +        {
 +            gmx_fio_do_int(fio, idum);
 +        }
 +    }
 +    else
 +    {
 +        gmx_fio_do_real(fio, ir->epsilon_surface);
 +    }
 +
 +    gmx_fio_do_gmx_bool(fio, ir->bOptFFT);
 +
 +    gmx_fio_do_gmx_bool(fio, ir->bContinuation);
 +    gmx_fio_do_int(fio, ir->etc);
 +    /* before version 18, ir->etc was a gmx_bool (ir->btc),
 +     * but the values 0 and 1 still mean no and
 +     * berendsen temperature coupling, respectively.
 +     */
 +    if (file_version >= 79)
 +    {
 +        gmx_fio_do_gmx_bool(fio, ir->bPrintNHChains);
 +    }
 +    if (file_version >= 71)
 +    {
 +        gmx_fio_do_int(fio, ir->nsttcouple);
 +    }
 +    else
 +    {
 +        ir->nsttcouple = ir->nstcalcenergy;
 +    }
 +    if (file_version <= 15)
 +    {
 +        gmx_fio_do_int(fio, idum);
 +    }
 +    if (file_version <= 17)
 +    {
 +        gmx_fio_do_int(fio, ir->epct);
 +        if (file_version <= 15)
 +        {
 +            if (ir->epct == 5)
 +            {
 +                ir->epct = epctSURFACETENSION;
 +            }
 +            gmx_fio_do_int(fio, idum);
 +        }
 +        ir->epct -= 1;
 +        /* we have removed the NO alternative at the beginning */
 +        if (ir->epct == -1)
 +        {
 +            ir->epc  = epcNO;
 +            ir->epct = epctISOTROPIC;
 +        }
 +        else
 +        {
 +            ir->epc = epcBERENDSEN;
 +        }
 +    }
 +    else
 +    {
 +        gmx_fio_do_int(fio, ir->epc);
 +        gmx_fio_do_int(fio, ir->epct);
 +    }
 +    if (file_version >= 71)
 +    {
 +        gmx_fio_do_int(fio, ir->nstpcouple);
 +    }
 +    else
 +    {
 +        ir->nstpcouple = ir->nstcalcenergy;
 +    }
 +    gmx_fio_do_real(fio, ir->tau_p);
 +    if (file_version <= 15)
 +    {
 +        gmx_fio_do_rvec(fio, vdum);
 +        clear_mat(ir->ref_p);
 +        for (i = 0; i < DIM; i++)
 +        {
 +            ir->ref_p[i][i] = vdum[i];
 +        }
 +    }
 +    else
 +    {
 +        gmx_fio_do_rvec(fio, ir->ref_p[XX]);
 +        gmx_fio_do_rvec(fio, ir->ref_p[YY]);
 +        gmx_fio_do_rvec(fio, ir->ref_p[ZZ]);
 +    }
 +    if (file_version <= 15)
 +    {
 +        gmx_fio_do_rvec(fio, vdum);
 +        clear_mat(ir->compress);
 +        for (i = 0; i < DIM; i++)
 +        {
 +            ir->compress[i][i] = vdum[i];
 +        }
 +    }
 +    else
 +    {
 +        gmx_fio_do_rvec(fio, ir->compress[XX]);
 +        gmx_fio_do_rvec(fio, ir->compress[YY]);
 +        gmx_fio_do_rvec(fio, ir->compress[ZZ]);
 +    }
 +    if (file_version >= 47)
 +    {
 +        gmx_fio_do_int(fio, ir->refcoord_scaling);
 +        gmx_fio_do_rvec(fio, ir->posres_com);
 +        gmx_fio_do_rvec(fio, ir->posres_comB);
 +    }
 +    else
 +    {
 +        ir->refcoord_scaling = erscNO;
 +        clear_rvec(ir->posres_com);
 +        clear_rvec(ir->posres_comB);
 +    }
 +    if ((file_version > 25) && (file_version < 79))
 +    {
 +        gmx_fio_do_int(fio, ir->andersen_seed);
 +    }
 +    else
 +    {
 +        ir->andersen_seed = 0;
 +    }
 +    if (file_version < 26)
 +    {
 +        gmx_fio_do_gmx_bool(fio, bSimAnn);
 +        gmx_fio_do_real(fio, zerotemptime);
 +    }
 +
 +    if (file_version < 37)
 +    {
 +        gmx_fio_do_real(fio, rdum);
 +    }
 +
 +    gmx_fio_do_real(fio, ir->shake_tol);
 +    if (file_version < 54)
 +    {
 +        gmx_fio_do_real(fio, *fudgeQQ);
 +    }
 +
 +    gmx_fio_do_int(fio, ir->efep);
 +    if (file_version <= 14 && ir->efep != efepNO)
 +    {
 +        ir->efep = efepYES;
 +    }
 +    do_fepvals(fio, ir->fepvals, bRead, file_version);
 +
 +    if (file_version >= 79)
 +    {
 +        gmx_fio_do_gmx_bool(fio, ir->bSimTemp);
 +        if (ir->bSimTemp)
 +        {
 +            ir->bSimTemp = TRUE;
 +        }
 +    }
 +    else
 +    {
 +        ir->bSimTemp = FALSE;
 +    }
 +    if (ir->bSimTemp)
 +    {
 +        do_simtempvals(fio, ir->simtempvals, ir->fepvals->n_lambda, bRead, file_version);
 +    }
 +
 +    if (file_version >= 79)
 +    {
 +        gmx_fio_do_gmx_bool(fio, ir->bExpanded);
 +        if (ir->bExpanded)
 +        {
 +            ir->bExpanded = TRUE;
 +        }
 +        else
 +        {
 +            ir->bExpanded = FALSE;
 +        }
 +    }
 +    if (ir->bExpanded)
 +    {
 +        do_expandedvals(fio, ir->expandedvals, ir->fepvals, bRead, file_version);
 +    }
 +    if (file_version >= 57)
 +    {
 +        gmx_fio_do_int(fio, ir->eDisre);
 +    }
 +    gmx_fio_do_int(fio, ir->eDisreWeighting);
 +    if (file_version < 22)
 +    {
 +        if (ir->eDisreWeighting == 0)
 +        {
 +            ir->eDisreWeighting = edrwEqual;
 +        }
 +        else
 +        {
 +            ir->eDisreWeighting = edrwConservative;
 +        }
 +    }
 +    gmx_fio_do_gmx_bool(fio, ir->bDisreMixed);
 +    gmx_fio_do_real(fio, ir->dr_fc);
 +    gmx_fio_do_real(fio, ir->dr_tau);
 +    gmx_fio_do_int(fio, ir->nstdisreout);
 +    if (file_version >= 22)
 +    {
 +        gmx_fio_do_real(fio, ir->orires_fc);
 +        gmx_fio_do_real(fio, ir->orires_tau);
 +        gmx_fio_do_int(fio, ir->nstorireout);
 +    }
 +    else
 +    {
 +        ir->orires_fc   = 0;
 +        ir->orires_tau  = 0;
 +        ir->nstorireout = 0;
 +    }
 +    if (file_version >= 26 && file_version < 79)
 +    {
 +        gmx_fio_do_real(fio, ir->dihre_fc);
 +        if (file_version < 56)
 +        {
 +            gmx_fio_do_real(fio, rdum);
 +            gmx_fio_do_int(fio, idum);
 +        }
 +    }
 +    else
 +    {
 +        ir->dihre_fc = 0;
 +    }
 +
 +    gmx_fio_do_real(fio, ir->em_stepsize);
 +    gmx_fio_do_real(fio, ir->em_tol);
 +    if (file_version >= 22)
 +    {
 +        gmx_fio_do_gmx_bool(fio, ir->bShakeSOR);
 +    }
 +    else if (bRead)
 +    {
 +        ir->bShakeSOR = TRUE;
 +    }
 +    if (file_version >= 11)
 +    {
 +        gmx_fio_do_int(fio, ir->niter);
 +    }
 +    else if (bRead)
 +    {
 +        ir->niter = 25;
 +        fprintf(stderr, "Note: niter not in run input file, setting it to %d\n",
 +                ir->niter);
 +    }
 +    if (file_version >= 21)
 +    {
 +        gmx_fio_do_real(fio, ir->fc_stepsize);
 +    }
 +    else
 +    {
 +        ir->fc_stepsize = 0;
 +    }
 +    gmx_fio_do_int(fio, ir->eConstrAlg);
 +    gmx_fio_do_int(fio, ir->nProjOrder);
 +    gmx_fio_do_real(fio, ir->LincsWarnAngle);
 +    if (file_version <= 14)
 +    {
 +        gmx_fio_do_int(fio, idum);
 +    }
 +    if (file_version >= 26)
 +    {
 +        gmx_fio_do_int(fio, ir->nLincsIter);
 +    }
 +    else if (bRead)
 +    {
 +        ir->nLincsIter = 1;
 +        fprintf(stderr, "Note: nLincsIter not in run input file, setting it to %d\n",
 +                ir->nLincsIter);
 +    }
 +    if (file_version < 33)
 +    {
 +        gmx_fio_do_real(fio, bd_temp);
 +    }
 +    gmx_fio_do_real(fio, ir->bd_fric);
 +    gmx_fio_do_int(fio, ir->ld_seed);
 +    if (file_version >= 33)
 +    {
 +        for (i = 0; i < DIM; i++)
 +        {
 +            gmx_fio_do_rvec(fio, ir->deform[i]);
 +        }
 +    }
 +    else
 +    {
 +        for (i = 0; i < DIM; i++)
 +        {
 +            clear_rvec(ir->deform[i]);
 +        }
 +    }
 +    if (file_version >= 14)
 +    {
 +        gmx_fio_do_real(fio, ir->cos_accel);
 +    }
 +    else if (bRead)
 +    {
 +        ir->cos_accel = 0;
 +    }
 +    gmx_fio_do_int(fio, ir->userint1);
 +    gmx_fio_do_int(fio, ir->userint2);
 +    gmx_fio_do_int(fio, ir->userint3);
 +    gmx_fio_do_int(fio, ir->userint4);
 +    gmx_fio_do_real(fio, ir->userreal1);
 +    gmx_fio_do_real(fio, ir->userreal2);
 +    gmx_fio_do_real(fio, ir->userreal3);
 +    gmx_fio_do_real(fio, ir->userreal4);
 +
 +    /* AdResS stuff */
 +    if (file_version >= 77)
 +    {
 +        gmx_fio_do_gmx_bool(fio, ir->bAdress);
 +        if (ir->bAdress)
 +        {
 +            if (bRead)
 +            {
 +                snew(ir->adress, 1);
 +            }
 +            gmx_fio_do_int(fio, ir->adress->type);
 +            gmx_fio_do_real(fio, ir->adress->const_wf);
 +            gmx_fio_do_real(fio, ir->adress->ex_width);
 +            gmx_fio_do_real(fio, ir->adress->hy_width);
 +            gmx_fio_do_int(fio, ir->adress->icor);
 +            gmx_fio_do_int(fio, ir->adress->site);
 +            gmx_fio_do_rvec(fio, ir->adress->refs);
 +            gmx_fio_do_int(fio, ir->adress->n_tf_grps);
 +            gmx_fio_do_real(fio, ir->adress->ex_forcecap);
 +            gmx_fio_do_int(fio, ir->adress->n_energy_grps);
 +            gmx_fio_do_int(fio, ir->adress->do_hybridpairs);
 +
 +            if (bRead)
 +            {
 +                snew(ir->adress->tf_table_index, ir->adress->n_tf_grps);
 +            }
 +            if (ir->adress->n_tf_grps > 0)
 +            {
 +                bDum = gmx_fio_ndo_int(fio, ir->adress->tf_table_index, ir->adress->n_tf_grps);
 +            }
 +            if (bRead)
 +            {
 +                snew(ir->adress->group_explicit, ir->adress->n_energy_grps);
 +            }
 +            if (ir->adress->n_energy_grps > 0)
 +            {
 +                bDum = gmx_fio_ndo_int(fio, ir->adress->group_explicit, ir->adress->n_energy_grps);
 +            }
 +        }
 +    }
 +    else
 +    {
 +        ir->bAdress = FALSE;
 +    }
 +
 +    /* pull stuff */
 +    if (file_version >= 48)
 +    {
 +        gmx_fio_do_int(fio, ir->ePull);
 +        if (ir->ePull != epullNO)
 +        {
 +            if (bRead)
 +            {
 +                snew(ir->pull, 1);
 +            }
 +            do_pull(fio, ir->pull, bRead, file_version);
 +        }
 +    }
 +    else
 +    {
 +        ir->ePull = epullNO;
 +    }
 +
 +    /* Enforced rotation */
 +    if (file_version >= 74)
 +    {
 +        gmx_fio_do_int(fio, ir->bRot);
 +        if (ir->bRot == TRUE)
 +        {
 +            if (bRead)
 +            {
 +                snew(ir->rot, 1);
 +            }
 +            do_rot(fio, ir->rot, bRead, file_version);
 +        }
 +    }
 +    else
 +    {
 +        ir->bRot = FALSE;
 +    }
 +
 +    /* grpopts stuff */
 +    gmx_fio_do_int(fio, ir->opts.ngtc);
 +    if (file_version >= 69)
 +    {
 +        gmx_fio_do_int(fio, ir->opts.nhchainlength);
 +    }
 +    else
 +    {
 +        ir->opts.nhchainlength = 1;
 +    }
 +    gmx_fio_do_int(fio, ir->opts.ngacc);
 +    gmx_fio_do_int(fio, ir->opts.ngfrz);
 +    gmx_fio_do_int(fio, ir->opts.ngener);
 +
 +    if (bRead)
 +    {
 +        snew(ir->opts.nrdf,   ir->opts.ngtc);
 +        snew(ir->opts.ref_t,  ir->opts.ngtc);
 +        snew(ir->opts.annealing, ir->opts.ngtc);
 +        snew(ir->opts.anneal_npoints, ir->opts.ngtc);
 +        snew(ir->opts.anneal_time, ir->opts.ngtc);
 +        snew(ir->opts.anneal_temp, ir->opts.ngtc);
 +        snew(ir->opts.tau_t,  ir->opts.ngtc);
 +        snew(ir->opts.nFreeze, ir->opts.ngfrz);
 +        snew(ir->opts.acc,    ir->opts.ngacc);
 +        snew(ir->opts.egp_flags, ir->opts.ngener*ir->opts.ngener);
 +    }
 +    if (ir->opts.ngtc > 0)
 +    {
 +        if (bRead && file_version < 13)
 +        {
 +            snew(tmp, ir->opts.ngtc);
 +            bDum = gmx_fio_ndo_int(fio, tmp, ir->opts.ngtc);
 +            for (i = 0; i < ir->opts.ngtc; i++)
 +            {
 +                ir->opts.nrdf[i] = tmp[i];
 +            }
 +            sfree(tmp);
 +        }
 +        else
 +        {
 +            bDum = gmx_fio_ndo_real(fio, ir->opts.nrdf, ir->opts.ngtc);
 +        }
 +        bDum = gmx_fio_ndo_real(fio, ir->opts.ref_t, ir->opts.ngtc);
 +        bDum = gmx_fio_ndo_real(fio, ir->opts.tau_t, ir->opts.ngtc);
 +        if (file_version < 33 && ir->eI == eiBD)
 +        {
 +            for (i = 0; i < ir->opts.ngtc; i++)
 +            {
 +                ir->opts.tau_t[i] = bd_temp;
 +            }
 +        }
 +    }
 +    if (ir->opts.ngfrz > 0)
 +    {
 +        bDum = gmx_fio_ndo_ivec(fio, ir->opts.nFreeze, ir->opts.ngfrz);
 +    }
 +    if (ir->opts.ngacc > 0)
 +    {
 +        gmx_fio_ndo_rvec(fio, ir->opts.acc, ir->opts.ngacc);
 +    }
 +    if (file_version >= 12)
 +    {
 +        bDum = gmx_fio_ndo_int(fio, ir->opts.egp_flags,
 +                               ir->opts.ngener*ir->opts.ngener);
 +    }
 +
 +    if (bRead && file_version < 26)
 +    {
 +        for (i = 0; i < ir->opts.ngtc; i++)
 +        {
 +            if (bSimAnn)
 +            {
 +                ir->opts.annealing[i]      = eannSINGLE;
 +                ir->opts.anneal_npoints[i] = 2;
 +                snew(ir->opts.anneal_time[i], 2);
 +                snew(ir->opts.anneal_temp[i], 2);
 +                /* calculate the starting/ending temperatures from reft, zerotemptime, and nsteps */
 +                finish_t                   = ir->init_t + ir->nsteps * ir->delta_t;
 +                init_temp                  = ir->opts.ref_t[i]*(1-ir->init_t/zerotemptime);
 +                finish_temp                = ir->opts.ref_t[i]*(1-finish_t/zerotemptime);
 +                ir->opts.anneal_time[i][0] = ir->init_t;
 +                ir->opts.anneal_time[i][1] = finish_t;
 +                ir->opts.anneal_temp[i][0] = init_temp;
 +                ir->opts.anneal_temp[i][1] = finish_temp;
 +            }
 +            else
 +            {
 +                ir->opts.annealing[i]      = eannNO;
 +                ir->opts.anneal_npoints[i] = 0;
 +            }
 +        }
 +    }
 +    else
 +    {
 +        /* file version 26 or later */
 +        /* First read the lists with annealing and npoints for each group */
 +        bDum = gmx_fio_ndo_int(fio, ir->opts.annealing, ir->opts.ngtc);
 +        bDum = gmx_fio_ndo_int(fio, ir->opts.anneal_npoints, ir->opts.ngtc);
 +        for (j = 0; j < (ir->opts.ngtc); j++)
 +        {
 +            k = ir->opts.anneal_npoints[j];
 +            if (bRead)
 +            {
 +                snew(ir->opts.anneal_time[j], k);
 +                snew(ir->opts.anneal_temp[j], k);
 +            }
 +            bDum = gmx_fio_ndo_real(fio, ir->opts.anneal_time[j], k);
 +            bDum = gmx_fio_ndo_real(fio, ir->opts.anneal_temp[j], k);
 +        }
 +    }
 +    /* Walls */
 +    if (file_version >= 45)
 +    {
 +        gmx_fio_do_int(fio, ir->nwall);
 +        gmx_fio_do_int(fio, ir->wall_type);
 +        if (file_version >= 50)
 +        {
 +            gmx_fio_do_real(fio, ir->wall_r_linpot);
 +        }
 +        else
 +        {
 +            ir->wall_r_linpot = -1;
 +        }
 +        gmx_fio_do_int(fio, ir->wall_atomtype[0]);
 +        gmx_fio_do_int(fio, ir->wall_atomtype[1]);
 +        gmx_fio_do_real(fio, ir->wall_density[0]);
 +        gmx_fio_do_real(fio, ir->wall_density[1]);
 +        gmx_fio_do_real(fio, ir->wall_ewald_zfac);
 +    }
 +    else
 +    {
 +        ir->nwall            = 0;
 +        ir->wall_type        = 0;
 +        ir->wall_atomtype[0] = -1;
 +        ir->wall_atomtype[1] = -1;
 +        ir->wall_density[0]  = 0;
 +        ir->wall_density[1]  = 0;
 +        ir->wall_ewald_zfac  = 3;
 +    }
 +    /* Cosine stuff for electric fields */
 +    for (j = 0; (j < DIM); j++)
 +    {
 +        gmx_fio_do_int(fio, ir->ex[j].n);
 +        gmx_fio_do_int(fio, ir->et[j].n);
 +        if (bRead)
 +        {
 +            snew(ir->ex[j].a,  ir->ex[j].n);
 +            snew(ir->ex[j].phi, ir->ex[j].n);
 +            snew(ir->et[j].a,  ir->et[j].n);
 +            snew(ir->et[j].phi, ir->et[j].n);
 +        }
 +        bDum = gmx_fio_ndo_real(fio, ir->ex[j].a,  ir->ex[j].n);
 +        bDum = gmx_fio_ndo_real(fio, ir->ex[j].phi, ir->ex[j].n);
 +        bDum = gmx_fio_ndo_real(fio, ir->et[j].a,  ir->et[j].n);
 +        bDum = gmx_fio_ndo_real(fio, ir->et[j].phi, ir->et[j].n);
 +    }
 +
 +    /* QMMM stuff */
 +    if (file_version >= 39)
 +    {
 +        gmx_fio_do_gmx_bool(fio, ir->bQMMM);
 +        gmx_fio_do_int(fio, ir->QMMMscheme);
 +        gmx_fio_do_real(fio, ir->scalefactor);
 +        gmx_fio_do_int(fio, ir->opts.ngQM);
 +        if (bRead)
 +        {
 +            snew(ir->opts.QMmethod,    ir->opts.ngQM);
 +            snew(ir->opts.QMbasis,     ir->opts.ngQM);
 +            snew(ir->opts.QMcharge,    ir->opts.ngQM);
 +            snew(ir->opts.QMmult,      ir->opts.ngQM);
 +            snew(ir->opts.bSH,         ir->opts.ngQM);
 +            snew(ir->opts.CASorbitals, ir->opts.ngQM);
 +            snew(ir->opts.CASelectrons, ir->opts.ngQM);
 +            snew(ir->opts.SAon,        ir->opts.ngQM);
 +            snew(ir->opts.SAoff,       ir->opts.ngQM);
 +            snew(ir->opts.SAsteps,     ir->opts.ngQM);
 +            snew(ir->opts.bOPT,        ir->opts.ngQM);
 +            snew(ir->opts.bTS,         ir->opts.ngQM);
 +        }
 +        if (ir->opts.ngQM > 0)
 +        {
 +            bDum = gmx_fio_ndo_int(fio, ir->opts.QMmethod, ir->opts.ngQM);
 +            bDum = gmx_fio_ndo_int(fio, ir->opts.QMbasis, ir->opts.ngQM);
 +            bDum = gmx_fio_ndo_int(fio, ir->opts.QMcharge, ir->opts.ngQM);
 +            bDum = gmx_fio_ndo_int(fio, ir->opts.QMmult, ir->opts.ngQM);
 +            bDum = gmx_fio_ndo_gmx_bool(fio, ir->opts.bSH, ir->opts.ngQM);
 +            bDum = gmx_fio_ndo_int(fio, ir->opts.CASorbitals, ir->opts.ngQM);
 +            bDum = gmx_fio_ndo_int(fio, ir->opts.CASelectrons, ir->opts.ngQM);
 +            bDum = gmx_fio_ndo_real(fio, ir->opts.SAon, ir->opts.ngQM);
 +            bDum = gmx_fio_ndo_real(fio, ir->opts.SAoff, ir->opts.ngQM);
 +            bDum = gmx_fio_ndo_int(fio, ir->opts.SAsteps, ir->opts.ngQM);
 +            bDum = gmx_fio_ndo_gmx_bool(fio, ir->opts.bOPT, ir->opts.ngQM);
 +            bDum = gmx_fio_ndo_gmx_bool(fio, ir->opts.bTS, ir->opts.ngQM);
 +        }
 +        /* end of QMMM stuff */
 +    }
 +}
 +
 +
 +static void do_harm(t_fileio *fio, t_iparams *iparams, gmx_bool bRead)
 +{
 +    gmx_fio_do_real(fio, iparams->harmonic.rA);
 +    gmx_fio_do_real(fio, iparams->harmonic.krA);
 +    gmx_fio_do_real(fio, iparams->harmonic.rB);
 +    gmx_fio_do_real(fio, iparams->harmonic.krB);
 +}
 +
 +void do_iparams(t_fileio *fio, t_functype ftype, t_iparams *iparams,
 +                gmx_bool bRead, int file_version)
 +{
 +    int      idum;
 +    gmx_bool bDum;
 +    real     rdum;
 +
 +    if (!bRead)
 +    {
 +        gmx_fio_set_comment(fio, interaction_function[ftype].name);
 +    }
 +    switch (ftype)
 +    {
 +        case F_ANGLES:
 +        case F_G96ANGLES:
 +        case F_BONDS:
 +        case F_G96BONDS:
 +        case F_HARMONIC:
 +        case F_IDIHS:
 +            do_harm(fio, iparams, bRead);
 +            if ((ftype == F_ANGRES || ftype == F_ANGRESZ) && bRead)
 +            {
 +                /* Correct incorrect storage of parameters */
 +                iparams->pdihs.phiB = iparams->pdihs.phiA;
 +                iparams->pdihs.cpB  = iparams->pdihs.cpA;
 +            }
 +            break;
 +        case F_LINEAR_ANGLES:
 +            gmx_fio_do_real(fio, iparams->linangle.klinA);
 +            gmx_fio_do_real(fio, iparams->linangle.aA);
 +            gmx_fio_do_real(fio, iparams->linangle.klinB);
 +            gmx_fio_do_real(fio, iparams->linangle.aB);
 +            break;
 +        case F_FENEBONDS:
 +            gmx_fio_do_real(fio, iparams->fene.bm);
 +            gmx_fio_do_real(fio, iparams->fene.kb);
 +            break;
 +        case F_RESTRBONDS:
 +            gmx_fio_do_real(fio, iparams->restraint.lowA);
 +            gmx_fio_do_real(fio, iparams->restraint.up1A);
 +            gmx_fio_do_real(fio, iparams->restraint.up2A);
 +            gmx_fio_do_real(fio, iparams->restraint.kA);
 +            gmx_fio_do_real(fio, iparams->restraint.lowB);
 +            gmx_fio_do_real(fio, iparams->restraint.up1B);
 +            gmx_fio_do_real(fio, iparams->restraint.up2B);
 +            gmx_fio_do_real(fio, iparams->restraint.kB);
 +            break;
 +        case F_TABBONDS:
 +        case F_TABBONDSNC:
 +        case F_TABANGLES:
 +        case F_TABDIHS:
 +            gmx_fio_do_real(fio, iparams->tab.kA);
 +            gmx_fio_do_int(fio, iparams->tab.table);
 +            gmx_fio_do_real(fio, iparams->tab.kB);
 +            break;
 +        case F_CROSS_BOND_BONDS:
 +            gmx_fio_do_real(fio, iparams->cross_bb.r1e);
 +            gmx_fio_do_real(fio, iparams->cross_bb.r2e);
 +            gmx_fio_do_real(fio, iparams->cross_bb.krr);
 +            break;
 +        case F_CROSS_BOND_ANGLES:
 +            gmx_fio_do_real(fio, iparams->cross_ba.r1e);
 +            gmx_fio_do_real(fio, iparams->cross_ba.r2e);
 +            gmx_fio_do_real(fio, iparams->cross_ba.r3e);
 +            gmx_fio_do_real(fio, iparams->cross_ba.krt);
 +            break;
 +        case F_UREY_BRADLEY:
 +            gmx_fio_do_real(fio, iparams->u_b.thetaA);
 +            gmx_fio_do_real(fio, iparams->u_b.kthetaA);
 +            gmx_fio_do_real(fio, iparams->u_b.r13A);
 +            gmx_fio_do_real(fio, iparams->u_b.kUBA);
 +            if (file_version >= 79)
 +            {
 +                gmx_fio_do_real(fio, iparams->u_b.thetaB);
 +                gmx_fio_do_real(fio, iparams->u_b.kthetaB);
 +                gmx_fio_do_real(fio, iparams->u_b.r13B);
 +                gmx_fio_do_real(fio, iparams->u_b.kUBB);
 +            }
 +            else
 +            {
 +                iparams->u_b.thetaB  = iparams->u_b.thetaA;
 +                iparams->u_b.kthetaB = iparams->u_b.kthetaA;
 +                iparams->u_b.r13B    = iparams->u_b.r13A;
 +                iparams->u_b.kUBB    = iparams->u_b.kUBA;
 +            }
 +            break;
 +        case F_QUARTIC_ANGLES:
 +            gmx_fio_do_real(fio, iparams->qangle.theta);
 +            bDum = gmx_fio_ndo_real(fio, iparams->qangle.c, 5);
 +            break;
 +        case F_BHAM:
 +            gmx_fio_do_real(fio, iparams->bham.a);
 +            gmx_fio_do_real(fio, iparams->bham.b);
 +            gmx_fio_do_real(fio, iparams->bham.c);
 +            break;
 +        case F_MORSE:
 +            gmx_fio_do_real(fio, iparams->morse.b0A);
 +            gmx_fio_do_real(fio, iparams->morse.cbA);
 +            gmx_fio_do_real(fio, iparams->morse.betaA);
 +            if (file_version >= 79)
 +            {
 +                gmx_fio_do_real(fio, iparams->morse.b0B);
 +                gmx_fio_do_real(fio, iparams->morse.cbB);
 +                gmx_fio_do_real(fio, iparams->morse.betaB);
 +            }
 +            else
 +            {
 +                iparams->morse.b0B   = iparams->morse.b0A;
 +                iparams->morse.cbB   = iparams->morse.cbA;
 +                iparams->morse.betaB = iparams->morse.betaA;
 +            }
 +            break;
 +        case F_CUBICBONDS:
 +            gmx_fio_do_real(fio, iparams->cubic.b0);
 +            gmx_fio_do_real(fio, iparams->cubic.kb);
 +            gmx_fio_do_real(fio, iparams->cubic.kcub);
 +            break;
 +        case F_CONNBONDS:
 +            break;
 +        case F_POLARIZATION:
 +            gmx_fio_do_real(fio, iparams->polarize.alpha);
 +            break;
 +        case F_ANHARM_POL:
 +            gmx_fio_do_real(fio, iparams->anharm_polarize.alpha);
 +            gmx_fio_do_real(fio, iparams->anharm_polarize.drcut);
 +            gmx_fio_do_real(fio, iparams->anharm_polarize.khyp);
 +            break;
 +        case F_WATER_POL:
 +            if (file_version < 31)
 +            {
 +                gmx_fatal(FARGS, "Old tpr files with water_polarization not supported. Make a new.");
 +            }
 +            gmx_fio_do_real(fio, iparams->wpol.al_x);
 +            gmx_fio_do_real(fio, iparams->wpol.al_y);
 +            gmx_fio_do_real(fio, iparams->wpol.al_z);
 +            gmx_fio_do_real(fio, iparams->wpol.rOH);
 +            gmx_fio_do_real(fio, iparams->wpol.rHH);
 +            gmx_fio_do_real(fio, iparams->wpol.rOD);
 +            break;
 +        case F_THOLE_POL:
 +            gmx_fio_do_real(fio, iparams->thole.a);
 +            gmx_fio_do_real(fio, iparams->thole.alpha1);
 +            gmx_fio_do_real(fio, iparams->thole.alpha2);
 +            gmx_fio_do_real(fio, iparams->thole.rfac);
 +            break;
 +        case F_LJ:
 +            gmx_fio_do_real(fio, iparams->lj.c6);
 +            gmx_fio_do_real(fio, iparams->lj.c12);
 +            break;
 +        case F_LJ14:
 +            gmx_fio_do_real(fio, iparams->lj14.c6A);
 +            gmx_fio_do_real(fio, iparams->lj14.c12A);
 +            gmx_fio_do_real(fio, iparams->lj14.c6B);
 +            gmx_fio_do_real(fio, iparams->lj14.c12B);
 +            break;
 +        case F_LJC14_Q:
 +            gmx_fio_do_real(fio, iparams->ljc14.fqq);
 +            gmx_fio_do_real(fio, iparams->ljc14.qi);
 +            gmx_fio_do_real(fio, iparams->ljc14.qj);
 +            gmx_fio_do_real(fio, iparams->ljc14.c6);
 +            gmx_fio_do_real(fio, iparams->ljc14.c12);
 +            break;
 +        case F_LJC_PAIRS_NB:
 +            gmx_fio_do_real(fio, iparams->ljcnb.qi);
 +            gmx_fio_do_real(fio, iparams->ljcnb.qj);
 +            gmx_fio_do_real(fio, iparams->ljcnb.c6);
 +            gmx_fio_do_real(fio, iparams->ljcnb.c12);
 +            break;
 +        case F_PDIHS:
 +        case F_PIDIHS:
 +        case F_ANGRES:
 +        case F_ANGRESZ:
 +            gmx_fio_do_real(fio, iparams->pdihs.phiA);
 +            gmx_fio_do_real(fio, iparams->pdihs.cpA);
 +            if ((ftype == F_ANGRES || ftype == F_ANGRESZ) && file_version < 42)
 +            {
 +                /* Read the incorrectly stored multiplicity */
 +                gmx_fio_do_real(fio, iparams->harmonic.rB);
 +                gmx_fio_do_real(fio, iparams->harmonic.krB);
 +                iparams->pdihs.phiB = iparams->pdihs.phiA;
 +                iparams->pdihs.cpB  = iparams->pdihs.cpA;
 +            }
 +            else
 +            {
 +                gmx_fio_do_real(fio, iparams->pdihs.phiB);
 +                gmx_fio_do_real(fio, iparams->pdihs.cpB);
 +                gmx_fio_do_int(fio, iparams->pdihs.mult);
 +            }
 +            break;
 +        case F_DISRES:
 +            gmx_fio_do_int(fio, iparams->disres.label);
 +            gmx_fio_do_int(fio, iparams->disres.type);
 +            gmx_fio_do_real(fio, iparams->disres.low);
 +            gmx_fio_do_real(fio, iparams->disres.up1);
 +            gmx_fio_do_real(fio, iparams->disres.up2);
 +            gmx_fio_do_real(fio, iparams->disres.kfac);
 +            break;
 +        case F_ORIRES:
 +            gmx_fio_do_int(fio, iparams->orires.ex);
 +            gmx_fio_do_int(fio, iparams->orires.label);
 +            gmx_fio_do_int(fio, iparams->orires.power);
 +            gmx_fio_do_real(fio, iparams->orires.c);
 +            gmx_fio_do_real(fio, iparams->orires.obs);
 +            gmx_fio_do_real(fio, iparams->orires.kfac);
 +            break;
 +        case F_DIHRES:
-             if (file_version < 72)
++            if (file_version < 82)
 +            {
 +                gmx_fio_do_int(fio, idum);
 +                gmx_fio_do_int(fio, idum);
 +            }
 +            gmx_fio_do_real(fio, iparams->dihres.phiA);
 +            gmx_fio_do_real(fio, iparams->dihres.dphiA);
 +            gmx_fio_do_real(fio, iparams->dihres.kfacA);
-             if (file_version >= 72)
++            if (file_version >= 82)
 +            {
 +                gmx_fio_do_real(fio, iparams->dihres.phiB);
 +                gmx_fio_do_real(fio, iparams->dihres.dphiB);
 +                gmx_fio_do_real(fio, iparams->dihres.kfacB);
 +            }
 +            else
 +            {
 +                iparams->dihres.phiB  = iparams->dihres.phiA;
 +                iparams->dihres.dphiB = iparams->dihres.dphiA;
 +                iparams->dihres.kfacB = iparams->dihres.kfacA;
 +            }
 +            break;
 +        case F_POSRES:
 +            gmx_fio_do_rvec(fio, iparams->posres.pos0A);
 +            gmx_fio_do_rvec(fio, iparams->posres.fcA);
 +            if (bRead && file_version < 27)
 +            {
 +                copy_rvec(iparams->posres.pos0A, iparams->posres.pos0B);
 +                copy_rvec(iparams->posres.fcA, iparams->posres.fcB);
 +            }
 +            else
 +            {
 +                gmx_fio_do_rvec(fio, iparams->posres.pos0B);
 +                gmx_fio_do_rvec(fio, iparams->posres.fcB);
 +            }
 +            break;
 +        case F_FBPOSRES:
 +            gmx_fio_do_int(fio, iparams->fbposres.geom);
 +            gmx_fio_do_rvec(fio, iparams->fbposres.pos0);
 +            gmx_fio_do_real(fio, iparams->fbposres.r);
 +            gmx_fio_do_real(fio, iparams->fbposres.k);
 +            break;
 +        case F_RBDIHS:
 +            bDum = gmx_fio_ndo_real(fio, iparams->rbdihs.rbcA, NR_RBDIHS);
 +            if (file_version >= 25)
 +            {
 +                bDum = gmx_fio_ndo_real(fio, iparams->rbdihs.rbcB, NR_RBDIHS);
 +            }
 +            break;
 +        case F_FOURDIHS:
 +            /* Fourier dihedrals are internally represented
 +             * as Ryckaert-Bellemans since those are faster to compute.
 +             */
 +            bDum = gmx_fio_ndo_real(fio, iparams->rbdihs.rbcA, NR_RBDIHS);
 +            bDum = gmx_fio_ndo_real(fio, iparams->rbdihs.rbcB, NR_RBDIHS);
 +            break;
 +        case F_CONSTR:
 +        case F_CONSTRNC:
 +            gmx_fio_do_real(fio, iparams->constr.dA);
 +            gmx_fio_do_real(fio, iparams->constr.dB);
 +            break;
 +        case F_SETTLE:
 +            gmx_fio_do_real(fio, iparams->settle.doh);
 +            gmx_fio_do_real(fio, iparams->settle.dhh);
 +            break;
 +        case F_VSITE2:
 +            gmx_fio_do_real(fio, iparams->vsite.a);
 +            break;
 +        case F_VSITE3:
 +        case F_VSITE3FD:
 +        case F_VSITE3FAD:
 +            gmx_fio_do_real(fio, iparams->vsite.a);
 +            gmx_fio_do_real(fio, iparams->vsite.b);
 +            break;
 +        case F_VSITE3OUT:
 +        case F_VSITE4FD:
 +        case F_VSITE4FDN:
 +            gmx_fio_do_real(fio, iparams->vsite.a);
 +            gmx_fio_do_real(fio, iparams->vsite.b);
 +            gmx_fio_do_real(fio, iparams->vsite.c);
 +            break;
 +        case F_VSITEN:
 +            gmx_fio_do_int(fio, iparams->vsiten.n);
 +            gmx_fio_do_real(fio, iparams->vsiten.a);
 +            break;
 +        case F_GB12:
 +        case F_GB13:
 +        case F_GB14:
 +            /* We got rid of some parameters in version 68 */
 +            if (bRead && file_version < 68)
 +            {
 +                gmx_fio_do_real(fio, rdum);
 +                gmx_fio_do_real(fio, rdum);
 +                gmx_fio_do_real(fio, rdum);
 +                gmx_fio_do_real(fio, rdum);
 +            }
 +            gmx_fio_do_real(fio, iparams->gb.sar);
 +            gmx_fio_do_real(fio, iparams->gb.st);
 +            gmx_fio_do_real(fio, iparams->gb.pi);
 +            gmx_fio_do_real(fio, iparams->gb.gbr);
 +            gmx_fio_do_real(fio, iparams->gb.bmlt);
 +            break;
 +        case F_CMAP:
 +            gmx_fio_do_int(fio, iparams->cmap.cmapA);
 +            gmx_fio_do_int(fio, iparams->cmap.cmapB);
 +            break;
 +        default:
 +            gmx_fatal(FARGS, "unknown function type %d (%s) in %s line %d",
 +                      ftype, interaction_function[ftype].name, __FILE__, __LINE__);
 +    }
 +    if (!bRead)
 +    {
 +        gmx_fio_unset_comment(fio);
 +    }
 +}
 +
 +static void do_ilist(t_fileio *fio, t_ilist *ilist, gmx_bool bRead, int file_version,
 +                     int ftype)
 +{
 +    int      i, k, idum;
 +    gmx_bool bDum = TRUE;
 +
 +    if (!bRead)
 +    {
 +        gmx_fio_set_comment(fio, interaction_function[ftype].name);
 +    }
 +    if (file_version < 44)
 +    {
 +        for (i = 0; i < MAXNODES; i++)
 +        {
 +            gmx_fio_do_int(fio, idum);
 +        }
 +    }
 +    gmx_fio_do_int(fio, ilist->nr);
 +    if (bRead)
 +    {
 +        snew(ilist->iatoms, ilist->nr);
 +    }
 +    bDum = gmx_fio_ndo_int(fio, ilist->iatoms, ilist->nr);
 +    if (!bRead)
 +    {
 +        gmx_fio_unset_comment(fio);
 +    }
 +}
 +
 +static void do_ffparams(t_fileio *fio, gmx_ffparams_t *ffparams,
 +                        gmx_bool bRead, int file_version)
 +{
 +    int          idum, i, j;
 +    gmx_bool     bDum = TRUE;
 +    unsigned int k;
 +
 +    gmx_fio_do_int(fio, ffparams->atnr);
 +    if (file_version < 57)
 +    {
 +        gmx_fio_do_int(fio, idum);
 +    }
 +    gmx_fio_do_int(fio, ffparams->ntypes);
 +    if (bRead && debug)
 +    {
 +        fprintf(debug, "ffparams->atnr = %d, ntypes = %d\n",
 +                ffparams->atnr, ffparams->ntypes);
 +    }
 +    if (bRead)
 +    {
 +        snew(ffparams->functype, ffparams->ntypes);
 +        snew(ffparams->iparams, ffparams->ntypes);
 +    }
 +    /* Read/write all the function types */
 +    bDum = gmx_fio_ndo_int(fio, ffparams->functype, ffparams->ntypes);
 +    if (bRead && debug)
 +    {
 +        pr_ivec(debug, 0, "functype", ffparams->functype, ffparams->ntypes, TRUE);
 +    }
 +
 +    if (file_version >= 66)
 +    {
 +        gmx_fio_do_double(fio, ffparams->reppow);
 +    }
 +    else
 +    {
 +        ffparams->reppow = 12.0;
 +    }
 +
 +    if (file_version >= 57)
 +    {
 +        gmx_fio_do_real(fio, ffparams->fudgeQQ);
 +    }
 +
 +    /* Check whether all these function types are supported by the code.
 +     * In practice the code is backwards compatible, which means that the
 +     * numbering may have to be altered from old numbering to new numbering
 +     */
 +    for (i = 0; (i < ffparams->ntypes); i++)
 +    {
 +        if (bRead)
 +        {
 +            /* Loop over file versions */
 +            for (k = 0; (k < NFTUPD); k++)
 +            {
 +                /* Compare the read file_version to the update table */
 +                if ((file_version < ftupd[k].fvnr) &&
 +                    (ffparams->functype[i] >= ftupd[k].ftype))
 +                {
 +                    ffparams->functype[i] += 1;
 +                    if (debug)
 +                    {
 +                        fprintf(debug, "Incrementing function type %d to %d (due to %s)\n",
 +                                i, ffparams->functype[i],
 +                                interaction_function[ftupd[k].ftype].longname);
 +                        fflush(debug);
 +                    }
 +                }
 +            }
 +        }
 +
 +        do_iparams(fio, ffparams->functype[i], &ffparams->iparams[i], bRead,
 +                   file_version);
 +        if (bRead && debug)
 +        {
 +            pr_iparams(debug, ffparams->functype[i], &ffparams->iparams[i]);
 +        }
 +    }
 +}
 +
 +static void add_settle_atoms(t_ilist *ilist)
 +{
 +    int i;
 +
 +    /* Settle used to only store the first atom: add the other two */
 +    srenew(ilist->iatoms, 2*ilist->nr);
 +    for (i = ilist->nr/2-1; i >= 0; i--)
 +    {
 +        ilist->iatoms[4*i+0] = ilist->iatoms[2*i+0];
 +        ilist->iatoms[4*i+1] = ilist->iatoms[2*i+1];
 +        ilist->iatoms[4*i+2] = ilist->iatoms[2*i+1] + 1;
 +        ilist->iatoms[4*i+3] = ilist->iatoms[2*i+1] + 2;
 +    }
 +    ilist->nr = 2*ilist->nr;
 +}
 +
 +static void do_ilists(t_fileio *fio, t_ilist *ilist, gmx_bool bRead,
 +                      int file_version)
 +{
 +    int          i, j, renum[F_NRE];
 +    gmx_bool     bDum = TRUE, bClear;
 +    unsigned int k;
 +
 +    for (j = 0; (j < F_NRE); j++)
 +    {
 +        bClear = FALSE;
 +        if (bRead)
 +        {
 +            for (k = 0; k < NFTUPD; k++)
 +            {
 +                if ((file_version < ftupd[k].fvnr) && (j == ftupd[k].ftype))
 +                {
 +                    bClear = TRUE;
 +                }
 +            }
 +        }
 +        if (bClear)
 +        {
 +            ilist[j].nr     = 0;
 +            ilist[j].iatoms = NULL;
 +        }
 +        else
 +        {
 +            do_ilist(fio, &ilist[j], bRead, file_version, j);
 +            if (file_version < 78 && j == F_SETTLE && ilist[j].nr > 0)
 +            {
 +                add_settle_atoms(&ilist[j]);
 +            }
 +        }
 +        /*
 +           if (bRead && gmx_debug_at)
 +           pr_ilist(debug,0,interaction_function[j].longname,
 +               functype,&ilist[j],TRUE);
 +         */
 +    }
 +}
 +
 +static void do_idef(t_fileio *fio, gmx_ffparams_t *ffparams, gmx_moltype_t *molt,
 +                    gmx_bool bRead, int file_version)
 +{
 +    do_ffparams(fio, ffparams, bRead, file_version);
 +
 +    if (file_version >= 54)
 +    {
 +        gmx_fio_do_real(fio, ffparams->fudgeQQ);
 +    }
 +
 +    do_ilists(fio, molt->ilist, bRead, file_version);
 +}
 +
 +static void do_block(t_fileio *fio, t_block *block, gmx_bool bRead, int file_version)
 +{
 +    int      i, idum, dum_nra, *dum_a;
 +    gmx_bool bDum = TRUE;
 +
 +    if (file_version < 44)
 +    {
 +        for (i = 0; i < MAXNODES; i++)
 +        {
 +            gmx_fio_do_int(fio, idum);
 +        }
 +    }
 +    gmx_fio_do_int(fio, block->nr);
 +    if (file_version < 51)
 +    {
 +        gmx_fio_do_int(fio, dum_nra);
 +    }
 +    if (bRead)
 +    {
 +        if ((block->nalloc_index > 0) && (NULL != block->index))
 +        {
 +            sfree(block->index);
 +        }
 +        block->nalloc_index = block->nr+1;
 +        snew(block->index, block->nalloc_index);
 +    }
 +    bDum = gmx_fio_ndo_int(fio, block->index, block->nr+1);
 +
 +    if (file_version < 51 && dum_nra > 0)
 +    {
 +        snew(dum_a, dum_nra);
 +        bDum = gmx_fio_ndo_int(fio, dum_a, dum_nra);
 +        sfree(dum_a);
 +    }
 +}
 +
 +static void do_blocka(t_fileio *fio, t_blocka *block, gmx_bool bRead,
 +                      int file_version)
 +{
 +    int      i, idum;
 +    gmx_bool bDum = TRUE;
 +
 +    if (file_version < 44)
 +    {
 +        for (i = 0; i < MAXNODES; i++)
 +        {
 +            gmx_fio_do_int(fio, idum);
 +        }
 +    }
 +    gmx_fio_do_int(fio, block->nr);
 +    gmx_fio_do_int(fio, block->nra);
 +    if (bRead)
 +    {
 +        block->nalloc_index = block->nr+1;
 +        snew(block->index, block->nalloc_index);
 +        block->nalloc_a = block->nra;
 +        snew(block->a, block->nalloc_a);
 +    }
 +    bDum = gmx_fio_ndo_int(fio, block->index, block->nr+1);
 +    bDum = gmx_fio_ndo_int(fio, block->a, block->nra);
 +}
 +
 +static void do_atom(t_fileio *fio, t_atom *atom, int ngrp, gmx_bool bRead,
 +                    int file_version, gmx_groups_t *groups, int atnr)
 +{
 +    int i, myngrp;
 +
 +    gmx_fio_do_real(fio, atom->m);
 +    gmx_fio_do_real(fio, atom->q);
 +    gmx_fio_do_real(fio, atom->mB);
 +    gmx_fio_do_real(fio, atom->qB);
 +    gmx_fio_do_ushort(fio, atom->type);
 +    gmx_fio_do_ushort(fio, atom->typeB);
 +    gmx_fio_do_int(fio, atom->ptype);
 +    gmx_fio_do_int(fio, atom->resind);
 +    if (file_version >= 52)
 +    {
 +        gmx_fio_do_int(fio, atom->atomnumber);
 +    }
 +    else if (bRead)
 +    {
 +        atom->atomnumber = NOTSET;
 +    }
 +    if (file_version < 23)
 +    {
 +        myngrp = 8;
 +    }
 +    else if (file_version < 39)
 +    {
 +        myngrp = 9;
 +    }
 +    else
 +    {
 +        myngrp = ngrp;
 +    }
 +
 +    if (file_version < 57)
 +    {
 +        unsigned char uchar[egcNR];
 +        gmx_fio_ndo_uchar(fio, uchar, myngrp);
 +        for (i = myngrp; (i < ngrp); i++)
 +        {
 +            uchar[i] = 0;
 +        }
 +        /* Copy the old data format to the groups struct */
 +        for (i = 0; i < ngrp; i++)
 +        {
 +            groups->grpnr[i][atnr] = uchar[i];
 +        }
 +    }
 +}
 +
 +static void do_grps(t_fileio *fio, int ngrp, t_grps grps[], gmx_bool bRead,
 +                    int file_version)
 +{
 +    int      i, j, myngrp;
 +    gmx_bool bDum = TRUE;
 +
 +    if (file_version < 23)
 +    {
 +        myngrp = 8;
 +    }
 +    else if (file_version < 39)
 +    {
 +        myngrp = 9;
 +    }
 +    else
 +    {
 +        myngrp = ngrp;
 +    }
 +
 +    for (j = 0; (j < ngrp); j++)
 +    {
 +        if (j < myngrp)
 +        {
 +            gmx_fio_do_int(fio, grps[j].nr);
 +            if (bRead)
 +            {
 +                snew(grps[j].nm_ind, grps[j].nr);
 +            }
 +            bDum = gmx_fio_ndo_int(fio, grps[j].nm_ind, grps[j].nr);
 +        }
 +        else
 +        {
 +            grps[j].nr = 1;
 +            snew(grps[j].nm_ind, grps[j].nr);
 +        }
 +    }
 +}
 +
 +static void do_symstr(t_fileio *fio, char ***nm, gmx_bool bRead, t_symtab *symtab)
 +{
 +    int ls;
 +
 +    if (bRead)
 +    {
 +        gmx_fio_do_int(fio, ls);
 +        *nm = get_symtab_handle(symtab, ls);
 +    }
 +    else
 +    {
 +        ls = lookup_symtab(symtab, *nm);
 +        gmx_fio_do_int(fio, ls);
 +    }
 +}
 +
 +static void do_strstr(t_fileio *fio, int nstr, char ***nm, gmx_bool bRead,
 +                      t_symtab *symtab)
 +{
 +    int  j;
 +
 +    for (j = 0; (j < nstr); j++)
 +    {
 +        do_symstr(fio, &(nm[j]), bRead, symtab);
 +    }
 +}
 +
 +static void do_resinfo(t_fileio *fio, int n, t_resinfo *ri, gmx_bool bRead,
 +                       t_symtab *symtab, int file_version)
 +{
 +    int  j;
 +
 +    for (j = 0; (j < n); j++)
 +    {
 +        do_symstr(fio, &(ri[j].name), bRead, symtab);
 +        if (file_version >= 63)
 +        {
 +            gmx_fio_do_int(fio, ri[j].nr);
 +            gmx_fio_do_uchar(fio, ri[j].ic);
 +        }
 +        else
 +        {
 +            ri[j].nr = j + 1;
 +            ri[j].ic = ' ';
 +        }
 +    }
 +}
 +
 +static void do_atoms(t_fileio *fio, t_atoms *atoms, gmx_bool bRead, t_symtab *symtab,
 +                     int file_version,
 +                     gmx_groups_t *groups)
 +{
 +    int i;
 +
 +    gmx_fio_do_int(fio, atoms->nr);
 +    gmx_fio_do_int(fio, atoms->nres);
 +    if (file_version < 57)
 +    {
 +        gmx_fio_do_int(fio, groups->ngrpname);
 +        for (i = 0; i < egcNR; i++)
 +        {
 +            groups->ngrpnr[i] = atoms->nr;
 +            snew(groups->grpnr[i], groups->ngrpnr[i]);
 +        }
 +    }
 +    if (bRead)
 +    {
 +        snew(atoms->atom, atoms->nr);
 +        snew(atoms->atomname, atoms->nr);
 +        snew(atoms->atomtype, atoms->nr);
 +        snew(atoms->atomtypeB, atoms->nr);
 +        snew(atoms->resinfo, atoms->nres);
 +        if (file_version < 57)
 +        {
 +            snew(groups->grpname, groups->ngrpname);
 +        }
 +        atoms->pdbinfo = NULL;
 +    }
 +    for (i = 0; (i < atoms->nr); i++)
 +    {
 +        do_atom(fio, &atoms->atom[i], egcNR, bRead, file_version, groups, i);
 +    }
 +    do_strstr(fio, atoms->nr, atoms->atomname, bRead, symtab);
 +    if (bRead && (file_version <= 20))
 +    {
 +        for (i = 0; i < atoms->nr; i++)
 +        {
 +            atoms->atomtype[i]  = put_symtab(symtab, "?");
 +            atoms->atomtypeB[i] = put_symtab(symtab, "?");
 +        }
 +    }
 +    else
 +    {
 +        do_strstr(fio, atoms->nr, atoms->atomtype, bRead, symtab);
 +        do_strstr(fio, atoms->nr, atoms->atomtypeB, bRead, symtab);
 +    }
 +    do_resinfo(fio, atoms->nres, atoms->resinfo, bRead, symtab, file_version);
 +
 +    if (file_version < 57)
 +    {
 +        do_strstr(fio, groups->ngrpname, groups->grpname, bRead, symtab);
 +
 +        do_grps(fio, egcNR, groups->grps, bRead, file_version);
 +    }
 +}
 +
 +static void do_groups(t_fileio *fio, gmx_groups_t *groups,
 +                      gmx_bool bRead, t_symtab *symtab,
 +                      int file_version)
 +{
 +    int      g, n, i;
 +    gmx_bool bDum = TRUE;
 +
 +    do_grps(fio, egcNR, groups->grps, bRead, file_version);
 +    gmx_fio_do_int(fio, groups->ngrpname);
 +    if (bRead)
 +    {
 +        snew(groups->grpname, groups->ngrpname);
 +    }
 +    do_strstr(fio, groups->ngrpname, groups->grpname, bRead, symtab);
 +    for (g = 0; g < egcNR; g++)
 +    {
 +        gmx_fio_do_int(fio, groups->ngrpnr[g]);
 +        if (groups->ngrpnr[g] == 0)
 +        {
 +            if (bRead)
 +            {
 +                groups->grpnr[g] = NULL;
 +            }
 +        }
 +        else
 +        {
 +            if (bRead)
 +            {
 +                snew(groups->grpnr[g], groups->ngrpnr[g]);
 +            }
 +            bDum = gmx_fio_ndo_uchar(fio, groups->grpnr[g], groups->ngrpnr[g]);
 +        }
 +    }
 +}
 +
 +static void do_atomtypes(t_fileio *fio, t_atomtypes *atomtypes, gmx_bool bRead,
 +                         t_symtab *symtab, int file_version)
 +{
 +    int      i, j;
 +    gmx_bool bDum = TRUE;
 +
 +    if (file_version > 25)
 +    {
 +        gmx_fio_do_int(fio, atomtypes->nr);
 +        j = atomtypes->nr;
 +        if (bRead)
 +        {
 +            snew(atomtypes->radius, j);
 +            snew(atomtypes->vol, j);
 +            snew(atomtypes->surftens, j);
 +            snew(atomtypes->atomnumber, j);
 +            snew(atomtypes->gb_radius, j);
 +            snew(atomtypes->S_hct, j);
 +        }
 +        bDum = gmx_fio_ndo_real(fio, atomtypes->radius, j);
 +        bDum = gmx_fio_ndo_real(fio, atomtypes->vol, j);
 +        bDum = gmx_fio_ndo_real(fio, atomtypes->surftens, j);
 +        if (file_version >= 40)
 +        {
 +            bDum = gmx_fio_ndo_int(fio, atomtypes->atomnumber, j);
 +        }
 +        if (file_version >= 60)
 +        {
 +            bDum = gmx_fio_ndo_real(fio, atomtypes->gb_radius, j);
 +            bDum = gmx_fio_ndo_real(fio, atomtypes->S_hct, j);
 +        }
 +    }
 +    else
 +    {
 +        /* File versions prior to 26 cannot do GBSA,
 +         * so they dont use this structure
 +         */
 +        atomtypes->nr         = 0;
 +        atomtypes->radius     = NULL;
 +        atomtypes->vol        = NULL;
 +        atomtypes->surftens   = NULL;
 +        atomtypes->atomnumber = NULL;
 +        atomtypes->gb_radius  = NULL;
 +        atomtypes->S_hct      = NULL;
 +    }
 +}
 +
 +static void do_symtab(t_fileio *fio, t_symtab *symtab, gmx_bool bRead)
 +{
 +    int       i, nr;
 +    t_symbuf *symbuf;
 +    char      buf[STRLEN];
 +
 +    gmx_fio_do_int(fio, symtab->nr);
 +    nr     = symtab->nr;
 +    if (bRead)
 +    {
 +        snew(symtab->symbuf, 1);
 +        symbuf          = symtab->symbuf;
 +        symbuf->bufsize = nr;
 +        snew(symbuf->buf, nr);
 +        for (i = 0; (i < nr); i++)
 +        {
 +            gmx_fio_do_string(fio, buf);
 +            symbuf->buf[i] = strdup(buf);
 +        }
 +    }
 +    else
 +    {
 +        symbuf = symtab->symbuf;
 +        while (symbuf != NULL)
 +        {
 +            for (i = 0; (i < symbuf->bufsize) && (i < nr); i++)
 +            {
 +                gmx_fio_do_string(fio, symbuf->buf[i]);
 +            }
 +            nr    -= i;
 +            symbuf = symbuf->next;
 +        }
 +        if (nr != 0)
 +        {
 +            gmx_fatal(FARGS, "nr of symtab strings left: %d", nr);
 +        }
 +    }
 +}
 +
 +static void do_cmap(t_fileio *fio, gmx_cmap_t *cmap_grid, gmx_bool bRead)
 +{
 +    int i, j, ngrid, gs, nelem;
 +
 +    gmx_fio_do_int(fio, cmap_grid->ngrid);
 +    gmx_fio_do_int(fio, cmap_grid->grid_spacing);
 +
 +    ngrid = cmap_grid->ngrid;
 +    gs    = cmap_grid->grid_spacing;
 +    nelem = gs * gs;
 +
 +    if (bRead)
 +    {
 +        snew(cmap_grid->cmapdata, ngrid);
 +
 +        for (i = 0; i < cmap_grid->ngrid; i++)
 +        {
 +            snew(cmap_grid->cmapdata[i].cmap, 4*nelem);
 +        }
 +    }
 +
 +    for (i = 0; i < cmap_grid->ngrid; i++)
 +    {
 +        for (j = 0; j < nelem; j++)
 +        {
 +            gmx_fio_do_real(fio, cmap_grid->cmapdata[i].cmap[j*4]);
 +            gmx_fio_do_real(fio, cmap_grid->cmapdata[i].cmap[j*4+1]);
 +            gmx_fio_do_real(fio, cmap_grid->cmapdata[i].cmap[j*4+2]);
 +            gmx_fio_do_real(fio, cmap_grid->cmapdata[i].cmap[j*4+3]);
 +        }
 +    }
 +}
 +
 +
 +void tpx_make_chain_identifiers(t_atoms *atoms, t_block *mols)
 +{
 +    int  m, a, a0, a1, r;
 +    char c, chainid;
 +    int  chainnum;
 +
 +    /* We always assign a new chain number, but save the chain id characters
 +     * for larger molecules.
 +     */
 +#define CHAIN_MIN_ATOMS 15
 +
 +    chainnum = 0;
 +    chainid  = 'A';
 +    for (m = 0; m < mols->nr; m++)
 +    {
 +        a0 = mols->index[m];
 +        a1 = mols->index[m+1];
 +        if ((a1-a0 >= CHAIN_MIN_ATOMS) && (chainid <= 'Z'))
 +        {
 +            c = chainid;
 +            chainid++;
 +        }
 +        else
 +        {
 +            c = ' ';
 +        }
 +        for (a = a0; a < a1; a++)
 +        {
 +            atoms->resinfo[atoms->atom[a].resind].chainnum = chainnum;
 +            atoms->resinfo[atoms->atom[a].resind].chainid  = c;
 +        }
 +        chainnum++;
 +    }
 +
 +    /* Blank out the chain id if there was only one chain */
 +    if (chainid == 'B')
 +    {
 +        for (r = 0; r < atoms->nres; r++)
 +        {
 +            atoms->resinfo[r].chainid = ' ';
 +        }
 +    }
 +}
 +
 +static void do_moltype(t_fileio *fio, gmx_moltype_t *molt, gmx_bool bRead,
 +                       t_symtab *symtab, int file_version,
 +                       gmx_groups_t *groups)
 +{
 +    int i;
 +
 +    if (file_version >= 57)
 +    {
 +        do_symstr(fio, &(molt->name), bRead, symtab);
 +    }
 +
 +    do_atoms(fio, &molt->atoms, bRead, symtab, file_version, groups);
 +
 +    if (bRead && gmx_debug_at)
 +    {
 +        pr_atoms(debug, 0, "atoms", &molt->atoms, TRUE);
 +    }
 +
 +    if (file_version >= 57)
 +    {
 +        do_ilists(fio, molt->ilist, bRead, file_version);
 +
 +        do_block(fio, &molt->cgs, bRead, file_version);
 +        if (bRead && gmx_debug_at)
 +        {
 +            pr_block(debug, 0, "cgs", &molt->cgs, TRUE);
 +        }
 +    }
 +
 +    /* This used to be in the atoms struct */
 +    do_blocka(fio, &molt->excls, bRead, file_version);
 +}
 +
 +static void do_molblock(t_fileio *fio, gmx_molblock_t *molb, gmx_bool bRead,
 +                        int file_version)
 +{
 +    int i;
 +
 +    gmx_fio_do_int(fio, molb->type);
 +    gmx_fio_do_int(fio, molb->nmol);
 +    gmx_fio_do_int(fio, molb->natoms_mol);
 +    /* Position restraint coordinates */
 +    gmx_fio_do_int(fio, molb->nposres_xA);
 +    if (molb->nposres_xA > 0)
 +    {
 +        if (bRead)
 +        {
 +            snew(molb->posres_xA, molb->nposres_xA);
 +        }
 +        gmx_fio_ndo_rvec(fio, molb->posres_xA, molb->nposres_xA);
 +    }
 +    gmx_fio_do_int(fio, molb->nposres_xB);
 +    if (molb->nposres_xB > 0)
 +    {
 +        if (bRead)
 +        {
 +            snew(molb->posres_xB, molb->nposres_xB);
 +        }
 +        gmx_fio_ndo_rvec(fio, molb->posres_xB, molb->nposres_xB);
 +    }
 +
 +}
 +
 +static t_block mtop_mols(gmx_mtop_t *mtop)
 +{
 +    int     mb, m, a, mol;
 +    t_block mols;
 +
 +    mols.nr = 0;
 +    for (mb = 0; mb < mtop->nmolblock; mb++)
 +    {
 +        mols.nr += mtop->molblock[mb].nmol;
 +    }
 +    mols.nalloc_index = mols.nr + 1;
 +    snew(mols.index, mols.nalloc_index);
 +
 +    a             = 0;
 +    m             = 0;
 +    mols.index[m] = a;
 +    for (mb = 0; mb < mtop->nmolblock; mb++)
 +    {
 +        for (mol = 0; mol < mtop->molblock[mb].nmol; mol++)
 +        {
 +            a += mtop->molblock[mb].natoms_mol;
 +            m++;
 +            mols.index[m] = a;
 +        }
 +    }
 +
 +    return mols;
 +}
 +
 +static void add_posres_molblock(gmx_mtop_t *mtop)
 +{
 +    t_ilist        *il, *ilfb;
 +    int             am, i, mol, a;
 +    gmx_bool        bFE;
 +    gmx_molblock_t *molb;
 +    t_iparams      *ip;
 +
 +    /* posres reference positions are stored in ip->posres (if present) and
 +       in ip->fbposres (if present). If normal and flat-bottomed posres are present,
 +       posres.pos0A are identical to fbposres.pos0. */
 +    il   = &mtop->moltype[0].ilist[F_POSRES];
 +    ilfb = &mtop->moltype[0].ilist[F_FBPOSRES];
 +    if (il->nr == 0 && ilfb->nr == 0)
 +    {
 +        return;
 +    }
 +    am  = 0;
 +    bFE = FALSE;
 +    for (i = 0; i < il->nr; i += 2)
 +    {
 +        ip = &mtop->ffparams.iparams[il->iatoms[i]];
 +        am = max(am, il->iatoms[i+1]);
 +        if (ip->posres.pos0B[XX] != ip->posres.pos0A[XX] ||
 +            ip->posres.pos0B[YY] != ip->posres.pos0A[YY] ||
 +            ip->posres.pos0B[ZZ] != ip->posres.pos0A[ZZ])
 +        {
 +            bFE = TRUE;
 +        }
 +    }
 +    /* This loop is required if we have only flat-bottomed posres:
 +       - set am
 +       - bFE == FALSE (no B-state for flat-bottomed posres) */
 +    if (il->nr == 0)
 +    {
 +        for (i = 0; i < ilfb->nr; i += 2)
 +        {
 +            ip = &mtop->ffparams.iparams[ilfb->iatoms[i]];
 +            am = max(am, ilfb->iatoms[i+1]);
 +        }
 +    }
 +    /* Make the posres coordinate block end at a molecule end */
 +    mol = 0;
 +    while (am >= mtop->mols.index[mol+1])
 +    {
 +        mol++;
 +    }
 +    molb             = &mtop->molblock[0];
 +    molb->nposres_xA = mtop->mols.index[mol+1];
 +    snew(molb->posres_xA, molb->nposres_xA);
 +    if (bFE)
 +    {
 +        molb->nposres_xB = molb->nposres_xA;
 +        snew(molb->posres_xB, molb->nposres_xB);
 +    }
 +    else
 +    {
 +        molb->nposres_xB = 0;
 +    }
 +    for (i = 0; i < il->nr; i += 2)
 +    {
 +        ip                     = &mtop->ffparams.iparams[il->iatoms[i]];
 +        a                      = il->iatoms[i+1];
 +        molb->posres_xA[a][XX] = ip->posres.pos0A[XX];
 +        molb->posres_xA[a][YY] = ip->posres.pos0A[YY];
 +        molb->posres_xA[a][ZZ] = ip->posres.pos0A[ZZ];
 +        if (bFE)
 +        {
 +            molb->posres_xB[a][XX] = ip->posres.pos0B[XX];
 +            molb->posres_xB[a][YY] = ip->posres.pos0B[YY];
 +            molb->posres_xB[a][ZZ] = ip->posres.pos0B[ZZ];
 +        }
 +    }
 +    if (il->nr == 0)
 +    {
 +        /* If only flat-bottomed posres are present, take reference pos from them.
 +           Here: bFE == FALSE      */
 +        for (i = 0; i < ilfb->nr; i += 2)
 +        {
 +            ip                     = &mtop->ffparams.iparams[ilfb->iatoms[i]];
 +            a                      = ilfb->iatoms[i+1];
 +            molb->posres_xA[a][XX] = ip->fbposres.pos0[XX];
 +            molb->posres_xA[a][YY] = ip->fbposres.pos0[YY];
 +            molb->posres_xA[a][ZZ] = ip->fbposres.pos0[ZZ];
 +        }
 +    }
 +}
 +
 +static void set_disres_npair(gmx_mtop_t *mtop)
 +{
 +    int        mt, i, npair;
 +    t_iparams *ip;
 +    t_ilist   *il;
 +    t_iatom   *a;
 +
 +    ip = mtop->ffparams.iparams;
 +
 +    for (mt = 0; mt < mtop->nmoltype; mt++)
 +    {
 +        il = &mtop->moltype[mt].ilist[F_DISRES];
 +        if (il->nr > 0)
 +        {
 +            a     = il->iatoms;
 +            npair = 0;
 +            for (i = 0; i < il->nr; i += 3)
 +            {
 +                npair++;
 +                if (i+3 == il->nr || ip[a[i]].disres.label != ip[a[i+3]].disres.label)
 +                {
 +                    ip[a[i]].disres.npair = npair;
 +                    npair                 = 0;
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +static void do_mtop(t_fileio *fio, gmx_mtop_t *mtop, gmx_bool bRead,
 +                    int file_version)
 +{
 +    int      mt, mb, i;
 +    t_blocka dumb;
 +
 +    if (bRead)
 +    {
 +        init_mtop(mtop);
 +    }
 +    do_symtab(fio, &(mtop->symtab), bRead);
 +    if (bRead && debug)
 +    {
 +        pr_symtab(debug, 0, "symtab", &mtop->symtab);
 +    }
 +
 +    do_symstr(fio, &(mtop->name), bRead, &(mtop->symtab));
 +
 +    if (file_version >= 57)
 +    {
 +        do_ffparams(fio, &mtop->ffparams, bRead, file_version);
 +
 +        gmx_fio_do_int(fio, mtop->nmoltype);
 +    }
 +    else
 +    {
 +        mtop->nmoltype = 1;
 +    }
 +    if (bRead)
 +    {
 +        snew(mtop->moltype, mtop->nmoltype);
 +        if (file_version < 57)
 +        {
 +            mtop->moltype[0].name = mtop->name;
 +        }
 +    }
 +    for (mt = 0; mt < mtop->nmoltype; mt++)
 +    {
 +        do_moltype(fio, &mtop->moltype[mt], bRead, &mtop->symtab, file_version,
 +                   &mtop->groups);
 +    }
 +
 +    if (file_version >= 57)
 +    {
 +        gmx_fio_do_int(fio, mtop->nmolblock);
 +    }
 +    else
 +    {
 +        mtop->nmolblock = 1;
 +    }
 +    if (bRead)
 +    {
 +        snew(mtop->molblock, mtop->nmolblock);
 +    }
 +    if (file_version >= 57)
 +    {
 +        for (mb = 0; mb < mtop->nmolblock; mb++)
 +        {
 +            do_molblock(fio, &mtop->molblock[mb], bRead, file_version);
 +        }
 +        gmx_fio_do_int(fio, mtop->natoms);
 +    }
 +    else
 +    {
 +        mtop->molblock[0].type       = 0;
 +        mtop->molblock[0].nmol       = 1;
 +        mtop->molblock[0].natoms_mol = mtop->moltype[0].atoms.nr;
 +        mtop->molblock[0].nposres_xA = 0;
 +        mtop->molblock[0].nposres_xB = 0;
 +    }
 +
 +    do_atomtypes (fio, &(mtop->atomtypes), bRead, &(mtop->symtab), file_version);
 +    if (bRead && debug)
 +    {
 +        pr_atomtypes(debug, 0, "atomtypes", &mtop->atomtypes, TRUE);
 +    }
 +
 +    if (file_version < 57)
 +    {
 +        /* Debug statements are inside do_idef */
 +        do_idef (fio, &mtop->ffparams, &mtop->moltype[0], bRead, file_version);
 +        mtop->natoms = mtop->moltype[0].atoms.nr;
 +    }
 +
 +    if (file_version >= 65)
 +    {
 +        do_cmap(fio, &mtop->ffparams.cmap_grid, bRead);
 +    }
 +    else
 +    {
 +        mtop->ffparams.cmap_grid.ngrid        = 0;
 +        mtop->ffparams.cmap_grid.grid_spacing = 0;
 +        mtop->ffparams.cmap_grid.cmapdata     = NULL;
 +    }
 +
 +    if (file_version >= 57)
 +    {
 +        do_groups(fio, &mtop->groups, bRead, &(mtop->symtab), file_version);
 +    }
 +
 +    if (file_version < 57)
 +    {
 +        do_block(fio, &mtop->moltype[0].cgs, bRead, file_version);
 +        if (bRead && gmx_debug_at)
 +        {
 +            pr_block(debug, 0, "cgs", &mtop->moltype[0].cgs, TRUE);
 +        }
 +        do_block(fio, &mtop->mols, bRead, file_version);
 +        /* Add the posres coordinates to the molblock */
 +        add_posres_molblock(mtop);
 +    }
 +    if (bRead)
 +    {
 +        if (file_version >= 57)
 +        {
 +            done_block(&mtop->mols);
 +            mtop->mols = mtop_mols(mtop);
 +        }
 +        if (gmx_debug_at)
 +        {
 +            pr_block(debug, 0, "mols", &mtop->mols, TRUE);
 +        }
 +    }
 +
 +    if (file_version < 51)
 +    {
 +        /* Here used to be the shake blocks */
 +        do_blocka(fio, &dumb, bRead, file_version);
 +        if (dumb.nr > 0)
 +        {
 +            sfree(dumb.index);
 +        }
 +        if (dumb.nra > 0)
 +        {
 +            sfree(dumb.a);
 +        }
 +    }
 +
 +    if (bRead)
 +    {
 +        close_symtab(&(mtop->symtab));
 +    }
 +}
 +
 +/* If TopOnlyOK is TRUE then we can read even future versions
 + * of tpx files, provided the file_generation hasn't changed.
 + * If it is FALSE, we need the inputrecord too, and bail out
 + * if the file is newer than the program.
 + *
 + * The version and generation if the topology (see top of this file)
 + * are returned in the two last arguments.
 + *
 + * If possible, we will read the inputrec even when TopOnlyOK is TRUE.
 + */
 +static void do_tpxheader(t_fileio *fio, gmx_bool bRead, t_tpxheader *tpx,
 +                         gmx_bool TopOnlyOK, int *file_version,
 +                         int *file_generation)
 +{
 +    char      buf[STRLEN];
 +    char      file_tag[STRLEN];
 +    gmx_bool  bDouble;
 +    int       precision;
 +    int       fver, fgen;
 +    int       idum = 0;
 +    real      rdum = 0;
 +
 +    gmx_fio_checktype(fio);
 +    gmx_fio_setdebug(fio, bDebugMode());
 +
 +    /* NEW! XDR tpb file */
 +    precision = sizeof(real);
 +    if (bRead)
 +    {
 +        gmx_fio_do_string(fio, buf);
 +        if (strncmp(buf, "VERSION", 7))
 +        {
 +            gmx_fatal(FARGS, "Can not read file %s,\n"
 +                      "             this file is from a Gromacs version which is older than 2.0\n"
 +                      "             Make a new one with grompp or use a gro or pdb file, if possible",
 +                      gmx_fio_getname(fio));
 +        }
 +        gmx_fio_do_int(fio, precision);
 +        bDouble = (precision == sizeof(double));
 +        if ((precision != sizeof(float)) && !bDouble)
 +        {
 +            gmx_fatal(FARGS, "Unknown precision in file %s: real is %d bytes "
 +                      "instead of %d or %d",
 +                      gmx_fio_getname(fio), precision, sizeof(float), sizeof(double));
 +        }
 +        gmx_fio_setprecision(fio, bDouble);
 +        fprintf(stderr, "Reading file %s, %s (%s precision)\n",
 +                gmx_fio_getname(fio), buf, bDouble ? "double" : "single");
 +    }
 +    else
 +    {
 +        gmx_fio_write_string(fio, GromacsVersion());
 +        bDouble = (precision == sizeof(double));
 +        gmx_fio_setprecision(fio, bDouble);
 +        gmx_fio_do_int(fio, precision);
 +        fver = tpx_version;
 +        sprintf(file_tag, "%s", tpx_tag);
 +        fgen = tpx_generation;
 +    }
 +
 +    /* Check versions! */
 +    gmx_fio_do_int(fio, fver);
 +
 +    /* This is for backward compatibility with development versions 77-79
 +     * where the tag was, mistakenly, placed before the generation,
 +     * which would cause a segv instead of a proper error message
 +     * when reading the topology only from tpx with <77 code.
 +     */
 +    if (fver >= 77 && fver <= 79)
 +    {
 +        gmx_fio_do_string(fio, file_tag);
 +    }
 +
 +    if (fver >= 26)
 +    {
 +        gmx_fio_do_int(fio, fgen);
 +    }
 +    else
 +    {
 +        fgen = 0;
 +    }
 +
 +    if (fver >= 81)
 +    {
 +        gmx_fio_do_string(fio, file_tag);
 +    }
 +    if (bRead)
 +    {
 +        if (fver < 77)
 +        {
 +            /* Versions before 77 don't have the tag, set it to release */
 +            sprintf(file_tag, "%s", TPX_TAG_RELEASE);
 +        }
 +
 +        if (strcmp(file_tag, tpx_tag) != 0)
 +        {
 +            fprintf(stderr, "Note: file tpx tag '%s', software tpx tag '%s'\n",
 +                    file_tag, tpx_tag);
 +
 +            /* We only support reading tpx files with the same tag as the code
 +             * or tpx files with the release tag and with lower version number.
 +             */
 +            if (!strcmp(file_tag, TPX_TAG_RELEASE) == 0 && fver < tpx_version)
 +            {
 +                gmx_fatal(FARGS, "tpx tag/version mismatch: reading tpx file (%s) version %d, tag '%s' with program for tpx version %d, tag '%s'",
 +                          gmx_fio_getname(fio), fver, file_tag,
 +                          tpx_version, tpx_tag);
 +            }
 +        }
 +    }
 +
 +    if (file_version != NULL)
 +    {
 +        *file_version = fver;
 +    }
 +    if (file_generation != NULL)
 +    {
 +        *file_generation = fgen;
 +    }
 +
 +
 +    if ((fver <= tpx_incompatible_version) ||
 +        ((fver > tpx_version) && !TopOnlyOK) ||
 +        (fgen > tpx_generation) || 
 +        tpx_version == 80) /*80 was used by both 5.0-dev and 4.6-dev*/
 +    {
 +        gmx_fatal(FARGS, "reading tpx file (%s) version %d with version %d program",
 +                  gmx_fio_getname(fio), fver, tpx_version);
 +    }
 +
 +    do_section(fio, eitemHEADER, bRead);
 +    gmx_fio_do_int(fio, tpx->natoms);
 +    if (fver >= 28)
 +    {
 +        gmx_fio_do_int(fio, tpx->ngtc);
 +    }
 +    else
 +    {
 +        tpx->ngtc = 0;
 +    }
 +    if (fver < 62)
 +    {
 +        gmx_fio_do_int(fio, idum);
 +        gmx_fio_do_real(fio, rdum);
 +    }
 +    /*a better decision will eventually (5.0 or later) need to be made
 +       on how to treat the alchemical state of the system, which can now
 +       vary through a simulation, and cannot be completely described
 +       though a single lambda variable, or even a single state
 +       index. Eventually, should probably be a vector. MRS*/
 +    if (fver >= 79)
 +    {
 +        gmx_fio_do_int(fio, tpx->fep_state);
 +    }
 +    gmx_fio_do_real(fio, tpx->lambda);
 +    gmx_fio_do_int(fio, tpx->bIr);
 +    gmx_fio_do_int(fio, tpx->bTop);
 +    gmx_fio_do_int(fio, tpx->bX);
 +    gmx_fio_do_int(fio, tpx->bV);
 +    gmx_fio_do_int(fio, tpx->bF);
 +    gmx_fio_do_int(fio, tpx->bBox);
 +
 +    if ((fgen > tpx_generation))
 +    {
 +        /* This can only happen if TopOnlyOK=TRUE */
 +        tpx->bIr = FALSE;
 +    }
 +}
 +
 +static int do_tpx(t_fileio *fio, gmx_bool bRead,
 +                  t_inputrec *ir, t_state *state, rvec *f, gmx_mtop_t *mtop,
 +                  gmx_bool bXVallocated)
 +{
 +    t_tpxheader     tpx;
 +    t_inputrec      dum_ir;
 +    gmx_mtop_t      dum_top;
 +    gmx_bool        TopOnlyOK, bDum = TRUE;
 +    int             file_version, file_generation;
 +    int             i;
 +    rvec           *xptr, *vptr;
 +    int             ePBC;
 +    gmx_bool        bPeriodicMols;
 +
 +    if (!bRead)
 +    {
 +        tpx.natoms    = state->natoms;
 +        tpx.ngtc      = state->ngtc; /* need to add nnhpres here? */
 +        tpx.fep_state = state->fep_state;
 +        tpx.lambda    = state->lambda[efptFEP];
 +        tpx.bIr       = (ir       != NULL);
 +        tpx.bTop      = (mtop     != NULL);
 +        tpx.bX        = (state->x != NULL);
 +        tpx.bV        = (state->v != NULL);
 +        tpx.bF        = (f        != NULL);
 +        tpx.bBox      = TRUE;
 +    }
 +
 +    TopOnlyOK = (ir == NULL);
 +
 +    do_tpxheader(fio, bRead, &tpx, TopOnlyOK, &file_version, &file_generation);
 +
 +    if (bRead)
 +    {
 +        state->flags  = 0;
 +        /* state->lambda = tpx.lambda;*/ /*remove this eventually? */
 +        /* The init_state calls initialize the Nose-Hoover xi integrals to zero */
 +        if (bXVallocated)
 +        {
 +            xptr = state->x;
 +            vptr = state->v;
 +            init_state(state, 0, tpx.ngtc, 0, 0, 0); /* nose-hoover chains */ /* eventually, need to add nnhpres here? */
 +            state->natoms = tpx.natoms;
 +            state->nalloc = tpx.natoms;
 +            state->x      = xptr;
 +            state->v      = vptr;
 +        }
 +        else
 +        {
 +            init_state(state, tpx.natoms, tpx.ngtc, 0, 0, 0); /* nose-hoover chains */
 +        }
 +    }
 +
 +#define do_test(fio, b, p) if (bRead && (p != NULL) && !b) gmx_fatal(FARGS, "No %s in %s",#p, gmx_fio_getname(fio))
 +
 +    do_test(fio, tpx.bBox, state->box);
 +    do_section(fio, eitemBOX, bRead);
 +    if (tpx.bBox)
 +    {
 +        gmx_fio_ndo_rvec(fio, state->box, DIM);
 +        if (file_version >= 51)
 +        {
 +            gmx_fio_ndo_rvec(fio, state->box_rel, DIM);
 +        }
 +        else
 +        {
 +            /* We initialize box_rel after reading the inputrec */
 +            clear_mat(state->box_rel);
 +        }
 +        if (file_version >= 28)
 +        {
 +            gmx_fio_ndo_rvec(fio, state->boxv, DIM);
 +            if (file_version < 56)
 +            {
 +                matrix mdum;
 +                gmx_fio_ndo_rvec(fio, mdum, DIM);
 +            }
 +        }
 +    }
 +
 +    if (state->ngtc > 0 && file_version >= 28)
 +    {
 +        real *dumv;
 +        /*ndo_double(state->nosehoover_xi,state->ngtc,bDum);*/
 +        /*ndo_double(state->nosehoover_vxi,state->ngtc,bDum);*/
 +        /*ndo_double(state->therm_integral,state->ngtc,bDum);*/
 +        snew(dumv, state->ngtc);
 +        if (file_version < 69)
 +        {
 +            bDum = gmx_fio_ndo_real(fio, dumv, state->ngtc);
 +        }
 +        /* These used to be the Berendsen tcoupl_lambda's */
 +        bDum = gmx_fio_ndo_real(fio, dumv, state->ngtc);
 +        sfree(dumv);
 +    }
 +
 +    /* Prior to tpx version 26, the inputrec was here.
 +     * I moved it to enable partial forward-compatibility
 +     * for analysis/viewer programs.
 +     */
 +    if (file_version < 26)
 +    {
 +        do_test(fio, tpx.bIr, ir);
 +        do_section(fio, eitemIR, bRead);
 +        if (tpx.bIr)
 +        {
 +            if (ir)
 +            {
 +                do_inputrec(fio, ir, bRead, file_version,
 +                            mtop ? &mtop->ffparams.fudgeQQ : NULL);
 +                if (bRead && debug)
 +                {
 +                    pr_inputrec(debug, 0, "inputrec", ir, FALSE);
 +                }
 +            }
 +            else
 +            {
 +                do_inputrec(fio, &dum_ir, bRead, file_version,
 +                            mtop ? &mtop->ffparams.fudgeQQ : NULL);
 +                if (bRead && debug)
 +                {
 +                    pr_inputrec(debug, 0, "inputrec", &dum_ir, FALSE);
 +                }
 +                done_inputrec(&dum_ir);
 +            }
 +
 +        }
 +    }
 +
 +    do_test(fio, tpx.bTop, mtop);
 +    do_section(fio, eitemTOP, bRead);
 +    if (tpx.bTop)
 +    {
 +        if (mtop)
 +        {
 +            do_mtop(fio, mtop, bRead, file_version);
 +        }
 +        else
 +        {
 +            do_mtop(fio, &dum_top, bRead, file_version);
 +            done_mtop(&dum_top, TRUE);
 +        }
 +    }
 +    do_test(fio, tpx.bX, state->x);
 +    do_section(fio, eitemX, bRead);
 +    if (tpx.bX)
 +    {
 +        if (bRead)
 +        {
 +            state->flags |= (1<<estX);
 +        }
 +        gmx_fio_ndo_rvec(fio, state->x, state->natoms);
 +    }
 +
 +    do_test(fio, tpx.bV, state->v);
 +    do_section(fio, eitemV, bRead);
 +    if (tpx.bV)
 +    {
 +        if (bRead)
 +        {
 +            state->flags |= (1<<estV);
 +        }
 +        gmx_fio_ndo_rvec(fio, state->v, state->natoms);
 +    }
 +
 +    do_test(fio, tpx.bF, f);
 +    do_section(fio, eitemF, bRead);
 +    if (tpx.bF)
 +    {
 +        gmx_fio_ndo_rvec(fio, f, state->natoms);
 +    }
 +
 +    /* Starting with tpx version 26, we have the inputrec
 +     * at the end of the file, so we can ignore it
 +     * if the file is never than the software (but still the
 +     * same generation - see comments at the top of this file.
 +     *
 +     *
 +     */
 +    ePBC          = -1;
 +    bPeriodicMols = FALSE;
 +    if (file_version >= 26)
 +    {
 +        do_test(fio, tpx.bIr, ir);
 +        do_section(fio, eitemIR, bRead);
 +        if (tpx.bIr)
 +        {
 +            if (file_version >= 53)
 +            {
 +                /* Removed the pbc info from do_inputrec, since we always want it */
 +                if (!bRead)
 +                {
 +                    ePBC          = ir->ePBC;
 +                    bPeriodicMols = ir->bPeriodicMols;
 +                }
 +                gmx_fio_do_int(fio, ePBC);
 +                gmx_fio_do_gmx_bool(fio, bPeriodicMols);
 +            }
 +            if (file_generation <= tpx_generation && ir)
 +            {
 +                do_inputrec(fio, ir, bRead, file_version, mtop ? &mtop->ffparams.fudgeQQ : NULL);
 +                if (bRead && debug)
 +                {
 +                    pr_inputrec(debug, 0, "inputrec", ir, FALSE);
 +                }
 +                if (file_version < 51)
 +                {
 +                    set_box_rel(ir, state);
 +                }
 +                if (file_version < 53)
 +                {
 +                    ePBC          = ir->ePBC;
 +                    bPeriodicMols = ir->bPeriodicMols;
 +                }
 +            }
 +            if (bRead && ir && file_version >= 53)
 +            {
 +                /* We need to do this after do_inputrec, since that initializes ir */
 +                ir->ePBC          = ePBC;
 +                ir->bPeriodicMols = bPeriodicMols;
 +            }
 +        }
 +    }
 +
 +    if (bRead)
 +    {
 +        if (tpx.bIr && ir)
 +        {
 +            if (state->ngtc == 0)
 +            {
 +                /* Reading old version without tcoupl state data: set it */
 +                init_gtc_state(state, ir->opts.ngtc, 0, ir->opts.nhchainlength);
 +            }
 +            if (tpx.bTop && mtop)
 +            {
 +                if (file_version < 57)
 +                {
 +                    if (mtop->moltype[0].ilist[F_DISRES].nr > 0)
 +                    {
 +                        ir->eDisre = edrSimple;
 +                    }
 +                    else
 +                    {
 +                        ir->eDisre = edrNone;
 +                    }
 +                }
 +                set_disres_npair(mtop);
 +            }
 +        }
 +
 +        if (tpx.bTop && mtop)
 +        {
 +            gmx_mtop_finalize(mtop);
 +        }
 +
 +        if (file_version >= 57)
 +        {
 +            char *env;
 +            int   ienv;
 +            env = getenv("GMX_NOCHARGEGROUPS");
 +            if (env != NULL)
 +            {
 +                sscanf(env, "%d", &ienv);
 +                fprintf(stderr, "\nFound env.var. GMX_NOCHARGEGROUPS = %d\n",
 +                        ienv);
 +                if (ienv > 0)
 +                {
 +                    fprintf(stderr,
 +                            "Will make single atomic charge groups in non-solvent%s\n",
 +                            ienv > 1 ? " and solvent" : "");
 +                    gmx_mtop_make_atomic_charge_groups(mtop, ienv == 1);
 +                }
 +                fprintf(stderr, "\n");
 +            }
 +        }
 +    }
 +
 +    return ePBC;
 +}
 +
 +/************************************************************
 + *
 + *  The following routines are the exported ones
 + *
 + ************************************************************/
 +
 +t_fileio *open_tpx(const char *fn, const char *mode)
 +{
 +    return gmx_fio_open(fn, mode);
 +}
 +
 +void close_tpx(t_fileio *fio)
 +{
 +    gmx_fio_close(fio);
 +}
 +
 +void read_tpxheader(const char *fn, t_tpxheader *tpx, gmx_bool TopOnlyOK,
 +                    int *file_version, int *file_generation)
 +{
 +    t_fileio *fio;
 +
 +    fio = open_tpx(fn, "r");
 +    do_tpxheader(fio, TRUE, tpx, TopOnlyOK, file_version, file_generation);
 +    close_tpx(fio);
 +}
 +
 +void write_tpx_state(const char *fn,
 +                     t_inputrec *ir, t_state *state, gmx_mtop_t *mtop)
 +{
 +    t_fileio *fio;
 +
 +    fio = open_tpx(fn, "w");
 +    do_tpx(fio, FALSE, ir, state, NULL, mtop, FALSE);
 +    close_tpx(fio);
 +}
 +
 +void read_tpx_state(const char *fn,
 +                    t_inputrec *ir, t_state *state, rvec *f, gmx_mtop_t *mtop)
 +{
 +    t_fileio *fio;
 +
 +    fio = open_tpx(fn, "r");
 +    do_tpx(fio, TRUE, ir, state, f, mtop, FALSE);
 +    close_tpx(fio);
 +}
 +
 +int read_tpx(const char *fn,
 +             t_inputrec *ir, matrix box, int *natoms,
 +             rvec *x, rvec *v, rvec *f, gmx_mtop_t *mtop)
 +{
 +    t_fileio *fio;
 +    t_state   state;
 +    int       ePBC;
 +
 +    state.x = x;
 +    state.v = v;
 +    fio     = open_tpx(fn, "r");
 +    ePBC    = do_tpx(fio, TRUE, ir, &state, f, mtop, TRUE);
 +    close_tpx(fio);
 +    *natoms = state.natoms;
 +    if (box)
 +    {
 +        copy_mat(state.box, box);
 +    }
 +    state.x = NULL;
 +    state.v = NULL;
 +    done_state(&state);
 +
 +    return ePBC;
 +}
 +
 +int read_tpx_top(const char *fn,
 +                 t_inputrec *ir, matrix box, int *natoms,
 +                 rvec *x, rvec *v, rvec *f, t_topology *top)
 +{
 +    gmx_mtop_t  mtop;
 +    t_topology *ltop;
 +    int         ePBC;
 +
 +    ePBC = read_tpx(fn, ir, box, natoms, x, v, f, &mtop);
 +
 +    *top = gmx_mtop_t_to_t_topology(&mtop);
 +
 +    return ePBC;
 +}
 +
 +gmx_bool fn2bTPX(const char *file)
 +{
 +    switch (fn2ftp(file))
 +    {
 +        case efTPR:
 +        case efTPB:
 +        case efTPA:
 +            return TRUE;
 +        default:
 +            return FALSE;
 +    }
 +}
 +
 +static void done_gmx_groups_t(gmx_groups_t *g)
 +{
 +    int i;
 +
 +    for (i = 0; (i < egcNR); i++)
 +    {
 +        if (NULL != g->grps[i].nm_ind)
 +        {
 +            sfree(g->grps[i].nm_ind);
 +            g->grps[i].nm_ind = NULL;
 +        }
 +        if (NULL != g->grpnr[i])
 +        {
 +            sfree(g->grpnr[i]);
 +            g->grpnr[i] = NULL;
 +        }
 +    }
 +    /* The contents of this array is in symtab, don't free it here */
 +    sfree(g->grpname);
 +}
 +
 +gmx_bool read_tps_conf(const char *infile, char *title, t_topology *top, int *ePBC,
 +                       rvec **x, rvec **v, matrix box, gmx_bool bMass)
 +{
 +    t_tpxheader      header;
 +    int              natoms, i, version, generation;
 +    gmx_bool         bTop, bXNULL = FALSE;
 +    gmx_mtop_t      *mtop;
 +    t_topology      *topconv;
 +    gmx_atomprop_t   aps;
 +
 +    bTop  = fn2bTPX(infile);
 +    *ePBC = -1;
 +    if (bTop)
 +    {
 +        read_tpxheader(infile, &header, TRUE, &version, &generation);
 +        if (x)
 +        {
 +            snew(*x, header.natoms);
 +        }
 +        if (v)
 +        {
 +            snew(*v, header.natoms);
 +        }
 +        snew(mtop, 1);
 +        *ePBC = read_tpx(infile, NULL, box, &natoms,
 +                         (x == NULL) ? NULL : *x, (v == NULL) ? NULL : *v, NULL, mtop);
 +        *top = gmx_mtop_t_to_t_topology(mtop);
 +        /* In this case we need to throw away the group data too */
 +        done_gmx_groups_t(&mtop->groups);
 +        sfree(mtop);
 +        strcpy(title, *top->name);
 +        tpx_make_chain_identifiers(&top->atoms, &top->mols);
 +    }
 +    else
 +    {
 +        get_stx_coordnum(infile, &natoms);
 +        init_t_atoms(&top->atoms, natoms, (fn2ftp(infile) == efPDB));
 +        if (x == NULL)
 +        {
 +            snew(x, 1);
 +            bXNULL = TRUE;
 +        }
 +        snew(*x, natoms);
 +        if (v)
 +        {
 +            snew(*v, natoms);
 +        }
 +        read_stx_conf(infile, title, &top->atoms, *x, (v == NULL) ? NULL : *v, ePBC, box);
 +        if (bXNULL)
 +        {
 +            sfree(*x);
 +            sfree(x);
 +        }
 +        if (bMass)
 +        {
 +            aps = gmx_atomprop_init();
 +            for (i = 0; (i < natoms); i++)
 +            {
 +                if (!gmx_atomprop_query(aps, epropMass,
 +                                        *top->atoms.resinfo[top->atoms.atom[i].resind].name,
 +                                        *top->atoms.atomname[i],
 +                                        &(top->atoms.atom[i].m)))
 +                {
 +                    if (debug)
 +                    {
 +                        fprintf(debug, "Can not find mass for atom %s %d %s, setting to 1\n",
 +                                *top->atoms.resinfo[top->atoms.atom[i].resind].name,
 +                                top->atoms.resinfo[top->atoms.atom[i].resind].nr,
 +                                *top->atoms.atomname[i]);
 +                    }
 +                }
 +            }
 +            gmx_atomprop_destroy(aps);
 +        }
 +        top->idef.ntypes = -1;
 +    }
 +
 +    return bTop;
 +}
diff --cc src/gromacs/legacyheaders/gstat.h
index 15c25622a9,0000000000..aa991441b6
mode 100644,000000..100644
--- a/src/gromacs/legacyheaders/gstat.h
+++ b/src/gromacs/legacyheaders/gstat.h
@@@ -1,436 -1,0 +1,436 @@@
 +/*
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + *          GROningen MAchine for Chemical Simulations
 + *
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + *
 + * For more info, check our website at http://www.gromacs.org
 + *
 + * And Hey:
 + * Gromacs Runs On Most of All Computer Systems
 + */
 +
 +#ifndef _gstat_h
 +#define _gstat_h
 +
 +#include "typedefs.h"
 +#include "statutil.h"
 +#include "mshift.h"
 +#include "rmpbc.h"
 +
 +#ifdef __cplusplus
 +extern "C" {
 +#endif
 +
 +/***********************************************
 + *
 + *     A U T O C O R R E L A T I O N
 + *
 + ***********************************************/
 +
 +real LegendreP(real x, unsigned long m);
 +
 +#define eacNormal (1<<0)
 +#define eacCos    (1<<1)
 +#define eacVector (1<<2)
 +#define eacRcross (1<<3  | eacVector)
 +#define eacP0     (1<<4  | eacVector)
 +#define eacP1     (1<<5  | eacVector)
 +#define eacP2     (1<<6  | eacVector)
 +#define eacP3     (1<<7  | eacVector)
 +#define eacP4     (1<<8  | eacVector)
 +#define eacIden   (1<<9)
 +
 +enum {
 +    effnNONE, effnEXP1, effnEXP2, effnEXP3,   effnVAC,
 +    effnEXP5, effnEXP7, effnEXP9, effnERF, effnERREST, effnNR
 +};
 +
 +/* must correspond with 'leg' g_chi.c:727 */
 +enum {
 +    edPhi = 0, edPsi, edOmega, edChi1, edChi2, edChi3, edChi4, edChi5, edChi6, edMax
 +};
 +
 +enum {
 +    edPrintST = 0, edPrintRO
 +};
 +
 +#define NHISTO 360
 +#define NONCHI 3
 +#define MAXCHI edMax-NONCHI
 +#define NROT 4  /* number of rotamers: 1=g(-), 2=t, 3=g(+), 0=other */
 +
 +typedef struct {
-     int minO, minC, H, N, C, O, Cn[MAXCHI+3];
++    int minCalpha, minC, H, N, C, O, Cn[MAXCHI+3];
 +} t_dihatms; /* Cn[0]=N, Cn[1]=Ca, Cn[2]=Cb etc. */
 +
 +typedef struct {
 +    char       name[12];
 +    int        resnr;
 +    int        index;     /* Index for amino acids (histograms) */
 +    int        j0[edMax]; /* Index in dih array (phi angle is first...) */
 +    t_dihatms  atm;
 +    int        b[edMax];
 +    int        ntr[edMax];
 +    real       S2[edMax];
 +    real       rot_occ[edMax][NROT];
 +
 +} t_dlist;
 +
 +extern const int   nfp_ffn[effnNR];
 +
 +extern const char *s_ffn[effnNR+2];
 +
 +extern const char *longs_ffn[effnNR];
 +
 +int sffn2effn(const char **sffn);
 +/* Returns the ffn enum corresponding to the selected enum option in sffn */
 +
 +t_pargs *add_acf_pargs(int *npargs, t_pargs *pa);
 +/* Add options for autocorr to the current set of options.
 + * *npargs must be initialised to the number of elements in pa,
 + * it will be incremented appropriately.
 + */
 +
 +void cross_corr(int n, real f[], real g[], real corr[]);
 +/* Simple minded cross correlation algorithm */
 +
 +real fit_acf(int ncorr, int fitfn, const output_env_t oenv, gmx_bool bVerbose,
 +             real tbeginfit, real tendfit, real dt, real c1[], real *fit);
 +/* Fit an ACF to a given function */
 +
 +void do_autocorr(const char *fn, const output_env_t oenv,
 +                 const char *title,
 +                 int nframes, int nitem, real **c1,
 +                 real dt, unsigned long mode, gmx_bool bAver);
 +/* Calls low_do_autocorr (see below). After calling add_acf_pargs */
 +
 +void low_do_autocorr(const char *fn, const output_env_t oenv,
 +                     const char *title, int  nframes, int nitem,
 +                     int nout, real **c1, real dt, unsigned long mode,
 +                     int nrestart, gmx_bool bAver, gmx_bool bNormalize,
 +                     gmx_bool bVerbose, real tbeginfit, real tendfit,
 +                     int nfitparm, int nskip);
 +/*
 + * do_autocorr calculates autocorrelation functions for many things.
 + * It takes a 2 d array containing nitem arrays of length nframes
 + * for each item the ACF is calculated.
 + *
 + * A number of "modes" exist for computation of the ACF
 + *
 + * if (mode == eacNormal) {
 + *   C(t) = < X (tau) * X (tau+t) >
 + * }
 + * else if (mode == eacCos) {
 + *   C(t) = < cos (X(tau) - X(tau+t)) >
 + * }
 + * else if (mode == eacIden) { **not fully supported yet**
 + *   C(t) = < (X(tau) == X(tau+t)) >
 + * }
 + * else if (mode == eacVector) {
 + *   C(t) = < X(tau) * X(tau+t)
 + * }
 + * else if (mode == eacP1) {
 + *   C(t) = < cos (X(tau) * X(tau+t) >
 + * }
 + * else if (mode == eacP2) {
 + *   C(t) = 1/2 * < 3 cos (X(tau) * X(tau+t) - 1 >
 + * }
 + * else if (mode == eacRcross) {
 + *   C(t) = < ( X(tau) * X(tau+t) )^2 >
 + * }
 + *
 + * For modes eacVector, eacP1, eacP2 and eacRcross the input should be
 + * 3 x nframes long, where each triplet is taken as a 3D vector
 + *
 + * For mode eacCos inputdata must be in radians, not degrees!
 + *
 + * Other parameters are:
 + *
 + * fn is output filename (.xvg) where the correlation function(s) are printed
 + * title is the title in the output file
 + * nframes is the number of frames in the time series
 + * nitem is the number of items
 + * c1       is an array of dimension [ 0 .. nitem-1 ] [ 0 .. nframes-1 ]
 + *          on output, this array is filled with the correlation function
 + *          to reduce storage
 + * nrestart     is the number of steps between restarts for direct ACFs
 + *              (i.e. without FFT) When set to 1 all points are used as
 + *              time origin for averaging
 + * dt       is the time between frames
 + * bAver    If set, all ndih C(t) functions are averaged into a single
 + *          C(t)
 + * (bFour       If set, will use fast fourier transform (FFT) for evaluating
 + *              the ACF: removed option, now on the command line only)
 + * bNormalize   If set, all ACFs will be normalized to start at 0
 + * nskip        Determines whether steps a re skipped in the output
 + */
 +
 +typedef struct {
 +    const char *name;    /* Description of the J coupling constant */
 +    real        A, B, C; /* Karplus coefficients */
 +    real        offset;  /* Offset for dihedral angle in histogram (e.g. -M_PI/3) */
 +    real        Jc;      /* Resulting Jcoupling */
 +    real        Jcsig;   /* Standard deviation in Jc */
 +} t_karplus;
 +
 +void calc_distribution_props(int nh, int histo[],
 +                             real start, int  nkkk, t_karplus kkk[],
 +                             real *S2);
 +/* This routine takes a dihedral distribution and calculates
 + * coupling constants and dihedral order parameters of it.
 + *
 + * nh      is the number of points
 + * histo   is the array of datapoints which is assumed to span
 + *         2 M_PI radians
 + * start   is the starting angle of the histogram, this can be either 0
 + *         or -M_PI
 + * nkkk    is the number of karplus sets (multiple coupling constants may be
 + *         derived from a single angle)
 + * kkk     are the constants for calculating J coupling constants using a
 + *         Karplus equation according to
 + *
 + *                  2
 + *         J = A cos theta + B cos theta + C
 + *
 + *         where theta is phi - offset (phi is the angle in the histogram)
 + * offset  is subtracted from phi before substitution in the Karplus
 + *         equation
 + * S2      is the resulting dihedral order parameter
 + *
 + */
 +
 +
 +/***********************************************
 + *
 + *     F I T   R O U T I N E S
 + *
 + ***********************************************/
 +void do_expfit(int ndata, real c1[], real dt,
 +               real begintimefit, real endtimefit);
 +
 +void expfit(int n, real x[], real y[], real Dy[],
 +            real *a, real *sa,
 +            real *b, real *sb);
 +/* This procedure fits y=exp(a+bx) for n (x,y) pairs to determine a and b.
 + * The uncertainties in the y values must be in the vector Dy.
 + * The standard deviations of a and b, sa and sb, are also calculated.
 + *
 + * Routine from Computers in physics, 7(3) (1993), p. 280-285.
 + */
 +
 +void ana_dih_trans(const char *fn_trans, const char *fn_histo,
 +                   real **dih, int nframes, int nangles,
 +                   const char *grpname, real *time, gmx_bool bRb,
 +                   const output_env_t oenv);
 +/*
 + * Analyse dihedral transitions, by counting transitions per dihedral
 + * and per frame. The total number of transitions is printed to
 + * stderr, as well as the average time between transitions.
 + *
 + * is wrapper to low_ana_dih_trans, which also passes in and out the
 +     number of transitions per dihedral per residue. that uses struc dlist
 +     which is not external, so pp2shift.h must be included.
 +
 + * Dihedrals are supposed to be in either of three minima,
 + * (trans, gauche+, gauche-)
 + *
 + * fn_trans  output file name for #transitions per timeframe
 + * fn_histo  output file name for transition time histogram
 + * dih       the actual dihedral angles
 + * nframes   number of times frames
 + * nangles   number of angles
 + * grpname   a string for the header of plots
 + * time      array (size nframes) of times of trajectory frames
 + * bRb       determines whether the polymer convention is used
 + *           (trans = 0)
 + */
 +
 +void low_ana_dih_trans(gmx_bool bTrans, const char *fn_trans,
 +                       gmx_bool bHisto, const char *fn_histo, int maxchi,
 +                       real **dih, int nlist, t_dlist dlist[],
 +                       int nframes, int nangles, const char *grpname,
 +                       int multiplicity[], real *time, gmx_bool bRb,
 +                       real core_frac, const output_env_t oenv);
 +/* as above but passes dlist so can copy occupancies into it, and multiplicity[]
 + *  (1..nangles, corresp to dih[this][], so can have non-3 multiplicity of
 + * rotamers. Also production of xvg output files is conditional
 + * and the fractional width of each rotamer can be set ie for a 3 fold
 + * dihedral with core_frac = 0.5 only the central 60 degrees is assigned
 + * to each rotamer, the rest goes to rotamer zero */
 +
 +
 +
 +void read_ang_dih(const char *trj_fn,
 +                  gmx_bool bAngles, gmx_bool bSaveAll, gmx_bool bRb, gmx_bool bPBC,
 +                  int maxangstat, int angstat[],
 +                  int *nframes, real **time,
 +                  int isize, atom_id index[],
 +                  real **trans_frac,
 +                  real **aver_angle,
 +                  real *dih[],
 +                  const output_env_t oenv);
 +/*
 + * Read a trajectory and calculate angles and dihedrals.
 + *
 + * trj_fn      file name of trajectory
 + * tpb_fn      file name of tpb file
 + * bAngles     do we have to read angles or dihedrals
 + * bSaveAll    do we have to store all in the dih array
 + * bRb         do we have Ryckaert-Bellemans dihedrals (trans = 0)
 + * bPBC        compute angles module 2 Pi
 + * maxangstat  number of entries in distribution array
 + * angstat     angle distribution
 + * *nframes    number of frames read
 + * time        simulation time at each time frame
 + * isize       number of entries in the index, when angles 3*number of angles
 + *             else 4*number of angles
 + * index       atom numbers that define the angles or dihedrals
 + *             (i,j,k) resp (i,j,k,l)
 + * trans_frac  number of dihedrals in trans
 + * aver_angle  average angle at each time frame
 + * dih         all angles at each time frame
 + */
 +
 +void make_histo(FILE *log,
 +                int ndata, real data[], int npoints, int histo[],
 +                real minx, real maxx);
 +/*
 + * Make a histogram from data. The min and max of the data array can
 + * be determined (if minx == 0 and maxx == 0)
 + * and the index in the histogram is computed from
 + * ind = npoints/(max(data) - min(data))
 + *
 + * log       write error output to this file
 + * ndata     number of points in data
 + * data      data points
 + * npoints   number of points in histogram
 + * histo     histogram array. This is NOT set to zero, to allow you
 + *           to add multiple histograms
 + * minx      start of the histogram
 + * maxx      end of the histogram
 + *           if both are 0, these values are computed by the routine itself
 + */
 +
 +void normalize_histo(int npoints, int histo[], real dx, real normhisto[]);
 +/*
 + * Normalize a histogram so that the integral over the histo is 1
 + *
 + * npoints    number of points in the histo array
 + * histo      input histogram
 + * dx         distance between points on the X-axis
 + * normhisto  normalized output histogram
 + */
 +
 +real fit_function(int eFitFn, real *parm, real x);
 +/* Returns the value of fit function eFitFn at x */
 +
 +/* Use Levenberg-Marquardt method to fit to a nfitparm parameter exponential */
 +/* or to a transverse current autocorrelation function */
 +/* Or: "There is no KILL like OVERKILL", Dr. Ir. D. van der Spoel */
 +real do_lmfit(int ndata, real c1[], real sig[], real dt, real *x,
 +              real begintimefit, real endtimefit, const output_env_t oenv,
 +              gmx_bool bVerbose, int eFitFn, real fitparms[], int fix);
 +/* Returns integral.
 + * If x == NULL, the timestep dt will be used to create a time axis.
 + * fix fixes fit parameter i at it's starting value, when the i'th bit
 + * of fix is set.
 + */
 +
 +real evaluate_integral(int n, real x[], real y[], real dy[],
 +                       real aver_start, real *stddev);
 +/* Integrate data in y, and, if given, use dy as weighting
 + * aver_start should be set to a value where the function has
 + * converged to 0.
 + */
 +
 +real print_and_integrate(FILE *fp, int n, real dt,
 +                         real c[], real *fit, int nskip);
 +/* Integrate the data in c[] from 0 to n using trapezium rule.
 + * If fp != NULL output is written to it
 + * nskip determines whether all elements are written to the output file
 + * (written when i % nskip == 0)
 + * If fit != NULL the fit is also written.
 + */
 +
 +int get_acfnout(void);
 +/* Return the output length for the correlation function
 + * Works only AFTER do_auto_corr has been called!
 + */
 +
 +int get_acffitfn(void);
 +/* Return the fit function type.
 + * Works only AFTER do_auto_corr has been called!
 + */
 +
 +/* Routines from pp2shift (anadih.c etc.) */
 +
 +void do_pp2shifts(FILE *fp, int nframes,
 +                  int nlist, t_dlist dlist[], real **dih);
 +
 +gmx_bool has_dihedral(int Dih, t_dlist *dl);
 +
 +t_dlist *mk_dlist(FILE *log,
 +                  t_atoms *atoms, int *nlist,
 +                  gmx_bool bPhi, gmx_bool bPsi, gmx_bool bChi, gmx_bool bHChi,
 +                  int maxchi, int r0, gmx_residuetype_t rt);
 +
 +void pr_dlist(FILE *fp, int nl, t_dlist dl[], real dt,  int printtype,
 +              gmx_bool bPhi, gmx_bool bPsi, gmx_bool bChi, gmx_bool bOmega, int maxchi);
 +
 +int pr_trans(FILE *fp, int nl, t_dlist dl[], real dt, int Xi);
 +
 +void mk_chi_lookup (int **lookup, int maxchi, real **dih,
 +                    int nlist, t_dlist dlist[]);
 +
 +void mk_multiplicity_lookup (int *multiplicity, int maxchi, real **dih,
 +                             int nlist, t_dlist dlist[], int nangle);
 +
 +void get_chi_product_traj (real **dih, int nframes, int nangles,
 +                           int nlist, int maxchi, t_dlist dlist[],
 +                           real time[], int **lookup, int *multiplicity,
 +                           gmx_bool bRb, gmx_bool bNormalize,
 +                           real core_frac, gmx_bool bAll, const char *fnall,
 +                           const output_env_t oenv);
 +
 +void print_one (const output_env_t oenv, const char *base,
 +                const char *name,
 +                const char *title, const char *ylabel, int nf,
 +                real time[], real data[]);
 +
 +/* Routines from g_hbond */
 +void analyse_corr(int n, real t[], real ct[], real nt[], real kt[],
 +                  real sigma_ct[], real sigma_nt[], real sigma_kt[],
 +                  real fit_start, real temp, real smooth_tail_start,
 +                  const output_env_t oenv);
 +
 +void compute_derivative(int nn, real x[], real y[], real dydx[]);
 +
 +#ifdef __cplusplus
 +}
 +#endif
 +
 +#endif
diff --cc src/gromacs/legacyheaders/nbnxn_cuda_data_mgmt.h
index e8e43073bf,0000000000..ed3bae4ef2
mode 100644,000000..100644
--- a/src/gromacs/legacyheaders/nbnxn_cuda_data_mgmt.h
+++ b/src/gromacs/legacyheaders/nbnxn_cuda_data_mgmt.h
@@@ -1,135 -1,0 +1,135 @@@
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + *          GROningen MAchine for Chemical Simulations
 + *
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2012, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 + *
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + *
 + * For more info, check our website at http://www.gromacs.org
 + *
 + * And Hey:
 + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
 + */
 +
 +#ifndef NBNXN_CUDA_DATA_MGMT_H
 +#define NBNXN_CUDA_DATA_MGMT_H
 +
 +#include "types/simple.h"
 +#include "types/interaction_const.h"
 +#include "types/nbnxn_cuda_types_ext.h"
 +#include "types/hw_info.h"
 +
 +#ifdef GMX_GPU
 +#define FUNC_TERM ;
 +#define FUNC_QUALIFIER
 +#else
 +#define FUNC_TERM {}
 +#define FUNC_QUALIFIER static
 +#endif
 +
 +#ifdef __cplusplus
 +extern "C" {
 +#endif
 +
 +/*! Initializes the data structures related to CUDA nonbonded calculations. */
 +FUNC_QUALIFIER
 +void nbnxn_cuda_init(FILE *fplog,
 +                     nbnxn_cuda_ptr_t *p_cu_nb,
 +                     gmx_gpu_info_t *gpu_info, int my_gpu_index,
 +                     /* true of both local and non-local are don on GPU */
 +                     gmx_bool bLocalAndNonlocal) FUNC_TERM
 +
 +/*! Initializes simulation constant data. */
 +FUNC_QUALIFIER
- void nbnxn_cuda_init_const(nbnxn_cuda_ptr_t           p_cu_nb,
-                            const interaction_const_t *ic,
-                            const nonbonded_verlet_t  *nbv) FUNC_TERM
++void nbnxn_cuda_init_const(nbnxn_cuda_ptr_t                cu_nb,
++                           const interaction_const_t      *ic,
++                           const nonbonded_verlet_group_t *nbv_group) FUNC_TERM
 +
 +/*! Initializes pair-list data for GPU, called at every pair search step. */
 +FUNC_QUALIFIER
 +void nbnxn_cuda_init_pairlist(nbnxn_cuda_ptr_t        cu_nb,
 +                              const nbnxn_pairlist_t *h_nblist,
 +                              int                     iloc) FUNC_TERM
 +
 +/*! Initializes atom-data on the GPU, called at every pair search step. */
 +FUNC_QUALIFIER
 +void nbnxn_cuda_init_atomdata(nbnxn_cuda_ptr_t        cu_nb,
 +                              const nbnxn_atomdata_t *atomdata) FUNC_TERM
 +
 +/*! \brief Update parameters during PP-PME load balancing. */
 +FUNC_QUALIFIER
 +void nbnxn_cuda_pme_loadbal_update_param(nbnxn_cuda_ptr_t           cu_nb,
 +                                         const interaction_const_t *ic) FUNC_TERM
 +
 +/*! Uploads shift vector to the GPU if the box is dynamic (otherwise just returns). */
 +FUNC_QUALIFIER
 +void nbnxn_cuda_upload_shiftvec(nbnxn_cuda_ptr_t        cu_nb,
 +                                const nbnxn_atomdata_t *nbatom) FUNC_TERM
 +
 +/*! Clears GPU outputs: nonbonded force, shift force and energy. */
 +FUNC_QUALIFIER
 +void nbnxn_cuda_clear_outputs(nbnxn_cuda_ptr_t cu_nb,
 +                              int              flags) FUNC_TERM
 +
 +/*! Frees all GPU resources used for the nonbonded calculations. */
 +FUNC_QUALIFIER
 +void nbnxn_cuda_free(FILE            *fplog,
 +                     nbnxn_cuda_ptr_t cu_nb) FUNC_TERM
 +
 +/*! Returns the GPU timings structure or NULL if GPU is not used or timing is off. */
 +FUNC_QUALIFIER
 +wallclock_gpu_t * nbnxn_cuda_get_timings(nbnxn_cuda_ptr_t cu_nb)
 +#ifdef GMX_GPU
 +;
 +#else
 +{
 +    return NULL;
 +}
 +#endif
 +
 +/*! Resets nonbonded GPU timings. */
 +FUNC_QUALIFIER
 +void nbnxn_cuda_reset_timings(nbnxn_cuda_ptr_t cu_nb) FUNC_TERM
 +
 +/*! Calculates the minimum size of proximity lists to improve SM load balance
 +    with CUDA non-bonded kernels. */
 +FUNC_QUALIFIER
 +int nbnxn_cuda_min_ci_balanced(nbnxn_cuda_ptr_t cu_nb)
 +#ifdef GMX_GPU
 +;
 +#else
 +{
 +    return -1;
 +}
 +#endif
 +
 +#ifdef __cplusplus
 +}
 +#endif
 +
 +#undef FUNC_TERM
 +#undef FUNC_QUALIFIER
 +
 +#endif /* NBNXN_CUDA_DATA_MGMT_H */
diff --cc src/gromacs/legacyheaders/pull_rotation.h
index 93c07ff5b9,0000000000..a24cd1528d
mode 100644,000000..100644
--- a/src/gromacs/legacyheaders/pull_rotation.h
+++ b/src/gromacs/legacyheaders/pull_rotation.h
@@@ -1,145 -1,0 +1,144 @@@
 +/*
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + *          GROningen MAchine for Chemical Simulations
 + *
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2008, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + *
 + * For more info, check our website at http://www.gromacs.org
 + *
 + * And Hey:
 + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
 + */
 +
 +/*! \file pull_rotation.h
 + *
 + *  @brief Enforced rotation of protein parts or other groups of particles.
 + *
 + *  This file contains routines that are used to enforce rotational motion
 + *  upon a subgroup of particles.
 + *
 + */
 +
 +#ifndef _pull_rotation_h
 +#define _pull_rotation_h
 +
 +#include "vec.h"
 +#include "typedefs.h"
 +
 +
 +#ifdef __cplusplus
 +extern "C" {
 +#endif
 +
 +
 +/*! \brief Initialize the enforced rotation groups.
 + *
 + * This routine does the memory allocation for various helper arrays, opens
 + * the output files etc.
 + *
 + * \param fplog             General output file, normally md.log.
 + * \param ir                Struct containing MD input parameters, among those
 + *                          also the enforced rotation parameters.
 + * \param nfile             Number of entries in the fnm structure.
 + * \param fnm               The filenames struct containing also the names
 + *                          of the rotation output files.
 + * \param cr                Pointer to MPI communication data.
 + * \param x                 The positions of all MD particles.
-  * \param box               Simulation box, needed to make group whole.
++ * \param box               The simulation box.
 + * \param mtop              Molecular topology.
 + * \param oenv              Needed to open the rotation output xvgr file.
 + * \param bVerbose          Whether to print extra status information.
 + * \param Flags             Flags passed over from main, used to determine
 + *                          whether or not we are doing a rerun.
 + */
 +extern void init_rot(FILE *fplog, t_inputrec *ir, int nfile, const t_filenm fnm[],
 +                     t_commrec *cr, rvec *x, matrix box, gmx_mtop_t *mtop, const output_env_t oenv,
 +                     gmx_bool bVerbose, unsigned long Flags);
 +
 +
 +/*! \brief Make a selection of the home atoms for all enforced rotation groups.
 + *
 + * This routine is similar to dd_make_local_pull_groups, but works only with
 + * domain decomposition. It should be called at every domain decomposition.
 + *
 + * \param dd                Structure containing domain decomposition data.
 + * \param rot               Pointer to all the enforced rotation data.
 + */
 +extern void dd_make_local_rotation_groups(gmx_domdec_t *dd, t_rot *rot);
 +
 +
 +/*! \brief Calculation of the enforced rotation potential.
 + *
 + * This is the main enforced rotation module which is called during every time
 + * step. Here the rotation potential as well as the resulting forces are
 + * calculated.
 + *
 + * \param cr                Pointer to MPI communication data.
 + * \param ir                Struct containing MD input parameters, among those
 + * \param box               Simulation box, needed to make group whole.
 + * \param x                 The positions of all the local particles.
 + * \param t                 Time.
 + * \param step              The time step.
 + * \param wcycle            During the potential calculation the wallcycles are
 + *                          counted. Later they enter the dynamic load balancing.
 + * \param bNS               After domain decomposition / neighborsearching several
 + *                          local arrays have to be updated (masses, shifts)
 + */
 +extern void do_rotation(t_commrec *cr, t_inputrec *ir, matrix box, rvec x[], real t,
 +                        gmx_large_int_t step, gmx_wallcycle_t wcycle, gmx_bool bNS);
 +
 +
 +/*! \brief Add the enforced rotation forces to the official force array.
 + *
 + * Adds the forces from enforced rotation potential to the local forces and
 + * sums up the contributions to the rotation potential from all the nodes. Since
 + * this needs communication, this routine should be called after the SR forces
 + * have been evaluated (in order not to spoil cycle counts).
 + * This routine also outputs data to the various rotation output files (e.g.
 + * the potential, the angle of the group, torques and more).
 + *
 + * \param rot               Pointer to all the enforced rotation data.
 + * \param f                 The local forces to which the rotational forces have
 + *                          to be added.
 + * \param cr                Pointer to MPI communication data.
 + * \param step              The time step, used for output.
 + * \param t                 Time, used for output.
 + */
 +extern real add_rot_forces(t_rot *rot, rvec f[], t_commrec *cr, gmx_large_int_t step, real t);
 +
 +
 +/*! \brief Close the enforced rotation output files.
 + *
-  * \param fplog             General output file, normally md.log.
 + * \param rot               Pointer to all the enforced rotation data.
 + */
- extern void finish_rot(FILE *fplog, t_rot *rot);
++extern void finish_rot(t_rot *rot);
 +
 +
 +#ifdef __cplusplus
 +}
 +#endif
 +
 +
 +#endif
diff --cc src/gromacs/mdlib/expanded.c
index 617647fdb5,0000000000..6140392c78
mode 100644,000000..100644
--- a/src/gromacs/mdlib/expanded.c
+++ b/src/gromacs/mdlib/expanded.c
@@@ -1,1431 -1,0 +1,1432 @@@
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + *          GROningen MAchine for Chemical Simulations
 + *
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2012, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 + *
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + *
 + * For more info, check our website at http://www.gromacs.org
 + *
 + * And Hey:
 + * GROwing Monsters And Cloning Shrimps
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#ifdef GMX_CRAY_XT3
 +#include <catamount/dclock.h>
 +#endif
 +
 +
 +#include <stdio.h>
 +#include <time.h>
 +#ifdef HAVE_SYS_TIME_H
 +#include <sys/time.h>
 +#endif
 +#include <math.h>
 +#include "typedefs.h"
 +#include "string2.h"
 +#include "gmxfio.h"
 +#include "smalloc.h"
 +#include "names.h"
 +#include "confio.h"
 +#include "mvdata.h"
 +#include "txtdump.h"
 +#include "pbc.h"
 +#include "chargegroup.h"
 +#include "vec.h"
 +#include "nrnb.h"
 +#include "mshift.h"
 +#include "mdrun.h"
 +#include "update.h"
 +#include "physics.h"
 +#include "main.h"
 +#include "mdatoms.h"
 +#include "force.h"
 +#include "bondf.h"
 +#include "pme.h"
 +#include "disre.h"
 +#include "orires.h"
 +#include "network.h"
 +#include "calcmu.h"
 +#include "constr.h"
 +#include "xvgr.h"
 +#include "trnio.h"
 +#include "xtcio.h"
 +#include "copyrite.h"
 +#include "gmx_random.h"
 +#include "domdec.h"
 +#include "partdec.h"
 +#include "gmx_wallcycle.h"
 +#include "macros.h"
 +
 +#include "gromacs/utility/gmxmpi.h"
 +
 +void GenerateGibbsProbabilities(real *ene, real *p_k, real *pks, int minfep, int maxfep)
 +{
 +
 +    int  i;
 +    real maxene;
 +
 +    *pks   = 0.0;
 +    maxene = ene[minfep];
 +    /* find the maximum value */
 +    for (i = minfep; i <= maxfep; i++)
 +    {
 +        if (ene[i] > maxene)
 +        {
 +            maxene = ene[i];
 +        }
 +    }
 +    /* find the denominator */
 +    for (i = minfep; i <= maxfep; i++)
 +    {
 +        *pks += exp(ene[i]-maxene);
 +    }
 +    /*numerators*/
 +    for (i = minfep; i <= maxfep; i++)
 +    {
 +        p_k[i] = exp(ene[i]-maxene) / *pks;
 +    }
 +}
 +
 +void GenerateWeightedGibbsProbabilities(real *ene, real *p_k, real *pks, int nlim, real *nvals, real delta)
 +{
 +
 +    int   i;
 +    real  maxene;
 +    real *nene;
 +    *pks = 0.0;
 +
 +    snew(nene, nlim);
 +    for (i = 0; i < nlim; i++)
 +    {
 +        if (nvals[i] == 0)
 +        {
 +            /* add the delta, since we need to make sure it's greater than zero, and
 +               we need a non-arbitrary number? */
 +            nene[i] = ene[i] + log(nvals[i]+delta);
 +        }
 +        else
 +        {
 +            nene[i] = ene[i] + log(nvals[i]);
 +        }
 +    }
 +
 +    /* find the maximum value */
 +    maxene = nene[0];
 +    for (i = 0; i < nlim; i++)
 +    {
 +        if (nene[i] > maxene)
 +        {
 +            maxene = nene[i];
 +        }
 +    }
 +
 +    /* subtract off the maximum, avoiding overflow */
 +    for (i = 0; i < nlim; i++)
 +    {
 +        nene[i] -= maxene;
 +    }
 +
 +    /* find the denominator */
 +    for (i = 0; i < nlim; i++)
 +    {
 +        *pks += exp(nene[i]);
 +    }
 +
 +    /*numerators*/
 +    for (i = 0; i < nlim; i++)
 +    {
 +        p_k[i] = exp(nene[i]) / *pks;
 +    }
 +    sfree(nene);
 +}
 +
 +real do_logsum(int N, real *a_n)
 +{
 +
 +    /*     RETURN VALUE */
 +    /* log(\sum_{i=0}^(N-1) exp[a_n]) */
 +    real maxarg;
 +    real sum;
 +    int  i;
 +    real logsum;
 +    /*     compute maximum argument to exp(.) */
 +
 +    maxarg = a_n[0];
 +    for (i = 1; i < N; i++)
 +    {
 +        maxarg = max(maxarg, a_n[i]);
 +    }
 +
 +    /* compute sum of exp(a_n - maxarg) */
 +    sum = 0.0;
 +    for (i = 0; i < N; i++)
 +    {
 +        sum = sum + exp(a_n[i] - maxarg);
 +    }
 +
 +    /*     compute log sum */
 +    logsum = log(sum) + maxarg;
 +    return logsum;
 +}
 +
 +int FindMinimum(real *min_metric, int N)
 +{
 +
 +    real min_val;
 +    int  min_nval, nval;
 +
 +    min_nval = 0;
 +    min_val  = min_metric[0];
 +
 +    for (nval = 0; nval < N; nval++)
 +    {
 +        if (min_metric[nval] < min_val)
 +        {
 +            min_val  = min_metric[nval];
 +            min_nval = nval;
 +        }
 +    }
 +    return min_nval;
 +}
 +
 +static gmx_bool CheckHistogramRatios(int nhisto, real *histo, real ratio)
 +{
 +
 +    int      i;
 +    real     nmean;
 +    gmx_bool bIfFlat;
 +
 +    nmean = 0;
 +    for (i = 0; i < nhisto; i++)
 +    {
 +        nmean += histo[i];
 +    }
 +
 +    if (nmean == 0)
 +    {
 +        /* no samples! is bad!*/
 +        bIfFlat = FALSE;
 +        return bIfFlat;
 +    }
 +    nmean /= (real)nhisto;
 +
 +    bIfFlat = TRUE;
 +    for (i = 0; i < nhisto; i++)
 +    {
 +        /* make sure that all points are in the ratio < x <  1/ratio range  */
 +        if (!((histo[i]/nmean < 1.0/ratio) && (histo[i]/nmean > ratio)))
 +        {
 +            bIfFlat = FALSE;
 +            break;
 +        }
 +    }
 +    return bIfFlat;
 +}
 +
 +static gmx_bool CheckIfDoneEquilibrating(int nlim, t_expanded *expand, df_history_t *dfhist, gmx_large_int_t step)
 +{
 +
 +    int      i, totalsamples;
 +    gmx_bool bDoneEquilibrating = TRUE;
 +    gmx_bool bIfFlat;
 +
 +    /* assume we have equilibrated the weights, then check to see if any of the conditions are not met */
 +
 +    /* calculate the total number of samples */
 +    switch (expand->elmceq)
 +    {
 +        case elmceqNO:
 +            /* We have not equilibrated, and won't, ever. */
 +            return FALSE;
 +        case elmceqYES:
 +            /* we have equilibrated -- we're done */
 +            return TRUE;
 +        case elmceqSTEPS:
 +            /* first, check if we are equilibrating by steps, if we're still under */
 +            if (step < expand->equil_steps)
 +            {
 +                bDoneEquilibrating = FALSE;
 +            }
 +            break;
 +        case elmceqSAMPLES:
 +            totalsamples = 0;
 +            for (i = 0; i < nlim; i++)
 +            {
 +                totalsamples += dfhist->n_at_lam[i];
 +            }
 +            if (totalsamples < expand->equil_samples)
 +            {
 +                bDoneEquilibrating = FALSE;
 +            }
 +            break;
 +        case elmceqNUMATLAM:
 +            for (i = 0; i < nlim; i++)
 +            {
 +                if (dfhist->n_at_lam[i] < expand->equil_n_at_lam) /* we are still doing the initial sweep, so we're definitely not
 +                                                                     done equilibrating*/
 +                {
 +                    bDoneEquilibrating  = FALSE;
 +                    break;
 +                }
 +            }
 +            break;
 +        case elmceqWLDELTA:
 +            if (EWL(expand->elamstats)) /* This check is in readir as well, but
 +                                           just to be sure */
 +            {
 +                if (dfhist->wl_delta > expand->equil_wl_delta)
 +                {
 +                    bDoneEquilibrating = FALSE;
 +                }
 +            }
 +            break;
 +        case elmceqRATIO:
 +            /* we can use the flatness as a judge of good weights, as long as
 +               we're not doing minvar, or Wang-Landau.
 +               But turn off for now until we figure out exactly how we do this.
 +             */
 +
 +            if (!(EWL(expand->elamstats) || expand->elamstats == elamstatsMINVAR))
 +            {
 +                /* we want to use flatness -avoiding- the forced-through samples.  Plus, we need to convert to
 +                   floats for this histogram function. */
 +
 +                real *modhisto;
 +                snew(modhisto, nlim);
 +                for (i = 0; i < nlim; i++)
 +                {
 +                    modhisto[i] = 1.0*(dfhist->n_at_lam[i]-expand->lmc_forced_nstart);
 +                }
 +                bIfFlat = CheckHistogramRatios(nlim, modhisto, expand->equil_ratio);
 +                sfree(modhisto);
 +                if (!bIfFlat)
 +                {
 +                    bDoneEquilibrating = FALSE;
 +                }
 +            }
 +        default:
 +            bDoneEquilibrating = TRUE;
 +    }
 +    /* one last case to go though, if we are doing slow growth to get initial values, we haven't finished equilibrating */
 +
 +    if (expand->lmc_forced_nstart > 0)
 +    {
 +        for (i = 0; i < nlim; i++)
 +        {
 +            if (dfhist->n_at_lam[i] < expand->lmc_forced_nstart) /* we are still doing the initial sweep, so we're definitely not
 +                                                                    done equilibrating*/
 +            {
 +                bDoneEquilibrating = FALSE;
 +                break;
 +            }
 +        }
 +    }
 +    return bDoneEquilibrating;
 +}
 +
 +static gmx_bool UpdateWeights(int nlim, t_expanded *expand, df_history_t *dfhist,
 +                              int fep_state, real *scaled_lamee, real *weighted_lamee, gmx_large_int_t step)
 +{
 +    real     maxdiff = 0.000000001;
 +    gmx_bool bSufficientSamples;
 +    int      i, k, n, nz, indexi, indexk, min_n, max_n, nlam, totali;
 +    int      n0, np1, nm1, nval, min_nvalm, min_nvalp, maxc;
 +    real     omega_m1_0, omega_p1_m1, omega_m1_p1, omega_p1_0, clam_osum;
 +    real     de, de_function, dr, denom, maxdr, pks = 0;
 +    real     min_val, cnval, zero_sum_weights;
 +    real    *omegam_array, *weightsm_array, *omegap_array, *weightsp_array, *varm_array, *varp_array, *dwp_array, *dwm_array;
 +    real     clam_varm, clam_varp, clam_weightsm, clam_weightsp, clam_minvar;
 +    real    *lam_weights, *lam_minvar_corr, *lam_variance, *lam_dg, *p_k;
 +    real    *numweighted_lamee, *logfrac;
 +    int     *nonzero;
 +    real     chi_m1_0, chi_p1_0, chi_m2_0, chi_p2_0, chi_p1_m1, chi_p2_m1, chi_m1_p1, chi_m2_p1;
 +
 +    /* if we have equilibrated the weights, exit now */
 +    if (dfhist->bEquil)
 +    {
 +        return FALSE;
 +    }
 +
 +    if (CheckIfDoneEquilibrating(nlim, expand, dfhist, step))
 +    {
 +        dfhist->bEquil = TRUE;
 +        /* zero out the visited states so we know how many equilibrated states we have
 +           from here on out.*/
 +        for (i = 0; i < nlim; i++)
 +        {
 +            dfhist->n_at_lam[i] = 0;
 +        }
 +        return TRUE;
 +    }
 +
 +    /* If we reached this far, we have not equilibrated yet, keep on
 +       going resetting the weights */
 +
 +    if (EWL(expand->elamstats))
 +    {
 +        if (expand->elamstats == elamstatsWL)  /* Standard Wang-Landau */
 +        {
 +            dfhist->sum_weights[fep_state] -= dfhist->wl_delta;
 +            dfhist->wl_histo[fep_state]    += 1.0;
 +        }
 +        else if (expand->elamstats == elamstatsWWL) /* Weighted Wang-Landau */
 +        {
 +            snew(p_k, nlim);
 +
 +            /* first increment count */
 +            GenerateGibbsProbabilities(weighted_lamee, p_k, &pks, 0, nlim-1);
 +            for (i = 0; i < nlim; i++)
 +            {
 +                dfhist->wl_histo[i] += p_k[i];
 +            }
 +
 +            /* then increment weights (uses count) */
 +            pks = 0.0;
 +            GenerateWeightedGibbsProbabilities(weighted_lamee, p_k, &pks, nlim, dfhist->wl_histo, dfhist->wl_delta);
 +
 +            for (i = 0; i < nlim; i++)
 +            {
 +                dfhist->sum_weights[i] -= dfhist->wl_delta*p_k[i];
 +            }
 +            /* Alternate definition, using logarithms. Shouldn't make very much difference! */
 +            /*
 +               real di;
 +               for (i=0;i<nlim;i++)
 +               {
 +                di = 1+dfhist->wl_delta*p_k[i];
 +                dfhist->sum_weights[i] -= log(di);
 +               }
 +             */
 +            sfree(p_k);
 +        }
 +
 +        zero_sum_weights =  dfhist->sum_weights[0];
 +        for (i = 0; i < nlim; i++)
 +        {
 +            dfhist->sum_weights[i] -= zero_sum_weights;
 +        }
 +    }
 +
 +    if (expand->elamstats == elamstatsBARKER || expand->elamstats == elamstatsMETROPOLIS || expand->elamstats == elamstatsMINVAR)
 +    {
 +
 +        de_function = 0;  /* to get rid of warnings, but this value will not be used because of the logic */
 +        maxc        = 2*expand->c_range+1;
 +
 +        snew(lam_dg, nlim);
 +        snew(lam_variance, nlim);
 +
 +        snew(omegap_array, maxc);
 +        snew(weightsp_array, maxc);
 +        snew(varp_array, maxc);
 +        snew(dwp_array, maxc);
 +
 +        snew(omegam_array, maxc);
 +        snew(weightsm_array, maxc);
 +        snew(varm_array, maxc);
 +        snew(dwm_array, maxc);
 +
 +        /* unpack the current lambdas -- we will only update 2 of these */
 +
 +        for (i = 0; i < nlim-1; i++)
 +        {   /* only through the second to last */
 +            lam_dg[i]       = dfhist->sum_dg[i+1] - dfhist->sum_dg[i];
 +            lam_variance[i] = pow(dfhist->sum_variance[i+1], 2) - pow(dfhist->sum_variance[i], 2);
 +        }
 +
 +        /* accumulate running averages */
 +        for (nval = 0; nval < maxc; nval++)
 +        {
 +            /* constants for later use */
 +            cnval = (real)(nval-expand->c_range);
 +            /* actually, should be able to rewrite it w/o exponential, for better numerical stability */
 +            if (fep_state > 0)
 +            {
 +                de = exp(cnval - (scaled_lamee[fep_state]-scaled_lamee[fep_state-1]));
 +                if (expand->elamstats == elamstatsBARKER || expand->elamstats == elamstatsMINVAR)
 +                {
 +                    de_function = 1.0/(1.0+de);
 +                }
 +                else if (expand->elamstats == elamstatsMETROPOLIS)
 +                {
 +                    if (de < 1.0)
 +                    {
 +                        de_function = 1.0;
 +                    }
 +                    else
 +                    {
 +                        de_function = 1.0/de;
 +                    }
 +                }
 +                dfhist->accum_m[fep_state][nval]  += de_function;
 +                dfhist->accum_m2[fep_state][nval] += de_function*de_function;
 +            }
 +
 +            if (fep_state < nlim-1)
 +            {
 +                de = exp(-cnval + (scaled_lamee[fep_state+1]-scaled_lamee[fep_state]));
 +                if (expand->elamstats == elamstatsBARKER || expand->elamstats == elamstatsMINVAR)
 +                {
 +                    de_function = 1.0/(1.0+de);
 +                }
 +                else if (expand->elamstats == elamstatsMETROPOLIS)
 +                {
 +                    if (de < 1.0)
 +                    {
 +                        de_function = 1.0;
 +                    }
 +                    else
 +                    {
 +                        de_function = 1.0/de;
 +                    }
 +                }
 +                dfhist->accum_p[fep_state][nval]  += de_function;
 +                dfhist->accum_p2[fep_state][nval] += de_function*de_function;
 +            }
 +
 +            /* Metropolis transition and Barker transition (unoptimized Bennett) acceptance weight determination */
 +
 +            n0  = dfhist->n_at_lam[fep_state];
 +            if (fep_state > 0)
 +            {
 +                nm1 = dfhist->n_at_lam[fep_state-1];
 +            }
 +            else
 +            {
 +                nm1 = 0;
 +            }
 +            if (fep_state < nlim-1)
 +            {
 +                np1 = dfhist->n_at_lam[fep_state+1];
 +            }
 +            else
 +            {
 +                np1 = 0;
 +            }
 +
 +            /* logic SHOULD keep these all set correctly whatever the logic, but apparently it can't figure it out. */
 +            chi_m1_0 = chi_p1_0 = chi_m2_0 = chi_p2_0 = chi_p1_m1 = chi_p2_m1 = chi_m1_p1 = chi_m2_p1 = 0;
 +
 +            if (n0 > 0)
 +            {
 +                chi_m1_0 = dfhist->accum_m[fep_state][nval]/n0;
 +                chi_p1_0 = dfhist->accum_p[fep_state][nval]/n0;
 +                chi_m2_0 = dfhist->accum_m2[fep_state][nval]/n0;
 +                chi_p2_0 = dfhist->accum_p2[fep_state][nval]/n0;
 +            }
 +
 +            if ((fep_state > 0 ) && (nm1 > 0))
 +            {
 +                chi_p1_m1 = dfhist->accum_p[fep_state-1][nval]/nm1;
 +                chi_p2_m1 = dfhist->accum_p2[fep_state-1][nval]/nm1;
 +            }
 +
 +            if ((fep_state < nlim-1) && (np1 > 0))
 +            {
 +                chi_m1_p1 = dfhist->accum_m[fep_state+1][nval]/np1;
 +                chi_m2_p1 = dfhist->accum_m2[fep_state+1][nval]/np1;
 +            }
 +
 +            omega_m1_0    = 0;
 +            omega_p1_0    = 0;
 +            clam_weightsm = 0;
 +            clam_weightsp = 0;
 +            clam_varm     = 0;
 +            clam_varp     = 0;
 +
 +            if (fep_state > 0)
 +            {
 +                if (n0 > 0)
 +                {
 +                    omega_m1_0 = chi_m2_0/(chi_m1_0*chi_m1_0) - 1.0;
 +                }
 +                if (nm1 > 0)
 +                {
 +                    omega_p1_m1 = chi_p2_m1/(chi_p1_m1*chi_p1_m1) - 1.0;
 +                }
 +                if ((n0 > 0) && (nm1 > 0))
 +                {
 +                    clam_weightsm = (log(chi_m1_0) - log(chi_p1_m1)) + cnval;
 +                    clam_varm     = (1.0/n0)*(omega_m1_0) + (1.0/nm1)*(omega_p1_m1);
 +                }
 +            }
 +
 +            if (fep_state < nlim-1)
 +            {
 +                if (n0 > 0)
 +                {
 +                    omega_p1_0 = chi_p2_0/(chi_p1_0*chi_p1_0) - 1.0;
 +                }
 +                if (np1 > 0)
 +                {
 +                    omega_m1_p1 = chi_m2_p1/(chi_m1_p1*chi_m1_p1) - 1.0;
 +                }
 +                if ((n0 > 0) && (np1 > 0))
 +                {
 +                    clam_weightsp = (log(chi_m1_p1) - log(chi_p1_0)) + cnval;
 +                    clam_varp     = (1.0/np1)*(omega_m1_p1) + (1.0/n0)*(omega_p1_0);
 +                }
 +            }
 +
 +            if (n0 > 0)
 +            {
 +                omegam_array[nval]             = omega_m1_0;
 +            }
 +            else
 +            {
 +                omegam_array[nval]             = 0;
 +            }
 +            weightsm_array[nval]           = clam_weightsm;
 +            varm_array[nval]               = clam_varm;
 +            if (nm1 > 0)
 +            {
 +                dwm_array[nval]  = fabs( (cnval + log((1.0*n0)/nm1)) - lam_dg[fep_state-1] );
 +            }
 +            else
 +            {
 +                dwm_array[nval]  = fabs( cnval - lam_dg[fep_state-1] );
 +            }
 +
 +            if (n0 > 0)
 +            {
 +                omegap_array[nval]             = omega_p1_0;
 +            }
 +            else
 +            {
 +                omegap_array[nval]             = 0;
 +            }
 +            weightsp_array[nval]           = clam_weightsp;
 +            varp_array[nval]               = clam_varp;
 +            if ((np1 > 0) && (n0 > 0))
 +            {
 +                dwp_array[nval]  = fabs( (cnval + log((1.0*np1)/n0)) - lam_dg[fep_state] );
 +            }
 +            else
 +            {
 +                dwp_array[nval]  = fabs( cnval - lam_dg[fep_state] );
 +            }
 +
 +        }
 +
 +        /* find the C's closest to the old weights value */
 +
 +        min_nvalm     = FindMinimum(dwm_array, maxc);
 +        omega_m1_0    = omegam_array[min_nvalm];
 +        clam_weightsm = weightsm_array[min_nvalm];
 +        clam_varm     = varm_array[min_nvalm];
 +
 +        min_nvalp     = FindMinimum(dwp_array, maxc);
 +        omega_p1_0    = omegap_array[min_nvalp];
 +        clam_weightsp = weightsp_array[min_nvalp];
 +        clam_varp     = varp_array[min_nvalp];
 +
 +        clam_osum   = omega_m1_0 + omega_p1_0;
 +        clam_minvar = 0;
 +        if (clam_osum > 0)
 +        {
 +            clam_minvar = 0.5*log(clam_osum);
 +        }
 +
 +        if (fep_state > 0)
 +        {
 +            lam_dg[fep_state-1]       = clam_weightsm;
 +            lam_variance[fep_state-1] = clam_varm;
 +        }
 +
 +        if (fep_state < nlim-1)
 +        {
 +            lam_dg[fep_state]       = clam_weightsp;
 +            lam_variance[fep_state] = clam_varp;
 +        }
 +
 +        if (expand->elamstats == elamstatsMINVAR)
 +        {
 +            bSufficientSamples = TRUE;
 +            /* make sure they are all past a threshold */
 +            for (i = 0; i < nlim; i++)
 +            {
 +                if (dfhist->n_at_lam[i] < expand->minvarmin)
 +                {
 +                    bSufficientSamples = FALSE;
 +                }
 +            }
 +            if (bSufficientSamples)
 +            {
 +                dfhist->sum_minvar[fep_state] = clam_minvar;
 +                if (fep_state == 0)
 +                {
 +                    for (i = 0; i < nlim; i++)
 +                    {
 +                        dfhist->sum_minvar[i] += (expand->minvar_const-clam_minvar);
 +                    }
 +                    expand->minvar_const          = clam_minvar;
 +                    dfhist->sum_minvar[fep_state] = 0.0;
 +                }
 +                else
 +                {
 +                    dfhist->sum_minvar[fep_state] -= expand->minvar_const;
 +                }
 +            }
 +        }
 +
 +        /* we need to rezero minvar now, since it could change at fep_state = 0 */
 +        dfhist->sum_dg[0]       = 0.0;
 +        dfhist->sum_variance[0] = 0.0;
 +        dfhist->sum_weights[0]  = dfhist->sum_dg[0] + dfhist->sum_minvar[0]; /* should be zero */
 +
 +        for (i = 1; i < nlim; i++)
 +        {
 +            dfhist->sum_dg[i]       = lam_dg[i-1] + dfhist->sum_dg[i-1];
 +            dfhist->sum_variance[i] = sqrt(lam_variance[i-1] + pow(dfhist->sum_variance[i-1], 2));
 +            dfhist->sum_weights[i]  = dfhist->sum_dg[i] + dfhist->sum_minvar[i];
 +        }
 +
 +        sfree(lam_dg);
 +        sfree(lam_variance);
 +
 +        sfree(omegam_array);
 +        sfree(weightsm_array);
 +        sfree(varm_array);
 +        sfree(dwm_array);
 +
 +        sfree(omegap_array);
 +        sfree(weightsp_array);
 +        sfree(varp_array);
 +        sfree(dwp_array);
 +    }
 +    return FALSE;
 +}
 +
 +static int ChooseNewLambda(FILE *log, int nlim, t_expanded *expand, df_history_t *dfhist, int fep_state, real *weighted_lamee, real *p_k, gmx_rng_t rng)
 +{
 +    /* Choose new lambda value, and update transition matrix */
 +
 +    int      i, ifep, jfep, minfep, maxfep, lamnew, lamtrial, starting_fep_state;
 +    real     r1, r2, pks, de_old, de_new, de, trialprob, tprob = 0;
 +    real   **Tij;
 +    real    *propose, *accept, *remainder;
 +    real     sum, pnorm;
 +    gmx_bool bRestricted;
 +
 +    starting_fep_state = fep_state;
 +    lamnew             = fep_state; /* so that there is a default setting -- stays the same */
 +
 +    if (!EWL(expand->elamstats))    /* ignore equilibrating the weights if using WL */
 +    {
 +        if ((expand->lmc_forced_nstart > 0) && (dfhist->n_at_lam[nlim-1] <= expand->lmc_forced_nstart))
 +        {
 +            /* Use a marching method to run through the lambdas and get preliminary free energy data,
 +               before starting 'free' sampling.  We start free sampling when we have enough at each lambda */
 +
 +            /* if we have enough at this lambda, move on to the next one */
 +
 +            if (dfhist->n_at_lam[fep_state] == expand->lmc_forced_nstart)
 +            {
 +                lamnew = fep_state+1;
 +                if (lamnew == nlim)  /* whoops, stepped too far! */
 +                {
 +                    lamnew -= 1;
 +                }
 +            }
 +            else
 +            {
 +                lamnew = fep_state;
 +            }
 +            return lamnew;
 +        }
 +    }
 +
 +    snew(propose, nlim);
 +    snew(accept, nlim);
 +    snew(remainder, nlim);
 +
 +    for (i = 0; i < expand->lmc_repeats; i++)
 +    {
 +
 +        for (ifep = 0; ifep < nlim; ifep++)
 +        {
 +            propose[ifep] = 0;
 +            accept[ifep]  = 0;
 +        }
 +
 +        if ((expand->elmcmove == elmcmoveGIBBS) || (expand->elmcmove == elmcmoveMETGIBBS))
 +        {
 +            bRestricted = TRUE;
 +            /* use the Gibbs sampler, with restricted range */
 +            if (expand->gibbsdeltalam < 0)
 +            {
 +                minfep      = 0;
 +                maxfep      = nlim-1;
 +                bRestricted = FALSE;
 +            }
 +            else
 +            {
 +                minfep = fep_state - expand->gibbsdeltalam;
 +                maxfep = fep_state + expand->gibbsdeltalam;
 +                if (minfep < 0)
 +                {
 +                    minfep = 0;
 +                }
 +                if (maxfep > nlim-1)
 +                {
 +                    maxfep = nlim-1;
 +                }
 +            }
 +
 +            GenerateGibbsProbabilities(weighted_lamee, p_k, &pks, minfep, maxfep);
 +
 +            if (expand->elmcmove == elmcmoveGIBBS)
 +            {
 +                for (ifep = minfep; ifep <= maxfep; ifep++)
 +                {
 +                    propose[ifep] = p_k[ifep];
 +                    accept[ifep]  = 1.0;
 +                }
 +                /* Gibbs sampling */
 +                r1 = gmx_rng_uniform_real(rng);
 +                for (lamnew = minfep; lamnew <= maxfep; lamnew++)
 +                {
 +                    if (r1 <= p_k[lamnew])
 +                    {
 +                        break;
 +                    }
 +                    r1 -= p_k[lamnew];
 +                }
 +            }
 +            else if (expand->elmcmove == elmcmoveMETGIBBS)
 +            {
 +
 +                /* Metropolized Gibbs sampling */
 +                for (ifep = minfep; ifep <= maxfep; ifep++)
 +                {
 +                    remainder[ifep] = 1 - p_k[ifep];
 +                }
 +
 +                /* find the proposal probabilities */
 +
 +                if (remainder[fep_state] == 0)
 +                {
 +                    /* only the current state has any probability */
 +                    /* we have to stay at the current state */
 +                    lamnew = fep_state;
 +                }
 +                else
 +                {
 +                    for (ifep = minfep; ifep <= maxfep; ifep++)
 +                    {
 +                        if (ifep != fep_state)
 +                        {
 +                            propose[ifep] = p_k[ifep]/remainder[fep_state];
 +                        }
 +                        else
 +                        {
 +                            propose[ifep] = 0;
 +                        }
 +                    }
 +
 +                    r1 = gmx_rng_uniform_real(rng);
 +                    for (lamtrial = minfep; lamtrial <= maxfep; lamtrial++)
 +                    {
 +                        pnorm = p_k[lamtrial]/remainder[fep_state];
 +                        if (lamtrial != fep_state)
 +                        {
 +                            if (r1 <= pnorm)
 +                            {
 +                                break;
 +                            }
 +                            r1 -= pnorm;
 +                        }
 +                    }
 +
 +                    /* we have now selected lamtrial according to p(lamtrial)/1-p(fep_state) */
 +                    tprob = 1.0;
 +                    /* trial probability is min{1,\frac{1 - p(old)}{1-p(new)} MRS 1/8/2008 */
 +                    trialprob = (remainder[fep_state])/(remainder[lamtrial]);
 +                    if (trialprob < tprob)
 +                    {
 +                        tprob = trialprob;
 +                    }
 +                    r2 = gmx_rng_uniform_real(rng);
 +                    if (r2 < tprob)
 +                    {
 +                        lamnew = lamtrial;
 +                    }
 +                    else
 +                    {
 +                        lamnew = fep_state;
 +                    }
 +                }
 +
 +                /* now figure out the acceptance probability for each */
 +                for (ifep = minfep; ifep <= maxfep; ifep++)
 +                {
 +                    tprob = 1.0;
 +                    if (remainder[ifep] != 0)
 +                    {
 +                        trialprob = (remainder[fep_state])/(remainder[ifep]);
 +                    }
 +                    else
 +                    {
 +                        trialprob = 1.0; /* this state is the only choice! */
 +                    }
 +                    if (trialprob < tprob)
 +                    {
 +                        tprob = trialprob;
 +                    }
 +                    /* probability for fep_state=0, but that's fine, it's never proposed! */
 +                    accept[ifep] = tprob;
 +                }
 +            }
 +
 +            if (lamnew > maxfep)
 +            {
 +                /* it's possible some rounding is failing */
 +                if (remainder[fep_state] < 2.0e-15)
 +                {
 +                    /* probably numerical rounding error -- no state other than the original has weight */
 +                    lamnew = fep_state;
 +                }
 +                else
 +                {
 +                    /* probably not a numerical issue */
 +                    int   loc    = 0;
 +                    int   nerror = 200+(maxfep-minfep+1)*60;
 +                    char *errorstr;
 +                    snew(errorstr, nerror);
 +                    /* if its greater than maxfep, then something went wrong -- probably underflow in the calculation
 +                       of sum weights. Generated detailed info for failure */
 +                    loc += sprintf(errorstr, "Something wrong in choosing new lambda state with a Gibbs move -- probably underflow in weight determination.\nDenominator is: %3d%17.10e\n  i                dE        numerator          weights\n", 0, pks);
 +                    for (ifep = minfep; ifep <= maxfep; ifep++)
 +                    {
 +                        loc += sprintf(&errorstr[loc], "%3d %17.10e%17.10e%17.10e\n", ifep, weighted_lamee[ifep], p_k[ifep], dfhist->sum_weights[ifep]);
 +                    }
 +                    gmx_fatal(FARGS, errorstr);
 +                }
 +            }
 +        }
 +        else if ((expand->elmcmove == elmcmoveMETROPOLIS) || (expand->elmcmove == elmcmoveBARKER))
 +        {
 +            /* use the metropolis sampler with trial +/- 1 */
 +            r1 = gmx_rng_uniform_real(rng);
 +            if (r1 < 0.5)
 +            {
 +                if (fep_state == 0)
 +                {
 +                    lamtrial = fep_state;
 +                }
 +                else
 +                {
 +                    lamtrial = fep_state-1;
 +                }
 +            }
 +            else
 +            {
 +                if (fep_state == nlim-1)
 +                {
 +                    lamtrial = fep_state;
 +                }
 +                else
 +                {
 +                    lamtrial = fep_state+1;
 +                }
 +            }
 +
 +            de = weighted_lamee[lamtrial] - weighted_lamee[fep_state];
 +            if (expand->elmcmove == elmcmoveMETROPOLIS)
 +            {
 +                tprob     = 1.0;
 +                trialprob = exp(de);
 +                if (trialprob < tprob)
 +                {
 +                    tprob = trialprob;
 +                }
 +                propose[fep_state] = 0;
 +                propose[lamtrial]  = 1.0; /* note that this overwrites the above line if fep_state = ntrial, which only occurs at the ends */
 +                accept[fep_state]  = 1.0; /* doesn't actually matter, never proposed unless fep_state = ntrial, in which case it's 1.0 anyway */
 +                accept[lamtrial]   = tprob;
 +
 +            }
 +            else if (expand->elmcmove == elmcmoveBARKER)
 +            {
 +                tprob = 1.0/(1.0+exp(-de));
 +
 +                propose[fep_state] = (1-tprob);
 +                propose[lamtrial] += tprob; /* we add, to account for the fact that at the end, they might be the same point */
 +                accept[fep_state]  = 1.0;
 +                accept[lamtrial]   = 1.0;
 +            }
 +
 +            r2 = gmx_rng_uniform_real(rng);
 +            if (r2 < tprob)
 +            {
 +                lamnew = lamtrial;
 +            }
 +            else
 +            {
 +                lamnew = fep_state;
 +            }
 +        }
 +
 +        for (ifep = 0; ifep < nlim; ifep++)
 +        {
 +            dfhist->Tij[fep_state][ifep]      += propose[ifep]*accept[ifep];
 +            dfhist->Tij[fep_state][fep_state] += propose[ifep]*(1.0-accept[ifep]);
 +        }
 +        fep_state = lamnew;
 +    }
 +
 +    dfhist->Tij_empirical[starting_fep_state][lamnew] += 1.0;
 +
 +    sfree(propose);
 +    sfree(accept);
 +    sfree(remainder);
 +
 +    return lamnew;
 +}
 +
 +/* print out the weights to the log, along with current state */
 +extern void PrintFreeEnergyInfoToFile(FILE *outfile, t_lambda *fep, t_expanded *expand, t_simtemp *simtemp, df_history_t *dfhist,
 +                                      int nlam, int frequency, gmx_large_int_t step)
 +{
 +    int         nlim, i, ifep, jfep;
 +    real        dw, dg, dv, dm, Tprint;
 +    real       *temps;
 +    const char *print_names[efptNR] = {" FEPL", "MassL", "CoulL", " VdwL", "BondL", "RestT", "Temp.(K)"};
 +    gmx_bool    bSimTemp            = FALSE;
 +
 +    nlim = fep->n_lambda;
 +    if (simtemp != NULL)
 +    {
 +        bSimTemp = TRUE;
 +    }
 +
 +    if (mod(step, frequency) == 0)
 +    {
 +        fprintf(outfile, "             MC-lambda information\n");
 +        if (EWL(expand->elamstats) && (!(dfhist->bEquil)))
 +        {
 +            fprintf(outfile, "  Wang-Landau incrementor is: %11.5g\n", dfhist->wl_delta);
 +        }
 +        fprintf(outfile, "  N");
 +        for (i = 0; i < efptNR; i++)
 +        {
 +            if (fep->separate_dvdl[i])
 +            {
 +                fprintf(outfile, "%7s", print_names[i]);
 +            }
 +            else if ((i == efptTEMPERATURE) && bSimTemp)
 +            {
 +                fprintf(outfile, "%10s", print_names[i]); /* more space for temperature formats */
 +            }
 +        }
 +        fprintf(outfile, "    Count   ");
 +        if (expand->elamstats == elamstatsMINVAR)
 +        {
 +            fprintf(outfile, "W(in kT)   G(in kT)  dG(in kT)  dV(in kT)\n");
 +        }
 +        else
 +        {
 +            fprintf(outfile, "G(in kT)  dG(in kT)\n");
 +        }
 +        for (ifep = 0; ifep < nlim; ifep++)
 +        {
 +            if (ifep == nlim-1)
 +            {
 +                dw = 0.0;
 +                dg = 0.0;
 +                dv = 0.0;
 +                dm = 0.0;
 +            }
 +            else
 +            {
 +                dw = dfhist->sum_weights[ifep+1] - dfhist->sum_weights[ifep];
 +                dg = dfhist->sum_dg[ifep+1] - dfhist->sum_dg[ifep];
 +                dv = sqrt(pow(dfhist->sum_variance[ifep+1], 2) - pow(dfhist->sum_variance[ifep], 2));
 +                dm = dfhist->sum_minvar[ifep+1] - dfhist->sum_minvar[ifep];
 +
 +            }
 +            fprintf(outfile, "%3d", (ifep+1));
 +            for (i = 0; i < efptNR; i++)
 +            {
 +                if (fep->separate_dvdl[i])
 +                {
 +                    fprintf(outfile, "%7.3f", fep->all_lambda[i][ifep]);
 +                }
 +                else if (i == efptTEMPERATURE && bSimTemp)
 +                {
 +                    fprintf(outfile, "%9.3f", simtemp->temperatures[ifep]);
 +                }
 +            }
 +            if (EWL(expand->elamstats) && (!(dfhist->bEquil)))  /* if performing WL and still haven't equilibrated */
 +            {
 +                if (expand->elamstats == elamstatsWL)
 +                {
 +                    fprintf(outfile, " %8d", (int)dfhist->wl_histo[ifep]);
 +                }
 +                else
 +                {
 +                    fprintf(outfile, " %8.3f", dfhist->wl_histo[ifep]);
 +                }
 +            }
 +            else   /* we have equilibrated weights */
 +            {
 +                fprintf(outfile, " %8d", dfhist->n_at_lam[ifep]);
 +            }
 +            if (expand->elamstats == elamstatsMINVAR)
 +            {
 +                fprintf(outfile, " %10.5f %10.5f %10.5f %10.5f", dfhist->sum_weights[ifep], dfhist->sum_dg[ifep], dg, dv);
 +            }
 +            else
 +            {
 +                fprintf(outfile, " %10.5f %10.5f", dfhist->sum_weights[ifep], dw);
 +            }
 +            if (ifep == nlam)
 +            {
 +                fprintf(outfile, " <<\n");
 +            }
 +            else
 +            {
 +                fprintf(outfile, "   \n");
 +            }
 +        }
 +        fprintf(outfile, "\n");
 +
 +        if ((mod(step, expand->nstTij) == 0) && (expand->nstTij > 0) && (step > 0))
 +        {
 +            fprintf(outfile, "                     Transition Matrix\n");
 +            for (ifep = 0; ifep < nlim; ifep++)
 +            {
 +                fprintf(outfile, "%12d", (ifep+1));
 +            }
 +            fprintf(outfile, "\n");
 +            for (ifep = 0; ifep < nlim; ifep++)
 +            {
 +                for (jfep = 0; jfep < nlim; jfep++)
 +                {
 +                    if (dfhist->n_at_lam[ifep] > 0)
 +                    {
 +                        if (expand->bSymmetrizedTMatrix)
 +                        {
 +                            Tprint = (dfhist->Tij[ifep][jfep]+dfhist->Tij[jfep][ifep])/(dfhist->n_at_lam[ifep]+dfhist->n_at_lam[jfep]);
 +                        }
 +                        else
 +                        {
 +                            Tprint = (dfhist->Tij[ifep][jfep])/(dfhist->n_at_lam[ifep]);
 +                        }
 +                    }
 +                    else
 +                    {
 +                        Tprint = 0.0;
 +                    }
 +                    fprintf(outfile, "%12.8f", Tprint);
 +                }
 +                fprintf(outfile, "%3d\n", (ifep+1));
 +            }
 +
 +            fprintf(outfile, "                  Empirical Transition Matrix\n");
 +            for (ifep = 0; ifep < nlim; ifep++)
 +            {
 +                fprintf(outfile, "%12d", (ifep+1));
 +            }
 +            fprintf(outfile, "\n");
 +            for (ifep = 0; ifep < nlim; ifep++)
 +            {
 +                for (jfep = 0; jfep < nlim; jfep++)
 +                {
 +                    if (dfhist->n_at_lam[ifep] > 0)
 +                    {
 +                        if (expand->bSymmetrizedTMatrix)
 +                        {
 +                            Tprint = (dfhist->Tij_empirical[ifep][jfep]+dfhist->Tij_empirical[jfep][ifep])/(dfhist->n_at_lam[ifep]+dfhist->n_at_lam[jfep]);
 +                        }
 +                        else
 +                        {
 +                            Tprint = dfhist->Tij_empirical[ifep][jfep]/(dfhist->n_at_lam[ifep]);
 +                        }
 +                    }
 +                    else
 +                    {
 +                        Tprint = 0.0;
 +                    }
 +                    fprintf(outfile, "%12.8f", Tprint);
 +                }
 +                fprintf(outfile, "%3d\n", (ifep+1));
 +            }
 +        }
 +    }
 +}
 +
 +extern void get_mc_state(gmx_rng_t rng, t_state *state)
 +{
 +    gmx_rng_get_state(rng, state->mc_rng, state->mc_rngi);
 +}
 +
 +extern void set_mc_state(gmx_rng_t rng, t_state *state)
 +{
 +    gmx_rng_set_state(rng, state->mc_rng, state->mc_rngi[0]);
 +}
 +
 +extern int ExpandedEnsembleDynamics(FILE *log, t_inputrec *ir, gmx_enerdata_t *enerd,
 +                                    t_state *state, t_extmass *MassQ, df_history_t *dfhist,
 +                                    gmx_large_int_t step, gmx_rng_t mcrng,
 +                                    rvec *v, t_mdatoms *mdatoms)
 +{
 +    real       *pfep_lamee, *p_k, *scaled_lamee, *weighted_lamee;
 +    int         i, nlam, nlim, lamnew, totalsamples;
 +    real        oneovert, maxscaled = 0, maxweighted = 0;
 +    t_expanded *expand;
 +    t_simtemp  *simtemp;
 +    double     *temperature_lambdas;
 +    gmx_bool    bIfReset, bSwitchtoOneOverT, bDoneEquilibrating = FALSE;
 +
 +    expand  = ir->expandedvals;
 +    simtemp = ir->simtempvals;
 +    nlim    = ir->fepvals->n_lambda;
 +    nlam    = state->fep_state;
 +
 +    snew(scaled_lamee, nlim);
 +    snew(weighted_lamee, nlim);
 +    snew(pfep_lamee, nlim);
 +    snew(p_k, nlim);
 +
 +    if (expand->bInit_weights)                    /* if initialized weights, we need to fill them in */
 +    {
 +        dfhist->wl_delta = expand->init_wl_delta; /* MRS -- this would fit better somewhere else? */
 +        for (i = 0; i < nlim; i++)
 +        {
 +            dfhist->sum_weights[i] = expand->init_lambda_weights[i];
 +            dfhist->sum_dg[i]      = expand->init_lambda_weights[i];
 +        }
 +        expand->bInit_weights = FALSE;
 +    }
 +
 +    /* update the count at the current lambda*/
 +    dfhist->n_at_lam[nlam]++;
 +
 +    /* need to calculate the PV term somewhere, but not needed here? Not until there's a lambda state that's
 +       pressure controlled.*/
 +    /*
 +       pVTerm = 0;
 +       where does this PV term go?
 +       for (i=0;i<nlim;i++)
 +       {
 +       fep_lamee[i] += pVTerm;
 +       }
 +     */
 +
 +    /* determine the minimum value to avoid overflow.  Probably a better way to do this */
 +    /* we don't need to include the pressure term, since the volume is the same between the two.
 +       is there some term we are neglecting, however? */
 +
 +    if (ir->efep != efepNO)
 +    {
 +        for (i = 0; i < nlim; i++)
 +        {
 +            if (ir->bSimTemp)
 +            {
 +                /* Note -- this assumes no mass changes, since kinetic energy is not added  . . . */
 +                scaled_lamee[i] = (enerd->enerpart_lambda[i+1]-enerd->enerpart_lambda[0])/(simtemp->temperatures[i]*BOLTZ)
 +                    + enerd->term[F_EPOT]*(1.0/(simtemp->temperatures[i])- 1.0/(simtemp->temperatures[nlam]))/BOLTZ;
 +            }
 +            else
 +            {
 +                scaled_lamee[i] = (enerd->enerpart_lambda[i+1]-enerd->enerpart_lambda[0])/(expand->mc_temp*BOLTZ);
 +                /* mc_temp is currently set to the system reft unless otherwise defined */
 +            }
 +
 +            /* save these energies for printing, so they don't get overwritten by the next step */
 +            /* they aren't overwritten in the non-free energy case, but we always print with these
 +               for simplicity */
 +        }
 +    }
 +    else
 +    {
 +        if (ir->bSimTemp)
 +        {
 +            for (i = 0; i < nlim; i++)
 +            {
 +                scaled_lamee[i] = enerd->term[F_EPOT]*(1.0/simtemp->temperatures[i] - 1.0/simtemp->temperatures[nlam])/BOLTZ;
 +            }
 +        }
 +    }
 +
 +    for (i = 0; i < nlim; i++)
 +    {
 +        pfep_lamee[i] = scaled_lamee[i];
 +
 +        weighted_lamee[i] = dfhist->sum_weights[i] - scaled_lamee[i];
 +        if (i == 0)
 +        {
 +            maxscaled   = scaled_lamee[i];
 +            maxweighted = weighted_lamee[i];
 +        }
 +        else
 +        {
 +            if (scaled_lamee[i] > maxscaled)
 +            {
 +                maxscaled = scaled_lamee[i];
 +            }
 +            if (weighted_lamee[i] > maxweighted)
 +            {
 +                maxweighted = weighted_lamee[i];
 +            }
 +        }
 +    }
 +
 +    for (i = 0; i < nlim; i++)
 +    {
 +        scaled_lamee[i]   -= maxscaled;
 +        weighted_lamee[i] -= maxweighted;
 +    }
 +
 +    /* update weights - we decide whether or not to actually do this inside */
 +
 +    bDoneEquilibrating = UpdateWeights(nlim, expand, dfhist, nlam, scaled_lamee, weighted_lamee, step);
 +    if (bDoneEquilibrating)
 +    {
 +        if (log)
 +        {
 +            fprintf(log, "\nStep %d: Weights have equilibrated, using criteria: %s\n", (int)step, elmceq_names[expand->elmceq]);
 +        }
 +    }
 +
 +    lamnew = ChooseNewLambda(log, nlim, expand, dfhist, nlam, weighted_lamee, p_k, mcrng);
 +    /* if using simulated tempering, we need to adjust the temperatures */
 +    if (ir->bSimTemp && (lamnew != nlam)) /* only need to change the temperatures if we change the state */
 +    {
 +        int   i, j, n, d;
 +        real *buf_ngtc;
 +        real  told;
 +        int   nstart, nend, gt;
 +
 +        snew(buf_ngtc, ir->opts.ngtc);
 +
 +        for (i = 0; i < ir->opts.ngtc; i++)
 +        {
 +            if (ir->opts.ref_t[i] > 0)
 +            {
 +                told              = ir->opts.ref_t[i];
 +                ir->opts.ref_t[i] =  simtemp->temperatures[lamnew];
 +                buf_ngtc[i]       = sqrt(ir->opts.ref_t[i]/told); /* using the buffer as temperature scaling */
 +            }
 +        }
 +
 +        /* we don't need to manipulate the ekind information, as it isn't due to be reset until the next step anyway */
 +
 +        nstart = mdatoms->start;
 +        nend   = nstart + mdatoms->homenr;
 +        for (n = nstart; n < nend; n++)
 +        {
 +            gt = 0;
 +            if (mdatoms->cTC)
 +            {
 +                gt = mdatoms->cTC[n];
 +            }
 +            for (d = 0; d < DIM; d++)
 +            {
 +                v[n][d] *= buf_ngtc[gt];
 +            }
 +        }
 +
 +        if (IR_NPT_TROTTER(ir) || IR_NPH_TROTTER(ir) || IR_NVT_TROTTER(ir))
 +        {
 +            /* we need to recalculate the masses if the temperature has changed */
 +            init_npt_masses(ir, state, MassQ, FALSE);
 +            for (i = 0; i < state->nnhpres; i++)
 +            {
 +                for (j = 0; j < ir->opts.nhchainlength; j++)
 +                {
 +                    state->nhpres_vxi[i+j] *= buf_ngtc[i];
 +                }
 +            }
 +            for (i = 0; i < ir->opts.ngtc; i++)
 +            {
 +                for (j = 0; j < ir->opts.nhchainlength; j++)
 +                {
 +                    state->nosehoover_vxi[i+j] *= buf_ngtc[i];
 +                }
 +            }
 +        }
 +        sfree(buf_ngtc);
 +    }
 +
 +    /* now check on the Wang-Landau updating critera */
 +
 +    if (EWL(expand->elamstats))
 +    {
 +        bSwitchtoOneOverT = FALSE;
 +        if (expand->bWLoneovert)
 +        {
 +            totalsamples = 0;
 +            for (i = 0; i < nlim; i++)
 +            {
 +                totalsamples += dfhist->n_at_lam[i];
 +            }
 +            oneovert = (1.0*nlim)/totalsamples;
 +            /* oneovert has decreasd by a bit since last time, so we actually make sure its within one of this number */
 +            /* switch to 1/t incrementing when wl_delta has decreased at least once, and wl_delta is now less than 1/t */
 +            if ((dfhist->wl_delta <= ((totalsamples)/(totalsamples-1.00001))*oneovert) &&
 +                (dfhist->wl_delta < expand->init_wl_delta))
 +            {
 +                bSwitchtoOneOverT = TRUE;
 +            }
 +        }
 +        if (bSwitchtoOneOverT)
 +        {
 +            dfhist->wl_delta = oneovert; /* now we reduce by this each time, instead of only at flatness */
 +        }
 +        else
 +        {
 +            bIfReset = CheckHistogramRatios(nlim, dfhist->wl_histo, expand->wl_ratio);
 +            if (bIfReset)
 +            {
 +                for (i = 0; i < nlim; i++)
 +                {
 +                    dfhist->wl_histo[i] = 0;
 +                }
 +                dfhist->wl_delta *= expand->wl_scale;
 +                if (log)
 +                {
 +                    fprintf(log, "\nStep %d: weights are now:", (int)step);
 +                    for (i = 0; i < nlim; i++)
 +                    {
 +                        fprintf(log, " %.5f", dfhist->sum_weights[i]);
 +                    }
 +                    fprintf(log, "\n");
 +                }
 +            }
 +        }
 +    }
++    sfree(pfep_lamee);
 +    sfree(scaled_lamee);
 +    sfree(weighted_lamee);
 +    sfree(p_k);
 +
 +    return lamnew;
 +}
diff --cc src/gromacs/mdlib/force.c
index 278c8de8f3,0000000000..7f0ae3ba2f
mode 100644,000000..100644
--- a/src/gromacs/mdlib/force.c
+++ b/src/gromacs/mdlib/force.c
@@@ -1,956 -1,0 +1,963 @@@
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + *          GROningen MAchine for Chemical Simulations
 + *
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + *
 + * For more info, check our website at http://www.gromacs.org
 + *
 + * And Hey:
 + * GROwing Monsters And Cloning Shrimps
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <math.h>
 +#include <string.h>
 +#include <assert.h>
 +#include "sysstuff.h"
 +#include "typedefs.h"
 +#include "macros.h"
 +#include "smalloc.h"
 +#include "macros.h"
 +#include "physics.h"
 +#include "force.h"
 +#include "nonbonded.h"
 +#include "names.h"
 +#include "network.h"
 +#include "pbc.h"
 +#include "ns.h"
 +#include "nrnb.h"
 +#include "bondf.h"
 +#include "mshift.h"
 +#include "txtdump.h"
 +#include "coulomb.h"
 +#include "pme.h"
 +#include "mdrun.h"
 +#include "domdec.h"
 +#include "partdec.h"
 +#include "qmmm.h"
 +#include "gmx_omp_nthreads.h"
 +
 +
 +void ns(FILE              *fp,
 +        t_forcerec        *fr,
 +        rvec               x[],
 +        matrix             box,
 +        gmx_groups_t      *groups,
 +        t_grpopts         *opts,
 +        gmx_localtop_t    *top,
 +        t_mdatoms         *md,
 +        t_commrec         *cr,
 +        t_nrnb            *nrnb,
 +        real              *lambda,
 +        real              *dvdlambda,
 +        gmx_grppairener_t *grppener,
 +        gmx_bool           bFillGrid,
 +        gmx_bool           bDoLongRangeNS)
 +{
 +    char   *ptr;
 +    int     nsearch;
 +
 +
 +    if (!fr->ns.nblist_initialized)
 +    {
 +        init_neighbor_list(fp, fr, md->homenr);
 +    }
 +
 +    if (fr->bTwinRange)
 +    {
 +        fr->nlr = 0;
 +    }
 +
 +    nsearch = search_neighbours(fp, fr, x, box, top, groups, cr, nrnb, md,
 +                                lambda, dvdlambda, grppener,
 +                                bFillGrid, bDoLongRangeNS, TRUE);
 +    if (debug)
 +    {
 +        fprintf(debug, "nsearch = %d\n", nsearch);
 +    }
 +
 +    /* Check whether we have to do dynamic load balancing */
 +    /*if ((nsb->nstDlb > 0) && (mod(step,nsb->nstDlb) == 0))
 +       count_nb(cr,nsb,&(top->blocks[ebCGS]),nns,fr->nlr,
 +       &(top->idef),opts->ngener);
 +     */
 +    if (fr->ns.dump_nl > 0)
 +    {
 +        dump_nblist(fp, cr, fr, fr->ns.dump_nl);
 +    }
 +}
 +
 +static void reduce_thread_forces(int n, rvec *f,
 +                                 tensor vir,
 +                                 real *Vcorr,
 +                                 int efpt_ind, real *dvdl,
 +                                 int nthreads, f_thread_t *f_t)
 +{
 +    int t, i;
 +
 +    /* This reduction can run over any number of threads */
 +#pragma omp parallel for num_threads(gmx_omp_nthreads_get(emntBonded)) private(t) schedule(static)
 +    for (i = 0; i < n; i++)
 +    {
 +        for (t = 1; t < nthreads; t++)
 +        {
 +            rvec_inc(f[i], f_t[t].f[i]);
 +        }
 +    }
 +    for (t = 1; t < nthreads; t++)
 +    {
 +        *Vcorr += f_t[t].Vcorr;
 +        *dvdl  += f_t[t].dvdl[efpt_ind];
 +        m_add(vir, f_t[t].vir, vir);
 +    }
 +}
 +
 +void do_force_lowlevel(FILE       *fplog,   gmx_large_int_t step,
 +                       t_forcerec *fr,      t_inputrec *ir,
 +                       t_idef     *idef,    t_commrec  *cr,
 +                       t_nrnb     *nrnb,    gmx_wallcycle_t wcycle,
 +                       t_mdatoms  *md,
 +                       t_grpopts  *opts,
 +                       rvec       x[],      history_t  *hist,
 +                       rvec       f[],
 +                       rvec       f_longrange[],
 +                       gmx_enerdata_t *enerd,
 +                       t_fcdata   *fcd,
 +                       gmx_mtop_t     *mtop,
 +                       gmx_localtop_t *top,
 +                       gmx_genborn_t *born,
 +                       t_atomtypes *atype,
 +                       gmx_bool       bBornRadii,
 +                       matrix     box,
 +                       t_lambda   *fepvals,
 +                       real       *lambda,
 +                       t_graph    *graph,
 +                       t_blocka   *excl,
 +                       rvec       mu_tot[],
 +                       int        flags,
 +                       float      *cycles_pme)
 +{
 +    int         i, j, status;
 +    int         donb_flags;
 +    gmx_bool    bDoEpot, bSepDVDL, bSB;
 +    int         pme_flags;
 +    matrix      boxs;
 +    rvec        box_size;
 +    real        Vsr, Vlr, Vcorr = 0;
 +    t_pbc       pbc;
 +    real        dvdgb;
 +    char        buf[22];
 +    double      clam_i, vlam_i;
 +    real        dvdl_dum[efptNR], dvdl, dvdl_nb[efptNR], lam_i[efptNR];
 +    real        dvdlsum;
 +
 +#ifdef GMX_MPI
 +    double  t0 = 0.0, t1, t2, t3; /* time measurement for coarse load balancing */
 +#endif
 +
 +#define PRINT_SEPDVDL(s, v, dvdlambda) if (bSepDVDL) {fprintf(fplog, sepdvdlformat, s, v, dvdlambda); }
 +
 +
 +    set_pbc(&pbc, fr->ePBC, box);
 +
 +    /* reset free energy components */
 +    for (i = 0; i < efptNR; i++)
 +    {
 +        dvdl_nb[i]  = 0;
 +        dvdl_dum[i] = 0;
 +    }
 +
 +    /* Reset box */
 +    for (i = 0; (i < DIM); i++)
 +    {
 +        box_size[i] = box[i][i];
 +    }
 +
 +    bSepDVDL = (fr->bSepDVDL && do_per_step(step, ir->nstlog));
 +    debug_gmx();
 +
 +    /* do QMMM first if requested */
 +    if (fr->bQMMM)
 +    {
 +        enerd->term[F_EQM] = calculate_QMMM(cr, x, f, fr, md);
 +    }
 +
 +    if (bSepDVDL)
 +    {
 +        fprintf(fplog, "Step %s: non-bonded V and dVdl for node %d:\n",
 +                gmx_step_str(step, buf), cr->nodeid);
 +    }
 +
 +    /* Call the short range functions all in one go. */
 +
 +#ifdef GMX_MPI
 +    /*#define TAKETIME ((cr->npmenodes) && (fr->timesteps < 12))*/
 +#define TAKETIME FALSE
 +    if (TAKETIME)
 +    {
 +        MPI_Barrier(cr->mpi_comm_mygroup);
 +        t0 = MPI_Wtime();
 +    }
 +#endif
 +
 +    if (ir->nwall)
 +    {
 +        /* foreign lambda component for walls */
 +        dvdl = do_walls(ir, fr, box, md, x, f, lambda[efptVDW],
 +                        enerd->grpp.ener[egLJSR], nrnb);
 +        PRINT_SEPDVDL("Walls", 0.0, dvdl);
 +        enerd->dvdl_lin[efptVDW] += dvdl;
 +    }
 +
 +    /* If doing GB, reset dvda and calculate the Born radii */
 +    if (ir->implicit_solvent)
 +    {
 +        wallcycle_sub_start(wcycle, ewcsNONBONDED);
 +
 +        for (i = 0; i < born->nr; i++)
 +        {
 +            fr->dvda[i] = 0;
 +        }
 +
 +        if (bBornRadii)
 +        {
 +            calc_gb_rad(cr, fr, ir, top, atype, x, &(fr->gblist), born, md, nrnb);
 +        }
 +
 +        wallcycle_sub_stop(wcycle, ewcsNONBONDED);
 +    }
 +
 +    where();
 +    /* We only do non-bonded calculation with group scheme here, the verlet
 +     * calls are done from do_force_cutsVERLET(). */
 +    if (fr->cutoff_scheme == ecutsGROUP && (flags & GMX_FORCE_NONBONDED))
 +    {
 +        donb_flags = 0;
 +        /* Add short-range interactions */
 +        donb_flags |= GMX_NONBONDED_DO_SR;
 +
 +        if (flags & GMX_FORCE_FORCES)
 +        {
 +            donb_flags |= GMX_NONBONDED_DO_FORCE;
 +        }
 +        if (flags & GMX_FORCE_ENERGY)
 +        {
 +            donb_flags |= GMX_NONBONDED_DO_POTENTIAL;
 +        }
 +        if (flags & GMX_FORCE_DO_LR)
 +        {
 +            donb_flags |= GMX_NONBONDED_DO_LR;
 +        }
 +
 +        wallcycle_sub_start(wcycle, ewcsNONBONDED);
 +        do_nonbonded(cr, fr, x, f, f_longrange, md, excl,
 +                     &enerd->grpp, box_size, nrnb,
 +                     lambda, dvdl_nb, -1, -1, donb_flags);
 +
 +        /* If we do foreign lambda and we have soft-core interactions
 +         * we have to recalculate the (non-linear) energies contributions.
 +         */
 +        if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && fepvals->sc_alpha != 0)
 +        {
 +            for (i = 0; i < enerd->n_lambda; i++)
 +            {
 +                for (j = 0; j < efptNR; j++)
 +                {
 +                    lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]);
 +                }
 +                reset_foreign_enerdata(enerd);
 +                do_nonbonded(cr, fr, x, f, f_longrange, md, excl,
 +                             &(enerd->foreign_grpp), box_size, nrnb,
 +                             lam_i, dvdl_dum, -1, -1,
 +                             (donb_flags & ~GMX_NONBONDED_DO_FORCE) | GMX_NONBONDED_DO_FOREIGNLAMBDA);
 +                sum_epot(&ir->opts, &(enerd->foreign_grpp), enerd->foreign_term);
 +                enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT];
 +            }
 +        }
 +        wallcycle_sub_stop(wcycle, ewcsNONBONDED);
 +        where();
 +    }
 +
 +    /* If we are doing GB, calculate bonded forces and apply corrections
 +     * to the solvation forces */
 +    /* MRS: Eventually, many need to include free energy contribution here! */
 +    if (ir->implicit_solvent)
 +    {
 +        wallcycle_sub_start(wcycle, ewcsBONDED);
 +        calc_gb_forces(cr, md, born, top, atype, x, f, fr, idef,
 +                       ir->gb_algorithm, ir->sa_algorithm, nrnb, bBornRadii, &pbc, graph, enerd);
 +        wallcycle_sub_stop(wcycle, ewcsBONDED);
 +    }
 +
 +#ifdef GMX_MPI
 +    if (TAKETIME)
 +    {
 +        t1          = MPI_Wtime();
 +        fr->t_fnbf += t1-t0;
 +    }
 +#endif
 +
 +    if (fepvals->sc_alpha != 0)
 +    {
 +        enerd->dvdl_nonlin[efptVDW] += dvdl_nb[efptVDW];
 +    }
 +    else
 +    {
 +        enerd->dvdl_lin[efptVDW] += dvdl_nb[efptVDW];
 +    }
 +
 +    if (fepvals->sc_alpha != 0)
 +
 +    /* even though coulomb part is linear, we already added it, beacuse we
 +       need to go through the vdw calculation anyway */
 +    {
 +        enerd->dvdl_nonlin[efptCOUL] += dvdl_nb[efptCOUL];
 +    }
 +    else
 +    {
 +        enerd->dvdl_lin[efptCOUL] += dvdl_nb[efptCOUL];
 +    }
 +
 +    Vsr = 0;
 +    if (bSepDVDL)
 +    {
 +        for (i = 0; i < enerd->grpp.nener; i++)
 +        {
 +            Vsr +=
 +                (fr->bBHAM ?
 +                 enerd->grpp.ener[egBHAMSR][i] :
 +                 enerd->grpp.ener[egLJSR][i])
 +                + enerd->grpp.ener[egCOULSR][i] + enerd->grpp.ener[egGB][i];
 +        }
 +        dvdlsum = dvdl_nb[efptVDW] + dvdl_nb[efptCOUL];
 +        PRINT_SEPDVDL("VdW and Coulomb SR particle-p.", Vsr, dvdlsum);
 +    }
 +    debug_gmx();
 +
 +
 +    if (debug)
 +    {
 +        pr_rvecs(debug, 0, "fshift after SR", fr->fshift, SHIFTS);
 +    }
 +
 +    /* Shift the coordinates. Must be done before bonded forces and PPPM,
 +     * but is also necessary for SHAKE and update, therefore it can NOT
 +     * go when no bonded forces have to be evaluated.
 +     */
 +
 +    /* Here sometimes we would not need to shift with NBFonly,
 +     * but we do so anyhow for consistency of the returned coordinates.
 +     */
 +    if (graph)
 +    {
 +        shift_self(graph, box, x);
 +        if (TRICLINIC(box))
 +        {
 +            inc_nrnb(nrnb, eNR_SHIFTX, 2*graph->nnodes);
 +        }
 +        else
 +        {
 +            inc_nrnb(nrnb, eNR_SHIFTX, graph->nnodes);
 +        }
 +    }
 +    /* Check whether we need to do bondeds or correct for exclusions */
 +    if (fr->bMolPBC &&
 +        ((flags & GMX_FORCE_BONDED)
 +         || EEL_RF(fr->eeltype) || EEL_FULL(fr->eeltype)))
 +    {
 +        /* Since all atoms are in the rectangular or triclinic unit-cell,
 +         * only single box vector shifts (2 in x) are required.
 +         */
 +        set_pbc_dd(&pbc, fr->ePBC, cr->dd, TRUE, box);
 +    }
 +    debug_gmx();
 +
 +    if (flags & GMX_FORCE_BONDED)
 +    {
 +        wallcycle_sub_start(wcycle, ewcsBONDED);
 +        calc_bonds(fplog, cr->ms,
 +                   idef, x, hist, f, fr, &pbc, graph, enerd, nrnb, lambda, md, fcd,
 +                   DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL, atype, born,
 +                   flags,
 +                   fr->bSepDVDL && do_per_step(step, ir->nstlog), step);
 +
 +        /* Check if we have to determine energy differences
 +         * at foreign lambda's.
 +         */
 +        if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) &&
 +            idef->ilsort != ilsortNO_FE)
 +        {
 +            if (idef->ilsort != ilsortFE_SORTED)
 +            {
 +                gmx_incons("The bonded interactions are not sorted for free energy");
 +            }
 +            for (i = 0; i < enerd->n_lambda; i++)
 +            {
 +                reset_foreign_enerdata(enerd);
 +                for (j = 0; j < efptNR; j++)
 +                {
 +                    lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]);
 +                }
 +                calc_bonds_lambda(fplog, idef, x, fr, &pbc, graph, &(enerd->foreign_grpp), enerd->foreign_term, nrnb, lam_i, md,
 +                                  fcd, DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL);
 +                sum_epot(&ir->opts, &(enerd->foreign_grpp), enerd->foreign_term);
 +                enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT];
 +            }
 +        }
 +        debug_gmx();
 +
 +        wallcycle_sub_stop(wcycle, ewcsBONDED);
 +    }
 +
 +    where();
 +
 +    *cycles_pme = 0;
 +    if (EEL_FULL(fr->eeltype))
 +    {
 +        bSB = (ir->nwall == 2);
 +        if (bSB)
 +        {
 +            copy_mat(box, boxs);
 +            svmul(ir->wall_ewald_zfac, boxs[ZZ], boxs[ZZ]);
 +            box_size[ZZ] *= ir->wall_ewald_zfac;
 +        }
 +
 +        clear_mat(fr->vir_el_recip);
 +
 +        if (fr->bEwald)
 +        {
 +            Vcorr = 0;
 +            dvdl  = 0;
 +
 +            /* With the Verlet scheme exclusion forces are calculated
 +             * in the non-bonded kernel.
 +             */
 +            /* The TPI molecule does not have exclusions with the rest
 +             * of the system and no intra-molecular PME grid contributions
 +             * will be calculated in gmx_pme_calc_energy.
 +             */
 +            if ((ir->cutoff_scheme == ecutsGROUP && fr->n_tpi == 0) ||
 +                ir->ewald_geometry != eewg3D ||
 +                ir->epsilon_surface != 0)
 +            {
 +                int nthreads, t;
 +
 +                wallcycle_sub_start(wcycle, ewcsEWALD_CORRECTION);
 +
 +                if (fr->n_tpi > 0)
 +                {
 +                    gmx_fatal(FARGS, "TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions");
 +                }
 +
 +                nthreads = gmx_omp_nthreads_get(emntBonded);
 +#pragma omp parallel for num_threads(nthreads) schedule(static)
 +                for (t = 0; t < nthreads; t++)
 +                {
 +                    int     s, e, i;
 +                    rvec   *fnv;
 +                    tensor *vir;
 +                    real   *Vcorrt, *dvdlt;
 +                    if (t == 0)
 +                    {
 +                        fnv    = fr->f_novirsum;
 +                        vir    = &fr->vir_el_recip;
 +                        Vcorrt = &Vcorr;
 +                        dvdlt  = &dvdl;
 +                    }
 +                    else
 +                    {
 +                        fnv    = fr->f_t[t].f;
 +                        vir    = &fr->f_t[t].vir;
 +                        Vcorrt = &fr->f_t[t].Vcorr;
 +                        dvdlt  = &fr->f_t[t].dvdl[efptCOUL];
 +                        for (i = 0; i < fr->natoms_force; i++)
 +                        {
 +                            clear_rvec(fnv[i]);
 +                        }
 +                        clear_mat(*vir);
 +                    }
 +                    *dvdlt  = 0;
 +                    *Vcorrt =
 +                        ewald_LRcorrection(fplog,
 +                                           fr->excl_load[t], fr->excl_load[t+1],
 +                                           cr, t, fr,
 +                                           md->chargeA,
 +                                           md->nChargePerturbed ? md->chargeB : NULL,
 +                                           ir->cutoff_scheme != ecutsVERLET,
 +                                           excl, x, bSB ? boxs : box, mu_tot,
 +                                           ir->ewald_geometry,
 +                                           ir->epsilon_surface,
 +                                           fnv, *vir,
 +                                           lambda[efptCOUL], dvdlt);
 +                }
 +                if (nthreads > 1)
 +                {
 +                    reduce_thread_forces(fr->natoms_force, fr->f_novirsum,
 +                                         fr->vir_el_recip,
 +                                         &Vcorr, efptCOUL, &dvdl,
 +                                         nthreads, fr->f_t);
 +                }
 +
 +                wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION);
 +            }
 +
 +            if (fr->n_tpi == 0)
 +            {
 +                Vcorr += ewald_charge_correction(cr, fr, lambda[efptCOUL], box,
 +                                                 &dvdl, fr->vir_el_recip);
 +            }
 +
 +            PRINT_SEPDVDL("Ewald excl./charge/dip. corr.", Vcorr, dvdl);
 +            enerd->dvdl_lin[efptCOUL] += dvdl;
 +        }
 +
 +        status = 0;
 +        Vlr    = 0;
 +        dvdl   = 0;
 +        switch (fr->eeltype)
 +        {
 +            case eelPME:
 +            case eelPMESWITCH:
 +            case eelPMEUSER:
 +            case eelPMEUSERSWITCH:
 +            case eelP3M_AD:
 +                if (cr->duty & DUTY_PME)
 +                {
 +                    assert(fr->n_tpi >= 0);
 +                    if (fr->n_tpi == 0 || (flags & GMX_FORCE_STATECHANGED))
 +                    {
 +                        pme_flags = GMX_PME_SPREAD_Q | GMX_PME_SOLVE;
 +                        if (flags & GMX_FORCE_FORCES)
 +                        {
 +                            pme_flags |= GMX_PME_CALC_F;
 +                        }
 +                        if (flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY))
 +                        {
 +                            pme_flags |= GMX_PME_CALC_ENER_VIR;
 +                        }
 +                        if (fr->n_tpi > 0)
 +                        {
 +                            /* We don't calculate f, but we do want the potential */
 +                            pme_flags |= GMX_PME_CALC_POT;
 +                        }
 +                        wallcycle_start(wcycle, ewcPMEMESH);
 +                        status = gmx_pme_do(fr->pmedata,
 +                                            md->start, md->homenr - fr->n_tpi,
 +                                            x, fr->f_novirsum,
 +                                            md->chargeA, md->chargeB,
 +                                            bSB ? boxs : box, cr,
 +                                            DOMAINDECOMP(cr) ? dd_pme_maxshift_x(cr->dd) : 0,
 +                                            DOMAINDECOMP(cr) ? dd_pme_maxshift_y(cr->dd) : 0,
 +                                            nrnb, wcycle,
 +                                            fr->vir_el_recip, fr->ewaldcoeff,
 +                                            &Vlr, lambda[efptCOUL], &dvdl,
 +                                            pme_flags);
 +                        *cycles_pme = wallcycle_stop(wcycle, ewcPMEMESH);
 +
 +                        /* We should try to do as little computation after
 +                         * this as possible, because parallel PME synchronizes
 +                         * the nodes, so we want all load imbalance of the rest
 +                         * of the force calculation to be before the PME call.
 +                         * DD load balancing is done on the whole time of
 +                         * the force call (without PME).
 +                         */
 +                    }
 +                    if (fr->n_tpi > 0)
 +                    {
 +                        /* Determine the PME grid energy of the test molecule
 +                         * with the PME grid potential of the other charges.
 +                         */
 +                        gmx_pme_calc_energy(fr->pmedata, fr->n_tpi,
 +                                            x + md->homenr - fr->n_tpi,
 +                                            md->chargeA + md->homenr - fr->n_tpi,
 +                                            &Vlr);
 +                    }
 +                    PRINT_SEPDVDL("PME mesh", Vlr, dvdl);
 +                }
 +                break;
 +            case eelEWALD:
 +                Vlr = do_ewald(fplog, FALSE, ir, x, fr->f_novirsum,
 +                               md->chargeA, md->chargeB,
 +                               box_size, cr, md->homenr,
 +                               fr->vir_el_recip, fr->ewaldcoeff,
 +                               lambda[efptCOUL], &dvdl, fr->ewald_table);
 +                PRINT_SEPDVDL("Ewald long-range", Vlr, dvdl);
 +                break;
 +            default:
 +                gmx_fatal(FARGS, "No such electrostatics method implemented %s",
 +                          eel_names[fr->eeltype]);
 +        }
 +        if (status != 0)
 +        {
 +            gmx_fatal(FARGS, "Error %d in long range electrostatics routine %s",
 +                      status, EELTYPE(fr->eeltype));
 +        }
 +        /* Note that with separate PME nodes we get the real energies later */
 +        enerd->dvdl_lin[efptCOUL] += dvdl;
 +        enerd->term[F_COUL_RECIP]  = Vlr + Vcorr;
 +        if (debug)
 +        {
 +            fprintf(debug, "Vlr = %g, Vcorr = %g, Vlr_corr = %g\n",
 +                    Vlr, Vcorr, enerd->term[F_COUL_RECIP]);
 +            pr_rvecs(debug, 0, "vir_el_recip after corr", fr->vir_el_recip, DIM);
 +            pr_rvecs(debug, 0, "fshift after LR Corrections", fr->fshift, SHIFTS);
 +        }
 +    }
 +    else
 +    {
 +        if (EEL_RF(fr->eeltype))
 +        {
 +            /* With the Verlet scheme exclusion forces are calculated
 +             * in the non-bonded kernel.
 +             */
 +            if (ir->cutoff_scheme != ecutsVERLET && fr->eeltype != eelRF_NEC)
 +            {
 +                dvdl                   = 0;
 +                enerd->term[F_RF_EXCL] =
 +                    RF_excl_correction(fplog, fr, graph, md, excl, x, f,
 +                                       fr->fshift, &pbc, lambda[efptCOUL], &dvdl);
 +            }
 +
 +            enerd->dvdl_lin[efptCOUL] += dvdl;
 +            PRINT_SEPDVDL("RF exclusion correction",
 +                          enerd->term[F_RF_EXCL], dvdl);
 +        }
 +    }
 +    where();
 +    debug_gmx();
 +
 +    if (debug)
 +    {
 +        print_nrnb(debug, nrnb);
 +    }
 +    debug_gmx();
 +
 +#ifdef GMX_MPI
 +    if (TAKETIME)
 +    {
 +        t2 = MPI_Wtime();
 +        MPI_Barrier(cr->mpi_comm_mygroup);
 +        t3          = MPI_Wtime();
 +        fr->t_wait += t3-t2;
 +        if (fr->timesteps == 11)
 +        {
 +            fprintf(stderr, "* PP load balancing info: node %d, step %s, rel wait time=%3.0f%% , load string value: %7.2f\n",
 +                    cr->nodeid, gmx_step_str(fr->timesteps, buf),
 +                    100*fr->t_wait/(fr->t_wait+fr->t_fnbf),
 +                    (fr->t_fnbf+fr->t_wait)/fr->t_fnbf);
 +        }
 +        fr->timesteps++;
 +    }
 +#endif
 +
 +    if (debug)
 +    {
 +        pr_rvecs(debug, 0, "fshift after bondeds", fr->fshift, SHIFTS);
 +    }
 +
 +}
 +
 +void init_enerdata(int ngener, int n_lambda, gmx_enerdata_t *enerd)
 +{
 +    int i, n2;
 +
 +    for (i = 0; i < F_NRE; i++)
 +    {
 +        enerd->term[i]         = 0;
 +        enerd->foreign_term[i] = 0;
 +    }
 +
 +
 +    for (i = 0; i < efptNR; i++)
 +    {
 +        enerd->dvdl_lin[i]     = 0;
 +        enerd->dvdl_nonlin[i]  = 0;
 +    }
 +
 +    n2 = ngener*ngener;
 +    if (debug)
 +    {
 +        fprintf(debug, "Creating %d sized group matrix for energies\n", n2);
 +    }
 +    enerd->grpp.nener         = n2;
 +    enerd->foreign_grpp.nener = n2;
 +    for (i = 0; (i < egNR); i++)
 +    {
 +        snew(enerd->grpp.ener[i], n2);
 +        snew(enerd->foreign_grpp.ener[i], n2);
 +    }
 +
 +    if (n_lambda)
 +    {
 +        enerd->n_lambda = 1 + n_lambda;
 +        snew(enerd->enerpart_lambda, enerd->n_lambda);
 +    }
 +    else
 +    {
 +        enerd->n_lambda = 0;
 +    }
 +}
 +
 +void destroy_enerdata(gmx_enerdata_t *enerd)
 +{
 +    int i;
 +
 +    for (i = 0; (i < egNR); i++)
 +    {
 +        sfree(enerd->grpp.ener[i]);
 +    }
 +
 +    for (i = 0; (i < egNR); i++)
 +    {
 +        sfree(enerd->foreign_grpp.ener[i]);
 +    }
 +
 +    if (enerd->n_lambda)
 +    {
 +        sfree(enerd->enerpart_lambda);
 +    }
 +}
 +
 +static real sum_v(int n, real v[])
 +{
 +    real t;
 +    int  i;
 +
 +    t = 0.0;
 +    for (i = 0; (i < n); i++)
 +    {
 +        t = t + v[i];
 +    }
 +
 +    return t;
 +}
 +
 +void sum_epot(t_grpopts *opts, gmx_grppairener_t *grpp, real *epot)
 +{
 +    int i;
 +
 +    /* Accumulate energies */
 +    epot[F_COUL_SR]  = sum_v(grpp->nener, grpp->ener[egCOULSR]);
 +    epot[F_LJ]       = sum_v(grpp->nener, grpp->ener[egLJSR]);
 +    epot[F_LJ14]     = sum_v(grpp->nener, grpp->ener[egLJ14]);
 +    epot[F_COUL14]   = sum_v(grpp->nener, grpp->ener[egCOUL14]);
 +    epot[F_COUL_LR]  = sum_v(grpp->nener, grpp->ener[egCOULLR]);
 +    epot[F_LJ_LR]    = sum_v(grpp->nener, grpp->ener[egLJLR]);
 +    /* We have already added 1-2,1-3, and 1-4 terms to F_GBPOL */
 +    epot[F_GBPOL]   += sum_v(grpp->nener, grpp->ener[egGB]);
 +
 +/* lattice part of LR doesnt belong to any group
 + * and has been added earlier
 + */
 +    epot[F_BHAM]     = sum_v(grpp->nener, grpp->ener[egBHAMSR]);
 +    epot[F_BHAM_LR]  = sum_v(grpp->nener, grpp->ener[egBHAMLR]);
 +
 +    epot[F_EPOT] = 0;
 +    for (i = 0; (i < F_EPOT); i++)
 +    {
 +        if (i != F_DISRESVIOL && i != F_ORIRESDEV)
 +        {
 +            epot[F_EPOT] += epot[i];
 +        }
 +    }
 +}
 +
 +void sum_dhdl(gmx_enerdata_t *enerd, real *lambda, t_lambda *fepvals)
 +{
 +    int    i, j, index;
 +    double dlam;
 +
 +    enerd->dvdl_lin[efptVDW] += enerd->term[F_DVDL_VDW];  /* include dispersion correction */
 +    enerd->term[F_DVDL]       = 0.0;
 +    for (i = 0; i < efptNR; i++)
 +    {
 +        if (fepvals->separate_dvdl[i])
 +        {
 +            /* could this be done more readably/compactly? */
 +            switch (i)
 +            {
 +                case (efptMASS):
 +                    index = F_DKDL;
 +                    break;
 +                case (efptCOUL):
 +                    index = F_DVDL_COUL;
 +                    break;
 +                case (efptVDW):
 +                    index = F_DVDL_VDW;
 +                    break;
 +                case (efptBONDED):
 +                    index = F_DVDL_BONDED;
 +                    break;
 +                case (efptRESTRAINT):
 +                    index = F_DVDL_RESTRAINT;
 +                    break;
 +                default:
 +                    index = F_DVDL;
 +                    break;
 +            }
 +            enerd->term[index] = enerd->dvdl_lin[i] + enerd->dvdl_nonlin[i];
 +            if (debug)
 +            {
 +                fprintf(debug, "dvdl-%s[%2d]: %f: non-linear %f + linear %f\n",
 +                        efpt_names[i], i, enerd->term[index], enerd->dvdl_nonlin[i], enerd->dvdl_lin[i]);
 +            }
 +        }
 +        else
 +        {
 +            enerd->term[F_DVDL] += enerd->dvdl_lin[i] + enerd->dvdl_nonlin[i];
 +            if (debug)
 +            {
 +                fprintf(debug, "dvd-%sl[%2d]: %f: non-linear %f + linear %f\n",
 +                        efpt_names[0], i, enerd->term[F_DVDL], enerd->dvdl_nonlin[i], enerd->dvdl_lin[i]);
 +            }
 +        }
 +    }
 +
 +    /* Notes on the foreign lambda free energy difference evaluation:
 +     * Adding the potential and ekin terms that depend linearly on lambda
 +     * as delta lam * dvdl to the energy differences is exact.
 +     * For the constraints this is not exact, but we have no other option
 +     * without literally changing the lengths and reevaluating the energies at each step.
 +     * (try to remedy this post 4.6 - MRS)
 +     * For the non-bonded LR term we assume that the soft-core (if present)
 +     * no longer affects the energy beyond the short-range cut-off,
 +     * which is a very good approximation (except for exotic settings).
 +     * (investigate how to overcome this post 4.6 - MRS)
 +     */
-     enerd->term[F_DVDL_BONDED] += enerd->term[F_DVDL_CONSTR];
++    if (fepvals->separate_dvdl[efptBONDED])
++    {
++        enerd->term[F_DVDL_BONDED] += enerd->term[F_DVDL_CONSTR];
++    }
++    else
++    {
++        enerd->term[F_DVDL] += enerd->term[F_DVDL_CONSTR];
++    }
 +    enerd->term[F_DVDL_CONSTR] = 0;
 +
 +    for (i = 0; i < fepvals->n_lambda; i++)
 +    {                                         /* note we are iterating over fepvals here!
 +                                                 For the current lam, dlam = 0 automatically,
 +                                                 so we don't need to add anything to the
 +                                                 enerd->enerpart_lambda[0] */
 +
 +        /* we don't need to worry about dvdl_lin contributions to dE at
 +           current lambda, because the contributions to the current
 +           lambda are automatically zeroed */
 +
 +        for (j = 0; j < efptNR; j++)
 +        {
 +            /* Note that this loop is over all dhdl components, not just the separated ones */
 +            dlam = (fepvals->all_lambda[j][i]-lambda[j]);
 +            enerd->enerpart_lambda[i+1] += dlam*enerd->dvdl_lin[j];
 +            if (debug)
 +            {
 +                fprintf(debug, "enerdiff lam %g: (%15s), non-linear %f linear %f*%f\n",
 +                        fepvals->all_lambda[j][i], efpt_names[j],
 +                        (enerd->enerpart_lambda[i+1] - enerd->enerpart_lambda[0]),
 +                        dlam, enerd->dvdl_lin[j]);
 +            }
 +        }
 +    }
 +}
 +
 +
 +void reset_foreign_enerdata(gmx_enerdata_t *enerd)
 +{
 +    int  i, j;
 +
 +    /* First reset all foreign energy components.  Foreign energies always called on
 +       neighbor search steps */
 +    for (i = 0; (i < egNR); i++)
 +    {
 +        for (j = 0; (j < enerd->grpp.nener); j++)
 +        {
 +            enerd->foreign_grpp.ener[i][j] = 0.0;
 +        }
 +    }
 +
 +    /* potential energy components */
 +    for (i = 0; (i <= F_EPOT); i++)
 +    {
 +        enerd->foreign_term[i] = 0.0;
 +    }
 +}
 +
 +void reset_enerdata(t_grpopts *opts,
 +                    t_forcerec *fr, gmx_bool bNS,
 +                    gmx_enerdata_t *enerd,
 +                    gmx_bool bMaster)
 +{
 +    gmx_bool bKeepLR;
 +    int      i, j;
 +
 +    /* First reset all energy components, except for the long range terms
 +     * on the master at non neighbor search steps, since the long range
 +     * terms have already been summed at the last neighbor search step.
 +     */
 +    bKeepLR = (fr->bTwinRange && !bNS);
 +    for (i = 0; (i < egNR); i++)
 +    {
 +        if (!(bKeepLR && bMaster && (i == egCOULLR || i == egLJLR)))
 +        {
 +            for (j = 0; (j < enerd->grpp.nener); j++)
 +            {
 +                enerd->grpp.ener[i][j] = 0.0;
 +            }
 +        }
 +    }
 +    for (i = 0; i < efptNR; i++)
 +    {
 +        enerd->dvdl_lin[i]    = 0.0;
 +        enerd->dvdl_nonlin[i] = 0.0;
 +    }
 +
 +    /* Normal potential energy components */
 +    for (i = 0; (i <= F_EPOT); i++)
 +    {
 +        enerd->term[i] = 0.0;
 +    }
 +    /* Initialize the dVdlambda term with the long range contribution */
 +    /* Initialize the dvdl term with the long range contribution */
 +    enerd->term[F_DVDL]            = 0.0;
 +    enerd->term[F_DVDL_COUL]       = 0.0;
 +    enerd->term[F_DVDL_VDW]        = 0.0;
 +    enerd->term[F_DVDL_BONDED]     = 0.0;
 +    enerd->term[F_DVDL_RESTRAINT]  = 0.0;
 +    enerd->term[F_DKDL]            = 0.0;
 +    if (enerd->n_lambda > 0)
 +    {
 +        for (i = 0; i < enerd->n_lambda; i++)
 +        {
 +            enerd->enerpart_lambda[i] = 0.0;
 +        }
 +    }
 +    /* reset foreign energy data - separate function since we also call it elsewhere */
 +    reset_foreign_enerdata(enerd);
 +}
diff --cc src/gromacs/mdlib/forcerec.c
index 794841cac9,0000000000..0751b57db4
mode 100644,000000..100644
--- a/src/gromacs/mdlib/forcerec.c
+++ b/src/gromacs/mdlib/forcerec.c
@@@ -1,2958 -1,0 +1,2977 @@@
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + *          GROningen MAchine for Chemical Simulations
 + *
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + *
 + * For more info, check our website at http://www.gromacs.org
 + *
 + * And Hey:
 + * GROwing Monsters And Cloning Shrimps
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <math.h>
 +#include <string.h>
 +#include <assert.h>
 +#include "sysstuff.h"
 +#include "typedefs.h"
 +#include "vec.h"
 +#include "maths.h"
 +#include "macros.h"
 +#include "smalloc.h"
 +#include "macros.h"
 +#include "gmx_fatal.h"
 +#include "gmx_fatal_collective.h"
 +#include "physics.h"
 +#include "force.h"
 +#include "tables.h"
 +#include "nonbonded.h"
 +#include "invblock.h"
 +#include "names.h"
 +#include "network.h"
 +#include "pbc.h"
 +#include "ns.h"
 +#include "mshift.h"
 +#include "txtdump.h"
 +#include "coulomb.h"
 +#include "md_support.h"
 +#include "md_logging.h"
 +#include "domdec.h"
 +#include "partdec.h"
 +#include "qmmm.h"
 +#include "copyrite.h"
 +#include "mtop_util.h"
 +#include "nbnxn_search.h"
 +#include "nbnxn_atomdata.h"
 +#include "nbnxn_consts.h"
 +#include "statutil.h"
 +#include "gmx_omp_nthreads.h"
 +#include "gmx_detect_hardware.h"
 +
 +#ifdef _MSC_VER
 +/* MSVC definition for __cpuid() */
 +#include <intrin.h>
 +#endif
 +
 +#include "types/nbnxn_cuda_types_ext.h"
 +#include "gpu_utils.h"
 +#include "nbnxn_cuda_data_mgmt.h"
 +#include "pmalloc_cuda.h"
 +
 +t_forcerec *mk_forcerec(void)
 +{
 +    t_forcerec *fr;
 +
 +    snew(fr, 1);
 +
 +    return fr;
 +}
 +
 +#ifdef DEBUG
 +static void pr_nbfp(FILE *fp, real *nbfp, gmx_bool bBHAM, int atnr)
 +{
 +    int i, j;
 +
 +    for (i = 0; (i < atnr); i++)
 +    {
 +        for (j = 0; (j < atnr); j++)
 +        {
 +            fprintf(fp, "%2d - %2d", i, j);
 +            if (bBHAM)
 +            {
 +                fprintf(fp, "  a=%10g, b=%10g, c=%10g\n", BHAMA(nbfp, atnr, i, j),
 +                        BHAMB(nbfp, atnr, i, j), BHAMC(nbfp, atnr, i, j)/6.0);
 +            }
 +            else
 +            {
 +                fprintf(fp, "  c6=%10g, c12=%10g\n", C6(nbfp, atnr, i, j)/6.0,
 +                        C12(nbfp, atnr, i, j)/12.0);
 +            }
 +        }
 +    }
 +}
 +#endif
 +
 +static real *mk_nbfp(const gmx_ffparams_t *idef, gmx_bool bBHAM)
 +{
 +    real *nbfp;
 +    int   i, j, k, atnr;
 +
 +    atnr = idef->atnr;
 +    if (bBHAM)
 +    {
 +        snew(nbfp, 3*atnr*atnr);
 +        for (i = k = 0; (i < atnr); i++)
 +        {
 +            for (j = 0; (j < atnr); j++, k++)
 +            {
 +                BHAMA(nbfp, atnr, i, j) = idef->iparams[k].bham.a;
 +                BHAMB(nbfp, atnr, i, j) = idef->iparams[k].bham.b;
 +                /* nbfp now includes the 6.0 derivative prefactor */
 +                BHAMC(nbfp, atnr, i, j) = idef->iparams[k].bham.c*6.0;
 +            }
 +        }
 +    }
 +    else
 +    {
 +        snew(nbfp, 2*atnr*atnr);
 +        for (i = k = 0; (i < atnr); i++)
 +        {
 +            for (j = 0; (j < atnr); j++, k++)
 +            {
 +                /* nbfp now includes the 6.0/12.0 derivative prefactors */
 +                C6(nbfp, atnr, i, j)   = idef->iparams[k].lj.c6*6.0;
 +                C12(nbfp, atnr, i, j)  = idef->iparams[k].lj.c12*12.0;
 +            }
 +        }
 +    }
 +
 +    return nbfp;
 +}
 +
 +/* This routine sets fr->solvent_opt to the most common solvent in the
 + * system, e.g. esolSPC or esolTIP4P. It will also mark each charge group in
 + * the fr->solvent_type array with the correct type (or esolNO).
 + *
 + * Charge groups that fulfill the conditions but are not identical to the
 + * most common one will be marked as esolNO in the solvent_type array.
 + *
 + * TIP3p is identical to SPC for these purposes, so we call it
 + * SPC in the arrays (Apologies to Bill Jorgensen ;-)
 + *
 + * NOTE: QM particle should not
 + * become an optimized solvent. Not even if there is only one charge
 + * group in the Qm
 + */
 +
 +typedef struct
 +{
 +    int    model;
 +    int    count;
 +    int    vdwtype[4];
 +    real   charge[4];
 +} solvent_parameters_t;
 +
 +static void
 +check_solvent_cg(const gmx_moltype_t    *molt,
 +                 int                     cg0,
 +                 int                     nmol,
 +                 const unsigned char    *qm_grpnr,
 +                 const t_grps           *qm_grps,
 +                 t_forcerec   *          fr,
 +                 int                    *n_solvent_parameters,
 +                 solvent_parameters_t  **solvent_parameters_p,
 +                 int                     cginfo,
 +                 int                    *cg_sp)
 +{
 +    const t_blocka     *  excl;
 +    t_atom               *atom;
 +    int                   j, k;
 +    int                   j0, j1, nj;
 +    gmx_bool              perturbed;
 +    gmx_bool              has_vdw[4];
 +    gmx_bool              match;
 +    real                  tmp_charge[4];
 +    int                   tmp_vdwtype[4];
 +    int                   tjA;
 +    gmx_bool              qm;
 +    solvent_parameters_t *solvent_parameters;
 +
 +    /* We use a list with parameters for each solvent type.
 +     * Every time we discover a new molecule that fulfills the basic
 +     * conditions for a solvent we compare with the previous entries
 +     * in these lists. If the parameters are the same we just increment
 +     * the counter for that type, and otherwise we create a new type
 +     * based on the current molecule.
 +     *
 +     * Once we've finished going through all molecules we check which
 +     * solvent is most common, and mark all those molecules while we
 +     * clear the flag on all others.
 +     */
 +
 +    solvent_parameters = *solvent_parameters_p;
 +
 +    /* Mark the cg first as non optimized */
 +    *cg_sp = -1;
 +
 +    /* Check if this cg has no exclusions with atoms in other charge groups
 +     * and all atoms inside the charge group excluded.
 +     * We only have 3 or 4 atom solvent loops.
 +     */
 +    if (GET_CGINFO_EXCL_INTER(cginfo) ||
 +        !GET_CGINFO_EXCL_INTRA(cginfo))
 +    {
 +        return;
 +    }
 +
 +    /* Get the indices of the first atom in this charge group */
 +    j0     = molt->cgs.index[cg0];
 +    j1     = molt->cgs.index[cg0+1];
 +
 +    /* Number of atoms in our molecule */
 +    nj     = j1 - j0;
 +
 +    if (debug)
 +    {
 +        fprintf(debug,
 +                "Moltype '%s': there are %d atoms in this charge group\n",
 +                *molt->name, nj);
 +    }
 +
 +    /* Check if it could be an SPC (3 atoms) or TIP4p (4) water,
 +     * otherwise skip it.
 +     */
 +    if (nj < 3 || nj > 4)
 +    {
 +        return;
 +    }
 +
 +    /* Check if we are doing QM on this group */
 +    qm = FALSE;
 +    if (qm_grpnr != NULL)
 +    {
 +        for (j = j0; j < j1 && !qm; j++)
 +        {
 +            qm = (qm_grpnr[j] < qm_grps->nr - 1);
 +        }
 +    }
 +    /* Cannot use solvent optimization with QM */
 +    if (qm)
 +    {
 +        return;
 +    }
 +
 +    atom = molt->atoms.atom;
 +
 +    /* Still looks like a solvent, time to check parameters */
 +
 +    /* If it is perturbed (free energy) we can't use the solvent loops,
 +     * so then we just skip to the next molecule.
 +     */
 +    perturbed = FALSE;
 +
 +    for (j = j0; j < j1 && !perturbed; j++)
 +    {
 +        perturbed = PERTURBED(atom[j]);
 +    }
 +
 +    if (perturbed)
 +    {
 +        return;
 +    }
 +
 +    /* Now it's only a question if the VdW and charge parameters
 +     * are OK. Before doing the check we compare and see if they are
 +     * identical to a possible previous solvent type.
 +     * First we assign the current types and charges.
 +     */
 +    for (j = 0; j < nj; j++)
 +    {
 +        tmp_vdwtype[j] = atom[j0+j].type;
 +        tmp_charge[j]  = atom[j0+j].q;
 +    }
 +
 +    /* Does it match any previous solvent type? */
 +    for (k = 0; k < *n_solvent_parameters; k++)
 +    {
 +        match = TRUE;
 +
 +
 +        /* We can only match SPC with 3 atoms and TIP4p with 4 atoms */
 +        if ( (solvent_parameters[k].model == esolSPC   && nj != 3)  ||
 +             (solvent_parameters[k].model == esolTIP4P && nj != 4) )
 +        {
 +            match = FALSE;
 +        }
 +
 +        /* Check that types & charges match for all atoms in molecule */
 +        for (j = 0; j < nj && match == TRUE; j++)
 +        {
 +            if (tmp_vdwtype[j] != solvent_parameters[k].vdwtype[j])
 +            {
 +                match = FALSE;
 +            }
 +            if (tmp_charge[j] != solvent_parameters[k].charge[j])
 +            {
 +                match = FALSE;
 +            }
 +        }
 +        if (match == TRUE)
 +        {
 +            /* Congratulations! We have a matched solvent.
 +             * Flag it with this type for later processing.
 +             */
 +            *cg_sp = k;
 +            solvent_parameters[k].count += nmol;
 +
 +            /* We are done with this charge group */
 +            return;
 +        }
 +    }
 +
 +    /* If we get here, we have a tentative new solvent type.
 +     * Before we add it we must check that it fulfills the requirements
 +     * of the solvent optimized loops. First determine which atoms have
 +     * VdW interactions.
 +     */
 +    for (j = 0; j < nj; j++)
 +    {
 +        has_vdw[j] = FALSE;
 +        tjA        = tmp_vdwtype[j];
 +
 +        /* Go through all other tpes and see if any have non-zero
 +         * VdW parameters when combined with this one.
 +         */
 +        for (k = 0; k < fr->ntype && (has_vdw[j] == FALSE); k++)
 +        {
 +            /* We already checked that the atoms weren't perturbed,
 +             * so we only need to check state A now.
 +             */
 +            if (fr->bBHAM)
 +            {
 +                has_vdw[j] = (has_vdw[j] ||
 +                              (BHAMA(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
 +                              (BHAMB(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
 +                              (BHAMC(fr->nbfp, fr->ntype, tjA, k) != 0.0));
 +            }
 +            else
 +            {
 +                /* Standard LJ */
 +                has_vdw[j] = (has_vdw[j] ||
 +                              (C6(fr->nbfp, fr->ntype, tjA, k)  != 0.0) ||
 +                              (C12(fr->nbfp, fr->ntype, tjA, k) != 0.0));
 +            }
 +        }
 +    }
 +
 +    /* Now we know all we need to make the final check and assignment. */
 +    if (nj == 3)
 +    {
 +        /* So, is it an SPC?
 +         * For this we require thatn all atoms have charge,
 +         * the charges on atom 2 & 3 should be the same, and only
 +         * atom 1 might have VdW.
 +         */
 +        if (has_vdw[1] == FALSE &&
 +            has_vdw[2] == FALSE &&
 +            tmp_charge[0]  != 0 &&
 +            tmp_charge[1]  != 0 &&
 +            tmp_charge[2]  == tmp_charge[1])
 +        {
 +            srenew(solvent_parameters, *n_solvent_parameters+1);
 +            solvent_parameters[*n_solvent_parameters].model = esolSPC;
 +            solvent_parameters[*n_solvent_parameters].count = nmol;
 +            for (k = 0; k < 3; k++)
 +            {
 +                solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
 +                solvent_parameters[*n_solvent_parameters].charge[k]  = tmp_charge[k];
 +            }
 +
 +            *cg_sp = *n_solvent_parameters;
 +            (*n_solvent_parameters)++;
 +        }
 +    }
 +    else if (nj == 4)
 +    {
 +        /* Or could it be a TIP4P?
 +         * For this we require thatn atoms 2,3,4 have charge, but not atom 1.
 +         * Only atom 1 mght have VdW.
 +         */
 +        if (has_vdw[1] == FALSE &&
 +            has_vdw[2] == FALSE &&
 +            has_vdw[3] == FALSE &&
 +            tmp_charge[0]  == 0 &&
 +            tmp_charge[1]  != 0 &&
 +            tmp_charge[2]  == tmp_charge[1] &&
 +            tmp_charge[3]  != 0)
 +        {
 +            srenew(solvent_parameters, *n_solvent_parameters+1);
 +            solvent_parameters[*n_solvent_parameters].model = esolTIP4P;
 +            solvent_parameters[*n_solvent_parameters].count = nmol;
 +            for (k = 0; k < 4; k++)
 +            {
 +                solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
 +                solvent_parameters[*n_solvent_parameters].charge[k]  = tmp_charge[k];
 +            }
 +
 +            *cg_sp = *n_solvent_parameters;
 +            (*n_solvent_parameters)++;
 +        }
 +    }
 +
 +    *solvent_parameters_p = solvent_parameters;
 +}
 +
 +static void
 +check_solvent(FILE  *                fp,
 +              const gmx_mtop_t  *    mtop,
 +              t_forcerec  *          fr,
 +              cginfo_mb_t           *cginfo_mb)
 +{
 +    const t_block     *   cgs;
 +    const t_block     *   mols;
 +    const gmx_moltype_t  *molt;
 +    int                   mb, mol, cg_mol, at_offset, cg_offset, am, cgm, i, nmol_ch, nmol;
 +    int                   n_solvent_parameters;
 +    solvent_parameters_t *solvent_parameters;
 +    int                 **cg_sp;
 +    int                   bestsp, bestsol;
 +
 +    if (debug)
 +    {
 +        fprintf(debug, "Going to determine what solvent types we have.\n");
 +    }
 +
 +    mols = &mtop->mols;
 +
 +    n_solvent_parameters = 0;
 +    solvent_parameters   = NULL;
 +    /* Allocate temporary array for solvent type */
 +    snew(cg_sp, mtop->nmolblock);
 +
 +    cg_offset = 0;
 +    at_offset = 0;
 +    for (mb = 0; mb < mtop->nmolblock; mb++)
 +    {
 +        molt = &mtop->moltype[mtop->molblock[mb].type];
 +        cgs  = &molt->cgs;
 +        /* Here we have to loop over all individual molecules
 +         * because we need to check for QMMM particles.
 +         */
 +        snew(cg_sp[mb], cginfo_mb[mb].cg_mod);
 +        nmol_ch = cginfo_mb[mb].cg_mod/cgs->nr;
 +        nmol    = mtop->molblock[mb].nmol/nmol_ch;
 +        for (mol = 0; mol < nmol_ch; mol++)
 +        {
 +            cgm = mol*cgs->nr;
 +            am  = mol*cgs->index[cgs->nr];
 +            for (cg_mol = 0; cg_mol < cgs->nr; cg_mol++)
 +            {
 +                check_solvent_cg(molt, cg_mol, nmol,
 +                                 mtop->groups.grpnr[egcQMMM] ?
 +                                 mtop->groups.grpnr[egcQMMM]+at_offset+am : 0,
 +                                 &mtop->groups.grps[egcQMMM],
 +                                 fr,
 +                                 &n_solvent_parameters, &solvent_parameters,
 +                                 cginfo_mb[mb].cginfo[cgm+cg_mol],
 +                                 &cg_sp[mb][cgm+cg_mol]);
 +            }
 +        }
 +        cg_offset += cgs->nr;
 +        at_offset += cgs->index[cgs->nr];
 +    }
 +
 +    /* Puh! We finished going through all charge groups.
 +     * Now find the most common solvent model.
 +     */
 +
 +    /* Most common solvent this far */
 +    bestsp = -2;
 +    for (i = 0; i < n_solvent_parameters; i++)
 +    {
 +        if (bestsp == -2 ||
 +            solvent_parameters[i].count > solvent_parameters[bestsp].count)
 +        {
 +            bestsp = i;
 +        }
 +    }
 +
 +    if (bestsp >= 0)
 +    {
 +        bestsol = solvent_parameters[bestsp].model;
 +    }
 +    else
 +    {
 +        bestsol = esolNO;
 +    }
 +
 +#ifdef DISABLE_WATER_NLIST
 +    bestsol = esolNO;
 +#endif
 +
 +    fr->nWatMol = 0;
 +    for (mb = 0; mb < mtop->nmolblock; mb++)
 +    {
 +        cgs  = &mtop->moltype[mtop->molblock[mb].type].cgs;
 +        nmol = (mtop->molblock[mb].nmol*cgs->nr)/cginfo_mb[mb].cg_mod;
 +        for (i = 0; i < cginfo_mb[mb].cg_mod; i++)
 +        {
 +            if (cg_sp[mb][i] == bestsp)
 +            {
 +                SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], bestsol);
 +                fr->nWatMol += nmol;
 +            }
 +            else
 +            {
 +                SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], esolNO);
 +            }
 +        }
 +        sfree(cg_sp[mb]);
 +    }
 +    sfree(cg_sp);
 +
 +    if (bestsol != esolNO && fp != NULL)
 +    {
 +        fprintf(fp, "\nEnabling %s-like water optimization for %d molecules.\n\n",
 +                esol_names[bestsol],
 +                solvent_parameters[bestsp].count);
 +    }
 +
 +    sfree(solvent_parameters);
 +    fr->solvent_opt = bestsol;
 +}
 +
 +enum {
 +    acNONE = 0, acCONSTRAINT, acSETTLE
 +};
 +
 +static cginfo_mb_t *init_cginfo_mb(FILE *fplog, const gmx_mtop_t *mtop,
 +                                   t_forcerec *fr, gmx_bool bNoSolvOpt,
 +                                   gmx_bool *bExcl_IntraCGAll_InterCGNone)
 +{
 +    const t_block        *cgs;
 +    const t_blocka       *excl;
 +    const gmx_moltype_t  *molt;
 +    const gmx_molblock_t *molb;
 +    cginfo_mb_t          *cginfo_mb;
 +    gmx_bool             *type_VDW;
 +    int                  *cginfo;
 +    int                   cg_offset, a_offset, cgm, am;
 +    int                   mb, m, ncg_tot, cg, a0, a1, gid, ai, j, aj, excl_nalloc;
 +    int                  *a_con;
 +    int                   ftype;
 +    int                   ia;
 +    gmx_bool              bId, *bExcl, bExclIntraAll, bExclInter, bHaveVDW, bHaveQ;
 +
 +    ncg_tot = ncg_mtop(mtop);
 +    snew(cginfo_mb, mtop->nmolblock);
 +
 +    snew(type_VDW, fr->ntype);
 +    for (ai = 0; ai < fr->ntype; ai++)
 +    {
 +        type_VDW[ai] = FALSE;
 +        for (j = 0; j < fr->ntype; j++)
 +        {
 +            type_VDW[ai] = type_VDW[ai] ||
 +                fr->bBHAM ||
 +                C6(fr->nbfp, fr->ntype, ai, j) != 0 ||
 +                C12(fr->nbfp, fr->ntype, ai, j) != 0;
 +        }
 +    }
 +
 +    *bExcl_IntraCGAll_InterCGNone = TRUE;
 +
 +    excl_nalloc = 10;
 +    snew(bExcl, excl_nalloc);
 +    cg_offset = 0;
 +    a_offset  = 0;
 +    for (mb = 0; mb < mtop->nmolblock; mb++)
 +    {
 +        molb = &mtop->molblock[mb];
 +        molt = &mtop->moltype[molb->type];
 +        cgs  = &molt->cgs;
 +        excl = &molt->excls;
 +
 +        /* Check if the cginfo is identical for all molecules in this block.
 +         * If so, we only need an array of the size of one molecule.
 +         * Otherwise we make an array of #mol times #cgs per molecule.
 +         */
 +        bId = TRUE;
 +        am  = 0;
 +        for (m = 0; m < molb->nmol; m++)
 +        {
 +            am = m*cgs->index[cgs->nr];
 +            for (cg = 0; cg < cgs->nr; cg++)
 +            {
 +                a0 = cgs->index[cg];
 +                a1 = cgs->index[cg+1];
 +                if (ggrpnr(&mtop->groups, egcENER, a_offset+am+a0) !=
 +                    ggrpnr(&mtop->groups, egcENER, a_offset   +a0))
 +                {
 +                    bId = FALSE;
 +                }
 +                if (mtop->groups.grpnr[egcQMMM] != NULL)
 +                {
 +                    for (ai = a0; ai < a1; ai++)
 +                    {
 +                        if (mtop->groups.grpnr[egcQMMM][a_offset+am+ai] !=
 +                            mtop->groups.grpnr[egcQMMM][a_offset   +ai])
 +                        {
 +                            bId = FALSE;
 +                        }
 +                    }
 +                }
 +            }
 +        }
 +
 +        cginfo_mb[mb].cg_start = cg_offset;
 +        cginfo_mb[mb].cg_end   = cg_offset + molb->nmol*cgs->nr;
 +        cginfo_mb[mb].cg_mod   = (bId ? 1 : molb->nmol)*cgs->nr;
 +        snew(cginfo_mb[mb].cginfo, cginfo_mb[mb].cg_mod);
 +        cginfo = cginfo_mb[mb].cginfo;
 +
 +        /* Set constraints flags for constrained atoms */
 +        snew(a_con, molt->atoms.nr);
 +        for (ftype = 0; ftype < F_NRE; ftype++)
 +        {
 +            if (interaction_function[ftype].flags & IF_CONSTRAINT)
 +            {
 +                int nral;
 +
 +                nral = NRAL(ftype);
 +                for (ia = 0; ia < molt->ilist[ftype].nr; ia += 1+nral)
 +                {
 +                    int a;
 +
 +                    for (a = 0; a < nral; a++)
 +                    {
 +                        a_con[molt->ilist[ftype].iatoms[ia+1+a]] =
 +                            (ftype == F_SETTLE ? acSETTLE : acCONSTRAINT);
 +                    }
 +                }
 +            }
 +        }
 +
 +        for (m = 0; m < (bId ? 1 : molb->nmol); m++)
 +        {
 +            cgm = m*cgs->nr;
 +            am  = m*cgs->index[cgs->nr];
 +            for (cg = 0; cg < cgs->nr; cg++)
 +            {
 +                a0 = cgs->index[cg];
 +                a1 = cgs->index[cg+1];
 +
 +                /* Store the energy group in cginfo */
 +                gid = ggrpnr(&mtop->groups, egcENER, a_offset+am+a0);
 +                SET_CGINFO_GID(cginfo[cgm+cg], gid);
 +
 +                /* Check the intra/inter charge group exclusions */
 +                if (a1-a0 > excl_nalloc)
 +                {
 +                    excl_nalloc = a1 - a0;
 +                    srenew(bExcl, excl_nalloc);
 +                }
 +                /* bExclIntraAll: all intra cg interactions excluded
 +                 * bExclInter:    any inter cg interactions excluded
 +                 */
 +                bExclIntraAll = TRUE;
 +                bExclInter    = FALSE;
 +                bHaveVDW      = FALSE;
 +                bHaveQ        = FALSE;
 +                for (ai = a0; ai < a1; ai++)
 +                {
 +                    /* Check VDW and electrostatic interactions */
 +                    bHaveVDW = bHaveVDW || (type_VDW[molt->atoms.atom[ai].type] ||
 +                                            type_VDW[molt->atoms.atom[ai].typeB]);
 +                    bHaveQ  = bHaveQ    || (molt->atoms.atom[ai].q != 0 ||
 +                                            molt->atoms.atom[ai].qB != 0);
 +
 +                    /* Clear the exclusion list for atom ai */
 +                    for (aj = a0; aj < a1; aj++)
 +                    {
 +                        bExcl[aj-a0] = FALSE;
 +                    }
 +                    /* Loop over all the exclusions of atom ai */
 +                    for (j = excl->index[ai]; j < excl->index[ai+1]; j++)
 +                    {
 +                        aj = excl->a[j];
 +                        if (aj < a0 || aj >= a1)
 +                        {
 +                            bExclInter = TRUE;
 +                        }
 +                        else
 +                        {
 +                            bExcl[aj-a0] = TRUE;
 +                        }
 +                    }
 +                    /* Check if ai excludes a0 to a1 */
 +                    for (aj = a0; aj < a1; aj++)
 +                    {
 +                        if (!bExcl[aj-a0])
 +                        {
 +                            bExclIntraAll = FALSE;
 +                        }
 +                    }
 +
 +                    switch (a_con[ai])
 +                    {
 +                        case acCONSTRAINT:
 +                            SET_CGINFO_CONSTR(cginfo[cgm+cg]);
 +                            break;
 +                        case acSETTLE:
 +                            SET_CGINFO_SETTLE(cginfo[cgm+cg]);
 +                            break;
 +                        default:
 +                            break;
 +                    }
 +                }
 +                if (bExclIntraAll)
 +                {
 +                    SET_CGINFO_EXCL_INTRA(cginfo[cgm+cg]);
 +                }
 +                if (bExclInter)
 +                {
 +                    SET_CGINFO_EXCL_INTER(cginfo[cgm+cg]);
 +                }
 +                if (a1 - a0 > MAX_CHARGEGROUP_SIZE)
 +                {
 +                    /* The size in cginfo is currently only read with DD */
 +                    gmx_fatal(FARGS, "A charge group has size %d which is larger than the limit of %d atoms", a1-a0, MAX_CHARGEGROUP_SIZE);
 +                }
 +                if (bHaveVDW)
 +                {
 +                    SET_CGINFO_HAS_VDW(cginfo[cgm+cg]);
 +                }
 +                if (bHaveQ)
 +                {
 +                    SET_CGINFO_HAS_Q(cginfo[cgm+cg]);
 +                }
 +                /* Store the charge group size */
 +                SET_CGINFO_NATOMS(cginfo[cgm+cg], a1-a0);
 +
 +                if (!bExclIntraAll || bExclInter)
 +                {
 +                    *bExcl_IntraCGAll_InterCGNone = FALSE;
 +                }
 +            }
 +        }
 +
 +        sfree(a_con);
 +
 +        cg_offset += molb->nmol*cgs->nr;
 +        a_offset  += molb->nmol*cgs->index[cgs->nr];
 +    }
 +    sfree(bExcl);
 +
 +    /* the solvent optimizer is called after the QM is initialized,
 +     * because we don't want to have the QM subsystemto become an
 +     * optimized solvent
 +     */
 +
 +    check_solvent(fplog, mtop, fr, cginfo_mb);
 +
 +    if (getenv("GMX_NO_SOLV_OPT"))
 +    {
 +        if (fplog)
 +        {
 +            fprintf(fplog, "Found environment variable GMX_NO_SOLV_OPT.\n"
 +                    "Disabling all solvent optimization\n");
 +        }
 +        fr->solvent_opt = esolNO;
 +    }
 +    if (bNoSolvOpt)
 +    {
 +        fr->solvent_opt = esolNO;
 +    }
 +    if (!fr->solvent_opt)
 +    {
 +        for (mb = 0; mb < mtop->nmolblock; mb++)
 +        {
 +            for (cg = 0; cg < cginfo_mb[mb].cg_mod; cg++)
 +            {
 +                SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[cg], esolNO);
 +            }
 +        }
 +    }
 +
 +    return cginfo_mb;
 +}
 +
 +static int *cginfo_expand(int nmb, cginfo_mb_t *cgi_mb)
 +{
 +    int  ncg, mb, cg;
 +    int *cginfo;
 +
 +    ncg = cgi_mb[nmb-1].cg_end;
 +    snew(cginfo, ncg);
 +    mb = 0;
 +    for (cg = 0; cg < ncg; cg++)
 +    {
 +        while (cg >= cgi_mb[mb].cg_end)
 +        {
 +            mb++;
 +        }
 +        cginfo[cg] =
 +            cgi_mb[mb].cginfo[(cg - cgi_mb[mb].cg_start) % cgi_mb[mb].cg_mod];
 +    }
 +
 +    return cginfo;
 +}
 +
 +static void set_chargesum(FILE *log, t_forcerec *fr, const gmx_mtop_t *mtop)
 +{
 +    double         qsum, q2sum, q;
 +    int            mb, nmol, i;
 +    const t_atoms *atoms;
 +
 +    qsum  = 0;
 +    q2sum = 0;
 +    for (mb = 0; mb < mtop->nmolblock; mb++)
 +    {
 +        nmol  = mtop->molblock[mb].nmol;
 +        atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 +        for (i = 0; i < atoms->nr; i++)
 +        {
 +            q      = atoms->atom[i].q;
 +            qsum  += nmol*q;
 +            q2sum += nmol*q*q;
 +        }
 +    }
 +    fr->qsum[0]  = qsum;
 +    fr->q2sum[0] = q2sum;
 +    if (fr->efep != efepNO)
 +    {
 +        qsum  = 0;
 +        q2sum = 0;
 +        for (mb = 0; mb < mtop->nmolblock; mb++)
 +        {
 +            nmol  = mtop->molblock[mb].nmol;
 +            atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 +            for (i = 0; i < atoms->nr; i++)
 +            {
 +                q      = atoms->atom[i].qB;
 +                qsum  += nmol*q;
 +                q2sum += nmol*q*q;
 +            }
 +            fr->qsum[1]  = qsum;
 +            fr->q2sum[1] = q2sum;
 +        }
 +    }
 +    else
 +    {
 +        fr->qsum[1]  = fr->qsum[0];
 +        fr->q2sum[1] = fr->q2sum[0];
 +    }
 +    if (log)
 +    {
 +        if (fr->efep == efepNO)
 +        {
 +            fprintf(log, "System total charge: %.3f\n", fr->qsum[0]);
 +        }
 +        else
 +        {
 +            fprintf(log, "System total charge, top. A: %.3f top. B: %.3f\n",
 +                    fr->qsum[0], fr->qsum[1]);
 +        }
 +    }
 +}
 +
 +void update_forcerec(FILE *log, t_forcerec *fr, matrix box)
 +{
 +    if (fr->eeltype == eelGRF)
 +    {
 +        calc_rffac(NULL, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
 +                   fr->rcoulomb, fr->temp, fr->zsquare, box,
 +                   &fr->kappa, &fr->k_rf, &fr->c_rf);
 +    }
 +}
 +
 +void set_avcsixtwelve(FILE *fplog, t_forcerec *fr, const gmx_mtop_t *mtop)
 +{
 +    const t_atoms  *atoms, *atoms_tpi;
 +    const t_blocka *excl;
 +    int             mb, nmol, nmolc, i, j, tpi, tpj, j1, j2, k, n, nexcl, q;
 +#if (defined SIZEOF_LONG_LONG_INT) && (SIZEOF_LONG_LONG_INT >= 8)
 +    long long int   npair, npair_ij, tmpi, tmpj;
 +#else
 +    double          npair, npair_ij, tmpi, tmpj;
 +#endif
 +    double          csix, ctwelve;
 +    int             ntp, *typecount;
 +    gmx_bool        bBHAM;
 +    real           *nbfp;
 +
 +    ntp   = fr->ntype;
 +    bBHAM = fr->bBHAM;
 +    nbfp  = fr->nbfp;
 +
 +    for (q = 0; q < (fr->efep == efepNO ? 1 : 2); q++)
 +    {
 +        csix    = 0;
 +        ctwelve = 0;
 +        npair   = 0;
 +        nexcl   = 0;
 +        if (!fr->n_tpi)
 +        {
 +            /* Count the types so we avoid natoms^2 operations */
 +            snew(typecount, ntp);
 +            for (mb = 0; mb < mtop->nmolblock; mb++)
 +            {
 +                nmol  = mtop->molblock[mb].nmol;
 +                atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 +                for (i = 0; i < atoms->nr; i++)
 +                {
 +                    if (q == 0)
 +                    {
 +                        tpi = atoms->atom[i].type;
 +                    }
 +                    else
 +                    {
 +                        tpi = atoms->atom[i].typeB;
 +                    }
 +                    typecount[tpi] += nmol;
 +                }
 +            }
 +            for (tpi = 0; tpi < ntp; tpi++)
 +            {
 +                for (tpj = tpi; tpj < ntp; tpj++)
 +                {
 +                    tmpi = typecount[tpi];
 +                    tmpj = typecount[tpj];
 +                    if (tpi != tpj)
 +                    {
 +                        npair_ij = tmpi*tmpj;
 +                    }
 +                    else
 +                    {
 +                        npair_ij = tmpi*(tmpi - 1)/2;
 +                    }
 +                    if (bBHAM)
 +                    {
 +                        /* nbfp now includes the 6.0 derivative prefactor */
 +                        csix    += npair_ij*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
 +                    }
 +                    else
 +                    {
 +                        /* nbfp now includes the 6.0/12.0 derivative prefactors */
 +                        csix    += npair_ij*   C6(nbfp, ntp, tpi, tpj)/6.0;
 +                        ctwelve += npair_ij*  C12(nbfp, ntp, tpi, tpj)/12.0;
 +                    }
 +                    npair += npair_ij;
 +                }
 +            }
 +            sfree(typecount);
 +            /* Subtract the excluded pairs.
 +             * The main reason for substracting exclusions is that in some cases
 +             * some combinations might never occur and the parameters could have
 +             * any value. These unused values should not influence the dispersion
 +             * correction.
 +             */
 +            for (mb = 0; mb < mtop->nmolblock; mb++)
 +            {
 +                nmol  = mtop->molblock[mb].nmol;
 +                atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 +                excl  = &mtop->moltype[mtop->molblock[mb].type].excls;
 +                for (i = 0; (i < atoms->nr); i++)
 +                {
 +                    if (q == 0)
 +                    {
 +                        tpi = atoms->atom[i].type;
 +                    }
 +                    else
 +                    {
 +                        tpi = atoms->atom[i].typeB;
 +                    }
 +                    j1  = excl->index[i];
 +                    j2  = excl->index[i+1];
 +                    for (j = j1; j < j2; j++)
 +                    {
 +                        k = excl->a[j];
 +                        if (k > i)
 +                        {
 +                            if (q == 0)
 +                            {
 +                                tpj = atoms->atom[k].type;
 +                            }
 +                            else
 +                            {
 +                                tpj = atoms->atom[k].typeB;
 +                            }
 +                            if (bBHAM)
 +                            {
 +                                /* nbfp now includes the 6.0 derivative prefactor */
 +                                csix -= nmol*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
 +                            }
 +                            else
 +                            {
 +                                /* nbfp now includes the 6.0/12.0 derivative prefactors */
 +                                csix    -= nmol*C6 (nbfp, ntp, tpi, tpj)/6.0;
 +                                ctwelve -= nmol*C12(nbfp, ntp, tpi, tpj)/12.0;
 +                            }
 +                            nexcl += nmol;
 +                        }
 +                    }
 +                }
 +            }
 +        }
 +        else
 +        {
 +            /* Only correct for the interaction of the test particle
 +             * with the rest of the system.
 +             */
 +            atoms_tpi =
 +                &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].atoms;
 +
 +            npair = 0;
 +            for (mb = 0; mb < mtop->nmolblock; mb++)
 +            {
 +                nmol  = mtop->molblock[mb].nmol;
 +                atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 +                for (j = 0; j < atoms->nr; j++)
 +                {
 +                    nmolc = nmol;
 +                    /* Remove the interaction of the test charge group
 +                     * with itself.
 +                     */
 +                    if (mb == mtop->nmolblock-1)
 +                    {
 +                        nmolc--;
 +
 +                        if (mb == 0 && nmol == 1)
 +                        {
 +                            gmx_fatal(FARGS, "Old format tpr with TPI, please generate a new tpr file");
 +                        }
 +                    }
 +                    if (q == 0)
 +                    {
 +                        tpj = atoms->atom[j].type;
 +                    }
 +                    else
 +                    {
 +                        tpj = atoms->atom[j].typeB;
 +                    }
 +                    for (i = 0; i < fr->n_tpi; i++)
 +                    {
 +                        if (q == 0)
 +                        {
 +                            tpi = atoms_tpi->atom[i].type;
 +                        }
 +                        else
 +                        {
 +                            tpi = atoms_tpi->atom[i].typeB;
 +                        }
 +                        if (bBHAM)
 +                        {
 +                            /* nbfp now includes the 6.0 derivative prefactor */
 +                            csix    += nmolc*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
 +                        }
 +                        else
 +                        {
 +                            /* nbfp now includes the 6.0/12.0 derivative prefactors */
 +                            csix    += nmolc*C6 (nbfp, ntp, tpi, tpj)/6.0;
 +                            ctwelve += nmolc*C12(nbfp, ntp, tpi, tpj)/12.0;
 +                        }
 +                        npair += nmolc;
 +                    }
 +                }
 +            }
 +        }
 +        if (npair - nexcl <= 0 && fplog)
 +        {
 +            fprintf(fplog, "\nWARNING: There are no atom pairs for dispersion correction\n\n");
 +            csix     = 0;
 +            ctwelve  = 0;
 +        }
 +        else
 +        {
 +            csix    /= npair - nexcl;
 +            ctwelve /= npair - nexcl;
 +        }
 +        if (debug)
 +        {
 +            fprintf(debug, "Counted %d exclusions\n", nexcl);
 +            fprintf(debug, "Average C6 parameter is: %10g\n", (double)csix);
 +            fprintf(debug, "Average C12 parameter is: %10g\n", (double)ctwelve);
 +        }
 +        fr->avcsix[q]    = csix;
 +        fr->avctwelve[q] = ctwelve;
 +    }
 +    if (fplog != NULL)
 +    {
 +        if (fr->eDispCorr == edispcAllEner ||
 +            fr->eDispCorr == edispcAllEnerPres)
 +        {
 +            fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
 +                    fr->avcsix[0], fr->avctwelve[0]);
 +        }
 +        else
 +        {
 +            fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e\n", fr->avcsix[0]);
 +        }
 +    }
 +}
 +
 +
 +static void set_bham_b_max(FILE *fplog, t_forcerec *fr,
 +                           const gmx_mtop_t *mtop)
 +{
 +    const t_atoms *at1, *at2;
 +    int            mt1, mt2, i, j, tpi, tpj, ntypes;
 +    real           b, bmin;
 +    real          *nbfp;
 +
 +    if (fplog)
 +    {
 +        fprintf(fplog, "Determining largest Buckingham b parameter for table\n");
 +    }
 +    nbfp   = fr->nbfp;
 +    ntypes = fr->ntype;
 +
 +    bmin           = -1;
 +    fr->bham_b_max = 0;
 +    for (mt1 = 0; mt1 < mtop->nmoltype; mt1++)
 +    {
 +        at1 = &mtop->moltype[mt1].atoms;
 +        for (i = 0; (i < at1->nr); i++)
 +        {
 +            tpi = at1->atom[i].type;
 +            if (tpi >= ntypes)
 +            {
 +                gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", i, tpi, ntypes);
 +            }
 +
 +            for (mt2 = mt1; mt2 < mtop->nmoltype; mt2++)
 +            {
 +                at2 = &mtop->moltype[mt2].atoms;
 +                for (j = 0; (j < at2->nr); j++)
 +                {
 +                    tpj = at2->atom[j].type;
 +                    if (tpj >= ntypes)
 +                    {
 +                        gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", j, tpj, ntypes);
 +                    }
 +                    b = BHAMB(nbfp, ntypes, tpi, tpj);
 +                    if (b > fr->bham_b_max)
 +                    {
 +                        fr->bham_b_max = b;
 +                    }
 +                    if ((b < bmin) || (bmin == -1))
 +                    {
 +                        bmin = b;
 +                    }
 +                }
 +            }
 +        }
 +    }
 +    if (fplog)
 +    {
 +        fprintf(fplog, "Buckingham b parameters, min: %g, max: %g\n",
 +                bmin, fr->bham_b_max);
 +    }
 +}
 +
 +static void make_nbf_tables(FILE *fp, const output_env_t oenv,
 +                            t_forcerec *fr, real rtab,
 +                            const t_commrec *cr,
 +                            const char *tabfn, char *eg1, char *eg2,
 +                            t_nblists *nbl)
 +{
 +    char buf[STRLEN];
 +    int  i, j;
 +
 +    if (tabfn == NULL)
 +    {
 +        if (debug)
 +        {
 +            fprintf(debug, "No table file name passed, can not read table, can not do non-bonded interactions\n");
 +        }
 +        return;
 +    }
 +
 +    sprintf(buf, "%s", tabfn);
 +    if (eg1 && eg2)
 +    {
 +        /* Append the two energy group names */
 +        sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "_%s_%s.%s",
 +                eg1, eg2, ftp2ext(efXVG));
 +    }
 +    nbl->table_elec_vdw = make_tables(fp, oenv, fr, MASTER(cr), buf, rtab, 0);
 +    /* Copy the contents of the table to separate coulomb and LJ tables too,
 +     * to improve cache performance.
 +     */
 +    /* For performance reasons we want
 +     * the table data to be aligned to 16-byte. The pointers could be freed
 +     * but currently aren't.
 +     */
 +    nbl->table_elec.interaction   = GMX_TABLE_INTERACTION_ELEC;
 +    nbl->table_elec.format        = nbl->table_elec_vdw.format;
 +    nbl->table_elec.r             = nbl->table_elec_vdw.r;
 +    nbl->table_elec.n             = nbl->table_elec_vdw.n;
 +    nbl->table_elec.scale         = nbl->table_elec_vdw.scale;
 +    nbl->table_elec.scale_exp     = nbl->table_elec_vdw.scale_exp;
 +    nbl->table_elec.formatsize    = nbl->table_elec_vdw.formatsize;
 +    nbl->table_elec.ninteractions = 1;
 +    nbl->table_elec.stride        = nbl->table_elec.formatsize * nbl->table_elec.ninteractions;
 +    snew_aligned(nbl->table_elec.data, nbl->table_elec.stride*(nbl->table_elec.n+1), 32);
 +
 +    nbl->table_vdw.interaction   = GMX_TABLE_INTERACTION_VDWREP_VDWDISP;
 +    nbl->table_vdw.format        = nbl->table_elec_vdw.format;
 +    nbl->table_vdw.r             = nbl->table_elec_vdw.r;
 +    nbl->table_vdw.n             = nbl->table_elec_vdw.n;
 +    nbl->table_vdw.scale         = nbl->table_elec_vdw.scale;
 +    nbl->table_vdw.scale_exp     = nbl->table_elec_vdw.scale_exp;
 +    nbl->table_vdw.formatsize    = nbl->table_elec_vdw.formatsize;
 +    nbl->table_vdw.ninteractions = 2;
 +    nbl->table_vdw.stride        = nbl->table_vdw.formatsize * nbl->table_vdw.ninteractions;
 +    snew_aligned(nbl->table_vdw.data, nbl->table_vdw.stride*(nbl->table_vdw.n+1), 32);
 +
 +    for (i = 0; i <= nbl->table_elec_vdw.n; i++)
 +    {
 +        for (j = 0; j < 4; j++)
 +        {
 +            nbl->table_elec.data[4*i+j] = nbl->table_elec_vdw.data[12*i+j];
 +        }
 +        for (j = 0; j < 8; j++)
 +        {
 +            nbl->table_vdw.data[8*i+j] = nbl->table_elec_vdw.data[12*i+4+j];
 +        }
 +    }
 +}
 +
 +static void count_tables(int ftype1, int ftype2, const gmx_mtop_t *mtop,
 +                         int *ncount, int **count)
 +{
 +    const gmx_moltype_t *molt;
 +    const t_ilist       *il;
 +    int                  mt, ftype, stride, i, j, tabnr;
 +
 +    for (mt = 0; mt < mtop->nmoltype; mt++)
 +    {
 +        molt = &mtop->moltype[mt];
 +        for (ftype = 0; ftype < F_NRE; ftype++)
 +        {
 +            if (ftype == ftype1 || ftype == ftype2)
 +            {
 +                il     = &molt->ilist[ftype];
 +                stride = 1 + NRAL(ftype);
 +                for (i = 0; i < il->nr; i += stride)
 +                {
 +                    tabnr = mtop->ffparams.iparams[il->iatoms[i]].tab.table;
 +                    if (tabnr < 0)
 +                    {
 +                        gmx_fatal(FARGS, "A bonded table number is smaller than 0: %d\n", tabnr);
 +                    }
 +                    if (tabnr >= *ncount)
 +                    {
 +                        srenew(*count, tabnr+1);
 +                        for (j = *ncount; j < tabnr+1; j++)
 +                        {
 +                            (*count)[j] = 0;
 +                        }
 +                        *ncount = tabnr+1;
 +                    }
 +                    (*count)[tabnr]++;
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +static bondedtable_t *make_bonded_tables(FILE *fplog,
 +                                         int ftype1, int ftype2,
 +                                         const gmx_mtop_t *mtop,
 +                                         const char *basefn, const char *tabext)
 +{
 +    int            i, ncount, *count;
 +    char           tabfn[STRLEN];
 +    bondedtable_t *tab;
 +
 +    tab = NULL;
 +
 +    ncount = 0;
 +    count  = NULL;
 +    count_tables(ftype1, ftype2, mtop, &ncount, &count);
 +
 +    if (ncount > 0)
 +    {
 +        snew(tab, ncount);
 +        for (i = 0; i < ncount; i++)
 +        {
 +            if (count[i] > 0)
 +            {
 +                sprintf(tabfn, "%s", basefn);
 +                sprintf(tabfn + strlen(basefn) - strlen(ftp2ext(efXVG)) - 1, "_%s%d.%s",
 +                        tabext, i, ftp2ext(efXVG));
 +                tab[i] = make_bonded_table(fplog, tabfn, NRAL(ftype1)-2);
 +            }
 +        }
 +        sfree(count);
 +    }
 +
 +    return tab;
 +}
 +
 +void forcerec_set_ranges(t_forcerec *fr,
 +                         int ncg_home, int ncg_force,
 +                         int natoms_force,
 +                         int natoms_force_constr, int natoms_f_novirsum)
 +{
 +    fr->cg0 = 0;
 +    fr->hcg = ncg_home;
 +
 +    /* fr->ncg_force is unused in the standard code,
 +     * but it can be useful for modified code dealing with charge groups.
 +     */
 +    fr->ncg_force           = ncg_force;
 +    fr->natoms_force        = natoms_force;
 +    fr->natoms_force_constr = natoms_force_constr;
 +
 +    if (fr->natoms_force_constr > fr->nalloc_force)
 +    {
 +        fr->nalloc_force = over_alloc_dd(fr->natoms_force_constr);
 +
 +        if (fr->bTwinRange)
 +        {
 +            srenew(fr->f_twin, fr->nalloc_force);
 +        }
 +    }
 +
 +    if (fr->bF_NoVirSum)
 +    {
 +        fr->f_novirsum_n = natoms_f_novirsum;
 +        if (fr->f_novirsum_n > fr->f_novirsum_nalloc)
 +        {
 +            fr->f_novirsum_nalloc = over_alloc_dd(fr->f_novirsum_n);
 +            srenew(fr->f_novirsum_alloc, fr->f_novirsum_nalloc);
 +        }
 +    }
 +    else
 +    {
 +        fr->f_novirsum_n = 0;
 +    }
 +}
 +
 +static real cutoff_inf(real cutoff)
 +{
 +    if (cutoff == 0)
 +    {
 +        cutoff = GMX_CUTOFF_INF;
 +    }
 +
 +    return cutoff;
 +}
 +
 +static void make_adress_tf_tables(FILE *fp, const output_env_t oenv,
 +                                  t_forcerec *fr, const t_inputrec *ir,
 +                                  const char *tabfn, const gmx_mtop_t *mtop,
 +                                  matrix     box)
 +{
 +    char buf[STRLEN];
 +    int  i, j;
 +
 +    if (tabfn == NULL)
 +    {
 +        gmx_fatal(FARGS, "No thermoforce table file given. Use -tabletf to specify a file\n");
 +        return;
 +    }
 +
 +    snew(fr->atf_tabs, ir->adress->n_tf_grps);
 +
 +    sprintf(buf, "%s", tabfn);
 +    for (i = 0; i < ir->adress->n_tf_grps; i++)
 +    {
 +        j = ir->adress->tf_table_index[i]; /* get energy group index */
 +        sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "tf_%s.%s",
 +                *(mtop->groups.grpname[mtop->groups.grps[egcENER].nm_ind[j]]), ftp2ext(efXVG));
 +        if (fp)
 +        {
 +            fprintf(fp, "loading tf table for energygrp index %d from %s\n", ir->adress->tf_table_index[i], buf);
 +        }
 +        fr->atf_tabs[i] = make_atf_table(fp, oenv, fr, buf, box);
 +    }
 +
 +}
 +
 +gmx_bool can_use_allvsall(const t_inputrec *ir, const gmx_mtop_t *mtop,
 +                          gmx_bool bPrintNote, t_commrec *cr, FILE *fp)
 +{
 +    gmx_bool bAllvsAll;
 +
 +    bAllvsAll =
 +        (
 +            ir->rlist == 0            &&
 +            ir->rcoulomb == 0         &&
 +            ir->rvdw == 0             &&
 +            ir->ePBC == epbcNONE      &&
 +            ir->vdwtype == evdwCUT    &&
 +            ir->coulombtype == eelCUT &&
 +            ir->efep == efepNO        &&
 +            (ir->implicit_solvent == eisNO ||
 +             (ir->implicit_solvent == eisGBSA && (ir->gb_algorithm == egbSTILL ||
 +                                                  ir->gb_algorithm == egbHCT   ||
 +                                                  ir->gb_algorithm == egbOBC))) &&
 +            getenv("GMX_NO_ALLVSALL") == NULL
 +        );
 +
 +    if (bAllvsAll && ir->opts.ngener > 1)
 +    {
 +        const char *note = "NOTE: Can not use all-vs-all force loops, because there are multiple energy monitor groups; you might get significantly higher performance when using only a single energy monitor group.\n";
 +
 +        if (bPrintNote)
 +        {
 +            if (MASTER(cr))
 +            {
 +                fprintf(stderr, "\n%s\n", note);
 +            }
 +            if (fp != NULL)
 +            {
 +                fprintf(fp, "\n%s\n", note);
 +            }
 +        }
 +        bAllvsAll = FALSE;
 +    }
 +
 +    if (bAllvsAll && fp && MASTER(cr))
 +    {
 +        fprintf(fp, "\nUsing accelerated all-vs-all kernels.\n\n");
 +    }
 +
 +    return bAllvsAll;
 +}
 +
 +
 +static void init_forcerec_f_threads(t_forcerec *fr, int nenergrp)
 +{
 +    int t, i;
 +
 +    /* These thread local data structures are used for bondeds only */
 +    fr->nthreads = gmx_omp_nthreads_get(emntBonded);
 +
 +    if (fr->nthreads > 1)
 +    {
 +        snew(fr->f_t, fr->nthreads);
 +        /* Thread 0 uses the global force and energy arrays */
 +        for (t = 1; t < fr->nthreads; t++)
 +        {
 +            fr->f_t[t].f        = NULL;
 +            fr->f_t[t].f_nalloc = 0;
 +            snew(fr->f_t[t].fshift, SHIFTS);
 +            fr->f_t[t].grpp.nener = nenergrp*nenergrp;
 +            for (i = 0; i < egNR; i++)
 +            {
 +                snew(fr->f_t[t].grpp.ener[i], fr->f_t[t].grpp.nener);
 +            }
 +        }
 +    }
 +}
 +
 +
 +static void pick_nbnxn_kernel_cpu(FILE             *fp,
 +                                  const t_commrec  *cr,
 +                                  const gmx_cpuid_t cpuid_info,
 +                                  const t_inputrec *ir,
 +                                  int              *kernel_type,
 +                                  int              *ewald_excl)
 +{
 +    *kernel_type = nbnxnk4x4_PlainC;
 +    *ewald_excl  = ewaldexclTable;
 +
 +#ifdef GMX_NBNXN_SIMD
 +    {
 +#ifdef GMX_NBNXN_SIMD_4XN
 +        *kernel_type = nbnxnk4xN_SIMD_4xN;
 +#endif
 +#ifdef GMX_NBNXN_SIMD_2XNN
 +        /* We expect the 2xNN kernels to be faster in most cases */
 +        *kernel_type = nbnxnk4xN_SIMD_2xNN;
 +#endif
 +
 +#if defined GMX_NBNXN_SIMD_4XN && defined GMX_X86_AVX_256
 +        if (EEL_RF(ir->coulombtype) || ir->coulombtype == eelCUT)
 +        {
 +            /* The raw pair rate of the 4x8 kernel is higher than 2x(4+4),
 +             * 10% with HT, 50% without HT, but extra zeros interactions
 +             * can compensate. As we currently don't detect the actual use
 +             * of HT, switch to 4x8 to avoid a potential performance hit.
 +             */
 +            *kernel_type = nbnxnk4xN_SIMD_4xN;
 +        }
 +#endif
 +        if (getenv("GMX_NBNXN_SIMD_4XN") != NULL)
 +        {
 +#ifdef GMX_NBNXN_SIMD_4XN
 +            *kernel_type = nbnxnk4xN_SIMD_4xN;
 +#else
 +            gmx_fatal(FARGS, "SIMD 4xN kernels requested, but Gromacs has been compiled without support for these kernels");
 +#endif
 +        }
 +        if (getenv("GMX_NBNXN_SIMD_2XNN") != NULL)
 +        {
 +#ifdef GMX_NBNXN_SIMD_2XNN
 +            *kernel_type = nbnxnk4xN_SIMD_2xNN;
 +#else
 +            gmx_fatal(FARGS, "SIMD 2x(N+N) kernels requested, but Gromacs has been compiled without support for these kernels");
 +#endif
 +        }
 +
 +        /* Analytical Ewald exclusion correction is only an option in the
 +         * x86 SIMD kernel. This is faster in single precision
 +         * on Bulldozer and slightly faster on Sandy Bridge.
 +         */
 +#if (defined GMX_X86_AVX_128_FMA || defined GMX_X86_AVX_256) && !defined GMX_DOUBLE
 +        *ewald_excl = ewaldexclAnalytical;
 +#endif
 +        if (getenv("GMX_NBNXN_EWALD_TABLE") != NULL)
 +        {
 +            *ewald_excl = ewaldexclTable;
 +        }
 +        if (getenv("GMX_NBNXN_EWALD_ANALYTICAL") != NULL)
 +        {
 +            *ewald_excl = ewaldexclAnalytical;
 +        }
 +
 +    }
 +#endif /* GMX_X86_SSE2 */
 +}
 +
 +
 +const char *lookup_nbnxn_kernel_name(int kernel_type)
 +{
 +    const char *returnvalue = NULL;
 +    switch (kernel_type)
 +    {
 +        case nbnxnkNotSet: returnvalue     = "not set"; break;
 +        case nbnxnk4x4_PlainC: returnvalue = "plain C"; break;
 +#ifndef GMX_NBNXN_SIMD
 +        case nbnxnk4xN_SIMD_4xN: returnvalue  = "not available"; break;
 +        case nbnxnk4xN_SIMD_2xNN: returnvalue = "not available"; break;
 +#else
 +#ifdef GMX_X86_SSE2
 +#if GMX_NBNXN_SIMD_BITWIDTH == 128
 +            /* x86 SIMD intrinsics can be converted to either SSE or AVX depending
 +             * on compiler flags. As we use nearly identical intrinsics, using an AVX
 +             * compiler flag without an AVX macro effectively results in AVX kernels.
 +             * For gcc we check for __AVX__
 +             * At least a check for icc should be added (if there is a macro)
 +             */
 +#if !(defined GMX_X86_AVX_128_FMA || defined __AVX__)
 +#ifndef GMX_X86_SSE4_1
 +        case nbnxnk4xN_SIMD_4xN: returnvalue  = "SSE2"; break;
 +        case nbnxnk4xN_SIMD_2xNN: returnvalue = "SSE2"; break;
 +#else
 +        case nbnxnk4xN_SIMD_4xN: returnvalue  = "SSE4.1"; break;
 +        case nbnxnk4xN_SIMD_2xNN: returnvalue = "SSE4.1"; break;
 +#endif
 +#else
 +        case nbnxnk4xN_SIMD_4xN: returnvalue  = "AVX-128"; break;
 +        case nbnxnk4xN_SIMD_2xNN: returnvalue = "AVX-128"; break;
 +#endif
 +#endif
 +#if GMX_NBNXN_SIMD_BITWIDTH == 256
 +        case nbnxnk4xN_SIMD_4xN: returnvalue  = "AVX-256"; break;
 +        case nbnxnk4xN_SIMD_2xNN: returnvalue = "AVX-256"; break;
 +#endif
 +#else   /* not GMX_X86_SSE2 */
 +        case nbnxnk4xN_SIMD_4xN: returnvalue  = "SIMD"; break;
 +        case nbnxnk4xN_SIMD_2xNN: returnvalue = "SIMD"; break;
 +#endif
 +#endif
 +        case nbnxnk8x8x8_CUDA: returnvalue   = "CUDA"; break;
 +        case nbnxnk8x8x8_PlainC: returnvalue = "plain C"; break;
 +
 +        case nbnxnkNR:
 +        default:
 +            gmx_fatal(FARGS, "Illegal kernel type selected");
 +            returnvalue = NULL;
 +            break;
 +    }
 +    return returnvalue;
 +};
 +
 +static void pick_nbnxn_kernel(FILE                *fp,
 +                              const t_commrec     *cr,
 +                              const gmx_hw_info_t *hwinfo,
 +                              gmx_bool             use_cpu_acceleration,
 +                              gmx_bool             bUseGPU,
 +                              gmx_bool             bEmulateGPU,
 +                              const t_inputrec    *ir,
 +                              int                 *kernel_type,
 +                              int                 *ewald_excl,
 +                              gmx_bool             bDoNonbonded)
 +{
 +    assert(kernel_type);
 +
 +    *kernel_type = nbnxnkNotSet;
 +    *ewald_excl  = ewaldexclTable;
 +
 +    if (bEmulateGPU)
 +    {
 +        *kernel_type = nbnxnk8x8x8_PlainC;
 +
 +        if (bDoNonbonded)
 +        {
 +            md_print_warn(cr, fp, "Emulating a GPU run on the CPU (slow)");
 +        }
 +    }
 +    else if (bUseGPU)
 +    {
 +        *kernel_type = nbnxnk8x8x8_CUDA;
 +    }
 +
 +    if (*kernel_type == nbnxnkNotSet)
 +    {
 +        if (use_cpu_acceleration)
 +        {
 +            pick_nbnxn_kernel_cpu(fp, cr, hwinfo->cpuid_info, ir,
 +                                  kernel_type, ewald_excl);
 +        }
 +        else
 +        {
 +            *kernel_type = nbnxnk4x4_PlainC;
 +        }
 +    }
 +
 +    if (bDoNonbonded && fp != NULL)
 +    {
 +        fprintf(fp, "\nUsing %s %dx%d non-bonded kernels\n\n",
 +                lookup_nbnxn_kernel_name(*kernel_type),
 +                nbnxn_kernel_pairlist_simple(*kernel_type) ? NBNXN_CPU_CLUSTER_I_SIZE : NBNXN_GPU_CLUSTER_SIZE,
 +                nbnxn_kernel_to_cj_size(*kernel_type));
 +    }
 +}
 +
 +static void pick_nbnxn_resources(FILE                *fp,
 +                                 const t_commrec     *cr,
 +                                 const gmx_hw_info_t *hwinfo,
 +                                 gmx_bool             bDoNonbonded,
 +                                 gmx_bool            *bUseGPU,
 +                                 gmx_bool            *bEmulateGPU)
 +{
 +    gmx_bool bEmulateGPUEnvVarSet;
 +    char     gpu_err_str[STRLEN];
 +
 +    *bUseGPU = FALSE;
 +
 +    bEmulateGPUEnvVarSet = (getenv("GMX_EMULATE_GPU") != NULL);
 +
 +    /* Run GPU emulation mode if GMX_EMULATE_GPU is defined. Because
 +     * GPUs (currently) only handle non-bonded calculations, we will
 +     * automatically switch to emulation if non-bonded calculations are
 +     * turned off via GMX_NO_NONBONDED - this is the simple and elegant
 +     * way to turn off GPU initialization, data movement, and cleanup.
 +     *
 +     * GPU emulation can be useful to assess the performance one can expect by
 +     * adding GPU(s) to the machine. The conditional below allows this even
 +     * if mdrun is compiled without GPU acceleration support.
 +     * Note that you should freezing the system as otherwise it will explode.
 +     */
 +    *bEmulateGPU = (bEmulateGPUEnvVarSet ||
 +                    (!bDoNonbonded && hwinfo->bCanUseGPU));
 +
 +    /* Enable GPU mode when GPUs are available or no GPU emulation is requested.
 +     */
 +    if (hwinfo->bCanUseGPU && !(*bEmulateGPU))
 +    {
 +        /* Each PP node will use the intra-node id-th device from the
 +         * list of detected/selected GPUs. */
 +        if (!init_gpu(cr->rank_pp_intranode, gpu_err_str, &hwinfo->gpu_info))
 +        {
 +            /* At this point the init should never fail as we made sure that
 +             * we have all the GPUs we need. If it still does, we'll bail. */
 +            gmx_fatal(FARGS, "On node %d failed to initialize GPU #%d: %s",
 +                      cr->nodeid,
 +                      get_gpu_device_id(&hwinfo->gpu_info, cr->rank_pp_intranode),
 +                      gpu_err_str);
 +        }
 +
 +        /* Here we actually turn on hardware GPU acceleration */
 +        *bUseGPU = TRUE;
 +    }
 +}
 +
 +gmx_bool uses_simple_tables(int                 cutoff_scheme,
 +                            nonbonded_verlet_t *nbv,
 +                            int                 group)
 +{
 +    gmx_bool bUsesSimpleTables = TRUE;
 +    int      grp_index;
 +
 +    switch (cutoff_scheme)
 +    {
 +        case ecutsGROUP:
 +            bUsesSimpleTables = TRUE;
 +            break;
 +        case ecutsVERLET:
 +            assert(NULL != nbv && NULL != nbv->grp);
 +            grp_index         = (group < 0) ? 0 : (nbv->ngrp - 1);
 +            bUsesSimpleTables = nbnxn_kernel_pairlist_simple(nbv->grp[grp_index].kernel_type);
 +            break;
 +        default:
 +            gmx_incons("unimplemented");
 +    }
 +    return bUsesSimpleTables;
 +}
 +
 +static void init_ewald_f_table(interaction_const_t *ic,
 +                               gmx_bool             bUsesSimpleTables,
 +                               real                 rtab)
 +{
 +    real maxr;
 +
 +    if (bUsesSimpleTables)
 +    {
 +        /* With a spacing of 0.0005 we are at the force summation accuracy
 +         * for the SSE kernels for "normal" atomistic simulations.
 +         */
 +        ic->tabq_scale = ewald_spline3_table_scale(ic->ewaldcoeff,
 +                                                   ic->rcoulomb);
 +
 +        maxr           = (rtab > ic->rcoulomb) ? rtab : ic->rcoulomb;
 +        ic->tabq_size  = (int)(maxr*ic->tabq_scale) + 2;
 +    }
 +    else
 +    {
 +        ic->tabq_size = GPU_EWALD_COULOMB_FORCE_TABLE_SIZE;
 +        /* Subtract 2 iso 1 to avoid access out of range due to rounding */
 +        ic->tabq_scale = (ic->tabq_size - 2)/ic->rcoulomb;
 +    }
 +
 +    sfree_aligned(ic->tabq_coul_FDV0);
 +    sfree_aligned(ic->tabq_coul_F);
 +    sfree_aligned(ic->tabq_coul_V);
 +
 +    /* Create the original table data in FDV0 */
 +    snew_aligned(ic->tabq_coul_FDV0, ic->tabq_size*4, 32);
 +    snew_aligned(ic->tabq_coul_F, ic->tabq_size, 32);
 +    snew_aligned(ic->tabq_coul_V, ic->tabq_size, 32);
 +    table_spline3_fill_ewald_lr(ic->tabq_coul_F, ic->tabq_coul_V, ic->tabq_coul_FDV0,
 +                                ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff);
 +}
 +
 +void init_interaction_const_tables(FILE                *fp,
 +                                   interaction_const_t *ic,
 +                                   gmx_bool             bUsesSimpleTables,
 +                                   real                 rtab)
 +{
 +    real spacing;
 +
 +    if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype))
 +    {
 +        init_ewald_f_table(ic, bUsesSimpleTables, rtab);
 +
 +        if (fp != NULL)
 +        {
 +            fprintf(fp, "Initialized non-bonded Ewald correction tables, spacing: %.2e size: %d\n\n",
 +                    1/ic->tabq_scale, ic->tabq_size);
 +        }
 +    }
 +}
 +
 +void init_interaction_const(FILE                 *fp,
 +                            interaction_const_t **interaction_const,
 +                            const t_forcerec     *fr,
 +                            real                  rtab)
 +{
 +    interaction_const_t *ic;
 +    gmx_bool             bUsesSimpleTables = TRUE;
 +
 +    snew(ic, 1);
 +
 +    /* Just allocate something so we can free it */
 +    snew_aligned(ic->tabq_coul_FDV0, 16, 32);
 +    snew_aligned(ic->tabq_coul_F, 16, 32);
 +    snew_aligned(ic->tabq_coul_V, 16, 32);
 +
 +    ic->rlist       = fr->rlist;
 +    ic->rlistlong   = fr->rlistlong;
 +
 +    /* Lennard-Jones */
 +    ic->rvdw        = fr->rvdw;
 +    if (fr->vdw_modifier == eintmodPOTSHIFT)
 +    {
 +        ic->sh_invrc6 = pow(ic->rvdw, -6.0);
 +    }
 +    else
 +    {
 +        ic->sh_invrc6 = 0;
 +    }
 +
 +    /* Electrostatics */
 +    ic->eeltype     = fr->eeltype;
 +    ic->rcoulomb    = fr->rcoulomb;
 +    ic->epsilon_r   = fr->epsilon_r;
 +    ic->epsfac      = fr->epsfac;
 +
 +    /* Ewald */
 +    ic->ewaldcoeff  = fr->ewaldcoeff;
 +    if (fr->coulomb_modifier == eintmodPOTSHIFT)
 +    {
 +        ic->sh_ewald = gmx_erfc(ic->ewaldcoeff*ic->rcoulomb);
 +    }
 +    else
 +    {
 +        ic->sh_ewald = 0;
 +    }
 +
 +    /* Reaction-field */
 +    if (EEL_RF(ic->eeltype))
 +    {
 +        ic->epsilon_rf = fr->epsilon_rf;
 +        ic->k_rf       = fr->k_rf;
 +        ic->c_rf       = fr->c_rf;
 +    }
 +    else
 +    {
 +        /* For plain cut-off we might use the reaction-field kernels */
 +        ic->epsilon_rf = ic->epsilon_r;
 +        ic->k_rf       = 0;
 +        if (fr->coulomb_modifier == eintmodPOTSHIFT)
 +        {
 +            ic->c_rf   = 1/ic->rcoulomb;
 +        }
 +        else
 +        {
 +            ic->c_rf   = 0;
 +        }
 +    }
 +
 +    if (fp != NULL)
 +    {
 +        fprintf(fp, "Potential shift: LJ r^-12: %.3f r^-6 %.3f",
 +                sqr(ic->sh_invrc6), ic->sh_invrc6);
 +        if (ic->eeltype == eelCUT)
 +        {
 +            fprintf(fp, ", Coulomb %.3f", ic->c_rf);
 +        }
 +        else if (EEL_PME(ic->eeltype))
 +        {
 +            fprintf(fp, ", Ewald %.3e", ic->sh_ewald);
 +        }
 +        fprintf(fp, "\n");
 +    }
 +
 +    *interaction_const = ic;
 +
 +    if (fr->nbv != NULL && fr->nbv->bUseGPU)
 +    {
-         nbnxn_cuda_init_const(fr->nbv->cu_nbv, ic, fr->nbv);
++        nbnxn_cuda_init_const(fr->nbv->cu_nbv, ic, fr->nbv->grp);
 +    }
 +
 +    bUsesSimpleTables = uses_simple_tables(fr->cutoff_scheme, fr->nbv, -1);
 +    init_interaction_const_tables(fp, ic, bUsesSimpleTables, rtab);
 +}
 +
 +static void init_nb_verlet(FILE                *fp,
 +                           nonbonded_verlet_t **nb_verlet,
 +                           const t_inputrec    *ir,
 +                           const t_forcerec    *fr,
 +                           const t_commrec     *cr,
 +                           const char          *nbpu_opt)
 +{
 +    nonbonded_verlet_t *nbv;
 +    int                 i;
 +    char               *env;
 +    gmx_bool            bEmulateGPU, bHybridGPURun = FALSE;
 +
 +    nbnxn_alloc_t      *nb_alloc;
 +    nbnxn_free_t       *nb_free;
 +
 +    snew(nbv, 1);
 +
 +    pick_nbnxn_resources(fp, cr, fr->hwinfo,
 +                         fr->bNonbonded,
 +                         &nbv->bUseGPU,
 +                         &bEmulateGPU);
 +
 +    nbv->nbs = NULL;
 +
 +    nbv->ngrp = (DOMAINDECOMP(cr) ? 2 : 1);
 +    for (i = 0; i < nbv->ngrp; i++)
 +    {
 +        nbv->grp[i].nbl_lists.nnbl = 0;
 +        nbv->grp[i].nbat           = NULL;
 +        nbv->grp[i].kernel_type    = nbnxnkNotSet;
 +
 +        if (i == 0) /* local */
 +        {
 +            pick_nbnxn_kernel(fp, cr, fr->hwinfo, fr->use_cpu_acceleration,
 +                              nbv->bUseGPU, bEmulateGPU,
 +                              ir,
 +                              &nbv->grp[i].kernel_type,
 +                              &nbv->grp[i].ewald_excl,
 +                              fr->bNonbonded);
 +        }
 +        else /* non-local */
 +        {
 +            if (nbpu_opt != NULL && strcmp(nbpu_opt, "gpu_cpu") == 0)
 +            {
 +                /* Use GPU for local, select a CPU kernel for non-local */
 +                pick_nbnxn_kernel(fp, cr, fr->hwinfo, fr->use_cpu_acceleration,
 +                                  FALSE, FALSE,
 +                                  ir,
 +                                  &nbv->grp[i].kernel_type,
 +                                  &nbv->grp[i].ewald_excl,
 +                                  fr->bNonbonded);
 +
 +                bHybridGPURun = TRUE;
 +            }
 +            else
 +            {
 +                /* Use the same kernel for local and non-local interactions */
 +                nbv->grp[i].kernel_type = nbv->grp[0].kernel_type;
 +                nbv->grp[i].ewald_excl  = nbv->grp[0].ewald_excl;
 +            }
 +        }
 +    }
 +
 +    if (nbv->bUseGPU)
 +    {
 +        /* init the NxN GPU data; the last argument tells whether we'll have
 +         * both local and non-local NB calculation on GPU */
 +        nbnxn_cuda_init(fp, &nbv->cu_nbv,
 +                        &fr->hwinfo->gpu_info, cr->rank_pp_intranode,
 +                        (nbv->ngrp > 1) && !bHybridGPURun);
 +
 +        if ((env = getenv("GMX_NB_MIN_CI")) != NULL)
 +        {
 +            char *end;
 +
 +            nbv->min_ci_balanced = strtol(env, &end, 10);
 +            if (!end || (*end != 0) || nbv->min_ci_balanced <= 0)
 +            {
 +                gmx_fatal(FARGS, "Invalid value passed in GMX_NB_MIN_CI=%s, positive integer required", env);
 +            }
 +
 +            if (debug)
 +            {
 +                fprintf(debug, "Neighbor-list balancing parameter: %d (passed as env. var.)\n",
 +                        nbv->min_ci_balanced);
 +            }
 +        }
 +        else
 +        {
 +            nbv->min_ci_balanced = nbnxn_cuda_min_ci_balanced(nbv->cu_nbv);
 +            if (debug)
 +            {
 +                fprintf(debug, "Neighbor-list balancing parameter: %d (auto-adjusted to the number of GPU multi-processors)\n",
 +                        nbv->min_ci_balanced);
 +            }
 +        }
 +    }
 +    else
 +    {
 +        nbv->min_ci_balanced = 0;
 +    }
 +
 +    *nb_verlet = nbv;
 +
 +    nbnxn_init_search(&nbv->nbs,
 +                      DOMAINDECOMP(cr) ? &cr->dd->nc : NULL,
 +                      DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : NULL,
 +                      gmx_omp_nthreads_get(emntNonbonded));
 +
 +    for (i = 0; i < nbv->ngrp; i++)
 +    {
 +        if (nbv->grp[0].kernel_type == nbnxnk8x8x8_CUDA)
 +        {
 +            nb_alloc = &pmalloc;
 +            nb_free  = &pfree;
 +        }
 +        else
 +        {
 +            nb_alloc = NULL;
 +            nb_free  = NULL;
 +        }
 +
 +        nbnxn_init_pairlist_set(&nbv->grp[i].nbl_lists,
 +                                nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
 +                                /* 8x8x8 "non-simple" lists are ATM always combined */
 +                                !nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
 +                                nb_alloc, nb_free);
 +
 +        if (i == 0 ||
 +            nbv->grp[0].kernel_type != nbv->grp[i].kernel_type)
 +        {
 +            snew(nbv->grp[i].nbat, 1);
 +            nbnxn_atomdata_init(fp,
 +                                nbv->grp[i].nbat,
 +                                nbv->grp[i].kernel_type,
 +                                fr->ntype, fr->nbfp,
 +                                ir->opts.ngener,
 +                                nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type) ? gmx_omp_nthreads_get(emntNonbonded) : 1,
 +                                nb_alloc, nb_free);
 +        }
 +        else
 +        {
 +            nbv->grp[i].nbat = nbv->grp[0].nbat;
 +        }
 +    }
 +}
 +
 +void init_forcerec(FILE              *fp,
 +                   const output_env_t oenv,
 +                   t_forcerec        *fr,
 +                   t_fcdata          *fcd,
 +                   const t_inputrec  *ir,
 +                   const gmx_mtop_t  *mtop,
 +                   const t_commrec   *cr,
 +                   matrix             box,
 +                   gmx_bool           bMolEpot,
 +                   const char        *tabfn,
 +                   const char        *tabafn,
 +                   const char        *tabpfn,
 +                   const char        *tabbfn,
 +                   const char        *nbpu_opt,
 +                   gmx_bool           bNoSolvOpt,
 +                   real               print_force)
 +{
 +    int            i, j, m, natoms, ngrp, negp_pp, negptable, egi, egj;
 +    real           rtab;
 +    char          *env;
 +    double         dbl;
 +    rvec           box_size;
 +    const t_block *cgs;
 +    gmx_bool       bGenericKernelOnly;
 +    gmx_bool       bTab, bSep14tab, bNormalnblists;
 +    t_nblists     *nbl;
 +    int           *nm_ind, egp_flags;
 +
 +    if (fr->hwinfo == NULL)
 +    {
 +        /* Detect hardware, gather information.
 +         * In mdrun, hwinfo has already been set before calling init_forcerec.
 +         * Here we ignore GPUs, as tools will not use them anyhow.
 +         */
 +        snew(fr->hwinfo, 1);
 +        gmx_detect_hardware(fp, fr->hwinfo, cr,
 +                            FALSE, FALSE, NULL);
 +    }
 +
 +    /* By default we turn acceleration on, but it might be turned off further down... */
 +    fr->use_cpu_acceleration = TRUE;
 +
 +    fr->bDomDec = DOMAINDECOMP(cr);
 +
 +    natoms = mtop->natoms;
 +
 +    if (check_box(ir->ePBC, box))
 +    {
 +        gmx_fatal(FARGS, check_box(ir->ePBC, box));
 +    }
 +
 +    /* Test particle insertion ? */
 +    if (EI_TPI(ir->eI))
 +    {
 +        /* Set to the size of the molecule to be inserted (the last one) */
 +        /* Because of old style topologies, we have to use the last cg
 +         * instead of the last molecule type.
 +         */
 +        cgs       = &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].cgs;
 +        fr->n_tpi = cgs->index[cgs->nr] - cgs->index[cgs->nr-1];
 +        if (fr->n_tpi != mtop->mols.index[mtop->mols.nr] - mtop->mols.index[mtop->mols.nr-1])
 +        {
 +            gmx_fatal(FARGS, "The molecule to insert can not consist of multiple charge groups.\nMake it a single charge group.");
 +        }
 +    }
 +    else
 +    {
 +        fr->n_tpi = 0;
 +    }
 +
 +    /* Copy AdResS parameters */
 +    if (ir->bAdress)
 +    {
 +        fr->adress_type           = ir->adress->type;
 +        fr->adress_const_wf       = ir->adress->const_wf;
 +        fr->adress_ex_width       = ir->adress->ex_width;
 +        fr->adress_hy_width       = ir->adress->hy_width;
 +        fr->adress_icor           = ir->adress->icor;
 +        fr->adress_site           = ir->adress->site;
 +        fr->adress_ex_forcecap    = ir->adress->ex_forcecap;
 +        fr->adress_do_hybridpairs = ir->adress->do_hybridpairs;
 +
 +
 +        snew(fr->adress_group_explicit, ir->adress->n_energy_grps);
 +        for (i = 0; i < ir->adress->n_energy_grps; i++)
 +        {
 +            fr->adress_group_explicit[i] = ir->adress->group_explicit[i];
 +        }
 +
 +        fr->n_adress_tf_grps = ir->adress->n_tf_grps;
 +        snew(fr->adress_tf_table_index, fr->n_adress_tf_grps);
 +        for (i = 0; i < fr->n_adress_tf_grps; i++)
 +        {
 +            fr->adress_tf_table_index[i] = ir->adress->tf_table_index[i];
 +        }
 +        copy_rvec(ir->adress->refs, fr->adress_refs);
 +    }
 +    else
 +    {
 +        fr->adress_type           = eAdressOff;
 +        fr->adress_do_hybridpairs = FALSE;
 +    }
 +
 +    /* Copy the user determined parameters */
 +    fr->userint1  = ir->userint1;
 +    fr->userint2  = ir->userint2;
 +    fr->userint3  = ir->userint3;
 +    fr->userint4  = ir->userint4;
 +    fr->userreal1 = ir->userreal1;
 +    fr->userreal2 = ir->userreal2;
 +    fr->userreal3 = ir->userreal3;
 +    fr->userreal4 = ir->userreal4;
 +
 +    /* Shell stuff */
 +    fr->fc_stepsize = ir->fc_stepsize;
 +
 +    /* Free energy */
 +    fr->efep        = ir->efep;
 +    fr->sc_alphavdw = ir->fepvals->sc_alpha;
 +    if (ir->fepvals->bScCoul)
 +    {
 +        fr->sc_alphacoul  = ir->fepvals->sc_alpha;
 +        fr->sc_sigma6_min = pow(ir->fepvals->sc_sigma_min, 6);
 +    }
 +    else
 +    {
 +        fr->sc_alphacoul  = 0;
 +        fr->sc_sigma6_min = 0; /* only needed when bScCoul is on */
 +    }
 +    fr->sc_power      = ir->fepvals->sc_power;
 +    fr->sc_r_power    = ir->fepvals->sc_r_power;
 +    fr->sc_sigma6_def = pow(ir->fepvals->sc_sigma, 6);
 +
 +    env = getenv("GMX_SCSIGMA_MIN");
 +    if (env != NULL)
 +    {
 +        dbl = 0;
 +        sscanf(env, "%lf", &dbl);
 +        fr->sc_sigma6_min = pow(dbl, 6);
 +        if (fp)
 +        {
 +            fprintf(fp, "Setting the minimum soft core sigma to %g nm\n", dbl);
 +        }
 +    }
 +
 +    fr->bNonbonded = TRUE;
 +    if (getenv("GMX_NO_NONBONDED") != NULL)
 +    {
 +        /* turn off non-bonded calculations */
 +        fr->bNonbonded = FALSE;
 +        md_print_warn(cr, fp,
 +                      "Found environment variable GMX_NO_NONBONDED.\n"
 +                      "Disabling nonbonded calculations.\n");
 +    }
 +
 +    bGenericKernelOnly = FALSE;
 +
 +    /* We now check in the NS code whether a particular combination of interactions
 +     * can be used with water optimization, and disable it if that is not the case.
 +     */
 +
 +    if (getenv("GMX_NB_GENERIC") != NULL)
 +    {
 +        if (fp != NULL)
 +        {
 +            fprintf(fp,
 +                    "Found environment variable GMX_NB_GENERIC.\n"
 +                    "Disabling all interaction-specific nonbonded kernels, will only\n"
 +                    "use the slow generic ones in src/gmxlib/nonbonded/nb_generic.c\n\n");
 +        }
 +        bGenericKernelOnly = TRUE;
 +    }
 +
 +    if (bGenericKernelOnly == TRUE)
 +    {
 +        bNoSolvOpt         = TRUE;
 +    }
 +
 +    if ( (getenv("GMX_DISABLE_CPU_ACCELERATION") != NULL) || (getenv("GMX_NOOPTIMIZEDKERNELS") != NULL) )
 +    {
 +        fr->use_cpu_acceleration = FALSE;
 +        if (fp != NULL)
 +        {
 +            fprintf(fp,
 +                    "\nFound environment variable GMX_DISABLE_CPU_ACCELERATION.\n"
 +                    "Disabling all CPU architecture-specific (e.g. SSE2/SSE4/AVX) routines.\n\n");
 +        }
 +    }
 +
 +    fr->bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
 +
 +    /* Check if we can/should do all-vs-all kernels */
 +    fr->bAllvsAll       = can_use_allvsall(ir, mtop, FALSE, NULL, NULL);
 +    fr->AllvsAll_work   = NULL;
 +    fr->AllvsAll_workgb = NULL;
 +
++    /* All-vs-all kernels have not been implemented in 4.6, and
++     * the SIMD group kernels are also buggy in this case. Non-accelerated
++     * group kernels are OK. See Redmine #1249. */
++    if (fr->bAllvsAll)
++    {
++        fr->bAllvsAll = FALSE;
++        fr->use_cpu_acceleration = FALSE;
++        if (fp != NULL)
++        {
++            fprintf(fp,
++                    "\nYour simulation settings would have triggered the efficient all-vs-all\n"
++                    "kernels in GROMACS 4.5, but these have not been implemented in GROMACS\n"
++                    "4.6. Also, we can't use the accelerated SIMD kernels here because\n"
++                    "of an unfixed bug. The reference C kernels are correct, though, so\n"
++                    "we are proceeding by disabling all CPU architecture-specific\n"
++                    "(e.g. SSE2/SSE4/AVX) routines. If performance is important, please\n"
++                    "use GROMACS 4.5.7 or try cutoff-scheme = Verlet.\n\n");
++        }
++    }
 +
 +    /* Neighbour searching stuff */
 +    fr->cutoff_scheme = ir->cutoff_scheme;
 +    fr->bGrid         = (ir->ns_type == ensGRID);
 +    fr->ePBC          = ir->ePBC;
 +
 +    /* Determine if we will do PBC for distances in bonded interactions */
 +    if (fr->ePBC == epbcNONE)
 +    {
 +        fr->bMolPBC = FALSE;
 +    }
 +    else
 +    {
 +        if (!DOMAINDECOMP(cr))
 +        {
 +            /* The group cut-off scheme and SHAKE assume charge groups
 +             * are whole, but not using molpbc is faster in most cases.
 +             */
 +            if (fr->cutoff_scheme == ecutsGROUP ||
 +                (ir->eConstrAlg == econtSHAKE &&
 +                 (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0 ||
 +                  gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0)))
 +            {
 +                fr->bMolPBC = ir->bPeriodicMols;
 +            }
 +            else
 +            {
 +                fr->bMolPBC = TRUE;
 +                if (getenv("GMX_USE_GRAPH") != NULL)
 +                {
 +                    fr->bMolPBC = FALSE;
 +                    if (fp)
 +                    {
 +                        fprintf(fp, "\nGMX_MOLPBC is set, using the graph for bonded interactions\n\n");
 +                    }
 +                }
 +            }
 +        }
 +        else
 +        {
 +            fr->bMolPBC = dd_bonded_molpbc(cr->dd, fr->ePBC);
 +        }
 +    }
 +    fr->bGB = (ir->implicit_solvent == eisGBSA);
 +
 +    fr->rc_scaling = ir->refcoord_scaling;
 +    copy_rvec(ir->posres_com, fr->posres_com);
 +    copy_rvec(ir->posres_comB, fr->posres_comB);
 +    fr->rlist      = cutoff_inf(ir->rlist);
 +    fr->rlistlong  = cutoff_inf(ir->rlistlong);
 +    fr->eeltype    = ir->coulombtype;
 +    fr->vdwtype    = ir->vdwtype;
 +
 +    fr->coulomb_modifier = ir->coulomb_modifier;
 +    fr->vdw_modifier     = ir->vdw_modifier;
 +
 +    /* Electrostatics: Translate from interaction-setting-in-mdp-file to kernel interaction format */
 +    switch (fr->eeltype)
 +    {
 +        case eelCUT:
 +            fr->nbkernel_elec_interaction = (fr->bGB) ? GMX_NBKERNEL_ELEC_GENERALIZEDBORN : GMX_NBKERNEL_ELEC_COULOMB;
 +            break;
 +
 +        case eelRF:
 +        case eelGRF:
 +        case eelRF_NEC:
 +            fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
 +            break;
 +
 +        case eelRF_ZERO:
 +            fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
 +            fr->coulomb_modifier          = eintmodEXACTCUTOFF;
 +            break;
 +
 +        case eelSWITCH:
 +        case eelSHIFT:
 +        case eelUSER:
 +        case eelENCADSHIFT:
 +        case eelPMESWITCH:
 +        case eelPMEUSER:
 +        case eelPMEUSERSWITCH:
 +            fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
 +            break;
 +
 +        case eelPME:
 +        case eelEWALD:
 +            fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_EWALD;
 +            break;
 +
 +        default:
 +            gmx_fatal(FARGS, "Unsupported electrostatic interaction: %s", eel_names[fr->eeltype]);
 +            break;
 +    }
 +
 +    /* Vdw: Translate from mdp settings to kernel format */
 +    switch (fr->vdwtype)
 +    {
 +        case evdwCUT:
 +            if (fr->bBHAM)
 +            {
 +                fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_BUCKINGHAM;
 +            }
 +            else
 +            {
 +                fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LENNARDJONES;
 +            }
 +            break;
 +
 +        case evdwSWITCH:
 +        case evdwSHIFT:
 +        case evdwUSER:
 +        case evdwENCADSHIFT:
 +            fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
 +            break;
 +
 +        default:
 +            gmx_fatal(FARGS, "Unsupported vdw interaction: %s", evdw_names[fr->vdwtype]);
 +            break;
 +    }
 +
 +    /* These start out identical to ir, but might be altered if we e.g. tabulate the interaction in the kernel */
 +    fr->nbkernel_elec_modifier    = fr->coulomb_modifier;
 +    fr->nbkernel_vdw_modifier     = fr->vdw_modifier;
 +
 +    fr->bTwinRange = fr->rlistlong > fr->rlist;
 +    fr->bEwald     = (EEL_PME(fr->eeltype) || fr->eeltype == eelEWALD);
 +
 +    fr->reppow     = mtop->ffparams.reppow;
 +
 +    if (ir->cutoff_scheme == ecutsGROUP)
 +    {
 +        fr->bvdwtab    = (fr->vdwtype != evdwCUT ||
 +                          !gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS));
 +        /* We have special kernels for standard Ewald and PME, but the pme-switch ones are tabulated above */
 +        fr->bcoultab   = !(fr->eeltype == eelCUT ||
 +                           fr->eeltype == eelEWALD ||
 +                           fr->eeltype == eelPME ||
 +                           fr->eeltype == eelRF ||
 +                           fr->eeltype == eelRF_ZERO);
 +
 +        /* If the user absolutely wants different switch/shift settings for coul/vdw, it is likely
 +         * going to be faster to tabulate the interaction than calling the generic kernel.
 +         */
 +        if (fr->nbkernel_elec_modifier == eintmodPOTSWITCH && fr->nbkernel_vdw_modifier == eintmodPOTSWITCH)
 +        {
 +            if ((fr->rcoulomb_switch != fr->rvdw_switch) || (fr->rcoulomb != fr->rvdw))
 +            {
 +                fr->bcoultab = TRUE;
 +            }
 +        }
 +        else if ((fr->nbkernel_elec_modifier == eintmodPOTSHIFT && fr->nbkernel_vdw_modifier == eintmodPOTSHIFT) ||
 +                 ((fr->nbkernel_elec_interaction == GMX_NBKERNEL_ELEC_REACTIONFIELD &&
 +                   fr->nbkernel_elec_modifier == eintmodEXACTCUTOFF &&
 +                   (fr->nbkernel_vdw_modifier == eintmodPOTSWITCH || fr->nbkernel_vdw_modifier == eintmodPOTSHIFT))))
 +        {
 +            if (fr->rcoulomb != fr->rvdw)
 +            {
 +                fr->bcoultab = TRUE;
 +            }
 +        }
 +
 +        if (getenv("GMX_REQUIRE_TABLES"))
 +        {
 +            fr->bvdwtab  = TRUE;
 +            fr->bcoultab = TRUE;
 +        }
 +
 +        if (fp)
 +        {
 +            fprintf(fp, "Table routines are used for coulomb: %s\n", bool_names[fr->bcoultab]);
 +            fprintf(fp, "Table routines are used for vdw:     %s\n", bool_names[fr->bvdwtab ]);
 +        }
 +
 +        if (fr->bvdwtab == TRUE)
 +        {
 +            fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
 +            fr->nbkernel_vdw_modifier    = eintmodNONE;
 +        }
 +        if (fr->bcoultab == TRUE)
 +        {
 +            fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
 +            fr->nbkernel_elec_modifier    = eintmodNONE;
 +        }
 +    }
 +
 +    if (ir->cutoff_scheme == ecutsVERLET)
 +    {
 +        if (!gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
 +        {
 +            gmx_fatal(FARGS, "Cut-off scheme %S only supports LJ repulsion power 12", ecutscheme_names[ir->cutoff_scheme]);
 +        }
 +        fr->bvdwtab  = FALSE;
 +        fr->bcoultab = FALSE;
 +    }
 +
 +    /* Tables are used for direct ewald sum */
 +    if (fr->bEwald)
 +    {
 +        if (EEL_PME(ir->coulombtype))
 +        {
 +            if (fp)
 +            {
 +                fprintf(fp, "Will do PME sum in reciprocal space.\n");
 +            }
 +            if (ir->coulombtype == eelP3M_AD)
 +            {
 +                please_cite(fp, "Hockney1988");
 +                please_cite(fp, "Ballenegger2012");
 +            }
 +            else
 +            {
 +                please_cite(fp, "Essmann95a");
 +            }
 +
 +            if (ir->ewald_geometry == eewg3DC)
 +            {
 +                if (fp)
 +                {
 +                    fprintf(fp, "Using the Ewald3DC correction for systems with a slab geometry.\n");
 +                }
 +                please_cite(fp, "In-Chul99a");
 +            }
 +        }
 +        fr->ewaldcoeff = calc_ewaldcoeff(ir->rcoulomb, ir->ewald_rtol);
 +        init_ewald_tab(&(fr->ewald_table), cr, ir, fp);
 +        if (fp)
 +        {
 +            fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for Ewald\n",
 +                    1/fr->ewaldcoeff);
 +        }
 +    }
 +
 +    /* Electrostatics */
 +    fr->epsilon_r       = ir->epsilon_r;
 +    fr->epsilon_rf      = ir->epsilon_rf;
 +    fr->fudgeQQ         = mtop->ffparams.fudgeQQ;
 +    fr->rcoulomb_switch = ir->rcoulomb_switch;
 +    fr->rcoulomb        = cutoff_inf(ir->rcoulomb);
 +
 +    /* Parameters for generalized RF */
 +    fr->zsquare = 0.0;
 +    fr->temp    = 0.0;
 +
 +    if (fr->eeltype == eelGRF)
 +    {
 +        init_generalized_rf(fp, mtop, ir, fr);
 +    }
 +    else if (fr->eeltype == eelSHIFT)
 +    {
 +        for (m = 0; (m < DIM); m++)
 +        {
 +            box_size[m] = box[m][m];
 +        }
 +
 +        if ((fr->eeltype == eelSHIFT && fr->rcoulomb > fr->rcoulomb_switch))
 +        {
 +            set_shift_consts(fp, fr->rcoulomb_switch, fr->rcoulomb, box_size, fr);
 +        }
 +    }
 +
 +    fr->bF_NoVirSum = (EEL_FULL(fr->eeltype) ||
 +                       gmx_mtop_ftype_count(mtop, F_POSRES) > 0 ||
 +                       gmx_mtop_ftype_count(mtop, F_FBPOSRES) > 0 ||
 +                       IR_ELEC_FIELD(*ir) ||
 +                       (fr->adress_icor != eAdressICOff)
 +                       );
 +
 +    if (fr->cutoff_scheme == ecutsGROUP &&
 +        ncg_mtop(mtop) > fr->cg_nalloc && !DOMAINDECOMP(cr))
 +    {
 +        /* Count the total number of charge groups */
 +        fr->cg_nalloc = ncg_mtop(mtop);
 +        srenew(fr->cg_cm, fr->cg_nalloc);
 +    }
 +    if (fr->shift_vec == NULL)
 +    {
 +        snew(fr->shift_vec, SHIFTS);
 +    }
 +
 +    if (fr->fshift == NULL)
 +    {
 +        snew(fr->fshift, SHIFTS);
 +    }
 +
 +    if (fr->nbfp == NULL)
 +    {
 +        fr->ntype = mtop->ffparams.atnr;
 +        fr->nbfp  = mk_nbfp(&mtop->ffparams, fr->bBHAM);
 +    }
 +
 +    /* Copy the energy group exclusions */
 +    fr->egp_flags = ir->opts.egp_flags;
 +
 +    /* Van der Waals stuff */
 +    fr->rvdw        = cutoff_inf(ir->rvdw);
 +    fr->rvdw_switch = ir->rvdw_switch;
 +    if ((fr->vdwtype != evdwCUT) && (fr->vdwtype != evdwUSER) && !fr->bBHAM)
 +    {
 +        if (fr->rvdw_switch >= fr->rvdw)
 +        {
 +            gmx_fatal(FARGS, "rvdw_switch (%f) must be < rvdw (%f)",
 +                      fr->rvdw_switch, fr->rvdw);
 +        }
 +        if (fp)
 +        {
 +            fprintf(fp, "Using %s Lennard-Jones, switch between %g and %g nm\n",
 +                    (fr->eeltype == eelSWITCH) ? "switched" : "shifted",
 +                    fr->rvdw_switch, fr->rvdw);
 +        }
 +    }
 +
 +    if (fr->bBHAM && (fr->vdwtype == evdwSHIFT || fr->vdwtype == evdwSWITCH))
 +    {
 +        gmx_fatal(FARGS, "Switch/shift interaction not supported with Buckingham");
 +    }
 +
 +    if (fp)
 +    {
 +        fprintf(fp, "Cut-off's:   NS: %g   Coulomb: %g   %s: %g\n",
 +                fr->rlist, fr->rcoulomb, fr->bBHAM ? "BHAM" : "LJ", fr->rvdw);
 +    }
 +
 +    fr->eDispCorr = ir->eDispCorr;
 +    if (ir->eDispCorr != edispcNO)
 +    {
 +        set_avcsixtwelve(fp, fr, mtop);
 +    }
 +
 +    if (fr->bBHAM)
 +    {
 +        set_bham_b_max(fp, fr, mtop);
 +    }
 +
 +    fr->gb_epsilon_solvent = ir->gb_epsilon_solvent;
 +
 +    /* Copy the GBSA data (radius, volume and surftens for each
 +     * atomtype) from the topology atomtype section to forcerec.
 +     */
 +    snew(fr->atype_radius, fr->ntype);
 +    snew(fr->atype_vol, fr->ntype);
 +    snew(fr->atype_surftens, fr->ntype);
 +    snew(fr->atype_gb_radius, fr->ntype);
 +    snew(fr->atype_S_hct, fr->ntype);
 +
 +    if (mtop->atomtypes.nr > 0)
 +    {
 +        for (i = 0; i < fr->ntype; i++)
 +        {
 +            fr->atype_radius[i] = mtop->atomtypes.radius[i];
 +        }
 +        for (i = 0; i < fr->ntype; i++)
 +        {
 +            fr->atype_vol[i] = mtop->atomtypes.vol[i];
 +        }
 +        for (i = 0; i < fr->ntype; i++)
 +        {
 +            fr->atype_surftens[i] = mtop->atomtypes.surftens[i];
 +        }
 +        for (i = 0; i < fr->ntype; i++)
 +        {
 +            fr->atype_gb_radius[i] = mtop->atomtypes.gb_radius[i];
 +        }
 +        for (i = 0; i < fr->ntype; i++)
 +        {
 +            fr->atype_S_hct[i] = mtop->atomtypes.S_hct[i];
 +        }
 +    }
 +
 +    /* Generate the GB table if needed */
 +    if (fr->bGB)
 +    {
 +#ifdef GMX_DOUBLE
 +        fr->gbtabscale = 2000;
 +#else
 +        fr->gbtabscale = 500;
 +#endif
 +
 +        fr->gbtabr = 100;
 +        fr->gbtab  = make_gb_table(fp, oenv, fr, tabpfn, fr->gbtabscale);
 +
 +        init_gb(&fr->born, cr, fr, ir, mtop, ir->rgbradii, ir->gb_algorithm);
 +
 +        /* Copy local gb data (for dd, this is done in dd_partition_system) */
 +        if (!DOMAINDECOMP(cr))
 +        {
 +            make_local_gb(cr, fr->born, ir->gb_algorithm);
 +        }
 +    }
 +
 +    /* Set the charge scaling */
 +    if (fr->epsilon_r != 0)
 +    {
 +        fr->epsfac = ONE_4PI_EPS0/fr->epsilon_r;
 +    }
 +    else
 +    {
 +        /* eps = 0 is infinite dieletric: no coulomb interactions */
 +        fr->epsfac = 0;
 +    }
 +
 +    /* Reaction field constants */
 +    if (EEL_RF(fr->eeltype))
 +    {
 +        calc_rffac(fp, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
 +                   fr->rcoulomb, fr->temp, fr->zsquare, box,
 +                   &fr->kappa, &fr->k_rf, &fr->c_rf);
 +    }
 +
 +    set_chargesum(fp, fr, mtop);
 +
 +    /* if we are using LR electrostatics, and they are tabulated,
 +     * the tables will contain modified coulomb interactions.
 +     * Since we want to use the non-shifted ones for 1-4
 +     * coulombic interactions, we must have an extra set of tables.
 +     */
 +
 +    /* Construct tables.
 +     * A little unnecessary to make both vdw and coul tables sometimes,
 +     * but what the heck... */
 +
 +    bTab = fr->bcoultab || fr->bvdwtab || fr->bEwald;
 +
 +    bSep14tab = ((!bTab || fr->eeltype != eelCUT || fr->vdwtype != evdwCUT ||
 +                  fr->bBHAM || fr->bEwald) &&
 +                 (gmx_mtop_ftype_count(mtop, F_LJ14) > 0 ||
 +                  gmx_mtop_ftype_count(mtop, F_LJC14_Q) > 0 ||
 +                  gmx_mtop_ftype_count(mtop, F_LJC_PAIRS_NB) > 0));
 +
 +    negp_pp   = ir->opts.ngener - ir->nwall;
 +    negptable = 0;
 +    if (!bTab)
 +    {
 +        bNormalnblists = TRUE;
 +        fr->nnblists   = 1;
 +    }
 +    else
 +    {
 +        bNormalnblists = (ir->eDispCorr != edispcNO);
 +        for (egi = 0; egi < negp_pp; egi++)
 +        {
 +            for (egj = egi; egj < negp_pp; egj++)
 +            {
 +                egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
 +                if (!(egp_flags & EGP_EXCL))
 +                {
 +                    if (egp_flags & EGP_TABLE)
 +                    {
 +                        negptable++;
 +                    }
 +                    else
 +                    {
 +                        bNormalnblists = TRUE;
 +                    }
 +                }
 +            }
 +        }
 +        if (bNormalnblists)
 +        {
 +            fr->nnblists = negptable + 1;
 +        }
 +        else
 +        {
 +            fr->nnblists = negptable;
 +        }
 +        if (fr->nnblists > 1)
 +        {
 +            snew(fr->gid2nblists, ir->opts.ngener*ir->opts.ngener);
 +        }
 +    }
 +
 +    if (ir->adress)
 +    {
 +        fr->nnblists *= 2;
 +    }
 +
 +    snew(fr->nblists, fr->nnblists);
 +
 +    /* This code automatically gives table length tabext without cut-off's,
 +     * in that case grompp should already have checked that we do not need
 +     * normal tables and we only generate tables for 1-4 interactions.
 +     */
 +    rtab = ir->rlistlong + ir->tabext;
 +
 +    if (bTab)
 +    {
 +        /* make tables for ordinary interactions */
 +        if (bNormalnblists)
 +        {
 +            make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]);
 +            if (ir->adress)
 +            {
 +                make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[fr->nnblists/2]);
 +            }
 +            if (!bSep14tab)
 +            {
 +                fr->tab14 = fr->nblists[0].table_elec_vdw;
 +            }
 +            m = 1;
 +        }
 +        else
 +        {
 +            m = 0;
 +        }
 +        if (negptable > 0)
 +        {
 +            /* Read the special tables for certain energy group pairs */
 +            nm_ind = mtop->groups.grps[egcENER].nm_ind;
 +            for (egi = 0; egi < negp_pp; egi++)
 +            {
 +                for (egj = egi; egj < negp_pp; egj++)
 +                {
 +                    egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
 +                    if ((egp_flags & EGP_TABLE) && !(egp_flags & EGP_EXCL))
 +                    {
 +                        nbl = &(fr->nblists[m]);
 +                        if (fr->nnblists > 1)
 +                        {
 +                            fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = m;
 +                        }
 +                        /* Read the table file with the two energy groups names appended */
 +                        make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
 +                                        *mtop->groups.grpname[nm_ind[egi]],
 +                                        *mtop->groups.grpname[nm_ind[egj]],
 +                                        &fr->nblists[m]);
 +                        if (ir->adress)
 +                        {
 +                            make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
 +                                            *mtop->groups.grpname[nm_ind[egi]],
 +                                            *mtop->groups.grpname[nm_ind[egj]],
 +                                            &fr->nblists[fr->nnblists/2+m]);
 +                        }
 +                        m++;
 +                    }
 +                    else if (fr->nnblists > 1)
 +                    {
 +                        fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = 0;
 +                    }
 +                }
 +            }
 +        }
 +    }
 +    if (bSep14tab)
 +    {
 +        /* generate extra tables with plain Coulomb for 1-4 interactions only */
 +        fr->tab14 = make_tables(fp, oenv, fr, MASTER(cr), tabpfn, rtab,
 +                                GMX_MAKETABLES_14ONLY);
 +    }
 +
 +    /* Read AdResS Thermo Force table if needed */
 +    if (fr->adress_icor == eAdressICThermoForce)
 +    {
 +        /* old todo replace */
 +
 +        if (ir->adress->n_tf_grps > 0)
 +        {
 +            make_adress_tf_tables(fp, oenv, fr, ir, tabfn, mtop, box);
 +
 +        }
 +        else
 +        {
 +            /* load the default table */
 +            snew(fr->atf_tabs, 1);
 +            fr->atf_tabs[DEFAULT_TF_TABLE] = make_atf_table(fp, oenv, fr, tabafn, box);
 +        }
 +    }
 +
 +    /* Wall stuff */
 +    fr->nwall = ir->nwall;
 +    if (ir->nwall && ir->wall_type == ewtTABLE)
 +    {
 +        make_wall_tables(fp, oenv, ir, tabfn, &mtop->groups, fr);
 +    }
 +
 +    if (fcd && tabbfn)
 +    {
 +        fcd->bondtab  = make_bonded_tables(fp,
 +                                           F_TABBONDS, F_TABBONDSNC,
 +                                           mtop, tabbfn, "b");
 +        fcd->angletab = make_bonded_tables(fp,
 +                                           F_TABANGLES, -1,
 +                                           mtop, tabbfn, "a");
 +        fcd->dihtab   = make_bonded_tables(fp,
 +                                           F_TABDIHS, -1,
 +                                           mtop, tabbfn, "d");
 +    }
 +    else
 +    {
 +        if (debug)
 +        {
 +            fprintf(debug, "No fcdata or table file name passed, can not read table, can not do bonded interactions\n");
 +        }
 +    }
 +
 +    /* QM/MM initialization if requested
 +     */
 +    if (ir->bQMMM)
 +    {
 +        fprintf(stderr, "QM/MM calculation requested.\n");
 +    }
 +
 +    fr->bQMMM      = ir->bQMMM;
 +    fr->qr         = mk_QMMMrec();
 +
 +    /* Set all the static charge group info */
 +    fr->cginfo_mb = init_cginfo_mb(fp, mtop, fr, bNoSolvOpt,
 +                                   &fr->bExcl_IntraCGAll_InterCGNone);
 +    if (DOMAINDECOMP(cr))
 +    {
 +        fr->cginfo = NULL;
 +    }
 +    else
 +    {
 +        fr->cginfo = cginfo_expand(mtop->nmolblock, fr->cginfo_mb);
 +    }
 +
 +    if (!DOMAINDECOMP(cr))
 +    {
 +        /* When using particle decomposition, the effect of the second argument,
 +         * which sets fr->hcg, is corrected later in do_md and init_em.
 +         */
 +        forcerec_set_ranges(fr, ncg_mtop(mtop), ncg_mtop(mtop),
 +                            mtop->natoms, mtop->natoms, mtop->natoms);
 +    }
 +
 +    fr->print_force = print_force;
 +
 +
 +    /* coarse load balancing vars */
 +    fr->t_fnbf    = 0.;
 +    fr->t_wait    = 0.;
 +    fr->timesteps = 0;
 +
 +    /* Initialize neighbor search */
 +    init_ns(fp, cr, &fr->ns, fr, mtop, box);
 +
 +    if (cr->duty & DUTY_PP)
 +    {
 +        gmx_nonbonded_setup(fp, fr, bGenericKernelOnly);
 +        /*
 +           if (ir->bAdress)
 +            {
 +                gmx_setup_adress_kernels(fp,bGenericKernelOnly);
 +            }
 +         */
 +    }
 +
 +    /* Initialize the thread working data for bonded interactions */
 +    init_forcerec_f_threads(fr, mtop->groups.grps[egcENER].nr);
 +
 +    snew(fr->excl_load, fr->nthreads+1);
 +
 +    if (fr->cutoff_scheme == ecutsVERLET)
 +    {
 +        if (ir->rcoulomb != ir->rvdw)
 +        {
 +            gmx_fatal(FARGS, "With Verlet lists rcoulomb and rvdw should be identical");
 +        }
 +
 +        init_nb_verlet(fp, &fr->nbv, ir, fr, cr, nbpu_opt);
 +    }
 +
 +    /* fr->ic is used both by verlet and group kernels (to some extent) now */
 +    init_interaction_const(fp, &fr->ic, fr, rtab);
 +    if (ir->eDispCorr != edispcNO)
 +    {
 +        calc_enervirdiff(fp, ir->eDispCorr, fr);
 +    }
 +}
 +
 +#define pr_real(fp, r) fprintf(fp, "%s: %e\n",#r, r)
 +#define pr_int(fp, i)  fprintf((fp), "%s: %d\n",#i, i)
 +#define pr_bool(fp, b) fprintf((fp), "%s: %s\n",#b, bool_names[b])
 +
 +void pr_forcerec(FILE *fp, t_forcerec *fr, t_commrec *cr)
 +{
 +    int i;
 +
 +    pr_real(fp, fr->rlist);
 +    pr_real(fp, fr->rcoulomb);
 +    pr_real(fp, fr->fudgeQQ);
 +    pr_bool(fp, fr->bGrid);
 +    pr_bool(fp, fr->bTwinRange);
 +    /*pr_int(fp,fr->cg0);
 +       pr_int(fp,fr->hcg);*/
 +    for (i = 0; i < fr->nnblists; i++)
 +    {
 +        pr_int(fp, fr->nblists[i].table_elec_vdw.n);
 +    }
 +    pr_real(fp, fr->rcoulomb_switch);
 +    pr_real(fp, fr->rcoulomb);
 +
 +    fflush(fp);
 +}
 +
 +void forcerec_set_excl_load(t_forcerec *fr,
 +                            const gmx_localtop_t *top, const t_commrec *cr)
 +{
 +    const int *ind, *a;
 +    int        t, i, j, ntot, n, ntarget;
 +
 +    if (cr != NULL && PARTDECOMP(cr))
 +    {
 +        /* No OpenMP with particle decomposition */
 +        pd_at_range(cr,
 +                    &fr->excl_load[0],
 +                    &fr->excl_load[1]);
 +
 +        return;
 +    }
 +
 +    ind = top->excls.index;
 +    a   = top->excls.a;
 +
 +    ntot = 0;
 +    for (i = 0; i < top->excls.nr; i++)
 +    {
 +        for (j = ind[i]; j < ind[i+1]; j++)
 +        {
 +            if (a[j] > i)
 +            {
 +                ntot++;
 +            }
 +        }
 +    }
 +
 +    fr->excl_load[0] = 0;
 +    n                = 0;
 +    i                = 0;
 +    for (t = 1; t <= fr->nthreads; t++)
 +    {
 +        ntarget = (ntot*t)/fr->nthreads;
 +        while (i < top->excls.nr && n < ntarget)
 +        {
 +            for (j = ind[i]; j < ind[i+1]; j++)
 +            {
 +                if (a[j] > i)
 +                {
 +                    n++;
 +                }
 +            }
 +            i++;
 +        }
 +        fr->excl_load[t] = i;
 +    }
 +}
diff --cc src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu
index 19c0b2d4f4,0000000000..dc089c0bed
mode 100644,000000..100644
--- a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu
+++ b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu
@@@ -1,961 -1,0 +1,963 @@@
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + *          GROningen MAchine for Chemical Simulations
 + *
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2012, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 + *
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + *
 + * For more info, check our website at http://www.gromacs.org
 + *
 + * And Hey:
 + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <stdlib.h>
 +#include <stdio.h>
 +#include <assert.h>
 +
 +#include <cuda.h>
 +
 +#include "gmx_fatal.h"
 +#include "smalloc.h"
 +#include "tables.h"
 +#include "typedefs.h"
 +#include "types/nb_verlet.h"
 +#include "types/interaction_const.h"
 +#include "types/force_flags.h"
 +#include "../nbnxn_consts.h"
 +
 +#include "nbnxn_cuda_types.h"
 +#include "../../gmxlib/cuda_tools/cudautils.cuh"
 +#include "nbnxn_cuda_data_mgmt.h"
 +#include "pmalloc_cuda.h"
 +#include "gpu_utils.h"
 +
 +static bool bUseCudaEventBlockingSync = false; /* makes the CPU thread block */
 +
 +/* This is a heuristically determined parameter for the Fermi architecture for
 + * the minimum size of ci lists by multiplying this constant with the # of
 + * multiprocessors on the current device.
 + */
 +static unsigned int gpu_min_ci_balanced_factor = 40;
 +
 +/* Functions from nbnxn_cuda.cu */
 +extern void nbnxn_cuda_set_cacheconfig(cuda_dev_info_t *devinfo);
 +extern const struct texture<float, 1, cudaReadModeElementType>& nbnxn_cuda_get_nbfp_texref();
 +extern const struct texture<float, 1, cudaReadModeElementType>& nbnxn_cuda_get_coulomb_tab_texref();
 +
 +/* We should actually be using md_print_warn in md_logging.c,
 + * but we can't include mpi.h in CUDA code.
 + */
 +static void md_print_warn(FILE       *fplog,
 +                          const char *fmt, ...)
 +{
 +    va_list ap;
 +
 +    if (fplog != NULL)
 +    {
 +        /* We should only print to stderr on the master node,
 +         * in most cases fplog is only set on the master node, so this works.
 +         */
 +        va_start(ap, fmt);
 +        fprintf(stderr, "\n");
 +        vfprintf(stderr, fmt, ap);
 +        fprintf(stderr, "\n");
 +        va_end(ap);
 +
 +        va_start(ap, fmt);
 +        fprintf(fplog, "\n");
 +        vfprintf(fplog, fmt, ap);
 +        fprintf(fplog, "\n");
 +        va_end(ap);
 +    }
 +}
 +
 +
 +/* Fw. decl. */
 +static void nbnxn_cuda_clear_e_fshift(nbnxn_cuda_ptr_t cu_nb);
 +
 +
 +/*! Tabulates the Ewald Coulomb force and initializes the size/scale
 +    and the table GPU array. If called with an already allocated table,
 +    it just re-uploads the table.
 + */
 +static void init_ewald_coulomb_force_table(cu_nbparam_t *nbp)
 +{
 +    float       *ftmp, *coul_tab;
 +    int         tabsize;
 +    double      tabscale;
 +    cudaError_t stat;
 +
 +    tabsize     = GPU_EWALD_COULOMB_FORCE_TABLE_SIZE;
 +    /* Subtract 2 iso 1 to avoid access out of range due to rounding */
 +    tabscale    = (tabsize - 2) / sqrt(nbp->rcoulomb_sq);
 +
 +    pmalloc((void**)&ftmp, tabsize*sizeof(*ftmp));
 +
 +    table_spline3_fill_ewald_lr(ftmp, NULL, NULL, tabsize,
 +                                1/tabscale, nbp->ewald_beta);
 +
 +    /* If the table pointer == NULL the table is generated the first time =>
 +       the array pointer will be saved to nbparam and the texture is bound.
 +     */
 +    coul_tab = nbp->coulomb_tab;
 +    if (coul_tab == NULL)
 +    {
 +        stat = cudaMalloc((void **)&coul_tab, tabsize*sizeof(*coul_tab));
 +        CU_RET_ERR(stat, "cudaMalloc failed on coul_tab");
 +
 +        nbp->coulomb_tab = coul_tab;
 +
 +        cudaChannelFormatDesc cd   = cudaCreateChannelDesc<float>();
 +        stat = cudaBindTexture(NULL, &nbnxn_cuda_get_coulomb_tab_texref(),
 +                               coul_tab, &cd, tabsize*sizeof(*coul_tab));
 +        CU_RET_ERR(stat, "cudaBindTexture on coul_tab failed");
 +    }
 +
 +    cu_copy_H2D(coul_tab, ftmp, tabsize*sizeof(*coul_tab));
 +
 +    nbp->coulomb_tab_size     = tabsize;
 +    nbp->coulomb_tab_scale    = tabscale;
 +
 +    pfree(ftmp);
 +}
 +
 +
 +/*! Initializes the atomdata structure first time, it only gets filled at
 +    pair-search. */
 +static void init_atomdata_first(cu_atomdata_t *ad, int ntypes)
 +{
 +    cudaError_t stat;
 +
 +    ad->ntypes  = ntypes;
 +    stat = cudaMalloc((void**)&ad->shift_vec, SHIFTS*sizeof(*ad->shift_vec));
 +    CU_RET_ERR(stat, "cudaMalloc failed on ad->shift_vec");
 +    ad->bShiftVecUploaded = false;
 +
 +    stat = cudaMalloc((void**)&ad->fshift, SHIFTS*sizeof(*ad->fshift));
 +    CU_RET_ERR(stat, "cudaMalloc failed on ad->fshift");
 +
 +    stat = cudaMalloc((void**)&ad->e_lj, sizeof(*ad->e_lj));
 +    CU_RET_ERR(stat, "cudaMalloc failed on ad->e_lj");
 +    stat = cudaMalloc((void**)&ad->e_el, sizeof(*ad->e_el));
 +    CU_RET_ERR(stat, "cudaMalloc failed on ad->e_el");
 +
 +    /* initialize to NULL poiters to data that is not allocated here and will
 +       need reallocation in nbnxn_cuda_init_atomdata */
 +    ad->xq = NULL;
 +    ad->f  = NULL;
 +
 +    /* size -1 indicates that the respective array hasn't been initialized yet */
 +    ad->natoms = -1;
 +    ad->nalloc = -1;
 +}
 +
 +/*! Selects the Ewald kernel type, analytical on SM 3.0 and later, tabulated on
 +    earlier GPUs, single or twin cut-off. */
 +static int pick_ewald_kernel_type(bool                   bTwinCut,
 +                                  const cuda_dev_info_t *dev_info)
 +{
 +    bool bUseAnalyticalEwald, bForceAnalyticalEwald, bForceTabulatedEwald;
 +    int  kernel_type;
 +
 +    /* Benchmarking/development environment variables to force the use of
 +       analytical or tabulated Ewald kernel. */
 +    bForceAnalyticalEwald = (getenv("GMX_CUDA_NB_ANA_EWALD") != NULL);
 +    bForceTabulatedEwald  = (getenv("GMX_CUDA_NB_TAB_EWALD") != NULL);
 +
 +    if (bForceAnalyticalEwald && bForceTabulatedEwald)
 +    {
 +        gmx_incons("Both analytical and tabulated Ewald CUDA non-bonded kernels "
 +                   "requested through environment variables.");
 +    }
 +
 +    /* By default, on SM 3.0 and later use analytical Ewald, on earlier tabulated. */
 +    if ((dev_info->prop.major >= 3 || bForceAnalyticalEwald) && !bForceTabulatedEwald)
 +    {
 +        bUseAnalyticalEwald = true;
 +
 +        if (debug)
 +        {
 +            fprintf(debug, "Using analytical Ewald CUDA kernels\n");
 +        }
 +    }
 +    else
 +    {
 +        bUseAnalyticalEwald = false;
 +
 +        if (debug)
 +        {
 +            fprintf(debug, "Using tabulated Ewald CUDA kernels\n");
 +        }
 +    }
 +
 +    /* Use twin cut-off kernels if requested by bTwinCut or the env. var.
 +       forces it (use it for debugging/benchmarking only). */
 +    if (!bTwinCut && (getenv("GMX_CUDA_NB_EWALD_TWINCUT") == NULL))
 +    {
 +        kernel_type = bUseAnalyticalEwald ? eelCuEWALD_ANA : eelCuEWALD_TAB;
 +    }
 +    else
 +    {
 +        kernel_type = bUseAnalyticalEwald ? eelCuEWALD_ANA_TWIN : eelCuEWALD_TAB_TWIN;
 +    }
 +
 +    return kernel_type;
 +}
 +
 +
 +/*! Initializes the nonbonded parameter data structure. */
 +static void init_nbparam(cu_nbparam_t *nbp,
 +                         const interaction_const_t *ic,
-                          const nonbonded_verlet_t *nbv,
++                         const nbnxn_atomdata_t *nbat,
 +                         const cuda_dev_info_t *dev_info)
 +{
 +    cudaError_t stat;
 +    int         ntypes, nnbfp;
 +
-     ntypes  = nbv->grp[0].nbat->ntype;
++    ntypes  = nbat->ntype;
 +
 +    nbp->ewald_beta = ic->ewaldcoeff;
 +    nbp->sh_ewald   = ic->sh_ewald;
 +    nbp->epsfac     = ic->epsfac;
 +    nbp->two_k_rf   = 2.0 * ic->k_rf;
 +    nbp->c_rf       = ic->c_rf;
 +    nbp->rvdw_sq    = ic->rvdw * ic->rvdw;
 +    nbp->rcoulomb_sq= ic->rcoulomb * ic->rcoulomb;
 +    nbp->rlist_sq   = ic->rlist * ic->rlist;
 +    nbp->sh_invrc6  = ic->sh_invrc6;
 +
 +    if (ic->eeltype == eelCUT)
 +    {
 +        nbp->eeltype = eelCuCUT;
 +    }
 +    else if (EEL_RF(ic->eeltype))
 +    {
 +        nbp->eeltype = eelCuRF;
 +    }
 +    else if ((EEL_PME(ic->eeltype) || ic->eeltype==eelEWALD))
 +    {
 +        /* Initially rcoulomb == rvdw, so it's surely not twin cut-off. */
 +        nbp->eeltype = pick_ewald_kernel_type(false, dev_info);
 +    }
 +    else
 +    {
 +        /* Shouldn't happen, as this is checked when choosing Verlet-scheme */
 +        gmx_incons("The requested electrostatics type is not implemented in the CUDA GPU accelerated kernels!");
 +    }
 +
 +    /* generate table for PME */
 +    nbp->coulomb_tab = NULL;
 +    if (nbp->eeltype == eelCuEWALD_TAB || nbp->eeltype == eelCuEWALD_TAB_TWIN)
 +    {
 +        init_ewald_coulomb_force_table(nbp);
 +    }
 +
 +    nnbfp = 2*ntypes*ntypes;
 +    stat = cudaMalloc((void **)&nbp->nbfp, nnbfp*sizeof(*nbp->nbfp));
 +    CU_RET_ERR(stat, "cudaMalloc failed on nbp->nbfp");
-     cu_copy_H2D(nbp->nbfp, nbv->grp[0].nbat->nbfp, nnbfp*sizeof(*nbp->nbfp));
++    cu_copy_H2D(nbp->nbfp, nbat->nbfp, nnbfp*sizeof(*nbp->nbfp));
 +
 +    cudaChannelFormatDesc cd   = cudaCreateChannelDesc<float>();
 +    stat = cudaBindTexture(NULL, &nbnxn_cuda_get_nbfp_texref(),
 +                           nbp->nbfp, &cd, nnbfp*sizeof(*nbp->nbfp));
 +    CU_RET_ERR(stat, "cudaBindTexture on nbfp failed");
 +}
 +
 +/*! Re-generate the GPU Ewald force table, resets rlist, and update the
 + *  electrostatic type switching to twin cut-off (or back) if needed. */
 +void nbnxn_cuda_pme_loadbal_update_param(nbnxn_cuda_ptr_t cu_nb,
 +                                         const interaction_const_t *ic)
 +{
 +    cu_nbparam_t *nbp = cu_nb->nbparam;
 +
 +    nbp->rlist_sq       = ic->rlist * ic->rlist;
 +    nbp->rcoulomb_sq    = ic->rcoulomb * ic->rcoulomb;
 +    nbp->ewald_beta     = ic->ewaldcoeff;
 +
 +    nbp->eeltype        = pick_ewald_kernel_type(ic->rcoulomb != ic->rvdw,
 +                                                 cu_nb->dev_info);
 +
 +    init_ewald_coulomb_force_table(cu_nb->nbparam);
 +}
 +
 +/*! Initializes the pair list data structure. */
 +static void init_plist(cu_plist_t *pl)
 +{
 +    /* initialize to NULL pointers to data that is not allocated here and will
 +       need reallocation in nbnxn_cuda_init_pairlist */
 +    pl->sci     = NULL;
 +    pl->cj4     = NULL;
 +    pl->excl    = NULL;
 +
 +    /* size -1 indicates that the respective array hasn't been initialized yet */
 +    pl->na_c        = -1;
 +    pl->nsci        = -1;
 +    pl->sci_nalloc  = -1;
 +    pl->ncj4        = -1;
 +    pl->cj4_nalloc  = -1;
 +    pl->nexcl       = -1;
 +    pl->excl_nalloc = -1;
 +    pl->bDoPrune    = false;
 +}
 +
 +/*! Initializes the timer data structure. */
 +static void init_timers(cu_timers_t *t, bool bUseTwoStreams)
 +{
 +    cudaError_t stat;
 +    int eventflags = ( bUseCudaEventBlockingSync ? cudaEventBlockingSync: cudaEventDefault );
 +
 +    stat = cudaEventCreateWithFlags(&(t->start_atdat), eventflags);
 +    CU_RET_ERR(stat, "cudaEventCreate on start_atdat failed");
 +    stat = cudaEventCreateWithFlags(&(t->stop_atdat), eventflags);
 +    CU_RET_ERR(stat, "cudaEventCreate on stop_atdat failed");
 +
 +    /* The non-local counters/stream (second in the array) are needed only with DD. */
 +    for (int i = 0; i <= (bUseTwoStreams ? 1 : 0); i++)
 +    {
 +        stat = cudaEventCreateWithFlags(&(t->start_nb_k[i]), eventflags);
 +        CU_RET_ERR(stat, "cudaEventCreate on start_nb_k failed");
 +        stat = cudaEventCreateWithFlags(&(t->stop_nb_k[i]), eventflags);
 +        CU_RET_ERR(stat, "cudaEventCreate on stop_nb_k failed");
 +
 +
 +        stat = cudaEventCreateWithFlags(&(t->start_pl_h2d[i]), eventflags);
 +        CU_RET_ERR(stat, "cudaEventCreate on start_pl_h2d failed");
 +        stat = cudaEventCreateWithFlags(&(t->stop_pl_h2d[i]), eventflags);
 +        CU_RET_ERR(stat, "cudaEventCreate on stop_pl_h2d failed");
 +
 +        stat = cudaEventCreateWithFlags(&(t->start_nb_h2d[i]), eventflags);
 +        CU_RET_ERR(stat, "cudaEventCreate on start_nb_h2d failed");
 +        stat = cudaEventCreateWithFlags(&(t->stop_nb_h2d[i]), eventflags);
 +        CU_RET_ERR(stat, "cudaEventCreate on stop_nb_h2d failed");
 +
 +        stat = cudaEventCreateWithFlags(&(t->start_nb_d2h[i]), eventflags);
 +        CU_RET_ERR(stat, "cudaEventCreate on start_nb_d2h failed");
 +        stat = cudaEventCreateWithFlags(&(t->stop_nb_d2h[i]), eventflags);
 +        CU_RET_ERR(stat, "cudaEventCreate on stop_nb_d2h failed");
 +    }
 +}
 +
 +/*! Initializes the timings data structure. */
 +static void init_timings(wallclock_gpu_t *t)
 +{
 +    int i, j;
 +
 +    t->nb_h2d_t = 0.0;
 +    t->nb_d2h_t = 0.0;
 +    t->nb_c    = 0;
 +    t->pl_h2d_t = 0.0;
 +    t->pl_h2d_c = 0;
 +    for (i = 0; i < 2; i++)
 +    {
 +        for(j = 0; j < 2; j++)
 +        {
 +            t->ktime[i][j].t = 0.0;
 +            t->ktime[i][j].c = 0;
 +        }
 +    }
 +}
 +
 +/* Decide which kernel version to use (default or legacy) based on:
 + *  - CUDA version used for compilation
 + *  - non-bonded kernel selector environment variables
 + *  - GPU architecture version
 + */
 +static int pick_nbnxn_kernel_version(FILE            *fplog,
 +                                     cuda_dev_info_t *devinfo)
 +{
 +    bool bForceLegacyKernel, bForceDefaultKernel, bCUDA40, bCUDA32;
 +    char sbuf[STRLEN];
 +    int  kver;
 +
 +    /* Legacy kernel (former k2), kept for backward compatibility as it is
 +       faster than the default with CUDA 3.2/4.0 on Fermi (not on Kepler). */
 +    bForceLegacyKernel  = (getenv("GMX_CUDA_NB_LEGACY") != NULL);
 +    /* default kernel (former k3). */
 +    bForceDefaultKernel = (getenv("GMX_CUDA_NB_DEFAULT") != NULL);
 +
 +    if ((unsigned)(bForceLegacyKernel + bForceDefaultKernel) > 1)
 +    {
 +        gmx_fatal(FARGS, "Multiple CUDA non-bonded kernels requested; to manually pick a kernel set only one \n"
 +                  "of the following environment variables: \n"
 +                  "GMX_CUDA_NB_DEFAULT, GMX_CUDA_NB_LEGACY");
 +    }
 +
 +    bCUDA32 = bCUDA40 = false;
 +#if CUDA_VERSION == 3200
 +    bCUDA32 = true;
 +    sprintf(sbuf, "3.2");
 +#elif CUDA_VERSION == 4000
 +    bCUDA40 = true;
 +    sprintf(sbuf, "4.0");
 +#endif
 +
 +    /* default is default ;) */
 +    kver = eNbnxnCuKDefault;
 +
 +    /* Consider switching to legacy kernels only on Fermi */
 +    if (devinfo->prop.major < 3 && (bCUDA32 || bCUDA40))
 +    {
 +        /* use legacy kernel unless something else is forced by an env. var */
 +        if (bForceDefaultKernel)
 +        {
 +            md_print_warn(fplog,
 +                          "NOTE: CUDA %s compilation detected; with this compiler version the legacy\n"
 +                          "      non-bonded kernels perform best. However, the default kernels were\n"
 +                          "      selected by the GMX_CUDA_NB_DEFAULT environment variable.\n"
 +                          "      For best performance upgrade your CUDA toolkit.\n",
 +                          sbuf);
 +        }
 +        else
 +        {
 +            kver = eNbnxnCuKLegacy;
 +        }
 +    }
 +    else
 +    {
 +        /* issue note if the non-default kernel is forced by an env. var */
 +        if (bForceLegacyKernel)
 +        {
 +            md_print_warn(fplog,
 +                    "NOTE: Legacy non-bonded CUDA kernels selected by the GMX_CUDA_NB_LEGACY\n"
 +                    "      env. var. Consider using using the default kernels which should be faster!\n");
 +
 +            kver = eNbnxnCuKLegacy;
 +        }
 +    }
 +
 +    return kver;
 +}
 +
 +void nbnxn_cuda_init(FILE *fplog,
 +                     nbnxn_cuda_ptr_t *p_cu_nb,
 +                     gmx_gpu_info_t *gpu_info, int my_gpu_index,
 +                     gmx_bool bLocalAndNonlocal)
 +{
 +    cudaError_t stat;
 +    nbnxn_cuda_ptr_t  nb;
 +    char sbuf[STRLEN];
 +    bool bStreamSync, bNoStreamSync, bTMPIAtomics, bX86, bOldDriver;
 +    int cuda_drv_ver;
 +
 +    assert(gpu_info);
 +
 +    if (p_cu_nb == NULL) return;
 +
 +    snew(nb, 1);
 +    snew(nb->atdat, 1);
 +    snew(nb->nbparam, 1);
 +    snew(nb->plist[eintLocal], 1);
 +    if (bLocalAndNonlocal)
 +    {
 +        snew(nb->plist[eintNonlocal], 1);
 +    }
 +
 +    nb->bUseTwoStreams = bLocalAndNonlocal;
 +
 +    snew(nb->timers, 1);
 +    snew(nb->timings, 1);
 +
 +    /* init nbst */
 +    pmalloc((void**)&nb->nbst.e_lj, sizeof(*nb->nbst.e_lj));
 +    pmalloc((void**)&nb->nbst.e_el, sizeof(*nb->nbst.e_el));
 +    pmalloc((void**)&nb->nbst.fshift, SHIFTS * sizeof(*nb->nbst.fshift));
 +
 +    init_plist(nb->plist[eintLocal]);
 +
 +    /* local/non-local GPU streams */
 +    stat = cudaStreamCreate(&nb->stream[eintLocal]);
 +    CU_RET_ERR(stat, "cudaStreamCreate on stream[eintLocal] failed");
 +    if (nb->bUseTwoStreams)
 +    {
 +        init_plist(nb->plist[eintNonlocal]);
 +        stat = cudaStreamCreate(&nb->stream[eintNonlocal]);
 +        CU_RET_ERR(stat, "cudaStreamCreate on stream[eintNonlocal] failed");
 +    }
 +
 +    /* init events for sychronization (timing disabled for performance reasons!) */
 +    stat = cudaEventCreateWithFlags(&nb->nonlocal_done, cudaEventDisableTiming);
 +    CU_RET_ERR(stat, "cudaEventCreate on nonlocal_done failed");
 +    stat = cudaEventCreateWithFlags(&nb->misc_ops_done, cudaEventDisableTiming);
 +    CU_RET_ERR(stat, "cudaEventCreate on misc_ops_one failed");
 +
 +    /* set device info, just point it to the right GPU among the detected ones */
 +    nb->dev_info = &gpu_info->cuda_dev[get_gpu_device_id(gpu_info, my_gpu_index)];
 +
 +    /* On GPUs with ECC enabled, cudaStreamSynchronize shows a large overhead
 +     * (which increases with shorter time/step) caused by a known CUDA driver bug.
 +     * To work around the issue we'll use an (admittedly fragile) memory polling
 +     * waiting to preserve performance. This requires support for atomic
 +     * operations and only works on x86/x86_64.
 +     * With polling wait event-timing also needs to be disabled.
 +     *
 +     * The overhead is greatly reduced in API v5.0 drivers and the improvement
 +     $ is independent of runtime version. Hence, with API v5.0 drivers and later
 +     * we won't switch to polling.
 +     *
 +     * NOTE: Unfortunately, this is known to fail when GPUs are shared by (t)MPI,
 +     * ranks so we will also disable it in that case.
 +     */
 +
 +    bStreamSync    = getenv("GMX_CUDA_STREAMSYNC") != NULL;
 +    bNoStreamSync  = getenv("GMX_NO_CUDA_STREAMSYNC") != NULL;
 +
 +#ifdef TMPI_ATOMICS
 +    bTMPIAtomics = true;
 +#else
 +    bTMPIAtomics = false;
 +#endif
 +
 +#if defined(i386) || defined(__x86_64__)
 +    bX86 = true;
 +#else
 +    bX86 = false;
 +#endif
 +
 +    if (bStreamSync && bNoStreamSync)
 +    {
 +        gmx_fatal(FARGS, "Conflicting environment variables: both GMX_CUDA_STREAMSYNC and GMX_NO_CUDA_STREAMSYNC defined");
 +    }
 +
 +    stat = cudaDriverGetVersion(&cuda_drv_ver);
 +    CU_RET_ERR(stat, "cudaDriverGetVersion failed");
++
 +    bOldDriver = (cuda_drv_ver < 5000);
 +
-     if (nb->dev_info->prop.ECCEnabled == 1)
++    if ((nb->dev_info->prop.ECCEnabled == 1) && bOldDriver)
 +    {
++        /* Polling wait should be used instead of cudaStreamSynchronize only if:
++         *   - ECC is ON & driver is old (checked above),
++         *   - we're on x86/x86_64,
++         *   - atomics are available, and
++         *   - GPUs are not being shared.
++         */
++        bool bShouldUsePollSync = (bX86 && bTMPIAtomics && !gpu_info->bDevShare);
++
 +        if (bStreamSync)
 +        {
 +            nb->bUseStreamSync = true;
 +
 +            /* only warn if polling should be used */
-             if (bOldDriver && !gpu_info->bDevShare)
++            if (bShouldUsePollSync)
 +            {
 +                md_print_warn(fplog,
 +                              "NOTE: Using a GPU with ECC enabled and CUDA driver API version <5.0, but\n"
 +                              "      cudaStreamSynchronize waiting is forced by the GMX_CUDA_STREAMSYNC env. var.\n");
 +            }
 +        }
 +        else
 +        {
-             /* Can/should turn of cudaStreamSynchronize wait only if
-              *   - we're on x86/x86_64
-              *   - atomics are available
-              *   - GPUs are not being shared
-              *   - and driver is old. */
-             nb->bUseStreamSync =
-                 (bX86 && bTMPIAtomics && !gpu_info->bDevShare && bOldDriver) ?
-                 true : false;
- 
-             if (nb->bUseStreamSync)
++            nb->bUseStreamSync = !bShouldUsePollSync;
++
++            if (bShouldUsePollSync)
 +            {
 +                md_print_warn(fplog,
 +                              "NOTE: Using a GPU with ECC enabled and CUDA driver API version <5.0, known to\n"
-                               "      cause performance loss. Switching to the alternative polling GPU waiting.\n"
++                              "      cause performance loss. Switching to the alternative polling GPU wait.\n"
 +                              "      If you encounter issues, switch back to standard GPU waiting by setting\n"
 +                              "      the GMX_CUDA_STREAMSYNC environment variable.\n");
 +            }
-             else if (bOldDriver)
++            else
 +            {
 +                /* Tell the user that the ECC+old driver combination can be bad */
 +                sprintf(sbuf,
-                         "NOTE: Using a GPU with ECC enabled and CUDA driver API version <5.0. A bug in this\n"
-                         "      driver can cause performance loss.\n"
-                         "      However, the polling waiting workaround can not be used because\n%s\n"
++                        "NOTE: Using a GPU with ECC enabled and CUDA driver API version <5.0.\n"
++                        "      A known bug in this driver version can cause performance loss.\n"
++                        "      However, the polling wait workaround can not be used because\n%s\n"
 +                        "      Consider updating the driver or turning ECC off.",
-                         (!bX86 || !bTMPIAtomics) ?
-                            "         atomic operations are not supported by the platform/CPU+compiler." :
-                            "         GPU(s) are being oversubscribed.");
++                        (bX86 && bTMPIAtomics) ?
++                            "      GPU(s) are being oversubscribed." :
++                            "      atomic operations are not supported by the platform/CPU+compiler.");
 +                md_print_warn(fplog, sbuf);
 +            }
 +        }
 +    }
 +    else
 +    {
 +        if (bNoStreamSync)
 +        {
 +            nb->bUseStreamSync = false;
 +
 +            md_print_warn(fplog,
 +                          "NOTE: Polling wait for GPU synchronization requested by GMX_NO_CUDA_STREAMSYNC\n");
 +        }
 +        else
 +        {
 +            /* no/off ECC, cudaStreamSynchronize not turned off by env. var. */
 +            nb->bUseStreamSync = true;
 +        }
 +    }
 +
 +    /* CUDA timing disabled as event timers don't work:
 +       - with multiple streams = domain-decomposition;
 +       - with the polling waiting hack (without cudaStreamSynchronize);
 +       - when turned off by GMX_DISABLE_CUDA_TIMING.
 +     */
 +    nb->bDoTime = (!nb->bUseTwoStreams && nb->bUseStreamSync &&
 +                   (getenv("GMX_DISABLE_CUDA_TIMING") == NULL));
 +
 +    if (nb->bDoTime)
 +    {
 +        init_timers(nb->timers, nb->bUseTwoStreams);
 +        init_timings(nb->timings);
 +    }
 +
 +    /* set the kernel type for the current GPU */
 +    nb->kernel_ver = pick_nbnxn_kernel_version(fplog, nb->dev_info);
 +    /* pick L1 cache configuration */
 +    nbnxn_cuda_set_cacheconfig(nb->dev_info);
 +
 +    *p_cu_nb = nb;
 +
 +    if (debug)
 +    {
 +        fprintf(debug, "Initialized CUDA data structures.\n");
 +    }
 +}
 +
- void nbnxn_cuda_init_const(nbnxn_cuda_ptr_t cu_nb,
-                            const interaction_const_t *ic,
-                            const nonbonded_verlet_t *nbv)
++void nbnxn_cuda_init_const(nbnxn_cuda_ptr_t                cu_nb,
++                           const interaction_const_t      *ic,
++                           const nonbonded_verlet_group_t *nbv_group)
 +{
-     init_atomdata_first(cu_nb->atdat, nbv->grp[0].nbat->ntype);
-     init_nbparam(cu_nb->nbparam, ic, nbv, cu_nb->dev_info);
++    init_atomdata_first(cu_nb->atdat, nbv_group[0].nbat->ntype);
++    init_nbparam(cu_nb->nbparam, ic, nbv_group[0].nbat, cu_nb->dev_info);
 +
 +    /* clear energy and shift force outputs */
 +    nbnxn_cuda_clear_e_fshift(cu_nb);
 +}
 +
 +void nbnxn_cuda_init_pairlist(nbnxn_cuda_ptr_t cu_nb,
 +                              const nbnxn_pairlist_t *h_plist,
 +                              int iloc)
 +{
 +    char         sbuf[STRLEN];
 +    cudaError_t  stat;
 +    bool         bDoTime    = cu_nb->bDoTime;
 +    cudaStream_t stream     = cu_nb->stream[iloc];
 +    cu_plist_t   *d_plist   = cu_nb->plist[iloc];
 +
 +    if (d_plist->na_c < 0)
 +    {
 +        d_plist->na_c = h_plist->na_ci;
 +    }
 +    else
 +    {
 +        if (d_plist->na_c != h_plist->na_ci)
 +        {
 +            sprintf(sbuf, "In cu_init_plist: the #atoms per cell has changed (from %d to %d)",
 +                    d_plist->na_c, h_plist->na_ci);
 +            gmx_incons(sbuf);
 +        }
 +    }
 +
 +    if (bDoTime)
 +    {
 +        stat = cudaEventRecord(cu_nb->timers->start_pl_h2d[iloc], stream);
 +        CU_RET_ERR(stat, "cudaEventRecord failed");
 +    }
 +
 +    cu_realloc_buffered((void **)&d_plist->sci, h_plist->sci, sizeof(*d_plist->sci),
 +                         &d_plist->nsci, &d_plist->sci_nalloc,
 +                         h_plist->nsci,
 +                         stream, true);
 +
 +    cu_realloc_buffered((void **)&d_plist->cj4, h_plist->cj4, sizeof(*d_plist->cj4),
 +                         &d_plist->ncj4, &d_plist->cj4_nalloc,
 +                         h_plist->ncj4,
 +                         stream, true);
 +
 +    cu_realloc_buffered((void **)&d_plist->excl, h_plist->excl, sizeof(*d_plist->excl),
 +                         &d_plist->nexcl, &d_plist->excl_nalloc,
 +                         h_plist->nexcl,
 +                         stream, true);
 +
 +    if (bDoTime)
 +    {
 +        stat = cudaEventRecord(cu_nb->timers->stop_pl_h2d[iloc], stream);
 +        CU_RET_ERR(stat, "cudaEventRecord failed");
 +    }
 +
 +    /* need to prune the pair list during the next step */
 +    d_plist->bDoPrune = true;
 +}
 +
 +void nbnxn_cuda_upload_shiftvec(nbnxn_cuda_ptr_t cu_nb,
 +                                const nbnxn_atomdata_t *nbatom)
 +{
 +    cu_atomdata_t *adat = cu_nb->atdat;
 +    cudaStream_t  ls    = cu_nb->stream[eintLocal];
 +
 +    /* only if we have a dynamic box */
 +    if (nbatom->bDynamicBox || !adat->bShiftVecUploaded)
 +    {
 +        cu_copy_H2D_async(adat->shift_vec, nbatom->shift_vec, 
 +                          SHIFTS * sizeof(*adat->shift_vec), ls);
 +        adat->bShiftVecUploaded = true;
 +    }
 +}
 +
 +/*! Clears the first natoms_clear elements of the GPU nonbonded force output array. */
 +static void nbnxn_cuda_clear_f(nbnxn_cuda_ptr_t cu_nb, int natoms_clear)
 +{
 +    cudaError_t   stat;
 +    cu_atomdata_t *adat = cu_nb->atdat;
 +    cudaStream_t  ls    = cu_nb->stream[eintLocal];
 +
 +    stat = cudaMemsetAsync(adat->f, 0, natoms_clear * sizeof(*adat->f), ls);
 +    CU_RET_ERR(stat, "cudaMemsetAsync on f falied");
 +}
 +
 +/*! Clears nonbonded shift force output array and energy outputs on the GPU. */
 +static void nbnxn_cuda_clear_e_fshift(nbnxn_cuda_ptr_t cu_nb)
 +{
 +    cudaError_t   stat;
 +    cu_atomdata_t *adat = cu_nb->atdat;
 +    cudaStream_t  ls    = cu_nb->stream[eintLocal];
 +
 +    stat = cudaMemsetAsync(adat->fshift, 0, SHIFTS * sizeof(*adat->fshift), ls);
 +    CU_RET_ERR(stat, "cudaMemsetAsync on fshift falied");
 +    stat = cudaMemsetAsync(adat->e_lj, 0, sizeof(*adat->e_lj), ls);
 +    CU_RET_ERR(stat, "cudaMemsetAsync on e_lj falied");
 +    stat = cudaMemsetAsync(adat->e_el, 0, sizeof(*adat->e_el), ls);
 +    CU_RET_ERR(stat, "cudaMemsetAsync on e_el falied");
 +}
 +
 +void nbnxn_cuda_clear_outputs(nbnxn_cuda_ptr_t cu_nb, int flags)
 +{
 +    nbnxn_cuda_clear_f(cu_nb, cu_nb->atdat->natoms);
 +    /* clear shift force array and energies if the outputs were 
 +       used in the current step */
 +    if (flags & GMX_FORCE_VIRIAL)
 +    {
 +        nbnxn_cuda_clear_e_fshift(cu_nb);
 +    }
 +}
 +
 +void nbnxn_cuda_init_atomdata(nbnxn_cuda_ptr_t cu_nb,
 +                              const nbnxn_atomdata_t *nbat)
 +{
 +    cudaError_t   stat;
 +    int           nalloc, natoms;
 +    bool          realloced;
 +    bool          bDoTime   = cu_nb->bDoTime;
 +    cu_timers_t   *timers   = cu_nb->timers;
 +    cu_atomdata_t *d_atdat  = cu_nb->atdat;
 +    cudaStream_t  ls        = cu_nb->stream[eintLocal];
 +
 +    natoms = nbat->natoms;
 +    realloced = false;
 +
 +    if (bDoTime)
 +    {
 +        /* time async copy */
 +        stat = cudaEventRecord(timers->start_atdat, ls);
 +        CU_RET_ERR(stat, "cudaEventRecord failed");
 +    }
 +
 +    /* need to reallocate if we have to copy more atoms than the amount of space
 +       available and only allocate if we haven't initialized yet, i.e d_atdat->natoms == -1 */
 +    if (natoms > d_atdat->nalloc)
 +    {
 +        nalloc = over_alloc_small(natoms);
 +
 +        /* free up first if the arrays have already been initialized */
 +        if (d_atdat->nalloc != -1)
 +        {
 +            cu_free_buffered(d_atdat->f, &d_atdat->natoms, &d_atdat->nalloc);
 +            cu_free_buffered(d_atdat->xq);
 +            cu_free_buffered(d_atdat->atom_types);
 +        }
 +
 +        stat = cudaMalloc((void **)&d_atdat->f, nalloc*sizeof(*d_atdat->f));
 +        CU_RET_ERR(stat, "cudaMalloc failed on d_atdat->f");
 +        stat = cudaMalloc((void **)&d_atdat->xq, nalloc*sizeof(*d_atdat->xq));
 +        CU_RET_ERR(stat, "cudaMalloc failed on d_atdat->xq");
 +
 +        stat = cudaMalloc((void **)&d_atdat->atom_types, nalloc*sizeof(*d_atdat->atom_types));
 +        CU_RET_ERR(stat, "cudaMalloc failed on d_atdat->atom_types");
 +
 +        d_atdat->nalloc = nalloc;
 +        realloced = true;
 +    }
 +
 +    d_atdat->natoms = natoms;
 +    d_atdat->natoms_local = nbat->natoms_local;
 +
 +    /* need to clear GPU f output if realloc happened */
 +    if (realloced)
 +    {
 +        nbnxn_cuda_clear_f(cu_nb, nalloc);
 +    }
 +
 +    cu_copy_H2D_async(d_atdat->atom_types, nbat->type,
 +                      natoms*sizeof(*d_atdat->atom_types), ls);
 +
 +    if (bDoTime)
 +    {
 +        stat = cudaEventRecord(timers->stop_atdat, ls);
 +        CU_RET_ERR(stat, "cudaEventRecord failed");
 +    }
 +}
 +
 +void nbnxn_cuda_free(FILE *fplog, nbnxn_cuda_ptr_t cu_nb)
 +{
 +    cudaError_t     stat;
 +    cu_atomdata_t   *atdat;
 +    cu_nbparam_t    *nbparam;
 +    cu_plist_t      *plist, *plist_nl;
 +    cu_timers_t     *timers;
 +
 +    if (cu_nb == NULL) return;
 +
 +    atdat       = cu_nb->atdat;
 +    nbparam     = cu_nb->nbparam;
 +    plist       = cu_nb->plist[eintLocal];
 +    plist_nl    = cu_nb->plist[eintNonlocal];
 +    timers      = cu_nb->timers;
 +
 +    if (nbparam->eeltype == eelCuEWALD_TAB || nbparam->eeltype == eelCuEWALD_TAB_TWIN)
 +    {
 +      stat = cudaUnbindTexture(nbnxn_cuda_get_coulomb_tab_texref());
 +      CU_RET_ERR(stat, "cudaUnbindTexture on coulomb_tab failed");
 +      cu_free_buffered(nbparam->coulomb_tab, &nbparam->coulomb_tab_size);
 +    }
 +
 +    stat = cudaEventDestroy(cu_nb->nonlocal_done);
 +    CU_RET_ERR(stat, "cudaEventDestroy failed on timers->nonlocal_done");
 +    stat = cudaEventDestroy(cu_nb->misc_ops_done);
 +    CU_RET_ERR(stat, "cudaEventDestroy failed on timers->misc_ops_done");
 +
 +    if (cu_nb->bDoTime)
 +    {
 +        stat = cudaEventDestroy(timers->start_atdat);
 +        CU_RET_ERR(stat, "cudaEventDestroy failed on timers->start_atdat");
 +        stat = cudaEventDestroy(timers->stop_atdat);
 +        CU_RET_ERR(stat, "cudaEventDestroy failed on timers->stop_atdat");
 +
 +        /* The non-local counters/stream (second in the array) are needed only with DD. */
 +        for (int i = 0; i <= (cu_nb->bUseTwoStreams ? 1 : 0); i++)
 +        {
 +            stat = cudaEventDestroy(timers->start_nb_k[i]);
 +            CU_RET_ERR(stat, "cudaEventDestroy failed on timers->start_nb_k");
 +            stat = cudaEventDestroy(timers->stop_nb_k[i]);
 +            CU_RET_ERR(stat, "cudaEventDestroy failed on timers->stop_nb_k");
 +
 +            stat = cudaEventDestroy(timers->start_pl_h2d[i]);
 +            CU_RET_ERR(stat, "cudaEventDestroy failed on timers->start_pl_h2d");
 +            stat = cudaEventDestroy(timers->stop_pl_h2d[i]);
 +            CU_RET_ERR(stat, "cudaEventDestroy failed on timers->stop_pl_h2d");
 +
 +            stat = cudaStreamDestroy(cu_nb->stream[i]);
 +            CU_RET_ERR(stat, "cudaStreamDestroy failed on stream");
 +
 +            stat = cudaEventDestroy(timers->start_nb_h2d[i]);
 +            CU_RET_ERR(stat, "cudaEventDestroy failed on timers->start_nb_h2d");
 +            stat = cudaEventDestroy(timers->stop_nb_h2d[i]);
 +            CU_RET_ERR(stat, "cudaEventDestroy failed on timers->stop_nb_h2d");
 +
 +            stat = cudaEventDestroy(timers->start_nb_d2h[i]);
 +            CU_RET_ERR(stat, "cudaEventDestroy failed on timers->start_nb_d2h");
 +            stat = cudaEventDestroy(timers->stop_nb_d2h[i]);
 +            CU_RET_ERR(stat, "cudaEventDestroy failed on timers->stop_nb_d2h");
 +        }
 +    }
 +
 +    stat = cudaUnbindTexture(nbnxn_cuda_get_nbfp_texref());
 +    CU_RET_ERR(stat, "cudaUnbindTexture on coulomb_tab failed");
 +    cu_free_buffered(nbparam->nbfp);
 +
 +    stat = cudaFree(atdat->shift_vec);
 +    CU_RET_ERR(stat, "cudaFree failed on atdat->shift_vec");
 +    stat = cudaFree(atdat->fshift);
 +    CU_RET_ERR(stat, "cudaFree failed on atdat->fshift");
 +
 +    stat = cudaFree(atdat->e_lj);
 +    CU_RET_ERR(stat, "cudaFree failed on atdat->e_lj");
 +    stat = cudaFree(atdat->e_el);
 +    CU_RET_ERR(stat, "cudaFree failed on atdat->e_el");
 +
 +    cu_free_buffered(atdat->f, &atdat->natoms, &atdat->nalloc);
 +    cu_free_buffered(atdat->xq);
 +    cu_free_buffered(atdat->atom_types, &atdat->ntypes);
 +
 +    cu_free_buffered(plist->sci, &plist->nsci, &plist->sci_nalloc);
 +    cu_free_buffered(plist->cj4, &plist->ncj4, &plist->cj4_nalloc);
 +    cu_free_buffered(plist->excl, &plist->nexcl, &plist->excl_nalloc);
 +    if (cu_nb->bUseTwoStreams)
 +    {
 +        cu_free_buffered(plist_nl->sci, &plist_nl->nsci, &plist_nl->sci_nalloc);
 +        cu_free_buffered(plist_nl->cj4, &plist_nl->ncj4, &plist_nl->cj4_nalloc);
 +        cu_free_buffered(plist_nl->excl, &plist_nl->nexcl, &plist->excl_nalloc);
 +    }
 +
 +    sfree(atdat);
 +    sfree(nbparam);
 +    sfree(plist);
 +    if (cu_nb->bUseTwoStreams)
 +    {
 +        sfree(plist_nl);
 +    }
 +    sfree(timers);
 +    sfree(cu_nb->timings);
 +    sfree(cu_nb);
 +
 +    if (debug)
 +    {
 +        fprintf(debug, "Cleaned up CUDA data structures.\n");
 +    }
 +}
 +
 +void cu_synchstream_atdat(nbnxn_cuda_ptr_t cu_nb, int iloc)
 +{
 +    cudaError_t stat;
 +    cudaStream_t stream = cu_nb->stream[iloc];
 +
 +    stat = cudaStreamWaitEvent(stream, cu_nb->timers->stop_atdat, 0);
 +    CU_RET_ERR(stat, "cudaStreamWaitEvent failed");
 +}
 +
 +wallclock_gpu_t * nbnxn_cuda_get_timings(nbnxn_cuda_ptr_t cu_nb)
 +{
 +    return (cu_nb != NULL && cu_nb->bDoTime) ? cu_nb->timings : NULL;
 +}
 +
 +void nbnxn_cuda_reset_timings(nbnxn_cuda_ptr_t cu_nb)
 +{
 +    if (cu_nb->bDoTime)
 +    {
 +        init_timings(cu_nb->timings);
 +    }
 +}
 +
 +int nbnxn_cuda_min_ci_balanced(nbnxn_cuda_ptr_t cu_nb)
 +{
 +    return cu_nb != NULL ?
 +        gpu_min_ci_balanced_factor*cu_nb->dev_info->prop.multiProcessorCount : 0;
 +
 +}
diff --cc src/gromacs/mdlib/pull_rotation.c
index fb6c276416,0000000000..9fc97282be
mode 100644,000000..100644
--- a/src/gromacs/mdlib/pull_rotation.c
+++ b/src/gromacs/mdlib/pull_rotation.c
@@@ -1,4092 -1,0 +1,4098 @@@
 +/*
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + *          GROningen MAchine for Chemical Simulations
 + *
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2008, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + *
 + * For more info, check our website at http://www.gromacs.org
 + *
 + * And Hey:
 + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <stdio.h>
 +#include <stdlib.h>
 +#include <string.h>
 +#include "domdec.h"
 +#include "gmx_wallcycle.h"
 +#include "gmx_cyclecounter.h"
 +#include "trnio.h"
 +#include "smalloc.h"
 +#include "network.h"
 +#include "pbc.h"
 +#include "futil.h"
 +#include "mdrun.h"
 +#include "txtdump.h"
 +#include "names.h"
 +#include "mtop_util.h"
 +#include "names.h"
 +#include "nrjac.h"
 +#include "vec.h"
 +#include "gmx_ga2la.h"
 +#include "xvgr.h"
 +#include "gmxfio.h"
 +#include "groupcoord.h"
 +#include "pull_rotation.h"
 +#include "gmx_sort.h"
 +#include "copyrite.h"
 +#include "macros.h"
 +
 +
 +static char *RotStr = {"Enforced rotation:"};
 +
 +
 +/* Set the minimum weight for the determination of the slab centers */
 +#define WEIGHT_MIN (10*GMX_FLOAT_MIN)
 +
 +/* Helper structure for sorting positions along rotation vector             */
 +typedef struct {
 +    real xcproj;            /* Projection of xc on the rotation vector        */
 +    int  ind;               /* Index of xc                                    */
 +    real m;                 /* Mass                                           */
 +    rvec x;                 /* Position                                       */
 +    rvec x_ref;             /* Reference position                             */
 +} sort_along_vec_t;
 +
 +
 +/* Enforced rotation / flexible: determine the angle of each slab             */
 +typedef struct gmx_slabdata
 +{
 +    int   nat;              /* Number of atoms belonging to this slab         */
 +    rvec *x;                /* The positions belonging to this slab. In
 +                               general, this should be all positions of the
 +                               whole rotation group, but we leave those away
 +                               that have a small enough weight                */
 +    rvec *ref;              /* Same for reference                             */
 +    real *weight;           /* The weight for each atom                       */
 +} t_gmx_slabdata;
 +
 +
 +/* Helper structure for potential fitting */
 +typedef struct gmx_potfit
 +{
 +    real   *degangle;       /* Set of angles for which the potential is
 +                               calculated. The optimum fit is determined as
 +                               the angle for with the potential is minimal    */
 +    real   *V;              /* Potential for the different angles             */
 +    matrix *rotmat;         /* Rotation matrix corresponding to the angles    */
 +} t_gmx_potfit;
 +
 +
 +/* Enforced rotation data for all groups                                      */
 +typedef struct gmx_enfrot
 +{
 +    FILE             *out_rot;     /* Output file for rotation data                  */
 +    FILE             *out_torque;  /* Output file for torque data                    */
 +    FILE             *out_angles;  /* Output file for slab angles for flexible type  */
 +    FILE             *out_slabs;   /* Output file for slab centers                   */
 +    int               bufsize;     /* Allocation size of buf                         */
 +    rvec             *xbuf;        /* Coordinate buffer variable for sorting         */
 +    real             *mbuf;        /* Masses buffer variable for sorting             */
 +    sort_along_vec_t *data;        /* Buffer variable needed for position sorting    */
 +    real             *mpi_inbuf;   /* MPI buffer                                     */
 +    real             *mpi_outbuf;  /* MPI buffer                                     */
 +    int               mpi_bufsize; /* Allocation size of in & outbuf                 */
 +    unsigned long     Flags;       /* mdrun flags                                    */
 +    gmx_bool          bOut;        /* Used to skip first output when appending to
 +                                    * avoid duplicate entries in rotation outfiles   */
 +} t_gmx_enfrot;
 +
 +
 +/* Global enforced rotation data for a single rotation group                  */
 +typedef struct gmx_enfrotgrp
 +{
 +    real     degangle;      /* Rotation angle in degrees                      */
 +    matrix   rotmat;        /* Rotation matrix                                */
 +    atom_id *ind_loc;       /* Local rotation indices                         */
 +    int      nat_loc;       /* Number of local group atoms                    */
 +    int      nalloc_loc;    /* Allocation size for ind_loc and weight_loc     */
 +
 +    real     V;             /* Rotation potential for this rotation group     */
 +    rvec    *f_rot_loc;     /* Array to store the forces on the local atoms
 +                               resulting from enforced rotation potential     */
 +
 +    /* Collective coordinates for the whole rotation group */
 +    real  *xc_ref_length;   /* Length of each x_rotref vector after x_rotref
 +                               has been put into origin                       */
 +    int   *xc_ref_ind;      /* Position of each local atom in the collective
 +                               array                                          */
 +    rvec   xc_center;       /* Center of the rotation group positions, may
 +                               be mass weighted                               */
 +    rvec   xc_ref_center;   /* dito, for the reference positions              */
 +    rvec  *xc;              /* Current (collective) positions                 */
 +    ivec  *xc_shifts;       /* Current (collective) shifts                    */
 +    ivec  *xc_eshifts;      /* Extra shifts since last DD step                */
 +    rvec  *xc_old;          /* Old (collective) positions                     */
 +    rvec  *xc_norm;         /* Normalized form of the current positions       */
 +    rvec  *xc_ref_sorted;   /* Reference positions (sorted in the same order
 +                               as xc when sorted)                             */
 +    int   *xc_sortind;      /* Where is a position found after sorting?       */
 +    real  *mc;              /* Collective masses                              */
 +    real  *mc_sorted;
 +    real   invmass;         /* one over the total mass of the rotation group  */
 +
 +    real   torque_v;        /* Torque in the direction of rotation vector     */
 +    real   angle_v;         /* Actual angle of the whole rotation group       */
 +    /* Fixed rotation only */
 +    real   weight_v;        /* Weights for angle determination                */
 +    rvec  *xr_loc;          /* Local reference coords, correctly rotated      */
 +    rvec  *x_loc_pbc;       /* Local current coords, correct PBC image        */
 +    real  *m_loc;           /* Masses of the current local atoms              */
 +
 +    /* Flexible rotation only */
 +    int    nslabs_alloc;              /* For this many slabs memory is allocated        */
 +    int    slab_first;                /* Lowermost slab for that the calculation needs
 +                                         to be performed at a given time step           */
 +    int    slab_last;                 /* Uppermost slab ...                             */
 +    int    slab_first_ref;            /* First slab for which ref. center is stored     */
 +    int    slab_last_ref;             /* Last ...                                       */
 +    int    slab_buffer;               /* Slab buffer region around reference slabs      */
 +    int   *firstatom;                 /* First relevant atom for a slab                 */
 +    int   *lastatom;                  /* Last relevant atom for a slab                  */
 +    rvec  *slab_center;               /* Gaussian-weighted slab center                  */
 +    rvec  *slab_center_ref;           /* Gaussian-weighted slab center for the
 +                                         reference positions                            */
 +    real  *slab_weights;              /* Sum of gaussian weights in a slab              */
 +    real  *slab_torque_v;             /* Torque T = r x f for each slab.                */
 +                                      /* torque_v = m.v = angular momentum in the
 +                                         direction of v                                 */
 +    real  max_beta;                   /* min_gaussian from inputrec->rotgrp is the
 +                                         minimum value the gaussian must have so that
 +                                         the force is actually evaluated max_beta is
 +                                         just another way to put it                     */
 +    real           *gn_atom;          /* Precalculated gaussians for a single atom      */
 +    int            *gn_slabind;       /* Tells to which slab each precalculated gaussian
 +                                         belongs                                        */
 +    rvec           *slab_innersumvec; /* Inner sum of the flexible2 potential per slab;
 +                                         this is precalculated for optimization reasons */
 +    t_gmx_slabdata *slab_data;        /* Holds atom positions and gaussian weights
 +                                         of atoms belonging to a slab                   */
 +
 +    /* For potential fits with varying angle: */
 +    t_gmx_potfit *PotAngleFit;  /* Used for fit type 'potential'              */
 +} t_gmx_enfrotgrp;
 +
 +
 +/* Activate output of forces for correctness checks */
 +/* #define PRINT_FORCES */
 +#ifdef PRINT_FORCES
 +#define PRINT_FORCE_J  fprintf(stderr, "f%d = %15.8f %15.8f %15.8f\n", erg->xc_ref_ind[j], erg->f_rot_loc[j][XX], erg->f_rot_loc[j][YY], erg->f_rot_loc[j][ZZ]);
 +#define PRINT_POT_TAU  if (MASTER(cr)) { \
 +        fprintf(stderr, "potential = %15.8f\n" "torque    = %15.8f\n", erg->V, erg->torque_v); \
 +}
 +#else
 +#define PRINT_FORCE_J
 +#define PRINT_POT_TAU
 +#endif
 +
 +/* Shortcuts for often used queries */
 +#define ISFLEX(rg) ( (rg->eType == erotgFLEX) || (rg->eType == erotgFLEXT) || (rg->eType == erotgFLEX2) || (rg->eType == erotgFLEX2T) )
 +#define ISCOLL(rg) ( (rg->eType == erotgFLEX) || (rg->eType == erotgFLEXT) || (rg->eType == erotgFLEX2) || (rg->eType == erotgFLEX2T) || (rg->eType == erotgRMPF) || (rg->eType == erotgRM2PF) )
 +
 +
 +/* Does any of the rotation groups use slab decomposition? */
 +static gmx_bool HaveFlexibleGroups(t_rot *rot)
 +{
 +    int       g;
 +    t_rotgrp *rotg;
 +
 +
 +    for (g = 0; g < rot->ngrp; g++)
 +    {
 +        rotg = &rot->grp[g];
 +        if (ISFLEX(rotg))
 +        {
 +            return TRUE;
 +        }
 +    }
 +
 +    return FALSE;
 +}
 +
 +
 +/* Is for any group the fit angle determined by finding the minimum of the
 + * rotation potential? */
 +static gmx_bool HavePotFitGroups(t_rot *rot)
 +{
 +    int       g;
 +    t_rotgrp *rotg;
 +
 +
 +    for (g = 0; g < rot->ngrp; g++)
 +    {
 +        rotg = &rot->grp[g];
 +        if (erotgFitPOT == rotg->eFittype)
 +        {
 +            return TRUE;
 +        }
 +    }
 +
 +    return FALSE;
 +}
 +
 +
 +static double** allocate_square_matrix(int dim)
 +{
 +    int      i;
 +    double** mat = NULL;
 +
 +
 +    snew(mat, dim);
 +    for (i = 0; i < dim; i++)
 +    {
 +        snew(mat[i], dim);
 +    }
 +
 +    return mat;
 +}
 +
 +
 +static void free_square_matrix(double** mat, int dim)
 +{
 +    int i;
 +
 +
 +    for (i = 0; i < dim; i++)
 +    {
 +        sfree(mat[i]);
 +    }
 +    sfree(mat);
 +}
 +
 +
 +/* Return the angle for which the potential is minimal */
 +static real get_fitangle(t_rotgrp *rotg, gmx_enfrotgrp_t erg)
 +{
 +    int           i;
 +    real          fitangle = -999.9;
 +    real          pot_min  = GMX_FLOAT_MAX;
 +    t_gmx_potfit *fit;
 +
 +
 +    fit = erg->PotAngleFit;
 +
 +    for (i = 0; i < rotg->PotAngle_nstep; i++)
 +    {
 +        if (fit->V[i] < pot_min)
 +        {
 +            pot_min  = fit->V[i];
 +            fitangle = fit->degangle[i];
 +        }
 +    }
 +
 +    return fitangle;
 +}
 +
 +
 +/* Reduce potential angle fit data for this group at this time step? */
 +static gmx_inline gmx_bool bPotAngle(t_rot *rot, t_rotgrp *rotg, gmx_large_int_t step)
 +{
 +    return ( (erotgFitPOT == rotg->eFittype) && (do_per_step(step, rot->nstsout) || do_per_step(step, rot->nstrout)) );
 +}
 +
 +/* Reduce slab torqe data for this group at this time step? */
 +static gmx_inline gmx_bool bSlabTau(t_rot *rot, t_rotgrp *rotg, gmx_large_int_t step)
 +{
 +    return ( (ISFLEX(rotg)) && do_per_step(step, rot->nstsout) );
 +}
 +
 +/* Output rotation energy, torques, etc. for each rotation group */
 +static void reduce_output(t_commrec *cr, t_rot *rot, real t, gmx_large_int_t step)
 +{
 +    int             g, i, islab, nslabs = 0;
 +    int             count; /* MPI element counter                               */
 +    t_rotgrp       *rotg;
 +    gmx_enfrot_t    er;    /* Pointer to the enforced rotation buffer variables */
 +    gmx_enfrotgrp_t erg;   /* Pointer to enforced rotation group data           */
 +    real            fitangle;
 +    gmx_bool        bFlex;
 +
 +
 +    er = rot->enfrot;
 +
 +    /* Fill the MPI buffer with stuff to reduce. If items are added for reduction
 +     * here, the MPI buffer size has to be enlarged also in calc_mpi_bufsize() */
 +    if (PAR(cr))
 +    {
 +        count = 0;
 +        for (g = 0; g < rot->ngrp; g++)
 +        {
 +            rotg                   = &rot->grp[g];
 +            erg                    = rotg->enfrotgrp;
 +            nslabs                 = erg->slab_last - erg->slab_first + 1;
 +            er->mpi_inbuf[count++] = erg->V;
 +            er->mpi_inbuf[count++] = erg->torque_v;
 +            er->mpi_inbuf[count++] = erg->angle_v;
 +            er->mpi_inbuf[count++] = erg->weight_v; /* weights are not needed for flex types, but this is just a single value */
 +
 +            if (bPotAngle(rot, rotg, step))
 +            {
 +                for (i = 0; i < rotg->PotAngle_nstep; i++)
 +                {
 +                    er->mpi_inbuf[count++] = erg->PotAngleFit->V[i];
 +                }
 +            }
 +            if (bSlabTau(rot, rotg, step))
 +            {
 +                for (i = 0; i < nslabs; i++)
 +                {
 +                    er->mpi_inbuf[count++] = erg->slab_torque_v[i];
 +                }
 +            }
 +        }
 +        if (count > er->mpi_bufsize)
 +        {
 +            gmx_fatal(FARGS, "%s MPI buffer overflow, please report this error.", RotStr);
 +        }
 +
 +#ifdef GMX_MPI
 +        MPI_Reduce(er->mpi_inbuf, er->mpi_outbuf, count, GMX_MPI_REAL, MPI_SUM, MASTERRANK(cr), cr->mpi_comm_mygroup);
 +#endif
 +
 +        /* Copy back the reduced data from the buffer on the master */
 +        if (MASTER(cr))
 +        {
 +            count = 0;
 +            for (g = 0; g < rot->ngrp; g++)
 +            {
 +                rotg          = &rot->grp[g];
 +                erg           = rotg->enfrotgrp;
 +                nslabs        = erg->slab_last - erg->slab_first + 1;
 +                erg->V        = er->mpi_outbuf[count++];
 +                erg->torque_v = er->mpi_outbuf[count++];
 +                erg->angle_v  = er->mpi_outbuf[count++];
 +                erg->weight_v = er->mpi_outbuf[count++];
 +
 +                if (bPotAngle(rot, rotg, step))
 +                {
 +                    for (i = 0; i < rotg->PotAngle_nstep; i++)
 +                    {
 +                        erg->PotAngleFit->V[i] = er->mpi_outbuf[count++];
 +                    }
 +                }
 +                if (bSlabTau(rot, rotg, step))
 +                {
 +                    for (i = 0; i < nslabs; i++)
 +                    {
 +                        erg->slab_torque_v[i] = er->mpi_outbuf[count++];
 +                    }
 +                }
 +            }
 +        }
 +    }
 +
 +    /* Output */
 +    if (MASTER(cr))
 +    {
 +        /* Angle and torque for each rotation group */
 +        for (g = 0; g < rot->ngrp; g++)
 +        {
 +            rotg  = &rot->grp[g];
 +            bFlex = ISFLEX(rotg);
 +
 +            erg = rotg->enfrotgrp;
 +
 +            /* Output to main rotation output file: */
 +            if (do_per_step(step, rot->nstrout) )
 +            {
 +                if (erotgFitPOT == rotg->eFittype)
 +                {
 +                    fitangle = get_fitangle(rotg, erg);
 +                }
 +                else
 +                {
 +                    if (bFlex)
 +                    {
 +                        fitangle = erg->angle_v; /* RMSD fit angle */
 +                    }
 +                    else
 +                    {
 +                        fitangle = (erg->angle_v/erg->weight_v)*180.0*M_1_PI;
 +                    }
 +                }
 +                fprintf(er->out_rot, "%12.4f", fitangle);
 +                fprintf(er->out_rot, "%12.3e", erg->torque_v);
 +                fprintf(er->out_rot, "%12.3e", erg->V);
 +            }
 +
 +            if (do_per_step(step, rot->nstsout) )
 +            {
 +                /* Output to torque log file: */
 +                if (bFlex)
 +                {
 +                    fprintf(er->out_torque, "%12.3e%6d", t, g);
 +                    for (i = erg->slab_first; i <= erg->slab_last; i++)
 +                    {
 +                        islab = i - erg->slab_first;  /* slab index */
 +                        /* Only output if enough weight is in slab */
 +                        if (erg->slab_weights[islab] > rotg->min_gaussian)
 +                        {
 +                            fprintf(er->out_torque, "%6d%12.3e", i, erg->slab_torque_v[islab]);
 +                        }
 +                    }
 +                    fprintf(er->out_torque, "\n");
 +                }
 +
 +                /* Output to angles log file: */
 +                if (erotgFitPOT == rotg->eFittype)
 +                {
 +                    fprintf(er->out_angles, "%12.3e%6d%12.4f", t, g, erg->degangle);
 +                    /* Output energies at a set of angles around the reference angle */
 +                    for (i = 0; i < rotg->PotAngle_nstep; i++)
 +                    {
 +                        fprintf(er->out_angles, "%12.3e", erg->PotAngleFit->V[i]);
 +                    }
 +                    fprintf(er->out_angles, "\n");
 +                }
 +            }
 +        }
 +        if (do_per_step(step, rot->nstrout) )
 +        {
 +            fprintf(er->out_rot, "\n");
 +        }
 +    }
 +}
 +
 +
 +/* Add the forces from enforced rotation potential to the local forces.
 + * Should be called after the SR forces have been evaluated */
 +extern real add_rot_forces(t_rot *rot, rvec f[], t_commrec *cr, gmx_large_int_t step, real t)
 +{
 +    int             g, l, ii;
 +    t_rotgrp       *rotg;
 +    gmx_enfrot_t    er;         /* Pointer to the enforced rotation buffer variables */
 +    gmx_enfrotgrp_t erg;        /* Pointer to enforced rotation group data           */
 +    real            Vrot = 0.0; /* If more than one rotation group is present, Vrot
 +                                   assembles the local parts from all groups         */
 +
 +
 +    er = rot->enfrot;
 +
 +    /* Loop over enforced rotation groups (usually 1, though)
 +     * Apply the forces from rotation potentials */
 +    for (g = 0; g < rot->ngrp; g++)
 +    {
 +        rotg  = &rot->grp[g];
 +        erg   = rotg->enfrotgrp;
 +        Vrot += erg->V;  /* add the local parts from the nodes */
 +        for (l = 0; l < erg->nat_loc; l++)
 +        {
 +            /* Get the right index of the local force */
 +            ii = erg->ind_loc[l];
 +            /* Add */
 +            rvec_inc(f[ii], erg->f_rot_loc[l]);
 +        }
 +    }
 +
 +    /* Reduce energy,torque, angles etc. to get the sum values (per rotation group)
 +     * on the master and output these values to file. */
 +    if ( (do_per_step(step, rot->nstrout) || do_per_step(step, rot->nstsout)) && er->bOut)
 +    {
 +        reduce_output(cr, rot, t, step);
 +    }
 +
 +    /* When appending, er->bOut is FALSE the first time to avoid duplicate entries */
 +    er->bOut = TRUE;
 +
 +    PRINT_POT_TAU
 +
 +    return Vrot;
 +}
 +
 +
 +/* The Gaussian norm is chosen such that the sum of the gaussian functions
 + * over the slabs is approximately 1.0 everywhere */
 +#define GAUSS_NORM   0.569917543430618
 +
 +
 +/* Calculate the maximum beta that leads to a gaussian larger min_gaussian,
 + * also does some checks
 + */
 +static double calc_beta_max(real min_gaussian, real slab_dist)
 +{
 +    double sigma;
 +    double arg;
 +
 +
 +    /* Actually the next two checks are already made in grompp */
 +    if (slab_dist <= 0)
 +    {
 +        gmx_fatal(FARGS, "Slab distance of flexible rotation groups must be >=0 !");
 +    }
 +    if (min_gaussian <= 0)
 +    {
 +        gmx_fatal(FARGS, "Cutoff value for Gaussian must be > 0. (You requested %f)");
 +    }
 +
 +    /* Define the sigma value */
 +    sigma = 0.7*slab_dist;
 +
 +    /* Calculate the argument for the logarithm and check that the log() result is negative or 0 */
 +    arg = min_gaussian/GAUSS_NORM;
 +    if (arg > 1.0)
 +    {
 +        gmx_fatal(FARGS, "min_gaussian of flexible rotation groups must be <%g", GAUSS_NORM);
 +    }
 +
 +    return sqrt(-2.0*sigma*sigma*log(min_gaussian/GAUSS_NORM));
 +}
 +
 +
 +static gmx_inline real calc_beta(rvec curr_x, t_rotgrp *rotg, int n)
 +{
 +    return iprod(curr_x, rotg->vec) - rotg->slab_dist * n;
 +}
 +
 +
 +static gmx_inline real gaussian_weight(rvec curr_x, t_rotgrp *rotg, int n)
 +{
 +    const real norm = GAUSS_NORM;
 +    real       sigma;
 +
 +
 +    /* Define the sigma value */
 +    sigma = 0.7*rotg->slab_dist;
 +    /* Calculate the Gaussian value of slab n for position curr_x */
 +    return norm * exp( -0.5 * sqr( calc_beta(curr_x, rotg, n)/sigma ) );
 +}
 +
 +
 +/* Returns the weight in a single slab, also calculates the Gaussian- and mass-
 + * weighted sum of positions for that slab */
 +static real get_slab_weight(int j, t_rotgrp *rotg, rvec xc[], real mc[], rvec *x_weighted_sum)
 +{
 +    rvec            curr_x;           /* The position of an atom                      */
 +    rvec            curr_x_weighted;  /* The gaussian-weighted position               */
 +    real            gaussian;         /* A single gaussian weight                     */
 +    real            wgauss;           /* gaussian times current mass                  */
 +    real            slabweight = 0.0; /* The sum of weights in the slab               */
 +    int             i, islab;
 +    gmx_enfrotgrp_t erg;              /* Pointer to enforced rotation group data      */
 +
 +
 +    erg = rotg->enfrotgrp;
 +    clear_rvec(*x_weighted_sum);
 +
 +    /* Slab index */
 +    islab = j - erg->slab_first;
 +
 +    /* Loop over all atoms in the rotation group */
 +    for (i = 0; i < rotg->nat; i++)
 +    {
 +        copy_rvec(xc[i], curr_x);
 +        gaussian = gaussian_weight(curr_x, rotg, j);
 +        wgauss   = gaussian * mc[i];
 +        svmul(wgauss, curr_x, curr_x_weighted);
 +        rvec_add(*x_weighted_sum, curr_x_weighted, *x_weighted_sum);
 +        slabweight += wgauss;
 +    }  /* END of loop over rotation group atoms */
 +
 +    return slabweight;
 +}
 +
 +
 +static void get_slab_centers(
 +        t_rotgrp  *rotg,       /* The rotation group information               */
 +        rvec      *xc,         /* The rotation group positions; will
 +                                  typically be enfrotgrp->xc, but at first call
 +                                  it is enfrotgrp->xc_ref                      */
 +        real      *mc,         /* The masses of the rotation group atoms       */
 +        int        g,          /* The number of the rotation group             */
 +        real       time,       /* Used for output only                         */
 +        FILE      *out_slabs,  /* For outputting center per slab information   */
 +        gmx_bool   bOutStep,   /* Is this an output step?                      */
 +        gmx_bool   bReference) /* If this routine is called from
 +                                  init_rot_group we need to store
 +                                  the reference slab centers                   */
 +{
 +    int             j, islab;
 +    gmx_enfrotgrp_t erg;      /* Pointer to enforced rotation group data */
 +
 +
 +    erg = rotg->enfrotgrp;
 +
 +    /* Loop over slabs */
 +    for (j = erg->slab_first; j <= erg->slab_last; j++)
 +    {
 +        islab                    = j - erg->slab_first;
 +        erg->slab_weights[islab] = get_slab_weight(j, rotg, xc, mc, &erg->slab_center[islab]);
 +
 +        /* We can do the calculations ONLY if there is weight in the slab! */
 +        if (erg->slab_weights[islab] > WEIGHT_MIN)
 +        {
 +            svmul(1.0/erg->slab_weights[islab], erg->slab_center[islab], erg->slab_center[islab]);
 +        }
 +        else
 +        {
 +            /* We need to check this here, since we divide through slab_weights
 +             * in the flexible low-level routines! */
 +            gmx_fatal(FARGS, "Not enough weight in slab %d. Slab center cannot be determined!", j);
 +        }
 +
 +        /* At first time step: save the centers of the reference structure */
 +        if (bReference)
 +        {
 +            copy_rvec(erg->slab_center[islab], erg->slab_center_ref[islab]);
 +        }
 +    } /* END of loop over slabs */
 +
 +    /* Output on the master */
 +    if ( (NULL != out_slabs) && bOutStep)
 +    {
 +        fprintf(out_slabs, "%12.3e%6d", time, g);
 +        for (j = erg->slab_first; j <= erg->slab_last; j++)
 +        {
 +            islab = j - erg->slab_first;
 +            fprintf(out_slabs, "%6d%12.3e%12.3e%12.3e",
 +                    j, erg->slab_center[islab][XX], erg->slab_center[islab][YY], erg->slab_center[islab][ZZ]);
 +        }
 +        fprintf(out_slabs, "\n");
 +    }
 +}
 +
 +
 +static void calc_rotmat(
 +        rvec   vec,
 +        real   degangle,      /* Angle alpha of rotation at time t in degrees       */
 +        matrix rotmat)        /* Rotation matrix                                    */
 +{
 +    real radangle;            /* Rotation angle in radians */
 +    real cosa;                /* cosine alpha              */
 +    real sina;                /* sine alpha                */
 +    real OMcosa;              /* 1 - cos(alpha)            */
 +    real dumxy, dumxz, dumyz; /* save computations         */
 +    rvec rot_vec;             /* Rotate around rot_vec ... */
 +
 +
 +    radangle = degangle * M_PI/180.0;
 +    copy_rvec(vec, rot_vec );
 +
 +    /* Precompute some variables: */
 +    cosa   = cos(radangle);
 +    sina   = sin(radangle);
 +    OMcosa = 1.0 - cosa;
 +    dumxy  = rot_vec[XX]*rot_vec[YY]*OMcosa;
 +    dumxz  = rot_vec[XX]*rot_vec[ZZ]*OMcosa;
 +    dumyz  = rot_vec[YY]*rot_vec[ZZ]*OMcosa;
 +
 +    /* Construct the rotation matrix for this rotation group: */
 +    /* 1st column: */
 +    rotmat[XX][XX] = cosa  + rot_vec[XX]*rot_vec[XX]*OMcosa;
 +    rotmat[YY][XX] = dumxy + rot_vec[ZZ]*sina;
 +    rotmat[ZZ][XX] = dumxz - rot_vec[YY]*sina;
 +    /* 2nd column: */
 +    rotmat[XX][YY] = dumxy - rot_vec[ZZ]*sina;
 +    rotmat[YY][YY] = cosa  + rot_vec[YY]*rot_vec[YY]*OMcosa;
 +    rotmat[ZZ][YY] = dumyz + rot_vec[XX]*sina;
 +    /* 3rd column: */
 +    rotmat[XX][ZZ] = dumxz + rot_vec[YY]*sina;
 +    rotmat[YY][ZZ] = dumyz - rot_vec[XX]*sina;
 +    rotmat[ZZ][ZZ] = cosa  + rot_vec[ZZ]*rot_vec[ZZ]*OMcosa;
 +
 +#ifdef PRINTMATRIX
 +    int iii, jjj;
 +
 +    for (iii = 0; iii < 3; iii++)
 +    {
 +        for (jjj = 0; jjj < 3; jjj++)
 +        {
 +            fprintf(stderr, " %10.8f ",  rotmat[iii][jjj]);
 +        }
 +        fprintf(stderr, "\n");
 +    }
 +#endif
 +}
 +
 +
 +/* Calculates torque on the rotation axis tau = position x force */
 +static gmx_inline real torque(
 +        rvec rotvec,  /* rotation vector; MUST be normalized!                 */
 +        rvec force,   /* force                                                */
 +        rvec x,       /* position of atom on which the force acts             */
 +        rvec pivot)   /* pivot point of rotation axis                         */
 +{
 +    rvec vectmp, tau;
 +
 +
 +    /* Subtract offset */
 +    rvec_sub(x, pivot, vectmp);
 +
 +    /* position x force */
 +    cprod(vectmp, force, tau);
 +
 +    /* Return the part of the torque which is parallel to the rotation vector */
 +    return iprod(tau, rotvec);
 +}
 +
 +
 +/* Right-aligned output of value with standard width */
 +static void print_aligned(FILE *fp, char *str)
 +{
 +    fprintf(fp, "%12s", str);
 +}
 +
 +
 +/* Right-aligned output of value with standard short width */
 +static void print_aligned_short(FILE *fp, char *str)
 +{
 +    fprintf(fp, "%6s", str);
 +}
 +
 +
 +static FILE *open_output_file(const char *fn, int steps, const char what[])
 +{
 +    FILE *fp;
 +
 +
 +    fp = ffopen(fn, "w");
 +
 +    fprintf(fp, "# Output of %s is written in intervals of %d time step%s.\n#\n",
 +            what, steps, steps > 1 ? "s" : "");
 +
 +    return fp;
 +}
 +
 +
 +/* Open output file for slab center data. Call on master only */
- static FILE *open_slab_out(const char *fn, t_rot *rot, const output_env_t oenv)
++static FILE *open_slab_out(const char *fn, t_rot *rot)
 +{
 +    FILE      *fp;
 +    int        g, i;
 +    t_rotgrp  *rotg;
 +
 +
 +    if (rot->enfrot->Flags & MD_APPENDFILES)
 +    {
 +        fp = gmx_fio_fopen(fn, "a");
 +    }
 +    else
 +    {
 +        fp = open_output_file(fn, rot->nstsout, "gaussian weighted slab centers");
 +
 +        for (g = 0; g < rot->ngrp; g++)
 +        {
 +            rotg = &rot->grp[g];
 +            if (ISFLEX(rotg))
 +            {
 +                fprintf(fp, "# Rotation group %d (%s), slab distance %f nm, %s.\n",
 +                        g, erotg_names[rotg->eType], rotg->slab_dist,
 +                        rotg->bMassW ? "centers of mass" : "geometrical centers");
 +            }
 +        }
 +
 +        fprintf(fp, "# Reference centers are listed first (t=-1).\n");
 +        fprintf(fp, "# The following columns have the syntax:\n");
 +        fprintf(fp, "#     ");
 +        print_aligned_short(fp, "t");
 +        print_aligned_short(fp, "grp");
 +        /* Print legend for the first two entries only ... */
 +        for (i = 0; i < 2; i++)
 +        {
 +            print_aligned_short(fp, "slab");
 +            print_aligned(fp, "X center");
 +            print_aligned(fp, "Y center");
 +            print_aligned(fp, "Z center");
 +        }
 +        fprintf(fp, " ...\n");
 +        fflush(fp);
 +    }
 +
 +    return fp;
 +}
 +
 +
 +/* Adds 'buf' to 'str' */
 +static void add_to_string(char **str, char *buf)
 +{
 +    int len;
 +
 +
 +    len = strlen(*str) + strlen(buf) + 1;
 +    srenew(*str, len);
 +    strcat(*str, buf);
 +}
 +
 +
 +static void add_to_string_aligned(char **str, char *buf)
 +{
 +    char buf_aligned[STRLEN];
 +
 +    sprintf(buf_aligned, "%12s", buf);
 +    add_to_string(str, buf_aligned);
 +}
 +
 +
 +/* Open output file and print some general information about the rotation groups.
 + * Call on master only */
 +static FILE *open_rot_out(const char *fn, t_rot *rot, const output_env_t oenv)
 +{
 +    FILE           *fp;
 +    int             g, nsets;
 +    t_rotgrp       *rotg;
 +    const char    **setname;
 +    char            buf[50], buf2[75];
 +    gmx_enfrotgrp_t erg;       /* Pointer to enforced rotation group data */
 +    gmx_bool        bFlex;
 +    char           *LegendStr = NULL;
 +
 +
 +    if (rot->enfrot->Flags & MD_APPENDFILES)
 +    {
 +        fp = gmx_fio_fopen(fn, "a");
 +    }
 +    else
 +    {
 +        fp = xvgropen(fn, "Rotation angles and energy", "Time (ps)", "angles (degrees) and energies (kJ/mol)", oenv);
 +        fprintf(fp, "# Output of enforced rotation data is written in intervals of %d time step%s.\n#\n", rot->nstrout, rot->nstrout > 1 ? "s" : "");
 +        fprintf(fp, "# The scalar tau is the torque (kJ/mol) in the direction of the rotation vector v.\n");
 +        fprintf(fp, "# To obtain the vectorial torque, multiply tau with the group's rot_vec.\n");
 +        fprintf(fp, "# For flexible groups, tau(t,n) from all slabs n have been summed in a single value tau(t) here.\n");
 +        fprintf(fp, "# The torques tau(t,n) are found in the rottorque.log (-rt) output file\n");
 +
 +        for (g = 0; g < rot->ngrp; g++)
 +        {
 +            rotg  = &rot->grp[g];
 +            erg   = rotg->enfrotgrp;
 +            bFlex = ISFLEX(rotg);
 +
 +            fprintf(fp, "#\n");
 +            fprintf(fp, "# ROTATION GROUP %d, potential type '%s':\n", g, erotg_names[rotg->eType]);
 +            fprintf(fp, "# rot_massw%d          %s\n", g, yesno_names[rotg->bMassW]);
 +            fprintf(fp, "# rot_vec%d            %12.5e %12.5e %12.5e\n", g, rotg->vec[XX], rotg->vec[YY], rotg->vec[ZZ]);
 +            fprintf(fp, "# rot_rate%d           %12.5e degrees/ps\n", g, rotg->rate);
 +            fprintf(fp, "# rot_k%d              %12.5e kJ/(mol*nm^2)\n", g, rotg->k);
 +            if (rotg->eType == erotgISO || rotg->eType == erotgPM || rotg->eType == erotgRM || rotg->eType == erotgRM2)
 +            {
 +                fprintf(fp, "# rot_pivot%d          %12.5e %12.5e %12.5e  nm\n", g, rotg->pivot[XX], rotg->pivot[YY], rotg->pivot[ZZ]);
 +            }
 +
 +            if (bFlex)
 +            {
 +                fprintf(fp, "# rot_slab_distance%d   %f nm\n", g, rotg->slab_dist);
 +                fprintf(fp, "# rot_min_gaussian%d   %12.5e\n", g, rotg->min_gaussian);
 +            }
 +
 +            /* Output the centers of the rotation groups for the pivot-free potentials */
 +            if ((rotg->eType == erotgISOPF) || (rotg->eType == erotgPMPF) || (rotg->eType == erotgRMPF) || (rotg->eType == erotgRM2PF
 +                                                                                                            || (rotg->eType == erotgFLEXT) || (rotg->eType == erotgFLEX2T)) )
 +            {
 +                fprintf(fp, "# ref. grp. %d center  %12.5e %12.5e %12.5e\n", g,
 +                        erg->xc_ref_center[XX], erg->xc_ref_center[YY], erg->xc_ref_center[ZZ]);
 +
 +                fprintf(fp, "# grp. %d init.center  %12.5e %12.5e %12.5e\n", g,
 +                        erg->xc_center[XX], erg->xc_center[YY], erg->xc_center[ZZ]);
 +            }
 +
 +            if ( (rotg->eType == erotgRM2) || (rotg->eType == erotgFLEX2) || (rotg->eType == erotgFLEX2T) )
 +            {
 +                fprintf(fp, "# rot_eps%d            %12.5e nm^2\n", g, rotg->eps);
 +            }
 +            if (erotgFitPOT == rotg->eFittype)
 +            {
 +                fprintf(fp, "#\n");
 +                fprintf(fp, "# theta_fit%d is determined by first evaluating the potential for %d angles around theta_ref%d.\n",
 +                        g, rotg->PotAngle_nstep, g);
 +                fprintf(fp, "# The fit angle is the one with the smallest potential. It is given as the deviation\n");
 +                fprintf(fp, "# from the reference angle, i.e. if theta_ref=X and theta_fit=Y, then the angle with\n");
 +                fprintf(fp, "# minimal value of the potential is X+Y. Angular resolution is %g degrees.\n", rotg->PotAngle_step);
 +            }
 +        }
 +
 +        /* Print a nice legend */
 +        snew(LegendStr, 1);
 +        LegendStr[0] = '\0';
 +        sprintf(buf, "#     %6s", "time");
 +        add_to_string_aligned(&LegendStr, buf);
 +
 +        nsets = 0;
 +        snew(setname, 4*rot->ngrp);
 +
 +        for (g = 0; g < rot->ngrp; g++)
 +        {
 +            rotg = &rot->grp[g];
 +            sprintf(buf, "theta_ref%d", g);
 +            add_to_string_aligned(&LegendStr, buf);
 +
 +            sprintf(buf2, "%s (degrees)", buf);
 +            setname[nsets] = strdup(buf2);
 +            nsets++;
 +        }
 +        for (g = 0; g < rot->ngrp; g++)
 +        {
 +            rotg  = &rot->grp[g];
 +            bFlex = ISFLEX(rotg);
 +
 +            /* For flexible axis rotation we use RMSD fitting to determine the
 +             * actual angle of the rotation group */
 +            if (bFlex || erotgFitPOT == rotg->eFittype)
 +            {
 +                sprintf(buf, "theta_fit%d", g);
 +            }
 +            else
 +            {
 +                sprintf(buf, "theta_av%d", g);
 +            }
 +            add_to_string_aligned(&LegendStr, buf);
 +            sprintf(buf2, "%s (degrees)", buf);
 +            setname[nsets] = strdup(buf2);
 +            nsets++;
 +
 +            sprintf(buf, "tau%d", g);
 +            add_to_string_aligned(&LegendStr, buf);
 +            sprintf(buf2, "%s (kJ/mol)", buf);
 +            setname[nsets] = strdup(buf2);
 +            nsets++;
 +
 +            sprintf(buf, "energy%d", g);
 +            add_to_string_aligned(&LegendStr, buf);
 +            sprintf(buf2, "%s (kJ/mol)", buf);
 +            setname[nsets] = strdup(buf2);
 +            nsets++;
 +        }
 +        fprintf(fp, "#\n");
 +
 +        if (nsets > 1)
 +        {
 +            xvgr_legend(fp, nsets, setname, oenv);
 +        }
 +        sfree(setname);
 +
 +        fprintf(fp, "#\n# Legend for the following data columns:\n");
 +        fprintf(fp, "%s\n", LegendStr);
 +        sfree(LegendStr);
 +
 +        fflush(fp);
 +    }
 +
 +    return fp;
 +}
 +
 +
 +/* Call on master only */
- static FILE *open_angles_out(const char *fn, t_rot *rot, const output_env_t oenv)
++static FILE *open_angles_out(const char *fn, t_rot *rot)
 +{
 +    int             g, i;
 +    FILE           *fp;
 +    t_rotgrp       *rotg;
 +    gmx_enfrotgrp_t erg;        /* Pointer to enforced rotation group data */
 +    char            buf[100];
 +
 +
 +    if (rot->enfrot->Flags & MD_APPENDFILES)
 +    {
 +        fp = gmx_fio_fopen(fn, "a");
 +    }
 +    else
 +    {
 +        /* Open output file and write some information about it's structure: */
 +        fp = open_output_file(fn, rot->nstsout, "rotation group angles");
 +        fprintf(fp, "# All angles given in degrees, time in ps.\n");
 +        for (g = 0; g < rot->ngrp; g++)
 +        {
 +            rotg = &rot->grp[g];
 +            erg  = rotg->enfrotgrp;
 +
 +            /* Output for this group happens only if potential type is flexible or
 +             * if fit type is potential! */
 +            if (ISFLEX(rotg) || (erotgFitPOT == rotg->eFittype) )
 +            {
 +                if (ISFLEX(rotg))
 +                {
 +                    sprintf(buf, " slab distance %f nm, ", rotg->slab_dist);
 +                }
 +                else
 +                {
 +                    buf[0] = '\0';
 +                }
 +
 +                fprintf(fp, "#\n# ROTATION GROUP %d '%s',%s fit type '%s'.\n",
 +                        g, erotg_names[rotg->eType], buf, erotg_fitnames[rotg->eFittype]);
 +
 +                /* Special type of fitting using the potential minimum. This is
 +                 * done for the whole group only, not for the individual slabs. */
 +                if (erotgFitPOT == rotg->eFittype)
 +                {
 +                    fprintf(fp, "#    To obtain theta_fit%d, the potential is evaluated for %d angles around theta_ref%d\n", g, rotg->PotAngle_nstep, g);
 +                    fprintf(fp, "#    The fit angle in the rotation standard outfile is the one with minimal energy E(theta_fit) [kJ/mol].\n");
 +                    fprintf(fp, "#\n");
 +                }
 +
 +                fprintf(fp, "# Legend for the group %d data columns:\n", g);
 +                fprintf(fp, "#     ");
 +                print_aligned_short(fp, "time");
 +                print_aligned_short(fp, "grp");
 +                print_aligned(fp, "theta_ref");
 +
 +                if (erotgFitPOT == rotg->eFittype)
 +                {
 +                    /* Output the set of angles around the reference angle */
 +                    for (i = 0; i < rotg->PotAngle_nstep; i++)
 +                    {
 +                        sprintf(buf, "E(%g)", erg->PotAngleFit->degangle[i]);
 +                        print_aligned(fp, buf);
 +                    }
 +                }
 +                else
 +                {
 +                    /* Output fit angle for each slab */
 +                    print_aligned_short(fp, "slab");
 +                    print_aligned_short(fp, "atoms");
 +                    print_aligned(fp, "theta_fit");
 +                    print_aligned_short(fp, "slab");
 +                    print_aligned_short(fp, "atoms");
 +                    print_aligned(fp, "theta_fit");
 +                    fprintf(fp, " ...");
 +                }
 +                fprintf(fp, "\n");
 +            }
 +        }
 +        fflush(fp);
 +    }
 +
 +    return fp;
 +}
 +
 +
 +/* Open torque output file and write some information about it's structure.
 + * Call on master only */
- static FILE *open_torque_out(const char *fn, t_rot *rot, const output_env_t oenv)
++static FILE *open_torque_out(const char *fn, t_rot *rot)
 +{
 +    FILE      *fp;
 +    int        g;
 +    t_rotgrp  *rotg;
 +
 +
 +    if (rot->enfrot->Flags & MD_APPENDFILES)
 +    {
 +        fp = gmx_fio_fopen(fn, "a");
 +    }
 +    else
 +    {
 +        fp = open_output_file(fn, rot->nstsout, "torques");
 +
 +        for (g = 0; g < rot->ngrp; g++)
 +        {
 +            rotg = &rot->grp[g];
 +            if (ISFLEX(rotg))
 +            {
 +                fprintf(fp, "# Rotation group %d (%s), slab distance %f nm.\n", g, erotg_names[rotg->eType], rotg->slab_dist);
 +                fprintf(fp, "# The scalar tau is the torque (kJ/mol) in the direction of the rotation vector.\n");
 +                fprintf(fp, "# To obtain the vectorial torque, multiply tau with\n");
 +                fprintf(fp, "# rot_vec%d            %10.3e %10.3e %10.3e\n", g, rotg->vec[XX], rotg->vec[YY], rotg->vec[ZZ]);
 +                fprintf(fp, "#\n");
 +            }
 +        }
 +        fprintf(fp, "# Legend for the following data columns: (tau=torque for that slab):\n");
 +        fprintf(fp, "#     ");
 +        print_aligned_short(fp, "t");
 +        print_aligned_short(fp, "grp");
 +        print_aligned_short(fp, "slab");
 +        print_aligned(fp, "tau");
 +        print_aligned_short(fp, "slab");
 +        print_aligned(fp, "tau");
 +        fprintf(fp, " ...\n");
 +        fflush(fp);
 +    }
 +
 +    return fp;
 +}
 +
 +
 +static void swap_val(double* vec, int i, int j)
 +{
 +    double tmp = vec[j];
 +
 +
 +    vec[j] = vec[i];
 +    vec[i] = tmp;
 +}
 +
 +
 +static void swap_col(double **mat, int i, int j)
 +{
 +    double tmp[3] = {mat[0][j], mat[1][j], mat[2][j]};
 +
 +
 +    mat[0][j] = mat[0][i];
 +    mat[1][j] = mat[1][i];
 +    mat[2][j] = mat[2][i];
 +
 +    mat[0][i] = tmp[0];
 +    mat[1][i] = tmp[1];
 +    mat[2][i] = tmp[2];
 +}
 +
 +
 +/* Eigenvectors are stored in columns of eigen_vec */
 +static void diagonalize_symmetric(
 +        double **matrix,
 +        double **eigen_vec,
 +        double   eigenval[3])
 +{
 +    int n_rot;
 +
 +
 +    jacobi(matrix, 3, eigenval, eigen_vec, &n_rot);
 +
 +    /* sort in ascending order */
 +    if (eigenval[0] > eigenval[1])
 +    {
 +        swap_val(eigenval, 0, 1);
 +        swap_col(eigen_vec, 0, 1);
 +    }
 +    if (eigenval[1] > eigenval[2])
 +    {
 +        swap_val(eigenval, 1, 2);
 +        swap_col(eigen_vec, 1, 2);
 +    }
 +    if (eigenval[0] > eigenval[1])
 +    {
 +        swap_val(eigenval, 0, 1);
 +        swap_col(eigen_vec, 0, 1);
 +    }
 +}
 +
 +
 +static void align_with_z(
 +        rvec* s,           /* Structure to align */
 +        int   natoms,
 +        rvec  axis)
 +{
 +    int     i, j, k;
 +    rvec    zet         = {0.0, 0.0, 1.0};
 +    rvec    rot_axis    = {0.0, 0.0, 0.0};
 +    rvec   *rotated_str = NULL;
 +    real    ooanorm;
 +    real    angle;
 +    matrix  rotmat;
 +
 +
 +    snew(rotated_str, natoms);
 +
 +    /* Normalize the axis */
 +    ooanorm = 1.0/norm(axis);
 +    svmul(ooanorm, axis, axis);
 +
 +    /* Calculate the angle for the fitting procedure */
 +    cprod(axis, zet, rot_axis);
 +    angle = acos(axis[2]);
 +    if (angle < 0.0)
 +    {
 +        angle += M_PI;
 +    }
 +
 +    /* Calculate the rotation matrix */
 +    calc_rotmat(rot_axis, angle*180.0/M_PI, rotmat);
 +
 +    /* Apply the rotation matrix to s */
 +    for (i = 0; i < natoms; i++)
 +    {
 +        for (j = 0; j < 3; j++)
 +        {
 +            for (k = 0; k < 3; k++)
 +            {
 +                rotated_str[i][j] += rotmat[j][k]*s[i][k];
 +            }
 +        }
 +    }
 +
 +    /* Rewrite the rotated structure to s */
 +    for (i = 0; i < natoms; i++)
 +    {
 +        for (j = 0; j < 3; j++)
 +        {
 +            s[i][j] = rotated_str[i][j];
 +        }
 +    }
 +
 +    sfree(rotated_str);
 +}
 +
 +
 +static void calc_correl_matrix(rvec* Xstr, rvec* Ystr, double** Rmat, int natoms)
 +{
 +    int i, j, k;
 +
 +
 +    for (i = 0; i < 3; i++)
 +    {
 +        for (j = 0; j < 3; j++)
 +        {
 +            Rmat[i][j] = 0.0;
 +        }
 +    }
 +
 +    for (i = 0; i < 3; i++)
 +    {
 +        for (j = 0; j < 3; j++)
 +        {
 +            for (k = 0; k < natoms; k++)
 +            {
 +                Rmat[i][j] += Ystr[k][i] * Xstr[k][j];
 +            }
 +        }
 +    }
 +}
 +
 +
 +static void weigh_coords(rvec* str, real* weight, int natoms)
 +{
 +    int i, j;
 +
 +
 +    for (i = 0; i < natoms; i++)
 +    {
 +        for (j = 0; j < 3; j++)
 +        {
 +            str[i][j] *= sqrt(weight[i]);
 +        }
 +    }
 +}
 +
 +
 +static real opt_angle_analytic(
 +        rvec* ref_s,
 +        rvec* act_s,
 +        real* weight,
 +        int   natoms,
 +        rvec  ref_com,
 +        rvec  act_com,
 +        rvec  axis)
 +{
 +    int      i, j, k;
 +    rvec    *ref_s_1 = NULL;
 +    rvec    *act_s_1 = NULL;
 +    rvec     shift;
 +    double **Rmat, **RtR, **eigvec;
 +    double   eigval[3];
 +    double   V[3][3], WS[3][3];
 +    double   rot_matrix[3][3];
 +    double   opt_angle;
 +
 +
 +    /* Do not change the original coordinates */
 +    snew(ref_s_1, natoms);
 +    snew(act_s_1, natoms);
 +    for (i = 0; i < natoms; i++)
 +    {
 +        copy_rvec(ref_s[i], ref_s_1[i]);
 +        copy_rvec(act_s[i], act_s_1[i]);
 +    }
 +
 +    /* Translate the structures to the origin */
 +    shift[XX] = -ref_com[XX];
 +    shift[YY] = -ref_com[YY];
 +    shift[ZZ] = -ref_com[ZZ];
 +    translate_x(ref_s_1, natoms, shift);
 +
 +    shift[XX] = -act_com[XX];
 +    shift[YY] = -act_com[YY];
 +    shift[ZZ] = -act_com[ZZ];
 +    translate_x(act_s_1, natoms, shift);
 +
 +    /* Align rotation axis with z */
 +    align_with_z(ref_s_1, natoms, axis);
 +    align_with_z(act_s_1, natoms, axis);
 +
 +    /* Correlation matrix */
 +    Rmat = allocate_square_matrix(3);
 +
 +    for (i = 0; i < natoms; i++)
 +    {
 +        ref_s_1[i][2] = 0.0;
 +        act_s_1[i][2] = 0.0;
 +    }
 +
 +    /* Weight positions with sqrt(weight) */
 +    if (NULL != weight)
 +    {
 +        weigh_coords(ref_s_1, weight, natoms);
 +        weigh_coords(act_s_1, weight, natoms);
 +    }
 +
 +    /* Calculate correlation matrices R=YXt (X=ref_s; Y=act_s) */
 +    calc_correl_matrix(ref_s_1, act_s_1, Rmat, natoms);
 +
 +    /* Calculate RtR */
 +    RtR = allocate_square_matrix(3);
 +    for (i = 0; i < 3; i++)
 +    {
 +        for (j = 0; j < 3; j++)
 +        {
 +            for (k = 0; k < 3; k++)
 +            {
 +                RtR[i][j] += Rmat[k][i] * Rmat[k][j];
 +            }
 +        }
 +    }
 +    /* Diagonalize RtR */
 +    snew(eigvec, 3);
 +    for (i = 0; i < 3; i++)
 +    {
 +        snew(eigvec[i], 3);
 +    }
 +
 +    diagonalize_symmetric(RtR, eigvec, eigval);
 +    swap_col(eigvec, 0, 1);
 +    swap_col(eigvec, 1, 2);
 +    swap_val(eigval, 0, 1);
 +    swap_val(eigval, 1, 2);
 +
 +    /* Calculate V */
 +    for (i = 0; i < 3; i++)
 +    {
 +        for (j = 0; j < 3; j++)
 +        {
 +            V[i][j]  = 0.0;
 +            WS[i][j] = 0.0;
 +        }
 +    }
 +
 +    for (i = 0; i < 2; i++)
 +    {
 +        for (j = 0; j < 2; j++)
 +        {
 +            WS[i][j] = eigvec[i][j] / sqrt(eigval[j]);
 +        }
 +    }
 +
 +    for (i = 0; i < 3; i++)
 +    {
 +        for (j = 0; j < 3; j++)
 +        {
 +            for (k = 0; k < 3; k++)
 +            {
 +                V[i][j] += Rmat[i][k]*WS[k][j];
 +            }
 +        }
 +    }
 +    free_square_matrix(Rmat, 3);
 +
 +    /* Calculate optimal rotation matrix */
 +    for (i = 0; i < 3; i++)
 +    {
 +        for (j = 0; j < 3; j++)
 +        {
 +            rot_matrix[i][j] = 0.0;
 +        }
 +    }
 +
 +    for (i = 0; i < 3; i++)
 +    {
 +        for (j = 0; j < 3; j++)
 +        {
 +            for (k = 0; k < 3; k++)
 +            {
 +                rot_matrix[i][j] += eigvec[i][k]*V[j][k];
 +            }
 +        }
 +    }
 +    rot_matrix[2][2] = 1.0;
 +
 +    /* In some cases abs(rot_matrix[0][0]) can be slighly larger
 +     * than unity due to numerical inacurracies. To be able to calculate
 +     * the acos function, we put these values back in range. */
 +    if (rot_matrix[0][0] > 1.0)
 +    {
 +        rot_matrix[0][0] = 1.0;
 +    }
 +    else if (rot_matrix[0][0] < -1.0)
 +    {
 +        rot_matrix[0][0] = -1.0;
 +    }
 +
 +    /* Determine the optimal rotation angle: */
 +    opt_angle = (-1.0)*acos(rot_matrix[0][0])*180.0/M_PI;
 +    if (rot_matrix[0][1] < 0.0)
 +    {
 +        opt_angle = (-1.0)*opt_angle;
 +    }
 +
 +    /* Give back some memory */
 +    free_square_matrix(RtR, 3);
 +    sfree(ref_s_1);
 +    sfree(act_s_1);
 +    for (i = 0; i < 3; i++)
 +    {
 +        sfree(eigvec[i]);
 +    }
 +    sfree(eigvec);
 +
 +    return (real) opt_angle;
 +}
 +
 +
 +/* Determine angle of the group by RMSD fit to the reference */
 +/* Not parallelized, call this routine only on the master */
 +static real flex_fit_angle(t_rotgrp *rotg)
 +{
 +    int             i;
 +    rvec           *fitcoords = NULL;
 +    rvec            center;     /* Center of positions passed to the fit routine */
 +    real            fitangle;   /* Angle of the rotation group derived by fitting */
 +    rvec            coord;
 +    real            scal;
 +    gmx_enfrotgrp_t erg;        /* Pointer to enforced rotation group data */
 +
 +
 +    erg = rotg->enfrotgrp;
 +
 +    /* Get the center of the rotation group.
 +     * Note, again, erg->xc has been sorted in do_flexible */
 +    get_center(erg->xc, erg->mc_sorted, rotg->nat, center);
 +
 +    /* === Determine the optimal fit angle for the rotation group === */
 +    if (rotg->eFittype == erotgFitNORM)
 +    {
 +        /* Normalize every position to it's reference length */
 +        for (i = 0; i < rotg->nat; i++)
 +        {
 +            /* Put the center of the positions into the origin */
 +            rvec_sub(erg->xc[i], center, coord);
 +            /* Determine the scaling factor for the length: */
 +            scal = erg->xc_ref_length[erg->xc_sortind[i]] / norm(coord);
 +            /* Get position, multiply with the scaling factor and save  */
 +            svmul(scal, coord, erg->xc_norm[i]);
 +        }
 +        fitcoords = erg->xc_norm;
 +    }
 +    else
 +    {
 +        fitcoords = erg->xc;
 +    }
 +    /* From the point of view of the current positions, the reference has rotated
 +     * backwards. Since we output the angle relative to the fixed reference,
 +     * we need the minus sign. */
 +    fitangle = -opt_angle_analytic(erg->xc_ref_sorted, fitcoords, erg->mc_sorted,
 +                                   rotg->nat, erg->xc_ref_center, center, rotg->vec);
 +
 +    return fitangle;
 +}
 +
 +
 +/* Determine actual angle of each slab by RMSD fit to the reference */
 +/* Not parallelized, call this routine only on the master */
 +static void flex_fit_angle_perslab(
 +        int       g,
 +        t_rotgrp *rotg,
 +        double    t,
 +        real      degangle,
 +        FILE     *fp)
 +{
 +    int             i, l, n, islab, ind;
 +    rvec            curr_x, ref_x;
 +    rvec            act_center; /* Center of actual positions that are passed to the fit routine */
 +    rvec            ref_center; /* Same for the reference positions */
 +    real            fitangle;   /* Angle of a slab derived from an RMSD fit to
 +                                 * the reference structure at t=0  */
 +    t_gmx_slabdata *sd;
 +    gmx_enfrotgrp_t erg;        /* Pointer to enforced rotation group data */
 +    real            OOm_av;     /* 1/average_mass of a rotation group atom */
 +    real            m_rel;      /* Relative mass of a rotation group atom  */
 +
 +
 +    erg = rotg->enfrotgrp;
 +
 +    /* Average mass of a rotation group atom: */
 +    OOm_av = erg->invmass*rotg->nat;
 +
 +    /**********************************/
 +    /* First collect the data we need */
 +    /**********************************/
 +
 +    /* Collect the data for the individual slabs */
 +    for (n = erg->slab_first; n <= erg->slab_last; n++)
 +    {
 +        islab   = n - erg->slab_first; /* slab index */
 +        sd      = &(rotg->enfrotgrp->slab_data[islab]);
 +        sd->nat = erg->lastatom[islab]-erg->firstatom[islab]+1;
 +        ind     = 0;
 +
 +        /* Loop over the relevant atoms in the slab */
 +        for (l = erg->firstatom[islab]; l <= erg->lastatom[islab]; l++)
 +        {
 +            /* Current position of this atom: x[ii][XX/YY/ZZ] */
 +            copy_rvec(erg->xc[l], curr_x);
 +
 +            /* The (unrotated) reference position of this atom is copied to ref_x.
 +             * Beware, the xc coords have been sorted in do_flexible */
 +            copy_rvec(erg->xc_ref_sorted[l], ref_x);
 +
 +            /* Save data for doing angular RMSD fit later */
 +            /* Save the current atom position */
 +            copy_rvec(curr_x, sd->x[ind]);
 +            /* Save the corresponding reference position */
 +            copy_rvec(ref_x, sd->ref[ind]);
 +
 +            /* Maybe also mass-weighting was requested. If yes, additionally
 +             * multiply the weights with the relative mass of the atom. If not,
 +             * multiply with unity. */
 +            m_rel = erg->mc_sorted[l]*OOm_av;
 +
 +            /* Save the weight for this atom in this slab */
 +            sd->weight[ind] = gaussian_weight(curr_x, rotg, n) * m_rel;
 +
 +            /* Next atom in this slab */
 +            ind++;
 +        }
 +    }
 +
 +    /******************************/
 +    /* Now do the fit calculation */
 +    /******************************/
 +
 +    fprintf(fp, "%12.3e%6d%12.3f", t, g, degangle);
 +
 +    /* === Now do RMSD fitting for each slab === */
 +    /* We require at least SLAB_MIN_ATOMS in a slab, such that the fit makes sense. */
 +#define SLAB_MIN_ATOMS 4
 +
 +    for (n = erg->slab_first; n <= erg->slab_last; n++)
 +    {
 +        islab = n - erg->slab_first; /* slab index */
 +        sd    = &(rotg->enfrotgrp->slab_data[islab]);
 +        if (sd->nat >= SLAB_MIN_ATOMS)
 +        {
 +            /* Get the center of the slabs reference and current positions */
 +            get_center(sd->ref, sd->weight, sd->nat, ref_center);
 +            get_center(sd->x, sd->weight, sd->nat, act_center);
 +            if (rotg->eFittype == erotgFitNORM)
 +            {
 +                /* Normalize every position to it's reference length
 +                 * prior to performing the fit */
 +                for (i = 0; i < sd->nat; i++) /* Center */
 +                {
 +                    rvec_dec(sd->ref[i], ref_center);
 +                    rvec_dec(sd->x[i], act_center);
 +                    /* Normalize x_i such that it gets the same length as ref_i */
 +                    svmul( norm(sd->ref[i])/norm(sd->x[i]), sd->x[i], sd->x[i] );
 +                }
 +                /* We already subtracted the centers */
 +                clear_rvec(ref_center);
 +                clear_rvec(act_center);
 +            }
 +            fitangle = -opt_angle_analytic(sd->ref, sd->x, sd->weight, sd->nat,
 +                                           ref_center, act_center, rotg->vec);
 +            fprintf(fp, "%6d%6d%12.3f", n, sd->nat, fitangle);
 +        }
 +    }
 +    fprintf(fp, "\n");
 +
 +#undef SLAB_MIN_ATOMS
 +}
 +
 +
 +/* Shift x with is */
 +static gmx_inline void shift_single_coord(matrix box, rvec x, const ivec is)
 +{
 +    int tx, ty, tz;
 +
 +
 +    tx = is[XX];
 +    ty = is[YY];
 +    tz = is[ZZ];
 +
 +    if (TRICLINIC(box))
 +    {
 +        x[XX] += tx*box[XX][XX]+ty*box[YY][XX]+tz*box[ZZ][XX];
 +        x[YY] += ty*box[YY][YY]+tz*box[ZZ][YY];
 +        x[ZZ] += tz*box[ZZ][ZZ];
 +    }
 +    else
 +    {
 +        x[XX] += tx*box[XX][XX];
 +        x[YY] += ty*box[YY][YY];
 +        x[ZZ] += tz*box[ZZ][ZZ];
 +    }
 +}
 +
 +
 +/* Determine the 'home' slab of this atom which is the
 + * slab with the highest Gaussian weight of all */
 +#define round(a) (int)(a+0.5)
 +static gmx_inline int get_homeslab(
 +        rvec curr_x,   /* The position for which the home slab shall be determined */
 +        rvec rotvec,   /* The rotation vector */
 +        real slabdist) /* The slab distance */
 +{
 +    real dist;
 +
 +
 +    /* The distance of the atom to the coordinate center (where the
 +     * slab with index 0) is */
 +    dist = iprod(rotvec, curr_x);
 +
 +    return round(dist / slabdist);
 +}
 +
 +
 +/* For a local atom determine the relevant slabs, i.e. slabs in
 + * which the gaussian is larger than min_gaussian
 + */
 +static int get_single_atom_gaussians(
 +        rvec       curr_x,
 +        t_rotgrp  *rotg)
 +{
 +    int             slab, homeslab;
 +    real            g;
 +    int             count = 0;
 +    gmx_enfrotgrp_t erg;      /* Pointer to enforced rotation group data */
 +
 +
 +    erg = rotg->enfrotgrp;
 +
 +    /* Determine the 'home' slab of this atom: */
 +    homeslab = get_homeslab(curr_x, rotg->vec, rotg->slab_dist);
 +
 +    /* First determine the weight in the atoms home slab: */
 +    g = gaussian_weight(curr_x, rotg, homeslab);
 +
 +    erg->gn_atom[count]    = g;
 +    erg->gn_slabind[count] = homeslab;
 +    count++;
 +
 +
 +    /* Determine the max slab */
 +    slab = homeslab;
 +    while (g > rotg->min_gaussian)
 +    {
 +        slab++;
 +        g = gaussian_weight(curr_x, rotg, slab);
 +        erg->gn_slabind[count] = slab;
 +        erg->gn_atom[count]    = g;
 +        count++;
 +    }
 +    count--;
 +
-     /* Determine the max slab */
++    /* Determine the min slab */
 +    slab = homeslab;
 +    do
 +    {
 +        slab--;
 +        g = gaussian_weight(curr_x, rotg, slab);
 +        erg->gn_slabind[count] = slab;
 +        erg->gn_atom[count]    = g;
 +        count++;
 +    }
 +    while (g > rotg->min_gaussian);
 +    count--;
 +
 +    return count;
 +}
 +
 +
 +static void flex2_precalc_inner_sum(t_rotgrp *rotg)
 +{
 +    int             i, n, islab;
 +    rvec            xi;       /* positions in the i-sum                        */
 +    rvec            xcn, ycn; /* the current and the reference slab centers    */
 +    real            gaussian_xi;
 +    rvec            yi0;
 +    rvec            rin;     /* Helper variables                              */
 +    real            fac, fac2;
 +    rvec            innersumvec;
 +    real            OOpsii, OOpsiistar;
 +    real            sin_rin; /* s_ii.r_ii */
 +    rvec            s_in, tmpvec, tmpvec2;
 +    real            mi, wi;  /* Mass-weighting of the positions                 */
 +    real            N_M;     /* N/M                                             */
 +    gmx_enfrotgrp_t erg;     /* Pointer to enforced rotation group data */
 +
 +
 +    erg = rotg->enfrotgrp;
 +    N_M = rotg->nat * erg->invmass;
 +
 +    /* Loop over all slabs that contain something */
 +    for (n = erg->slab_first; n <= erg->slab_last; n++)
 +    {
 +        islab = n - erg->slab_first; /* slab index */
 +
 +        /* The current center of this slab is saved in xcn: */
 +        copy_rvec(erg->slab_center[islab], xcn);
 +        /* ... and the reference center in ycn: */
 +        copy_rvec(erg->slab_center_ref[islab+erg->slab_buffer], ycn);
 +
 +        /*** D. Calculate the whole inner sum used for second and third sum */
 +        /* For slab n, we need to loop over all atoms i again. Since we sorted
 +         * the atoms with respect to the rotation vector, we know that it is sufficient
 +         * to calculate from firstatom to lastatom only. All other contributions will
 +         * be very small. */
 +        clear_rvec(innersumvec);
 +        for (i = erg->firstatom[islab]; i <= erg->lastatom[islab]; i++)
 +        {
 +            /* Coordinate xi of this atom */
 +            copy_rvec(erg->xc[i], xi);
 +
 +            /* The i-weights */
 +            gaussian_xi = gaussian_weight(xi, rotg, n);
 +            mi          = erg->mc_sorted[i]; /* need the sorted mass here */
 +            wi          = N_M*mi;
 +
 +            /* Calculate rin */
 +            copy_rvec(erg->xc_ref_sorted[i], yi0); /* Reference position yi0   */
 +            rvec_sub(yi0, ycn, tmpvec2);           /* tmpvec2 = yi0 - ycn      */
 +            mvmul(erg->rotmat, tmpvec2, rin);      /* rin = Omega.(yi0 - ycn)  */
 +
 +            /* Calculate psi_i* and sin */
 +            rvec_sub(xi, xcn, tmpvec2);           /* tmpvec2 = xi - xcn       */
 +            cprod(rotg->vec, tmpvec2, tmpvec);    /* tmpvec = v x (xi - xcn)  */
 +            OOpsiistar = norm2(tmpvec)+rotg->eps; /* OOpsii* = 1/psii* = |v x (xi-xcn)|^2 + eps */
 +            OOpsii     = norm(tmpvec);            /* OOpsii = 1 / psii = |v x (xi - xcn)| */
 +
 +            /*                           *         v x (xi - xcn)          */
 +            unitv(tmpvec, s_in);        /*  sin = ----------------         */
 +                                        /*        |v x (xi - xcn)|         */
 +
 +            sin_rin = iprod(s_in, rin); /* sin_rin = sin . rin             */
 +
 +            /* Now the whole sum */
 +            fac = OOpsii/OOpsiistar;
 +            svmul(fac, rin, tmpvec);
 +            fac2 = fac*fac*OOpsii;
 +            svmul(fac2*sin_rin, s_in, tmpvec2);
 +            rvec_dec(tmpvec, tmpvec2);
 +
 +            svmul(wi*gaussian_xi*sin_rin, tmpvec, tmpvec2);
 +
 +            rvec_inc(innersumvec, tmpvec2);
 +        } /* now we have the inner sum, used both for sum2 and sum3 */
 +
 +        /* Save it to be used in do_flex2_lowlevel */
 +        copy_rvec(innersumvec, erg->slab_innersumvec[islab]);
 +    } /* END of loop over slabs */
 +}
 +
 +
 +static void flex_precalc_inner_sum(t_rotgrp *rotg)
 +{
 +    int             i, n, islab;
 +    rvec            xi;       /* position                                      */
 +    rvec            xcn, ycn; /* the current and the reference slab centers    */
 +    rvec            qin, rin; /* q_i^n and r_i^n                               */
 +    real            bin;
 +    rvec            tmpvec;
 +    rvec            innersumvec; /* Inner part of sum_n2                          */
 +    real            gaussian_xi; /* Gaussian weight gn(xi)                        */
 +    real            mi, wi;      /* Mass-weighting of the positions               */
 +    real            N_M;         /* N/M                                           */
 +
 +    gmx_enfrotgrp_t erg;         /* Pointer to enforced rotation group data */
 +
 +
 +    erg = rotg->enfrotgrp;
 +    N_M = rotg->nat * erg->invmass;
 +
 +    /* Loop over all slabs that contain something */
 +    for (n = erg->slab_first; n <= erg->slab_last; n++)
 +    {
 +        islab = n - erg->slab_first; /* slab index */
 +
 +        /* The current center of this slab is saved in xcn: */
 +        copy_rvec(erg->slab_center[islab], xcn);
 +        /* ... and the reference center in ycn: */
 +        copy_rvec(erg->slab_center_ref[islab+erg->slab_buffer], ycn);
 +
 +        /* For slab n, we need to loop over all atoms i again. Since we sorted
 +         * the atoms with respect to the rotation vector, we know that it is sufficient
 +         * to calculate from firstatom to lastatom only. All other contributions will
 +         * be very small. */
 +        clear_rvec(innersumvec);
 +        for (i = erg->firstatom[islab]; i <= erg->lastatom[islab]; i++)
 +        {
 +            /* Coordinate xi of this atom */
 +            copy_rvec(erg->xc[i], xi);
 +
 +            /* The i-weights */
 +            gaussian_xi = gaussian_weight(xi, rotg, n);
 +            mi          = erg->mc_sorted[i]; /* need the sorted mass here */
 +            wi          = N_M*mi;
 +
 +            /* Calculate rin and qin */
 +            rvec_sub(erg->xc_ref_sorted[i], ycn, tmpvec); /* tmpvec = yi0-ycn */
 +            mvmul(erg->rotmat, tmpvec, rin);              /* rin = Omega.(yi0 - ycn)  */
 +            cprod(rotg->vec, rin, tmpvec);                /* tmpvec = v x Omega*(yi0-ycn) */
 +
 +            /*                                *        v x Omega*(yi0-ycn)    */
 +            unitv(tmpvec, qin);              /* qin = ---------------------   */
 +                                             /*       |v x Omega*(yi0-ycn)|   */
 +
 +            /* Calculate bin */
 +            rvec_sub(xi, xcn, tmpvec);            /* tmpvec = xi-xcn          */
 +            bin = iprod(qin, tmpvec);             /* bin  = qin*(xi-xcn)      */
 +
 +            svmul(wi*gaussian_xi*bin, qin, tmpvec);
 +
 +            /* Add this contribution to the inner sum: */
 +            rvec_add(innersumvec, tmpvec, innersumvec);
 +        } /* now we have the inner sum vector S^n for this slab */
 +          /* Save it to be used in do_flex_lowlevel */
 +        copy_rvec(innersumvec, erg->slab_innersumvec[islab]);
 +    }
 +}
 +
 +
 +static real do_flex2_lowlevel(
 +        t_rotgrp  *rotg,
 +        real       sigma,   /* The Gaussian width sigma */
 +        rvec       x[],
 +        gmx_bool   bOutstepRot,
 +        gmx_bool   bOutstepSlab,
 +        matrix     box)
 +{
 +    int             count, ic, ii, j, m, n, islab, iigrp, ifit;
 +    rvec            xj;          /* position in the i-sum                         */
 +    rvec            yj0;         /* the reference position in the j-sum           */
 +    rvec            xcn, ycn;    /* the current and the reference slab centers    */
 +    real            V;           /* This node's part of the rotation pot. energy  */
 +    real            gaussian_xj; /* Gaussian weight                               */
 +    real            beta;
 +
 +    real            numerator, fit_numerator;
 +    rvec            rjn, fit_rjn; /* Helper variables                              */
 +    real            fac, fac2;
 +
 +    real            OOpsij, OOpsijstar;
 +    real            OOsigma2; /* 1/(sigma^2)                                   */
 +    real            sjn_rjn;
 +    real            betasigpsi;
 +    rvec            sjn, tmpvec, tmpvec2, yj0_ycn;
 +    rvec            sum1vec_part, sum1vec, sum2vec_part, sum2vec, sum3vec, sum4vec, innersumvec;
 +    real            sum3, sum4;
 +    gmx_enfrotgrp_t erg;     /* Pointer to enforced rotation group data       */
 +    real            mj, wj;  /* Mass-weighting of the positions               */
 +    real            N_M;     /* N/M                                           */
 +    real            Wjn;     /* g_n(x_j) m_j / Mjn                            */
 +    gmx_bool        bCalcPotFit;
 +
 +    /* To calculate the torque per slab */
 +    rvec slab_force;         /* Single force from slab n on one atom          */
 +    rvec slab_sum1vec_part;
 +    real slab_sum3part, slab_sum4part;
 +    rvec slab_sum1vec, slab_sum2vec, slab_sum3vec, slab_sum4vec;
 +
 +
 +    erg = rotg->enfrotgrp;
 +
 +    /* Pre-calculate the inner sums, so that we do not have to calculate
 +     * them again for every atom */
 +    flex2_precalc_inner_sum(rotg);
 +
 +    bCalcPotFit = (bOutstepRot || bOutstepSlab) && (erotgFitPOT == rotg->eFittype);
 +
 +    /********************************************************/
 +    /* Main loop over all local atoms of the rotation group */
 +    /********************************************************/
 +    N_M      = rotg->nat * erg->invmass;
 +    V        = 0.0;
 +    OOsigma2 = 1.0 / (sigma*sigma);
 +    for (j = 0; j < erg->nat_loc; j++)
 +    {
 +        /* Local index of a rotation group atom  */
 +        ii = erg->ind_loc[j];
 +        /* Position of this atom in the collective array */
 +        iigrp = erg->xc_ref_ind[j];
 +        /* Mass-weighting */
 +        mj = erg->mc[iigrp];  /* need the unsorted mass here */
 +        wj = N_M*mj;
 +
 +        /* Current position of this atom: x[ii][XX/YY/ZZ]
 +         * Note that erg->xc_center contains the center of mass in case the flex2-t
 +         * potential was chosen. For the flex2 potential erg->xc_center must be
 +         * zero. */
 +        rvec_sub(x[ii], erg->xc_center, xj);
 +
 +        /* Shift this atom such that it is near its reference */
 +        shift_single_coord(box, xj, erg->xc_shifts[iigrp]);
 +
 +        /* Determine the slabs to loop over, i.e. the ones with contributions
 +         * larger than min_gaussian */
 +        count = get_single_atom_gaussians(xj, rotg);
 +
 +        clear_rvec(sum1vec_part);
 +        clear_rvec(sum2vec_part);
 +        sum3 = 0.0;
 +        sum4 = 0.0;
 +        /* Loop over the relevant slabs for this atom */
 +        for (ic = 0; ic < count; ic++)
 +        {
 +            n = erg->gn_slabind[ic];
 +
 +            /* Get the precomputed Gaussian value of curr_slab for curr_x */
 +            gaussian_xj = erg->gn_atom[ic];
 +
 +            islab = n - erg->slab_first; /* slab index */
 +
 +            /* The (unrotated) reference position of this atom is copied to yj0: */
 +            copy_rvec(rotg->x_ref[iigrp], yj0);
 +
 +            beta = calc_beta(xj, rotg, n);
 +
 +            /* The current center of this slab is saved in xcn: */
 +            copy_rvec(erg->slab_center[islab], xcn);
 +            /* ... and the reference center in ycn: */
 +            copy_rvec(erg->slab_center_ref[islab+erg->slab_buffer], ycn);
 +
 +            rvec_sub(yj0, ycn, yj0_ycn);          /* yj0_ycn = yj0 - ycn      */
 +
 +            /* Rotate: */
 +            mvmul(erg->rotmat, yj0_ycn, rjn);     /* rjn = Omega.(yj0 - ycn)  */
 +
 +            /* Subtract the slab center from xj */
 +            rvec_sub(xj, xcn, tmpvec2);           /* tmpvec2 = xj - xcn       */
++            
++            /* In rare cases, when an atom position coincides with a slab center
++             * (tmpvec2 == 0) we cannot compute the vector product for sjn. 
++             * However, since the atom is located directly on the pivot, this 
++             * slab's contribution to the force on that atom will be zero 
++             * anyway. Therefore, we directly move on to the next slab.       */
++            if ( 0 == norm(tmpvec2) )
++            {
++                continue;
++            }
 +
 +            /* Calculate sjn */
 +            cprod(rotg->vec, tmpvec2, tmpvec);    /* tmpvec = v x (xj - xcn)  */
 +
 +            OOpsijstar = norm2(tmpvec)+rotg->eps; /* OOpsij* = 1/psij* = |v x (xj-xcn)|^2 + eps */
 +
 +            numerator = sqr(iprod(tmpvec, rjn));
 +
 +            /*********************************/
 +            /* Add to the rotation potential */
 +            /*********************************/
 +            V += 0.5*rotg->k*wj*gaussian_xj*numerator/OOpsijstar;
 +
 +            /* If requested, also calculate the potential for a set of angles
 +             * near the current reference angle */
 +            if (bCalcPotFit)
 +            {
 +                for (ifit = 0; ifit < rotg->PotAngle_nstep; ifit++)
 +                {
 +                    mvmul(erg->PotAngleFit->rotmat[ifit], yj0_ycn, fit_rjn);
 +                    fit_numerator              = sqr(iprod(tmpvec, fit_rjn));
 +                    erg->PotAngleFit->V[ifit] += 0.5*rotg->k*wj*gaussian_xj*fit_numerator/OOpsijstar;
 +                }
 +            }
 +
 +            /*************************************/
 +            /* Now calculate the force on atom j */
 +            /*************************************/
 +
 +            OOpsij = norm(tmpvec);    /* OOpsij = 1 / psij = |v x (xj - xcn)| */
 +
 +            /*                              *         v x (xj - xcn)          */
 +            unitv(tmpvec, sjn);            /*  sjn = ----------------         */
 +                                           /*        |v x (xj - xcn)|         */
 +
 +            sjn_rjn = iprod(sjn, rjn);     /* sjn_rjn = sjn . rjn             */
 +
 +
 +            /*** A. Calculate the first of the four sum terms: ****************/
 +            fac = OOpsij/OOpsijstar;
 +            svmul(fac, rjn, tmpvec);
 +            fac2 = fac*fac*OOpsij;
 +            svmul(fac2*sjn_rjn, sjn, tmpvec2);
 +            rvec_dec(tmpvec, tmpvec2);
 +            fac2 = wj*gaussian_xj; /* also needed for sum4 */
 +            svmul(fac2*sjn_rjn, tmpvec, slab_sum1vec_part);
 +            /********************/
 +            /*** Add to sum1: ***/
 +            /********************/
 +            rvec_inc(sum1vec_part, slab_sum1vec_part); /* sum1 still needs to vector multiplied with v */
 +
 +            /*** B. Calculate the forth of the four sum terms: ****************/
 +            betasigpsi = beta*OOsigma2*OOpsij; /* this is also needed for sum3 */
 +            /********************/
 +            /*** Add to sum4: ***/
 +            /********************/
 +            slab_sum4part = fac2*betasigpsi*fac*sjn_rjn*sjn_rjn; /* Note that fac is still valid from above */
 +            sum4         += slab_sum4part;
 +
 +            /*** C. Calculate Wjn for second and third sum */
 +            /* Note that we can safely divide by slab_weights since we check in
 +             * get_slab_centers that it is non-zero. */
 +            Wjn = gaussian_xj*mj/erg->slab_weights[islab];
 +
 +            /* We already have precalculated the inner sum for slab n */
 +            copy_rvec(erg->slab_innersumvec[islab], innersumvec);
 +
 +            /* Weigh the inner sum vector with Wjn */
 +            svmul(Wjn, innersumvec, innersumvec);
 +
 +            /*** E. Calculate the second of the four sum terms: */
 +            /********************/
 +            /*** Add to sum2: ***/
 +            /********************/
 +            rvec_inc(sum2vec_part, innersumvec); /* sum2 still needs to be vector crossproduct'ed with v */
 +
 +            /*** F. Calculate the third of the four sum terms: */
 +            slab_sum3part = betasigpsi * iprod(sjn, innersumvec);
 +            sum3         += slab_sum3part; /* still needs to be multiplied with v */
 +
 +            /*** G. Calculate the torque on the local slab's axis: */
 +            if (bOutstepRot)
 +            {
 +                /* Sum1 */
 +                cprod(slab_sum1vec_part, rotg->vec, slab_sum1vec);
 +                /* Sum2 */
 +                cprod(innersumvec, rotg->vec, slab_sum2vec);
 +                /* Sum3 */
 +                svmul(slab_sum3part, rotg->vec, slab_sum3vec);
 +                /* Sum4 */
 +                svmul(slab_sum4part, rotg->vec, slab_sum4vec);
 +
 +                /* The force on atom ii from slab n only: */
 +                for (m = 0; m < DIM; m++)
 +                {
 +                    slab_force[m] = rotg->k * (-slab_sum1vec[m] + slab_sum2vec[m] - slab_sum3vec[m] + 0.5*slab_sum4vec[m]);
 +                }
 +
 +                erg->slab_torque_v[islab] += torque(rotg->vec, slab_force, xj, xcn);
 +            }
 +        } /* END of loop over slabs */
 +
 +        /* Construct the four individual parts of the vector sum: */
 +        cprod(sum1vec_part, rotg->vec, sum1vec);      /* sum1vec =   { } x v  */
 +        cprod(sum2vec_part, rotg->vec, sum2vec);      /* sum2vec =   { } x v  */
 +        svmul(sum3, rotg->vec, sum3vec);              /* sum3vec =   { } . v  */
 +        svmul(sum4, rotg->vec, sum4vec);              /* sum4vec =   { } . v  */
 +
 +        /* Store the additional force so that it can be added to the force
 +         * array after the normal forces have been evaluated */
 +        for (m = 0; m < DIM; m++)
 +        {
 +            erg->f_rot_loc[j][m] = rotg->k * (-sum1vec[m] + sum2vec[m] - sum3vec[m] + 0.5*sum4vec[m]);
 +        }
 +
 +#ifdef SUM_PARTS
 +        fprintf(stderr, "sum1: %15.8f %15.8f %15.8f\n",    -rotg->k*sum1vec[XX],    -rotg->k*sum1vec[YY],    -rotg->k*sum1vec[ZZ]);
 +        fprintf(stderr, "sum2: %15.8f %15.8f %15.8f\n",     rotg->k*sum2vec[XX],     rotg->k*sum2vec[YY],     rotg->k*sum2vec[ZZ]);
 +        fprintf(stderr, "sum3: %15.8f %15.8f %15.8f\n",    -rotg->k*sum3vec[XX],    -rotg->k*sum3vec[YY],    -rotg->k*sum3vec[ZZ]);
 +        fprintf(stderr, "sum4: %15.8f %15.8f %15.8f\n", 0.5*rotg->k*sum4vec[XX], 0.5*rotg->k*sum4vec[YY], 0.5*rotg->k*sum4vec[ZZ]);
 +#endif
 +
 +        PRINT_FORCE_J
 +
 +    } /* END of loop over local atoms */
 +
 +    return V;
 +}
 +
 +
 +static real do_flex_lowlevel(
 +        t_rotgrp *rotg,
 +        real      sigma,     /* The Gaussian width sigma                      */
 +        rvec      x[],
 +        gmx_bool  bOutstepRot,
 +        gmx_bool  bOutstepSlab,
 +        matrix    box)
 +{
 +    int             count, ic, ifit, ii, j, m, n, islab, iigrp;
 +    rvec            xj, yj0;                /* current and reference position                */
 +    rvec            xcn, ycn;               /* the current and the reference slab centers    */
 +    rvec            yj0_ycn;                /* yj0 - ycn                                     */
 +    rvec            xj_xcn;                 /* xj - xcn                                      */
 +    rvec            qjn, fit_qjn;           /* q_i^n                                         */
 +    rvec            sum_n1, sum_n2;         /* Two contributions to the rotation force       */
 +    rvec            innersumvec;            /* Inner part of sum_n2                          */
 +    rvec            s_n;
 +    rvec            force_n;                /* Single force from slab n on one atom          */
 +    rvec            force_n1, force_n2;     /* First and second part of force_n              */
 +    rvec            tmpvec, tmpvec2, tmp_f; /* Helper variables                              */
 +    real            V;                      /* The rotation potential energy                 */
 +    real            OOsigma2;               /* 1/(sigma^2)                                   */
 +    real            beta;                   /* beta_n(xj)                                    */
 +    real            bjn, fit_bjn;           /* b_j^n                                         */
 +    real            gaussian_xj;            /* Gaussian weight gn(xj)                        */
 +    real            betan_xj_sigma2;
 +    real            mj, wj;                 /* Mass-weighting of the positions               */
 +    real            N_M;                    /* N/M                                           */
 +    gmx_enfrotgrp_t erg;                    /* Pointer to enforced rotation group data       */
 +    gmx_bool        bCalcPotFit;
 +
 +
 +    erg = rotg->enfrotgrp;
 +
 +    /* Pre-calculate the inner sums, so that we do not have to calculate
 +     * them again for every atom */
 +    flex_precalc_inner_sum(rotg);
 +
 +    bCalcPotFit = (bOutstepRot || bOutstepSlab) && (erotgFitPOT == rotg->eFittype);
 +
 +    /********************************************************/
 +    /* Main loop over all local atoms of the rotation group */
 +    /********************************************************/
 +    OOsigma2 = 1.0/(sigma*sigma);
 +    N_M      = rotg->nat * erg->invmass;
 +    V        = 0.0;
 +    for (j = 0; j < erg->nat_loc; j++)
 +    {
 +        /* Local index of a rotation group atom  */
 +        ii = erg->ind_loc[j];
 +        /* Position of this atom in the collective array */
 +        iigrp = erg->xc_ref_ind[j];
 +        /* Mass-weighting */
 +        mj = erg->mc[iigrp];  /* need the unsorted mass here */
 +        wj = N_M*mj;
 +
 +        /* Current position of this atom: x[ii][XX/YY/ZZ]
 +         * Note that erg->xc_center contains the center of mass in case the flex-t
 +         * potential was chosen. For the flex potential erg->xc_center must be
 +         * zero. */
 +        rvec_sub(x[ii], erg->xc_center, xj);
 +
 +        /* Shift this atom such that it is near its reference */
 +        shift_single_coord(box, xj, erg->xc_shifts[iigrp]);
 +
 +        /* Determine the slabs to loop over, i.e. the ones with contributions
 +         * larger than min_gaussian */
 +        count = get_single_atom_gaussians(xj, rotg);
 +
 +        clear_rvec(sum_n1);
 +        clear_rvec(sum_n2);
 +
 +        /* Loop over the relevant slabs for this atom */
 +        for (ic = 0; ic < count; ic++)
 +        {
 +            n = erg->gn_slabind[ic];
 +
 +            /* Get the precomputed Gaussian for xj in slab n */
 +            gaussian_xj = erg->gn_atom[ic];
 +
 +            islab = n - erg->slab_first; /* slab index */
 +
 +            /* The (unrotated) reference position of this atom is saved in yj0: */
 +            copy_rvec(rotg->x_ref[iigrp], yj0);
 +
 +            beta = calc_beta(xj, rotg, n);
 +
 +            /* The current center of this slab is saved in xcn: */
 +            copy_rvec(erg->slab_center[islab], xcn);
 +            /* ... and the reference center in ycn: */
 +            copy_rvec(erg->slab_center_ref[islab+erg->slab_buffer], ycn);
 +
 +            rvec_sub(yj0, ycn, yj0_ycn); /* yj0_ycn = yj0 - ycn */
 +
 +            /* Rotate: */
 +            mvmul(erg->rotmat, yj0_ycn, tmpvec2); /* tmpvec2= Omega.(yj0-ycn) */
 +
 +            /* Subtract the slab center from xj */
 +            rvec_sub(xj, xcn, xj_xcn);           /* xj_xcn = xj - xcn         */
 +
++            /* In rare cases, when an atom position coincides with a slab center
++             * (xj_xcn == 0) we cannot compute the vector product for qjn. 
++             * However, since the atom is located directly on the pivot, this 
++             * slab's contribution to the force on that atom will be zero 
++             * anyway. Therefore, we directly move on to the next slab.       */
++            if ( 0 == norm(xj_xcn) )
++            {
++                continue;
++            }
++
 +            /* Calculate qjn */
 +            cprod(rotg->vec, tmpvec2, tmpvec); /* tmpvec= v x Omega.(yj0-ycn) */
 +
 +            /*                         *         v x Omega.(yj0-ycn)    */
 +            unitv(tmpvec, qjn);       /*  qjn = ---------------------   */
 +                                      /*        |v x Omega.(yj0-ycn)|   */
 +
 +            bjn = iprod(qjn, xj_xcn); /* bjn = qjn * (xj - xcn) */
 +
 +            /*********************************/
 +            /* Add to the rotation potential */
 +            /*********************************/
 +            V += 0.5*rotg->k*wj*gaussian_xj*sqr(bjn);
 +
 +            /* If requested, also calculate the potential for a set of angles
 +             * near the current reference angle */
 +            if (bCalcPotFit)
 +            {
 +                for (ifit = 0; ifit < rotg->PotAngle_nstep; ifit++)
 +                {
 +                    /* As above calculate Omega.(yj0-ycn), now for the other angles */
 +                    mvmul(erg->PotAngleFit->rotmat[ifit], yj0_ycn, tmpvec2); /* tmpvec2= Omega.(yj0-ycn) */
 +                    /* As above calculate qjn */
 +                    cprod(rotg->vec, tmpvec2, tmpvec);                       /* tmpvec= v x Omega.(yj0-ycn) */
 +                    /*                                                        *             v x Omega.(yj0-ycn)    */
 +                    unitv(tmpvec, fit_qjn);                                  /*  fit_qjn = ---------------------   */
 +                                                                             /*            |v x Omega.(yj0-ycn)|   */
 +                    fit_bjn = iprod(fit_qjn, xj_xcn);                        /* fit_bjn = fit_qjn * (xj - xcn) */
 +                    /* Add to the rotation potential for this angle */
 +                    erg->PotAngleFit->V[ifit] += 0.5*rotg->k*wj*gaussian_xj*sqr(fit_bjn);
 +                }
 +            }
 +
 +            /****************************************************************/
 +            /* sum_n1 will typically be the main contribution to the force: */
 +            /****************************************************************/
 +            betan_xj_sigma2 = beta*OOsigma2;  /*  beta_n(xj)/sigma^2  */
 +
 +            /* The next lines calculate
 +             *  qjn - (bjn*beta(xj)/(2sigma^2))v  */
 +            svmul(bjn*0.5*betan_xj_sigma2, rotg->vec, tmpvec2);
 +            rvec_sub(qjn, tmpvec2, tmpvec);
 +
 +            /* Multiply with gn(xj)*bjn: */
 +            svmul(gaussian_xj*bjn, tmpvec, tmpvec2);
 +
 +            /* Sum over n: */
 +            rvec_inc(sum_n1, tmpvec2);
 +
 +            /* We already have precalculated the Sn term for slab n */
 +            copy_rvec(erg->slab_innersumvec[islab], s_n);
 +            /*                                                             *          beta_n(xj)              */
 +            svmul(betan_xj_sigma2*iprod(s_n, xj_xcn), rotg->vec, tmpvec); /* tmpvec = ---------- s_n (xj-xcn) */
 +                                                                          /*            sigma^2               */
 +
 +            rvec_sub(s_n, tmpvec, innersumvec);
 +
 +            /* We can safely divide by slab_weights since we check in get_slab_centers
 +             * that it is non-zero. */
 +            svmul(gaussian_xj/erg->slab_weights[islab], innersumvec, innersumvec);
 +
 +            rvec_add(sum_n2, innersumvec, sum_n2);
 +
 +            /* Calculate the torque: */
 +            if (bOutstepRot)
 +            {
 +                /* The force on atom ii from slab n only: */
 +                svmul(-rotg->k*wj, tmpvec2, force_n1);     /* part 1 */
 +                svmul( rotg->k*mj, innersumvec, force_n2); /* part 2 */
 +                rvec_add(force_n1, force_n2, force_n);
 +                erg->slab_torque_v[islab] += torque(rotg->vec, force_n, xj, xcn);
 +            }
 +        } /* END of loop over slabs */
 +
 +        /* Put both contributions together: */
 +        svmul(wj, sum_n1, sum_n1);
 +        svmul(mj, sum_n2, sum_n2);
 +        rvec_sub(sum_n2, sum_n1, tmp_f); /* F = -grad V */
 +
 +        /* Store the additional force so that it can be added to the force
 +         * array after the normal forces have been evaluated */
 +        for (m = 0; m < DIM; m++)
 +        {
 +            erg->f_rot_loc[j][m] = rotg->k*tmp_f[m];
 +        }
 +
 +        PRINT_FORCE_J
 +
 +    } /* END of loop over local atoms */
 +
 +    return V;
 +}
 +
 +#ifdef PRINT_COORDS
 +static void print_coordinates(t_rotgrp *rotg, rvec x[], matrix box, int step)
 +{
 +    int             i;
 +    static FILE    *fp;
 +    static char     buf[STRLEN];
 +    static gmx_bool bFirst = 1;
 +
 +
 +    if (bFirst)
 +    {
 +        sprintf(buf, "coords%d.txt", cr->nodeid);
 +        fp     = fopen(buf, "w");
 +        bFirst = 0;
 +    }
 +
 +    fprintf(fp, "\nStep %d\n", step);
 +    fprintf(fp, "box: %f %f %f %f %f %f %f %f %f\n",
 +            box[XX][XX], box[XX][YY], box[XX][ZZ],
 +            box[YY][XX], box[YY][YY], box[YY][ZZ],
 +            box[ZZ][XX], box[ZZ][ZZ], box[ZZ][ZZ]);
 +    for (i = 0; i < rotg->nat; i++)
 +    {
 +        fprintf(fp, "%4d  %f %f %f\n", i,
 +                erg->xc[i][XX], erg->xc[i][YY], erg->xc[i][ZZ]);
 +    }
 +    fflush(fp);
 +
 +}
 +#endif
 +
 +
 +static int projection_compare(const void *a, const void *b)
 +{
 +    sort_along_vec_t *xca, *xcb;
 +
 +
 +    xca = (sort_along_vec_t *)a;
 +    xcb = (sort_along_vec_t *)b;
 +
 +    if (xca->xcproj < xcb->xcproj)
 +    {
 +        return -1;
 +    }
 +    else if (xca->xcproj > xcb->xcproj)
 +    {
 +        return 1;
 +    }
 +    else
 +    {
 +        return 0;
 +    }
 +}
 +
 +
 +static void sort_collective_coordinates(
 +        t_rotgrp         *rotg, /* Rotation group */
 +        sort_along_vec_t *data) /* Buffer for sorting the positions */
 +{
 +    int             i;
 +    gmx_enfrotgrp_t erg;       /* Pointer to enforced rotation group data */
 +
 +
 +    erg = rotg->enfrotgrp;
 +
 +    /* The projection of the position vector on the rotation vector is
 +     * the relevant value for sorting. Fill the 'data' structure */
 +    for (i = 0; i < rotg->nat; i++)
 +    {
 +        data[i].xcproj = iprod(erg->xc[i], rotg->vec);  /* sort criterium */
 +        data[i].m      = erg->mc[i];
 +        data[i].ind    = i;
 +        copy_rvec(erg->xc[i], data[i].x    );
 +        copy_rvec(rotg->x_ref[i], data[i].x_ref);
 +    }
 +    /* Sort the 'data' structure */
 +    gmx_qsort(data, rotg->nat, sizeof(sort_along_vec_t), projection_compare);
 +
 +    /* Copy back the sorted values */
 +    for (i = 0; i < rotg->nat; i++)
 +    {
 +        copy_rvec(data[i].x, erg->xc[i]           );
 +        copy_rvec(data[i].x_ref, erg->xc_ref_sorted[i]);
 +        erg->mc_sorted[i]  = data[i].m;
 +        erg->xc_sortind[i] = data[i].ind;
 +    }
 +}
 +
 +
 +/* For each slab, get the first and the last index of the sorted atom
 + * indices */
 +static void get_firstlast_atom_per_slab(t_rotgrp *rotg)
 +{
 +    int             i, islab, n;
 +    real            beta;
 +    gmx_enfrotgrp_t erg;     /* Pointer to enforced rotation group data */
 +
 +
 +    erg = rotg->enfrotgrp;
 +
 +    /* Find the first atom that needs to enter the calculation for each slab */
 +    n = erg->slab_first; /* slab */
 +    i = 0;               /* start with the first atom */
 +    do
 +    {
 +        /* Find the first atom that significantly contributes to this slab */
 +        do /* move forward in position until a large enough beta is found */
 +        {
 +            beta = calc_beta(erg->xc[i], rotg, n);
 +            i++;
 +        }
 +        while ((beta < -erg->max_beta) && (i < rotg->nat));
 +        i--;
 +        islab                 = n - erg->slab_first; /* slab index */
 +        erg->firstatom[islab] = i;
 +        /* Proceed to the next slab */
 +        n++;
 +    }
 +    while (n <= erg->slab_last);
 +
 +    /* Find the last atom for each slab */
 +    n = erg->slab_last; /* start with last slab */
 +    i = rotg->nat-1;    /* start with the last atom */
 +    do
 +    {
 +        do  /* move backward in position until a large enough beta is found */
 +        {
 +            beta = calc_beta(erg->xc[i], rotg, n);
 +            i--;
 +        }
 +        while ((beta > erg->max_beta) && (i > -1));
 +        i++;
 +        islab                = n - erg->slab_first; /* slab index */
 +        erg->lastatom[islab] = i;
 +        /* Proceed to the next slab */
 +        n--;
 +    }
 +    while (n >= erg->slab_first);
 +}
 +
 +
 +/* Determine the very first and very last slab that needs to be considered
 + * For the first slab that needs to be considered, we have to find the smallest
 + * n that obeys:
 + *
 + * x_first * v - n*Delta_x <= beta_max
 + *
 + * slab index n, slab distance Delta_x, rotation vector v. For the last slab we
 + * have to find the largest n that obeys
 + *
 + * x_last * v - n*Delta_x >= -beta_max
 + *
 + */
 +static gmx_inline int get_first_slab(
 +        t_rotgrp *rotg,      /* The rotation group (inputrec data) */
 +        real      max_beta,  /* The max_beta value, instead of min_gaussian */
 +        rvec      firstatom) /* First atom after sorting along the rotation vector v */
 +{
 +    /* Find the first slab for the first atom */
 +    return ceil((iprod(firstatom, rotg->vec) - max_beta)/rotg->slab_dist);
 +}
 +
 +
 +static gmx_inline int get_last_slab(
 +        t_rotgrp *rotg,     /* The rotation group (inputrec data) */
 +        real      max_beta, /* The max_beta value, instead of min_gaussian */
 +        rvec      lastatom) /* Last atom along v */
 +{
 +    /* Find the last slab for the last atom */
 +    return floor((iprod(lastatom, rotg->vec) + max_beta)/rotg->slab_dist);
 +}
 +
 +
 +static void get_firstlast_slab_check(
 +        t_rotgrp        *rotg,      /* The rotation group (inputrec data) */
 +        t_gmx_enfrotgrp *erg,       /* The rotation group (data only accessible in this file) */
 +        rvec             firstatom, /* First atom after sorting along the rotation vector v */
-         rvec             lastatom,  /* Last atom along v */
-         int              g)         /* The rotation group number */
++        rvec             lastatom)  /* Last atom along v */
 +{
 +    erg->slab_first = get_first_slab(rotg, erg->max_beta, firstatom);
 +    erg->slab_last  = get_last_slab(rotg, erg->max_beta, lastatom);
 +
++    /* Calculate the slab buffer size, which changes when slab_first changes */
++    erg->slab_buffer = erg->slab_first - erg->slab_first_ref;
++
 +    /* Check whether we have reference data to compare against */
 +    if (erg->slab_first < erg->slab_first_ref)
 +    {
 +        gmx_fatal(FARGS, "%s No reference data for first slab (n=%d), unable to proceed.",
 +                  RotStr, erg->slab_first);
 +    }
 +
 +    /* Check whether we have reference data to compare against */
 +    if (erg->slab_last > erg->slab_last_ref)
 +    {
 +        gmx_fatal(FARGS, "%s No reference data for last slab (n=%d), unable to proceed.",
 +                  RotStr, erg->slab_last);
 +    }
 +}
 +
 +
 +/* Enforced rotation with a flexible axis */
 +static void do_flexible(
 +        gmx_bool        bMaster,
 +        gmx_enfrot_t    enfrot,       /* Other rotation data                        */
 +        t_rotgrp       *rotg,         /* The rotation group                         */
 +        int             g,            /* Group number                               */
 +        rvec            x[],          /* The local positions                        */
 +        matrix          box,
 +        double          t,            /* Time in picoseconds                        */
-         gmx_large_int_t step,         /* The time step                              */
 +        gmx_bool        bOutstepRot,  /* Output to main rotation output file        */
 +        gmx_bool        bOutstepSlab) /* Output per-slab data                       */
 +{
 +    int             l, nslabs;
 +    real            sigma;    /* The Gaussian width sigma */
 +    gmx_enfrotgrp_t erg;      /* Pointer to enforced rotation group data */
 +
 +
 +    erg = rotg->enfrotgrp;
 +
 +    /* Define the sigma value */
 +    sigma = 0.7*rotg->slab_dist;
 +
 +    /* Sort the collective coordinates erg->xc along the rotation vector. This is
 +     * an optimization for the inner loop. */
 +    sort_collective_coordinates(rotg, enfrot->data);
 +
 +    /* Determine the first relevant slab for the first atom and the last
 +     * relevant slab for the last atom */
-     get_firstlast_slab_check(rotg, erg, erg->xc[0], erg->xc[rotg->nat-1], g);
++    get_firstlast_slab_check(rotg, erg, erg->xc[0], erg->xc[rotg->nat-1]);
 +
 +    /* Determine for each slab depending on the min_gaussian cutoff criterium,
 +     * a first and a last atom index inbetween stuff needs to be calculated */
 +    get_firstlast_atom_per_slab(rotg);
 +
 +    /* Determine the gaussian-weighted center of positions for all slabs */
 +    get_slab_centers(rotg, erg->xc, erg->mc_sorted, g, t, enfrot->out_slabs, bOutstepSlab, FALSE);
 +
 +    /* Clear the torque per slab from last time step: */
 +    nslabs = erg->slab_last - erg->slab_first + 1;
 +    for (l = 0; l < nslabs; l++)
 +    {
 +        erg->slab_torque_v[l] = 0.0;
 +    }
 +
 +    /* Call the rotational forces kernel */
 +    if (rotg->eType == erotgFLEX || rotg->eType == erotgFLEXT)
 +    {
 +        erg->V = do_flex_lowlevel(rotg, sigma, x, bOutstepRot, bOutstepSlab, box);
 +    }
 +    else if (rotg->eType == erotgFLEX2 || rotg->eType == erotgFLEX2T)
 +    {
 +        erg->V = do_flex2_lowlevel(rotg, sigma, x, bOutstepRot, bOutstepSlab, box);
 +    }
 +    else
 +    {
 +        gmx_fatal(FARGS, "Unknown flexible rotation type");
 +    }
 +
 +    /* Determine angle by RMSD fit to the reference - Let's hope this */
 +    /* only happens once in a while, since this is not parallelized! */
 +    if (bMaster && (erotgFitPOT != rotg->eFittype) )
 +    {
 +        if (bOutstepRot)
 +        {
 +            /* Fit angle of the whole rotation group */
 +            erg->angle_v = flex_fit_angle(rotg);
 +        }
 +        if (bOutstepSlab)
 +        {
 +            /* Fit angle of each slab */
 +            flex_fit_angle_perslab(g, rotg, t, erg->degangle, enfrot->out_angles);
 +        }
 +    }
 +
 +    /* Lump together the torques from all slabs: */
 +    erg->torque_v = 0.0;
 +    for (l = 0; l < nslabs; l++)
 +    {
 +        erg->torque_v += erg->slab_torque_v[l];
 +    }
 +}
 +
 +
 +/* Calculate the angle between reference and actual rotation group atom,
 + * both projected into a plane perpendicular to the rotation vector: */
 +static void angle(t_rotgrp *rotg,
 +                  rvec      x_act,
 +                  rvec      x_ref,
 +                  real     *alpha,
 +                  real     *weight) /* atoms near the rotation axis should count less than atoms far away */
 +{
 +    rvec xp, xrp;                   /* current and reference positions projected on a plane perpendicular to pg->vec */
 +    rvec dum;
 +
 +
 +    /* Project x_ref and x into a plane through the origin perpendicular to rot_vec: */
 +    /* Project x_ref: xrp = x_ref - (vec * x_ref) * vec */
 +    svmul(iprod(rotg->vec, x_ref), rotg->vec, dum);
 +    rvec_sub(x_ref, dum, xrp);
 +    /* Project x_act: */
 +    svmul(iprod(rotg->vec, x_act), rotg->vec, dum);
 +    rvec_sub(x_act, dum, xp);
 +
 +    /* Retrieve information about which vector precedes. gmx_angle always
 +     * returns a positive angle. */
 +    cprod(xp, xrp, dum); /* if reference precedes, this is pointing into the same direction as vec */
 +
 +    if (iprod(rotg->vec, dum) >= 0)
 +    {
 +        *alpha = -gmx_angle(xrp, xp);
 +    }
 +    else
 +    {
 +        *alpha = +gmx_angle(xrp, xp);
 +    }
 +
 +    /* Also return the weight */
 +    *weight = norm(xp);
 +}
 +
 +
 +/* Project first vector onto a plane perpendicular to the second vector
 + * dr = dr - (dr.v)v
 + * Note that v must be of unit length.
 + */
 +static gmx_inline void project_onto_plane(rvec dr, const rvec v)
 +{
 +    rvec tmp;
 +
 +
 +    svmul(iprod(dr, v), v, tmp); /* tmp = (dr.v)v */
 +    rvec_dec(dr, tmp);           /* dr = dr - (dr.v)v */
 +}
 +
 +
 +/* Fixed rotation: The rotation reference group rotates around the v axis. */
 +/* The atoms of the actual rotation group are attached with imaginary  */
 +/* springs to the reference atoms.                                     */
 +static void do_fixed(
 +        t_rotgrp       *rotg,         /* The rotation group                         */
-         rvec            x[],          /* The positions                              */
-         matrix          box,          /* The simulation box                         */
-         double          t,            /* Time in picoseconds                        */
-         gmx_large_int_t step,         /* The time step                              */
 +        gmx_bool        bOutstepRot,  /* Output to main rotation output file        */
 +        gmx_bool        bOutstepSlab) /* Output per-slab data                       */
 +{
 +    int             ifit, j, jj, m;
 +    rvec            dr;
 +    rvec            tmp_f;     /* Force */
 +    real            alpha;     /* a single angle between an actual and a reference position */
 +    real            weight;    /* single weight for a single angle */
 +    gmx_enfrotgrp_t erg;       /* Pointer to enforced rotation group data */
 +    rvec            xi_xc;     /* xi - xc */
 +    gmx_bool        bCalcPotFit;
 +    rvec            fit_xr_loc;
 +
 +    /* for mass weighting: */
 +    real      wi;              /* Mass-weighting of the positions */
 +    real      N_M;             /* N/M */
 +    real      k_wi;            /* k times wi */
 +
 +    gmx_bool  bProject;
 +
 +
 +    erg         = rotg->enfrotgrp;
 +    bProject    = (rotg->eType == erotgPM) || (rotg->eType == erotgPMPF);
 +    bCalcPotFit = (bOutstepRot || bOutstepSlab) && (erotgFitPOT == rotg->eFittype);
 +
 +    N_M = rotg->nat * erg->invmass;
 +
 +    /* Each process calculates the forces on its local atoms */
 +    for (j = 0; j < erg->nat_loc; j++)
 +    {
 +        /* Calculate (x_i-x_c) resp. (x_i-u) */
 +        rvec_sub(erg->x_loc_pbc[j], erg->xc_center, xi_xc);
 +
 +        /* Calculate Omega*(y_i-y_c)-(x_i-x_c) */
 +        rvec_sub(erg->xr_loc[j], xi_xc, dr);
 +
 +        if (bProject)
 +        {
 +            project_onto_plane(dr, rotg->vec);
 +        }
 +
 +        /* Mass-weighting */
 +        wi = N_M*erg->m_loc[j];
 +
 +        /* Store the additional force so that it can be added to the force
 +         * array after the normal forces have been evaluated */
 +        k_wi = rotg->k*wi;
 +        for (m = 0; m < DIM; m++)
 +        {
 +            tmp_f[m]             = k_wi*dr[m];
 +            erg->f_rot_loc[j][m] = tmp_f[m];
 +            erg->V              += 0.5*k_wi*sqr(dr[m]);
 +        }
 +
 +        /* If requested, also calculate the potential for a set of angles
 +         * near the current reference angle */
 +        if (bCalcPotFit)
 +        {
 +            for (ifit = 0; ifit < rotg->PotAngle_nstep; ifit++)
 +            {
 +                /* Index of this rotation group atom with respect to the whole rotation group */
 +                jj = erg->xc_ref_ind[j];
 +
 +                /* Rotate with the alternative angle. Like rotate_local_reference(),
 +                 * just for a single local atom */
 +                mvmul(erg->PotAngleFit->rotmat[ifit], rotg->x_ref[jj], fit_xr_loc); /* fit_xr_loc = Omega*(y_i-y_c) */
 +
 +                /* Calculate Omega*(y_i-y_c)-(x_i-x_c) */
 +                rvec_sub(fit_xr_loc, xi_xc, dr);
 +
 +                if (bProject)
 +                {
 +                    project_onto_plane(dr, rotg->vec);
 +                }
 +
 +                /* Add to the rotation potential for this angle: */
 +                erg->PotAngleFit->V[ifit] += 0.5*k_wi*norm2(dr);
 +            }
 +        }
 +
 +        if (bOutstepRot)
 +        {
 +            /* Add to the torque of this rotation group */
 +            erg->torque_v += torque(rotg->vec, tmp_f, erg->x_loc_pbc[j], erg->xc_center);
 +
 +            /* Calculate the angle between reference and actual rotation group atom. */
 +            angle(rotg, xi_xc, erg->xr_loc[j], &alpha, &weight);  /* angle in rad, weighted */
 +            erg->angle_v  += alpha * weight;
 +            erg->weight_v += weight;
 +        }
 +        /* If you want enforced rotation to contribute to the virial,
 +         * activate the following lines:
 +            if (MASTER(cr))
 +            {
 +               Add the rotation contribution to the virial
 +              for(j=0; j<DIM; j++)
 +                for(m=0;m<DIM;m++)
 +                  vir[j][m] += 0.5*f[ii][j]*dr[m];
 +            }
 +         */
 +
 +        PRINT_FORCE_J
 +
 +    } /* end of loop over local rotation group atoms */
 +}
 +
 +
 +/* Calculate the radial motion potential and forces */
 +static void do_radial_motion(
 +        t_rotgrp       *rotg,         /* The rotation group                         */
-         rvec            x[],          /* The positions                              */
-         matrix          box,          /* The simulation box                         */
-         double          t,            /* Time in picoseconds                        */
-         gmx_large_int_t step,         /* The time step                              */
 +        gmx_bool        bOutstepRot,  /* Output to main rotation output file        */
 +        gmx_bool        bOutstepSlab) /* Output per-slab data                       */
 +{
 +    int             j, jj, ifit;
 +    rvec            tmp_f;     /* Force */
 +    real            alpha;     /* a single angle between an actual and a reference position */
 +    real            weight;    /* single weight for a single angle */
 +    gmx_enfrotgrp_t erg;       /* Pointer to enforced rotation group data */
 +    rvec            xj_u;      /* xj - u */
 +    rvec            tmpvec, fit_tmpvec;
 +    real            fac, fac2, sum = 0.0;
 +    rvec            pj;
 +    gmx_bool        bCalcPotFit;
 +
 +    /* For mass weighting: */
 +    real      wj;              /* Mass-weighting of the positions */
 +    real      N_M;             /* N/M */
 +
 +
 +    erg         = rotg->enfrotgrp;
 +    bCalcPotFit = (bOutstepRot || bOutstepSlab) && (erotgFitPOT == rotg->eFittype);
 +
 +    N_M = rotg->nat * erg->invmass;
 +
 +    /* Each process calculates the forces on its local atoms */
 +    for (j = 0; j < erg->nat_loc; j++)
 +    {
 +        /* Calculate (xj-u) */
 +        rvec_sub(erg->x_loc_pbc[j], erg->xc_center, xj_u);  /* xj_u = xj-u */
 +
 +        /* Calculate Omega.(yj0-u) */
 +        cprod(rotg->vec, erg->xr_loc[j], tmpvec);  /* tmpvec = v x Omega.(yj0-u) */
 +
 +        /*                       *         v x Omega.(yj0-u)     */
 +        unitv(tmpvec, pj);      /*  pj = ---------------------   */
 +                                /*       | v x Omega.(yj0-u) |   */
 +
 +        fac  = iprod(pj, xj_u); /* fac = pj.(xj-u) */
 +        fac2 = fac*fac;
 +
 +        /* Mass-weighting */
 +        wj = N_M*erg->m_loc[j];
 +
 +        /* Store the additional force so that it can be added to the force
 +         * array after the normal forces have been evaluated */
 +        svmul(-rotg->k*wj*fac, pj, tmp_f);
 +        copy_rvec(tmp_f, erg->f_rot_loc[j]);
 +        sum += wj*fac2;
 +
 +        /* If requested, also calculate the potential for a set of angles
 +         * near the current reference angle */
 +        if (bCalcPotFit)
 +        {
 +            for (ifit = 0; ifit < rotg->PotAngle_nstep; ifit++)
 +            {
 +                /* Index of this rotation group atom with respect to the whole rotation group */
 +                jj = erg->xc_ref_ind[j];
 +
 +                /* Rotate with the alternative angle. Like rotate_local_reference(),
 +                 * just for a single local atom */
 +                mvmul(erg->PotAngleFit->rotmat[ifit], rotg->x_ref[jj], fit_tmpvec); /* fit_tmpvec = Omega*(yj0-u) */
 +
 +                /* Calculate Omega.(yj0-u) */
 +                cprod(rotg->vec, fit_tmpvec, tmpvec); /* tmpvec = v x Omega.(yj0-u) */
 +                /*                                     *         v x Omega.(yj0-u)     */
 +                unitv(tmpvec, pj);                    /*  pj = ---------------------   */
 +                                                      /*       | v x Omega.(yj0-u) |   */
 +
 +                fac  = iprod(pj, xj_u);               /* fac = pj.(xj-u) */
 +                fac2 = fac*fac;
 +
 +                /* Add to the rotation potential for this angle: */
 +                erg->PotAngleFit->V[ifit] += 0.5*rotg->k*wj*fac2;
 +            }
 +        }
 +
 +        if (bOutstepRot)
 +        {
 +            /* Add to the torque of this rotation group */
 +            erg->torque_v += torque(rotg->vec, tmp_f, erg->x_loc_pbc[j], erg->xc_center);
 +
 +            /* Calculate the angle between reference and actual rotation group atom. */
 +            angle(rotg, xj_u, erg->xr_loc[j], &alpha, &weight);  /* angle in rad, weighted */
 +            erg->angle_v  += alpha * weight;
 +            erg->weight_v += weight;
 +        }
 +
 +        PRINT_FORCE_J
 +
 +    } /* end of loop over local rotation group atoms */
 +    erg->V = 0.5*rotg->k*sum;
 +}
 +
 +
 +/* Calculate the radial motion pivot-free potential and forces */
 +static void do_radial_motion_pf(
 +        t_rotgrp       *rotg,         /* The rotation group                         */
 +        rvec            x[],          /* The positions                              */
 +        matrix          box,          /* The simulation box                         */
-         double          t,            /* Time in picoseconds                        */
-         gmx_large_int_t step,         /* The time step                              */
 +        gmx_bool        bOutstepRot,  /* Output to main rotation output file        */
 +        gmx_bool        bOutstepSlab) /* Output per-slab data                       */
 +{
 +    int             i, ii, iigrp, ifit, j;
 +    rvec            xj;          /* Current position */
 +    rvec            xj_xc;       /* xj  - xc  */
 +    rvec            yj0_yc0;     /* yj0 - yc0 */
 +    rvec            tmp_f;       /* Force */
 +    real            alpha;       /* a single angle between an actual and a reference position */
 +    real            weight;      /* single weight for a single angle */
 +    gmx_enfrotgrp_t erg;         /* Pointer to enforced rotation group data */
 +    rvec            tmpvec, tmpvec2;
 +    rvec            innersumvec; /* Precalculation of the inner sum */
 +    rvec            innersumveckM;
 +    real            fac, fac2, V = 0.0;
 +    rvec            qi, qj;
 +    gmx_bool        bCalcPotFit;
 +
 +    /* For mass weighting: */
 +    real      mj, wi, wj;      /* Mass-weighting of the positions */
 +    real      N_M;             /* N/M */
 +
 +
 +    erg         = rotg->enfrotgrp;
 +    bCalcPotFit = (bOutstepRot || bOutstepSlab) && (erotgFitPOT == rotg->eFittype);
 +
 +    N_M = rotg->nat * erg->invmass;
 +
 +    /* Get the current center of the rotation group: */
 +    get_center(erg->xc, erg->mc, rotg->nat, erg->xc_center);
 +
 +    /* Precalculate Sum_i [ wi qi.(xi-xc) qi ] which is needed for every single j */
 +    clear_rvec(innersumvec);
 +    for (i = 0; i < rotg->nat; i++)
 +    {
 +        /* Mass-weighting */
 +        wi = N_M*erg->mc[i];
 +
 +        /* Calculate qi. Note that xc_ref_center has already been subtracted from
 +         * x_ref in init_rot_group.*/
 +        mvmul(erg->rotmat, rotg->x_ref[i], tmpvec); /* tmpvec  = Omega.(yi0-yc0) */
 +
 +        cprod(rotg->vec, tmpvec, tmpvec2);          /* tmpvec2 = v x Omega.(yi0-yc0) */
 +
 +        /*                                             *         v x Omega.(yi0-yc0)     */
 +        unitv(tmpvec2, qi);                           /*  qi = -----------------------   */
 +                                                      /*       | v x Omega.(yi0-yc0) |   */
 +
 +        rvec_sub(erg->xc[i], erg->xc_center, tmpvec); /* tmpvec = xi-xc */
 +
 +        svmul(wi*iprod(qi, tmpvec), qi, tmpvec2);
 +
 +        rvec_inc(innersumvec, tmpvec2);
 +    }
 +    svmul(rotg->k*erg->invmass, innersumvec, innersumveckM);
 +
 +    /* Each process calculates the forces on its local atoms */
 +    for (j = 0; j < erg->nat_loc; j++)
 +    {
 +        /* Local index of a rotation group atom  */
 +        ii = erg->ind_loc[j];
 +        /* Position of this atom in the collective array */
 +        iigrp = erg->xc_ref_ind[j];
 +        /* Mass-weighting */
 +        mj = erg->mc[iigrp];  /* need the unsorted mass here */
 +        wj = N_M*mj;
 +
 +        /* Current position of this atom: x[ii][XX/YY/ZZ] */
 +        copy_rvec(x[ii], xj);
 +
 +        /* Shift this atom such that it is near its reference */
 +        shift_single_coord(box, xj, erg->xc_shifts[iigrp]);
 +
 +        /* The (unrotated) reference position is yj0. yc0 has already
 +         * been subtracted in init_rot_group */
 +        copy_rvec(rotg->x_ref[iigrp], yj0_yc0);   /* yj0_yc0 = yj0 - yc0      */
 +
 +        /* Calculate Omega.(yj0-yc0) */
 +        mvmul(erg->rotmat, yj0_yc0, tmpvec2); /* tmpvec2 = Omega.(yj0 - yc0)  */
 +
 +        cprod(rotg->vec, tmpvec2, tmpvec);    /* tmpvec = v x Omega.(yj0-yc0) */
 +
 +        /*                     *         v x Omega.(yj0-yc0)     */
 +        unitv(tmpvec, qj);    /*  qj = -----------------------   */
 +                              /*       | v x Omega.(yj0-yc0) |   */
 +
 +        /* Calculate (xj-xc) */
 +        rvec_sub(xj, erg->xc_center, xj_xc); /* xj_xc = xj-xc */
 +
 +        fac  = iprod(qj, xj_xc);             /* fac = qj.(xj-xc) */
 +        fac2 = fac*fac;
 +
 +        /* Store the additional force so that it can be added to the force
 +         * array after the normal forces have been evaluated */
 +        svmul(-rotg->k*wj*fac, qj, tmp_f); /* part 1 of force */
 +        svmul(mj, innersumveckM, tmpvec);  /* part 2 of force */
 +        rvec_inc(tmp_f, tmpvec);
 +        copy_rvec(tmp_f, erg->f_rot_loc[j]);
 +        V += wj*fac2;
 +
 +        /* If requested, also calculate the potential for a set of angles
 +         * near the current reference angle */
 +        if (bCalcPotFit)
 +        {
 +            for (ifit = 0; ifit < rotg->PotAngle_nstep; ifit++)
 +            {
 +                /* Rotate with the alternative angle. Like rotate_local_reference(),
 +                 * just for a single local atom */
 +                mvmul(erg->PotAngleFit->rotmat[ifit], yj0_yc0, tmpvec2); /* tmpvec2 = Omega*(yj0-yc0) */
 +
 +                /* Calculate Omega.(yj0-u) */
 +                cprod(rotg->vec, tmpvec2, tmpvec); /* tmpvec = v x Omega.(yj0-yc0) */
 +                /*                                  *         v x Omega.(yj0-yc0)     */
 +                unitv(tmpvec, qj);                 /*  qj = -----------------------   */
 +                                                   /*       | v x Omega.(yj0-yc0) |   */
 +
 +                fac  = iprod(qj, xj_xc);           /* fac = qj.(xj-xc) */
 +                fac2 = fac*fac;
 +
 +                /* Add to the rotation potential for this angle: */
 +                erg->PotAngleFit->V[ifit] += 0.5*rotg->k*wj*fac2;
 +            }
 +        }
 +
 +        if (bOutstepRot)
 +        {
 +            /* Add to the torque of this rotation group */
 +            erg->torque_v += torque(rotg->vec, tmp_f, xj, erg->xc_center);
 +
 +            /* Calculate the angle between reference and actual rotation group atom. */
 +            angle(rotg, xj_xc, yj0_yc0, &alpha, &weight);  /* angle in rad, weighted */
 +            erg->angle_v  += alpha * weight;
 +            erg->weight_v += weight;
 +        }
 +
 +        PRINT_FORCE_J
 +
 +    } /* end of loop over local rotation group atoms */
 +    erg->V = 0.5*rotg->k*V;
 +}
 +
 +
 +/* Precalculate the inner sum for the radial motion 2 forces */
 +static void radial_motion2_precalc_inner_sum(t_rotgrp  *rotg, rvec innersumvec)
 +{
 +    int             i;
 +    gmx_enfrotgrp_t erg;       /* Pointer to enforced rotation group data */
 +    rvec            xi_xc;     /* xj - xc */
 +    rvec            tmpvec, tmpvec2;
 +    real            fac, fac2;
 +    rvec            ri, si;
 +    real            siri;
 +    rvec            v_xi_xc;   /* v x (xj - u) */
 +    real            psii, psiistar;
 +    real            wi;        /* Mass-weighting of the positions */
 +    real            N_M;       /* N/M */
 +    rvec            sumvec;
 +
 +    erg = rotg->enfrotgrp;
 +    N_M = rotg->nat * erg->invmass;
 +
 +    /* Loop over the collective set of positions */
 +    clear_rvec(sumvec);
 +    for (i = 0; i < rotg->nat; i++)
 +    {
 +        /* Mass-weighting */
 +        wi = N_M*erg->mc[i];
 +
 +        rvec_sub(erg->xc[i], erg->xc_center, xi_xc); /* xi_xc = xi-xc         */
 +
 +        /* Calculate ri. Note that xc_ref_center has already been subtracted from
 +         * x_ref in init_rot_group.*/
 +        mvmul(erg->rotmat, rotg->x_ref[i], ri);      /* ri  = Omega.(yi0-yc0) */
 +
 +        cprod(rotg->vec, xi_xc, v_xi_xc);            /* v_xi_xc = v x (xi-u)  */
 +
 +        fac = norm2(v_xi_xc);
 +        /*                                 *                      1           */
 +        psiistar = 1.0/(fac + rotg->eps); /* psiistar = --------------------- */
 +                                          /*            |v x (xi-xc)|^2 + eps */
 +
 +        psii = gmx_invsqrt(fac);          /*                 1                */
 +                                          /*  psii    = -------------         */
 +                                          /*            |v x (xi-xc)|         */
 +
 +        svmul(psii, v_xi_xc, si);         /*  si = psii * (v x (xi-xc) )     */
 +
 +        fac  = iprod(v_xi_xc, ri);        /* fac = (v x (xi-xc)).ri */
 +        fac2 = fac*fac;
 +
 +        siri = iprod(si, ri);                       /* siri = si.ri           */
 +
 +        svmul(psiistar/psii, ri, tmpvec);
 +        svmul(psiistar*psiistar/(psii*psii*psii) * siri, si, tmpvec2);
 +        rvec_dec(tmpvec, tmpvec2);
 +        cprod(tmpvec, rotg->vec, tmpvec2);
 +
 +        svmul(wi*siri, tmpvec2, tmpvec);
 +
 +        rvec_inc(sumvec, tmpvec);
 +    }
 +    svmul(rotg->k*erg->invmass, sumvec, innersumvec);
 +}
 +
 +
 +/* Calculate the radial motion 2 potential and forces */
 +static void do_radial_motion2(
 +        t_rotgrp       *rotg,         /* The rotation group                         */
 +        rvec            x[],          /* The positions                              */
 +        matrix          box,          /* The simulation box                         */
-         double          t,            /* Time in picoseconds                        */
-         gmx_large_int_t step,         /* The time step                              */
 +        gmx_bool        bOutstepRot,  /* Output to main rotation output file        */
 +        gmx_bool        bOutstepSlab) /* Output per-slab data                       */
 +{
 +    int             ii, iigrp, ifit, j;
 +    rvec            xj;        /* Position */
 +    real            alpha;     /* a single angle between an actual and a reference position */
 +    real            weight;    /* single weight for a single angle */
 +    gmx_enfrotgrp_t erg;       /* Pointer to enforced rotation group data */
 +    rvec            xj_u;      /* xj - u */
 +    rvec            yj0_yc0;   /* yj0 -yc0 */
 +    rvec            tmpvec, tmpvec2;
 +    real            fac, fit_fac, fac2, Vpart = 0.0;
 +    rvec            rj, fit_rj, sj;
 +    real            sjrj;
 +    rvec            v_xj_u;    /* v x (xj - u) */
 +    real            psij, psijstar;
 +    real            mj, wj;    /* For mass-weighting of the positions */
 +    real            N_M;       /* N/M */
 +    gmx_bool        bPF;
 +    rvec            innersumvec;
 +    gmx_bool        bCalcPotFit;
 +
 +
 +    erg = rotg->enfrotgrp;
 +
 +    bPF         = rotg->eType == erotgRM2PF;
 +    bCalcPotFit = (bOutstepRot || bOutstepSlab) && (erotgFitPOT == rotg->eFittype);
 +
 +
 +    clear_rvec(yj0_yc0); /* Make the compiler happy */
 +
 +    clear_rvec(innersumvec);
 +    if (bPF)
 +    {
 +        /* For the pivot-free variant we have to use the current center of
 +         * mass of the rotation group instead of the pivot u */
 +        get_center(erg->xc, erg->mc, rotg->nat, erg->xc_center);
 +
 +        /* Also, we precalculate the second term of the forces that is identical
 +         * (up to the weight factor mj) for all forces */
 +        radial_motion2_precalc_inner_sum(rotg, innersumvec);
 +    }
 +
 +    N_M = rotg->nat * erg->invmass;
 +
 +    /* Each process calculates the forces on its local atoms */
 +    for (j = 0; j < erg->nat_loc; j++)
 +    {
 +        if (bPF)
 +        {
 +            /* Local index of a rotation group atom  */
 +            ii = erg->ind_loc[j];
 +            /* Position of this atom in the collective array */
 +            iigrp = erg->xc_ref_ind[j];
 +            /* Mass-weighting */
 +            mj = erg->mc[iigrp];
 +
 +            /* Current position of this atom: x[ii] */
 +            copy_rvec(x[ii], xj);
 +
 +            /* Shift this atom such that it is near its reference */
 +            shift_single_coord(box, xj, erg->xc_shifts[iigrp]);
 +
 +            /* The (unrotated) reference position is yj0. yc0 has already
 +             * been subtracted in init_rot_group */
 +            copy_rvec(rotg->x_ref[iigrp], yj0_yc0);   /* yj0_yc0 = yj0 - yc0  */
 +
 +            /* Calculate Omega.(yj0-yc0) */
 +            mvmul(erg->rotmat, yj0_yc0, rj);         /* rj = Omega.(yj0-yc0)  */
 +        }
 +        else
 +        {
 +            mj = erg->m_loc[j];
 +            copy_rvec(erg->x_loc_pbc[j], xj);
 +            copy_rvec(erg->xr_loc[j], rj);           /* rj = Omega.(yj0-u)    */
 +        }
 +        /* Mass-weighting */
 +        wj = N_M*mj;
 +
 +        /* Calculate (xj-u) resp. (xj-xc) */
 +        rvec_sub(xj, erg->xc_center, xj_u);          /* xj_u = xj-u           */
 +
 +        cprod(rotg->vec, xj_u, v_xj_u);              /* v_xj_u = v x (xj-u)   */
 +
 +        fac = norm2(v_xj_u);
 +        /*                                 *                      1           */
 +        psijstar = 1.0/(fac + rotg->eps); /*  psistar = --------------------  */
 +                                          /*            |v x (xj-u)|^2 + eps  */
 +
 +        psij = gmx_invsqrt(fac);          /*                 1                */
 +                                          /*  psij    = ------------          */
 +                                          /*            |v x (xj-u)|          */
 +
 +        svmul(psij, v_xj_u, sj);          /*  sj = psij * (v x (xj-u) )       */
 +
 +        fac  = iprod(v_xj_u, rj);         /* fac = (v x (xj-u)).rj */
 +        fac2 = fac*fac;
 +
 +        sjrj = iprod(sj, rj);                        /* sjrj = sj.rj          */
 +
 +        svmul(psijstar/psij, rj, tmpvec);
 +        svmul(psijstar*psijstar/(psij*psij*psij) * sjrj, sj, tmpvec2);
 +        rvec_dec(tmpvec, tmpvec2);
 +        cprod(tmpvec, rotg->vec, tmpvec2);
 +
 +        /* Store the additional force so that it can be added to the force
 +         * array after the normal forces have been evaluated */
 +        svmul(-rotg->k*wj*sjrj, tmpvec2, tmpvec);
 +        svmul(mj, innersumvec, tmpvec2);  /* This is != 0 only for the pivot-free variant */
 +
 +        rvec_add(tmpvec2, tmpvec, erg->f_rot_loc[j]);
 +        Vpart += wj*psijstar*fac2;
 +
 +        /* If requested, also calculate the potential for a set of angles
 +         * near the current reference angle */
 +        if (bCalcPotFit)
 +        {
 +            for (ifit = 0; ifit < rotg->PotAngle_nstep; ifit++)
 +            {
 +                if (bPF)
 +                {
 +                    mvmul(erg->PotAngleFit->rotmat[ifit], yj0_yc0, fit_rj); /* fit_rj = Omega.(yj0-yc0) */
 +                }
 +                else
 +                {
 +                    /* Position of this atom in the collective array */
 +                    iigrp = erg->xc_ref_ind[j];
 +                    /* Rotate with the alternative angle. Like rotate_local_reference(),
 +                     * just for a single local atom */
 +                    mvmul(erg->PotAngleFit->rotmat[ifit], rotg->x_ref[iigrp], fit_rj); /* fit_rj = Omega*(yj0-u) */
 +                }
 +                fit_fac = iprod(v_xj_u, fit_rj);                                       /* fac = (v x (xj-u)).fit_rj */
 +                /* Add to the rotation potential for this angle: */
 +                erg->PotAngleFit->V[ifit] += 0.5*rotg->k*wj*psijstar*fit_fac*fit_fac;
 +            }
 +        }
 +
 +        if (bOutstepRot)
 +        {
 +            /* Add to the torque of this rotation group */
 +            erg->torque_v += torque(rotg->vec, erg->f_rot_loc[j], xj, erg->xc_center);
 +
 +            /* Calculate the angle between reference and actual rotation group atom. */
 +            angle(rotg, xj_u, rj, &alpha, &weight);  /* angle in rad, weighted */
 +            erg->angle_v  += alpha * weight;
 +            erg->weight_v += weight;
 +        }
 +
 +        PRINT_FORCE_J
 +
 +    } /* end of loop over local rotation group atoms */
 +    erg->V = 0.5*rotg->k*Vpart;
 +}
 +
 +
 +/* Determine the smallest and largest position vector (with respect to the
 + * rotation vector) for the reference group */
 +static void get_firstlast_atom_ref(
 +        t_rotgrp  *rotg,
 +        int       *firstindex,
 +        int       *lastindex)
 +{
 +    gmx_enfrotgrp_t erg;              /* Pointer to enforced rotation group data */
 +    int             i;
 +    real            xcproj;           /* The projection of a reference position on the
 +                                         rotation vector */
 +    real            minproj, maxproj; /* Smallest and largest projection on v */
 +
 +
 +
 +    erg = rotg->enfrotgrp;
 +
 +    /* Start with some value */
 +    minproj = iprod(rotg->x_ref[0], rotg->vec);
 +    maxproj = minproj;
 +
 +    /* This is just to ensure that it still works if all the atoms of the
 +     * reference structure are situated in a plane perpendicular to the rotation
 +     * vector */
 +    *firstindex = 0;
 +    *lastindex  = rotg->nat-1;
 +
 +    /* Loop over all atoms of the reference group,
 +     * project them on the rotation vector to find the extremes */
 +    for (i = 0; i < rotg->nat; i++)
 +    {
 +        xcproj = iprod(rotg->x_ref[i], rotg->vec);
 +        if (xcproj < minproj)
 +        {
 +            minproj     = xcproj;
 +            *firstindex = i;
 +        }
 +        if (xcproj > maxproj)
 +        {
 +            maxproj    = xcproj;
 +            *lastindex = i;
 +        }
 +    }
 +}
 +
 +
 +/* Allocate memory for the slabs */
 +static void allocate_slabs(
 +        t_rotgrp  *rotg,
 +        FILE      *fplog,
 +        int        g,
 +        gmx_bool   bVerbose)
 +{
 +    gmx_enfrotgrp_t erg;      /* Pointer to enforced rotation group data */
 +    int             i, nslabs;
 +
 +
 +    erg = rotg->enfrotgrp;
 +
 +    /* More slabs than are defined for the reference are never needed */
 +    nslabs = erg->slab_last_ref - erg->slab_first_ref + 1;
 +
 +    /* Remember how many we allocated */
 +    erg->nslabs_alloc = nslabs;
 +
 +    if ( (NULL != fplog) && bVerbose)
 +    {
 +        fprintf(fplog, "%s allocating memory to store data for %d slabs (rotation group %d).\n",
 +                RotStr, nslabs, g);
 +    }
 +    snew(erg->slab_center, nslabs);
 +    snew(erg->slab_center_ref, nslabs);
 +    snew(erg->slab_weights, nslabs);
 +    snew(erg->slab_torque_v, nslabs);
 +    snew(erg->slab_data, nslabs);
 +    snew(erg->gn_atom, nslabs);
 +    snew(erg->gn_slabind, nslabs);
 +    snew(erg->slab_innersumvec, nslabs);
 +    for (i = 0; i < nslabs; i++)
 +    {
 +        snew(erg->slab_data[i].x, rotg->nat);
 +        snew(erg->slab_data[i].ref, rotg->nat);
 +        snew(erg->slab_data[i].weight, rotg->nat);
 +    }
 +    snew(erg->xc_ref_sorted, rotg->nat);
 +    snew(erg->xc_sortind, rotg->nat);
 +    snew(erg->firstatom, nslabs);
 +    snew(erg->lastatom, nslabs);
 +}
 +
 +
- /* From the extreme coordinates of the reference group, determine the first
++/* From the extreme positions of the reference group, determine the first
 + * and last slab of the reference. We can never have more slabs in the real
 + * simulation than calculated here for the reference.
 + */
 +static void get_firstlast_slab_ref(t_rotgrp *rotg, real mc[], int ref_firstindex, int ref_lastindex)
 +{
 +    gmx_enfrotgrp_t erg;      /* Pointer to enforced rotation group data */
-     int             first, last, firststart;
++    int             first, last;
 +    rvec            dummy;
 +
 +
 +    erg        = rotg->enfrotgrp;
 +    first      = get_first_slab(rotg, erg->max_beta, rotg->x_ref[ref_firstindex]);
 +    last       = get_last_slab( rotg, erg->max_beta, rotg->x_ref[ref_lastindex ]);
-     firststart = first;
 +
 +    while (get_slab_weight(first, rotg, rotg->x_ref, mc, &dummy) > WEIGHT_MIN)
 +    {
 +        first--;
 +    }
 +    erg->slab_first_ref = first+1;
 +    while (get_slab_weight(last, rotg, rotg->x_ref, mc, &dummy) > WEIGHT_MIN)
 +    {
 +        last++;
 +    }
 +    erg->slab_last_ref  = last-1;
- 
-     erg->slab_buffer = firststart - erg->slab_first_ref;
 +}
 +
 +
 +/* Special version of copy_rvec:
 + * During the copy procedure of xcurr to b, the correct PBC image is chosen
 + * such that the copied vector ends up near its reference position xref */
 +static inline void copy_correct_pbc_image(
 +        const rvec  xcurr,  /* copy vector xcurr ...                */
 +        rvec        b,      /* ... to b ...                         */
 +        const rvec  xref,   /* choosing the PBC image such that b ends up near xref */
 +        matrix      box,
 +        int         npbcdim)
 +{
 +    rvec  dx;
 +    int   d, m;
 +    ivec  shift;
 +
 +
 +    /* Shortest PBC distance between the atom and its reference */
 +    rvec_sub(xcurr, xref, dx);
 +
 +    /* Determine the shift for this atom */
 +    clear_ivec(shift);
 +    for (m = npbcdim-1; m >= 0; m--)
 +    {
 +        while (dx[m] < -0.5*box[m][m])
 +        {
 +            for (d = 0; d < DIM; d++)
 +            {
 +                dx[d] += box[m][d];
 +            }
 +            shift[m]++;
 +        }
 +        while (dx[m] >= 0.5*box[m][m])
 +        {
 +            for (d = 0; d < DIM; d++)
 +            {
 +                dx[d] -= box[m][d];
 +            }
 +            shift[m]--;
 +        }
 +    }
 +
 +    /* Apply the shift to the position */
 +    copy_rvec(xcurr, b);
 +    shift_single_coord(box, b, shift);
 +}
 +
 +
 +static void init_rot_group(FILE *fplog, t_commrec *cr, int g, t_rotgrp *rotg,
 +                           rvec *x, gmx_mtop_t *mtop, gmx_bool bVerbose, FILE *out_slabs, matrix box,
 +                           t_inputrec *ir, gmx_bool bOutputCenters)
 +{
 +    int                   i, ii;
 +    rvec                  coord, xref, *xdum;
 +    gmx_bool              bFlex, bColl;
 +    t_atom               *atom;
 +    gmx_enfrotgrp_t       erg; /* Pointer to enforced rotation group data */
 +    int                   ref_firstindex, ref_lastindex;
 +    gmx_mtop_atomlookup_t alook = NULL;
 +    real                  mass, totalmass;
 +    real                  start = 0.0;
 +    double                t_start;
 +
 +
 +    /* Do we have a flexible axis? */
 +    bFlex = ISFLEX(rotg);
 +    /* Do we use a global set of coordinates? */
 +    bColl = ISCOLL(rotg);
 +
 +    erg = rotg->enfrotgrp;
 +
 +    /* Allocate space for collective coordinates if needed */
 +    if (bColl)
 +    {
 +        snew(erg->xc, rotg->nat);
 +        snew(erg->xc_shifts, rotg->nat);
 +        snew(erg->xc_eshifts, rotg->nat);
 +        snew(erg->xc_old, rotg->nat);
 +
 +        if (rotg->eFittype == erotgFitNORM)
 +        {
 +            snew(erg->xc_ref_length, rotg->nat); /* in case fit type NORM is chosen */
 +            snew(erg->xc_norm, rotg->nat);
 +        }
 +    }
 +    else
 +    {
 +        snew(erg->xr_loc, rotg->nat);
 +        snew(erg->x_loc_pbc, rotg->nat);
 +    }
 +
 +    snew(erg->f_rot_loc, rotg->nat);
 +    snew(erg->xc_ref_ind, rotg->nat);
 +
 +    /* Make space for the calculation of the potential at other angles (used
 +     * for fitting only) */
 +    if (erotgFitPOT == rotg->eFittype)
 +    {
 +        snew(erg->PotAngleFit, 1);
 +        snew(erg->PotAngleFit->degangle, rotg->PotAngle_nstep);
 +        snew(erg->PotAngleFit->V, rotg->PotAngle_nstep);
 +        snew(erg->PotAngleFit->rotmat, rotg->PotAngle_nstep);
 +
 +        /* Get the set of angles around the reference angle */
 +        start = -0.5 * (rotg->PotAngle_nstep - 1)*rotg->PotAngle_step;
 +        for (i = 0; i < rotg->PotAngle_nstep; i++)
 +        {
 +            erg->PotAngleFit->degangle[i] = start + i*rotg->PotAngle_step;
 +        }
 +    }
 +    else
 +    {
 +        erg->PotAngleFit = NULL;
 +    }
 +
 +    /* xc_ref_ind needs to be set to identity in the serial case */
 +    if (!PAR(cr))
 +    {
 +        for (i = 0; i < rotg->nat; i++)
 +        {
 +            erg->xc_ref_ind[i] = i;
 +        }
 +    }
 +
 +    /* Copy the masses so that the center can be determined. For all types of
 +     * enforced rotation, we store the masses in the erg->mc array. */
 +    if (rotg->bMassW)
 +    {
 +        alook = gmx_mtop_atomlookup_init(mtop);
 +    }
 +    snew(erg->mc, rotg->nat);
 +    if (bFlex)
 +    {
 +        snew(erg->mc_sorted, rotg->nat);
 +    }
 +    if (!bColl)
 +    {
 +        snew(erg->m_loc, rotg->nat);
 +    }
 +    totalmass = 0.0;
 +    for (i = 0; i < rotg->nat; i++)
 +    {
 +        if (rotg->bMassW)
 +        {
 +            gmx_mtop_atomnr_to_atom(alook, rotg->ind[i], &atom);
 +            mass = atom->m;
 +        }
 +        else
 +        {
 +            mass = 1.0;
 +        }
 +        erg->mc[i] = mass;
 +        totalmass += mass;
 +    }
 +    erg->invmass = 1.0/totalmass;
 +
 +    if (rotg->bMassW)
 +    {
 +        gmx_mtop_atomlookup_destroy(alook);
 +    }
 +
 +    /* Set xc_ref_center for any rotation potential */
 +    if ((rotg->eType == erotgISO) || (rotg->eType == erotgPM) || (rotg->eType == erotgRM) || (rotg->eType == erotgRM2))
 +    {
 +        /* Set the pivot point for the fixed, stationary-axis potentials. This
 +         * won't change during the simulation */
 +        copy_rvec(rotg->pivot, erg->xc_ref_center);
 +        copy_rvec(rotg->pivot, erg->xc_center    );
 +    }
 +    else
 +    {
 +        /* Center of the reference positions */
 +        get_center(rotg->x_ref, erg->mc, rotg->nat, erg->xc_ref_center);
 +
 +        /* Center of the actual positions */
 +        if (MASTER(cr))
 +        {
 +            snew(xdum, rotg->nat);
 +            for (i = 0; i < rotg->nat; i++)
 +            {
 +                ii = rotg->ind[i];
 +                copy_rvec(x[ii], xdum[i]);
 +            }
 +            get_center(xdum, erg->mc, rotg->nat, erg->xc_center);
 +            sfree(xdum);
 +        }
 +#ifdef GMX_MPI
 +        if (PAR(cr))
 +        {
 +            gmx_bcast(sizeof(erg->xc_center), erg->xc_center, cr);
 +        }
 +#endif
 +    }
 +    
 +    if (bColl)
 +    {
 +        /* Save the original (whole) set of positions in xc_old such that at later 
 +         * steps the rotation group can always be made whole again. If the simulation is
 +         * restarted, we compute the starting reference positions (given the time)
 +         * and assume that the correct PBC image of each position is the one nearest
 +         * to the current reference */
 +        if (MASTER(cr))
 +        {
 +            /* Calculate the rotation matrix for this angle: */
 +            t_start       = ir->init_t + ir->init_step*ir->delta_t;
 +            erg->degangle = rotg->rate * t_start;
 +            calc_rotmat(rotg->vec, erg->degangle, erg->rotmat);
 +
 +            for (i = 0; i < rotg->nat; i++)
 +            {
 +                ii = rotg->ind[i];
 +
 +                /* Subtract pivot, rotate, and add pivot again. This will yield the 
 +                 * reference position for time t */
 +                rvec_sub(rotg->x_ref[i], erg->xc_ref_center, coord);
 +                mvmul(erg->rotmat, coord, xref);
 +                rvec_inc(xref, erg->xc_ref_center);
 +
 +                copy_correct_pbc_image(x[ii], erg->xc_old[i], xref, box, 3);
 +            }
 +        }
 +#ifdef GMX_MPI
 +        if (PAR(cr))
 +        {
 +            gmx_bcast(rotg->nat*sizeof(erg->xc_old[0]), erg->xc_old, cr);
 +        }
 +#endif
 +    }
 +
 +    if ( (rotg->eType != erotgFLEX) && (rotg->eType != erotgFLEX2) )
 +    {
 +        /* Put the reference positions into origin: */
 +        for (i = 0; i < rotg->nat; i++)
 +        {
 +            rvec_dec(rotg->x_ref[i], erg->xc_ref_center);
 +        }
 +    }
 +
 +    /* Enforced rotation with flexible axis */
 +    if (bFlex)
 +    {
 +        /* Calculate maximum beta value from minimum gaussian (performance opt.) */
 +        erg->max_beta = calc_beta_max(rotg->min_gaussian, rotg->slab_dist);
 +
 +        /* Determine the smallest and largest coordinate with respect to the rotation vector */
 +        get_firstlast_atom_ref(rotg, &ref_firstindex, &ref_lastindex);
 +
-         /* From the extreme coordinates of the reference group, determine the first
++        /* From the extreme positions of the reference group, determine the first
 +         * and last slab of the reference. */
 +        get_firstlast_slab_ref(rotg, erg->mc, ref_firstindex, ref_lastindex);
 +
 +        /* Allocate memory for the slabs */
 +        allocate_slabs(rotg, fplog, g, bVerbose);
 +
 +        /* Flexible rotation: determine the reference centers for the rest of the simulation */
 +        erg->slab_first = erg->slab_first_ref;
 +        erg->slab_last  = erg->slab_last_ref;
 +        get_slab_centers(rotg, rotg->x_ref, erg->mc, g, -1, out_slabs, bOutputCenters, TRUE);
 +
 +        /* Length of each x_rotref vector from center (needed if fit routine NORM is chosen): */
 +        if (rotg->eFittype == erotgFitNORM)
 +        {
 +            for (i = 0; i < rotg->nat; i++)
 +            {
 +                rvec_sub(rotg->x_ref[i], erg->xc_ref_center, coord);
 +                erg->xc_ref_length[i] = norm(coord);
 +            }
 +        }
 +    }
 +}
 +
 +
 +extern void dd_make_local_rotation_groups(gmx_domdec_t *dd, t_rot *rot)
 +{
 +    gmx_ga2la_t     ga2la;
 +    int             g;
 +    t_rotgrp       *rotg;
 +    gmx_enfrotgrp_t erg;      /* Pointer to enforced rotation group data */
 +
 +    ga2la = dd->ga2la;
 +
 +    for (g = 0; g < rot->ngrp; g++)
 +    {
 +        rotg = &rot->grp[g];
 +        erg  = rotg->enfrotgrp;
 +
 +
 +        dd_make_local_group_indices(ga2la, rotg->nat, rotg->ind,
 +                                    &erg->nat_loc, &erg->ind_loc, &erg->nalloc_loc, erg->xc_ref_ind);
 +    }
 +}
 +
 +
 +/* Calculate the size of the MPI buffer needed in reduce_output() */
 +static int calc_mpi_bufsize(t_rot *rot)
 +{
 +    int             g;
 +    int             count_group, count_total;
 +    t_rotgrp       *rotg;
 +    gmx_enfrotgrp_t erg;      /* Pointer to enforced rotation group data */
 +
 +
 +    count_total = 0;
 +    for (g = 0; g < rot->ngrp; g++)
 +    {
 +        rotg = &rot->grp[g];
 +        erg  = rotg->enfrotgrp;
 +
 +        /* Count the items that are transferred for this group: */
 +        count_group = 4; /* V, torque, angle, weight */
 +
 +        /* Add the maximum number of slabs for flexible groups */
 +        if (ISFLEX(rotg))
 +        {
 +            count_group += erg->slab_last_ref - erg->slab_first_ref + 1;
 +        }
 +
 +        /* Add space for the potentials at different angles: */
 +        if (erotgFitPOT == rotg->eFittype)
 +        {
 +            count_group += rotg->PotAngle_nstep;
 +        }
 +
 +        /* Add to the total number: */
 +        count_total += count_group;
 +    }
 +
 +    return count_total;
 +}
 +
 +
 +extern void init_rot(FILE *fplog, t_inputrec *ir, int nfile, const t_filenm fnm[],
 +                     t_commrec *cr, rvec *x, matrix box, gmx_mtop_t *mtop, const output_env_t oenv,
 +                     gmx_bool bVerbose, unsigned long Flags)
 +{
 +    t_rot          *rot;
 +    t_rotgrp       *rotg;
 +    int             g;
 +    int             nat_max = 0;  /* Size of biggest rotation group */
 +    gmx_enfrot_t    er;           /* Pointer to the enforced rotation buffer variables */
 +    gmx_enfrotgrp_t erg;          /* Pointer to enforced rotation group data */
 +    rvec           *x_pbc = NULL; /* Space for the pbc-correct atom positions */
 +
 +
 +    if ( (PAR(cr)) && !DOMAINDECOMP(cr) )
 +    {
 +        gmx_fatal(FARGS, "Enforced rotation is only implemented for domain decomposition!");
 +    }
 +
 +    if (MASTER(cr) && bVerbose)
 +    {
 +        fprintf(stdout, "%s Initializing ...\n", RotStr);
 +    }
 +
 +    rot = ir->rot;
 +    snew(rot->enfrot, 1);
 +    er        = rot->enfrot;
 +    er->Flags = Flags;
 +
 +    /* When appending, skip first output to avoid duplicate entries in the data files */
 +    if (er->Flags & MD_APPENDFILES)
 +    {
 +        er->bOut = FALSE;
 +    }
 +    else
 +    {
 +        er->bOut = TRUE;
 +    }
 +
 +    if (MASTER(cr) && er->bOut)
 +    {
 +        please_cite(fplog, "Kutzner2011");
 +    }
 +
 +    /* Output every step for reruns */
 +    if (er->Flags & MD_RERUN)
 +    {
 +        if (NULL != fplog)
 +        {
 +            fprintf(fplog, "%s rerun - will write rotation output every available step.\n", RotStr);
 +        }
 +        rot->nstrout = 1;
 +        rot->nstsout = 1;
 +    }
 +
 +    er->out_slabs = NULL;
 +    if (MASTER(cr) && HaveFlexibleGroups(rot) )
 +    {
-         er->out_slabs = open_slab_out(opt2fn("-rs", nfile, fnm), rot, oenv);
++        er->out_slabs = open_slab_out(opt2fn("-rs", nfile, fnm), rot);
 +    }
 +
 +    if (MASTER(cr))
 +    {
 +        /* Remove pbc, make molecule whole.
 +         * When ir->bContinuation=TRUE this has already been done, but ok. */
 +        snew(x_pbc, mtop->natoms);
 +        m_rveccopy(mtop->natoms, x, x_pbc);
 +        do_pbc_first_mtop(NULL, ir->ePBC, box, mtop, x_pbc);
 +        /* All molecules will be whole now, but not necessarily in the home box.
 +         * Additionally, if a rotation group consists of more than one molecule
 +         * (e.g. two strands of DNA), each one of them can end up in a different
 +         * periodic box. This is taken care of in init_rot_group.  */
 +    }
 +
 +    for (g = 0; g < rot->ngrp; g++)
 +    {
 +        rotg = &rot->grp[g];
 +
 +        if (NULL != fplog)
 +        {
 +            fprintf(fplog, "%s group %d type '%s'\n", RotStr, g, erotg_names[rotg->eType]);
 +        }
 +
 +        if (rotg->nat > 0)
 +        {
 +            /* Allocate space for the rotation group's data: */
 +            snew(rotg->enfrotgrp, 1);
 +            erg  = rotg->enfrotgrp;
 +
 +            nat_max = max(nat_max, rotg->nat);
 +
 +            if (PAR(cr))
 +            {
 +                erg->nat_loc    = 0;
 +                erg->nalloc_loc = 0;
 +                erg->ind_loc    = NULL;
 +            }
 +            else
 +            {
 +                erg->nat_loc = rotg->nat;
 +                erg->ind_loc = rotg->ind;
 +            }
 +            init_rot_group(fplog, cr, g, rotg, x_pbc, mtop, bVerbose, er->out_slabs, box, ir,
 +                           !(er->Flags & MD_APPENDFILES) ); /* Do not output the reference centers
 +                                                             * again if we are appending */
 +        }
 +    }
 +
 +    /* Allocate space for enforced rotation buffer variables */
 +    er->bufsize = nat_max;
 +    snew(er->data, nat_max);
 +    snew(er->xbuf, nat_max);
 +    snew(er->mbuf, nat_max);
 +
 +    /* Buffers for MPI reducing torques, angles, weights (for each group), and V */
 +    if (PAR(cr))
 +    {
 +        er->mpi_bufsize = calc_mpi_bufsize(rot) + 100; /* larger to catch errors */
 +        snew(er->mpi_inbuf, er->mpi_bufsize);
 +        snew(er->mpi_outbuf, er->mpi_bufsize);
 +    }
 +    else
 +    {
 +        er->mpi_bufsize = 0;
 +        er->mpi_inbuf   = NULL;
 +        er->mpi_outbuf  = NULL;
 +    }
 +
 +    /* Only do I/O on the MASTER */
 +    er->out_angles  = NULL;
 +    er->out_rot     = NULL;
 +    er->out_torque  = NULL;
 +    if (MASTER(cr))
 +    {
 +        er->out_rot = open_rot_out(opt2fn("-ro", nfile, fnm), rot, oenv);
 +
 +        if (rot->nstsout > 0)
 +        {
 +            if (HaveFlexibleGroups(rot) || HavePotFitGroups(rot) )
 +            {
-                 er->out_angles  = open_angles_out(opt2fn("-ra", nfile, fnm), rot, oenv);
++                er->out_angles  = open_angles_out(opt2fn("-ra", nfile, fnm), rot);
 +            }
 +            if (HaveFlexibleGroups(rot) )
 +            {
-                 er->out_torque  = open_torque_out(opt2fn("-rt", nfile, fnm), rot, oenv);
++                er->out_torque  = open_torque_out(opt2fn("-rt", nfile, fnm), rot);
 +            }
 +        }
 +
 +        sfree(x_pbc);
 +    }
 +}
 +
 +
- extern void finish_rot(FILE *fplog, t_rot *rot)
++extern void finish_rot(t_rot *rot)
 +{
 +    gmx_enfrot_t er;        /* Pointer to the enforced rotation buffer variables */
 +
 +
 +    er = rot->enfrot;
 +    if (er->out_rot)
 +    {
 +        gmx_fio_fclose(er->out_rot);
 +    }
 +    if (er->out_slabs)
 +    {
 +        gmx_fio_fclose(er->out_slabs);
 +    }
 +    if (er->out_angles)
 +    {
 +        gmx_fio_fclose(er->out_angles);
 +    }
 +    if (er->out_torque)
 +    {
 +        gmx_fio_fclose(er->out_torque);
 +    }
 +}
 +
 +
 +/* Rotate the local reference positions and store them in
 + * erg->xr_loc[0...(nat_loc-1)]
 + *
 + * Note that we already subtracted u or y_c from the reference positions
 + * in init_rot_group().
 + */
 +static void rotate_local_reference(t_rotgrp *rotg)
 +{
 +    gmx_enfrotgrp_t erg;
 +    int             i, ii;
 +
 +
 +    erg = rotg->enfrotgrp;
 +
 +    for (i = 0; i < erg->nat_loc; i++)
 +    {
 +        /* Index of this rotation group atom with respect to the whole rotation group */
 +        ii = erg->xc_ref_ind[i];
 +        /* Rotate */
 +        mvmul(erg->rotmat, rotg->x_ref[ii], erg->xr_loc[i]);
 +    }
 +}
 +
 +
 +/* Select the PBC representation for each local x position and store that
 + * for later usage. We assume the right PBC image of an x is the one nearest to
 + * its rotated reference */
 +static void choose_pbc_image(rvec x[], t_rotgrp *rotg, matrix box, int npbcdim)
 +{
 +    int             i, ii;
 +    gmx_enfrotgrp_t erg;       /* Pointer to enforced rotation group data */
 +    rvec            xref;
 +
 +
 +    erg = rotg->enfrotgrp;
 +
 +    for (i = 0; i < erg->nat_loc; i++)
 +    {
 +        /* Index of a rotation group atom  */
 +        ii = erg->ind_loc[i];
 +
 +        /* Get the correctly rotated reference position. The pivot was already
 +         * subtracted in init_rot_group() from the reference positions. Also,
 +         * the reference positions have already been rotated in
 +         * rotate_local_reference(). For the current reference position we thus
 +         * only need to add the pivot again. */
 +        copy_rvec(erg->xr_loc[i], xref);
 +        rvec_inc(xref, erg->xc_ref_center);
 +
 +        copy_correct_pbc_image(x[ii], erg->x_loc_pbc[i], xref, box, npbcdim);
 +    }
 +}
 +
 +
 +extern void do_rotation(
 +        t_commrec      *cr,
 +        t_inputrec     *ir,
 +        matrix          box,
 +        rvec            x[],
 +        real            t,
 +        gmx_large_int_t step,
 +        gmx_wallcycle_t wcycle,
 +        gmx_bool        bNS)
 +{
 +    int             g, i, ii;
 +    t_rot          *rot;
 +    t_rotgrp       *rotg;
 +    gmx_bool        outstep_slab, outstep_rot;
 +    gmx_bool        bFlex, bColl;
 +    gmx_enfrot_t    er;         /* Pointer to the enforced rotation buffer variables */
 +    gmx_enfrotgrp_t erg;        /* Pointer to enforced rotation group data           */
 +    rvec            transvec;
 +    t_gmx_potfit   *fit = NULL; /* For fit type 'potential' determine the fit
 +                                   angle via the potential minimum            */
 +
 +    /* Enforced rotation cycle counting: */
 +    gmx_cycles_t cycles_comp;   /* Cycles for the enf. rotation computation
 +                                   only, does not count communication. This
 +                                   counter is used for load-balancing         */
 +
 +#ifdef TAKETIME
 +    double t0;
 +#endif
 +
 +    rot = ir->rot;
 +    er  = rot->enfrot;
 +
 +    /* When to output in main rotation output file */
 +    outstep_rot  = do_per_step(step, rot->nstrout) && er->bOut;
 +    /* When to output per-slab data */
 +    outstep_slab = do_per_step(step, rot->nstsout) && er->bOut;
 +
 +    /* Output time into rotation output file */
 +    if (outstep_rot && MASTER(cr))
 +    {
 +        fprintf(er->out_rot, "%12.3e", t);
 +    }
 +
 +    /**************************************************************************/
 +    /* First do ALL the communication! */
 +    for (g = 0; g < rot->ngrp; g++)
 +    {
 +        rotg = &rot->grp[g];
 +        erg  = rotg->enfrotgrp;
 +
 +        /* Do we have a flexible axis? */
 +        bFlex = ISFLEX(rotg);
 +        /* Do we use a collective (global) set of coordinates? */
 +        bColl = ISCOLL(rotg);
 +
 +        /* Calculate the rotation matrix for this angle: */
 +        erg->degangle = rotg->rate * t;
 +        calc_rotmat(rotg->vec, erg->degangle, erg->rotmat);
 +
 +        if (bColl)
 +        {
 +            /* Transfer the rotation group's positions such that every node has
 +             * all of them. Every node contributes its local positions x and stores
 +             * it in the collective erg->xc array. */
 +            communicate_group_positions(cr, erg->xc, erg->xc_shifts, erg->xc_eshifts, bNS,
 +                                        x, rotg->nat, erg->nat_loc, erg->ind_loc, erg->xc_ref_ind, erg->xc_old, box);
 +        }
 +        else
 +        {
 +            /* Fill the local masses array;
 +             * this array changes in DD/neighborsearching steps */
 +            if (bNS)
 +            {
 +                for (i = 0; i < erg->nat_loc; i++)
 +                {
 +                    /* Index of local atom w.r.t. the collective rotation group */
 +                    ii            = erg->xc_ref_ind[i];
 +                    erg->m_loc[i] = erg->mc[ii];
 +                }
 +            }
 +
 +            /* Calculate Omega*(y_i-y_c) for the local positions */
 +            rotate_local_reference(rotg);
 +
 +            /* Choose the nearest PBC images of the group atoms with respect
 +             * to the rotated reference positions */
 +            choose_pbc_image(x, rotg, box, 3);
 +
 +            /* Get the center of the rotation group */
 +            if ( (rotg->eType == erotgISOPF) || (rotg->eType == erotgPMPF) )
 +            {
 +                get_center_comm(cr, erg->x_loc_pbc, erg->m_loc, erg->nat_loc, rotg->nat, erg->xc_center);
 +            }
 +        }
 +
 +    } /* End of loop over rotation groups */
 +
 +    /**************************************************************************/
 +    /* Done communicating, we can start to count cycles for the load balancing now ... */
 +    cycles_comp = gmx_cycles_read();
 +
 +
 +#ifdef TAKETIME
 +    t0 = MPI_Wtime();
 +#endif
 +
 +    for (g = 0; g < rot->ngrp; g++)
 +    {
 +        rotg = &rot->grp[g];
 +        erg  = rotg->enfrotgrp;
 +
 +        bFlex = ISFLEX(rotg);
 +        bColl = ISCOLL(rotg);
 +
 +        if (outstep_rot && MASTER(cr))
 +        {
 +            fprintf(er->out_rot, "%12.4f", erg->degangle);
 +        }
 +
 +        /* Calculate angles and rotation matrices for potential fitting: */
 +        if ( (outstep_rot || outstep_slab) && (erotgFitPOT == rotg->eFittype) )
 +        {
 +            fit = erg->PotAngleFit;
 +            for (i = 0; i < rotg->PotAngle_nstep; i++)
 +            {
 +                calc_rotmat(rotg->vec, erg->degangle + fit->degangle[i], fit->rotmat[i]);
 +
 +                /* Clear value from last step */
 +                erg->PotAngleFit->V[i] = 0.0;
 +            }
 +        }
 +
 +        /* Clear values from last time step */
 +        erg->V        = 0.0;
 +        erg->torque_v = 0.0;
 +        erg->angle_v  = 0.0;
 +        erg->weight_v = 0.0;
 +
 +        switch (rotg->eType)
 +        {
 +            case erotgISO:
 +            case erotgISOPF:
 +            case erotgPM:
 +            case erotgPMPF:
-                 do_fixed(rotg, x, box, t, step, outstep_rot, outstep_slab);
++                do_fixed(rotg, outstep_rot, outstep_slab);
 +                break;
 +            case erotgRM:
-                 do_radial_motion(rotg, x, box, t, step, outstep_rot, outstep_slab);
++                do_radial_motion(rotg, outstep_rot, outstep_slab);
 +                break;
 +            case erotgRMPF:
-                 do_radial_motion_pf(rotg, x, box, t, step, outstep_rot, outstep_slab);
++                do_radial_motion_pf(rotg, x, box, outstep_rot, outstep_slab);
 +                break;
 +            case erotgRM2:
 +            case erotgRM2PF:
-                 do_radial_motion2(rotg, x, box, t, step, outstep_rot, outstep_slab);
++                do_radial_motion2(rotg, x, box, outstep_rot, outstep_slab);
 +                break;
 +            case erotgFLEXT:
 +            case erotgFLEX2T:
 +                /* Subtract the center of the rotation group from the collective positions array
 +                 * Also store the center in erg->xc_center since it needs to be subtracted
 +                 * in the low level routines from the local coordinates as well */
 +                get_center(erg->xc, erg->mc, rotg->nat, erg->xc_center);
 +                svmul(-1.0, erg->xc_center, transvec);
 +                translate_x(erg->xc, rotg->nat, transvec);
-                 do_flexible(MASTER(cr), er, rotg, g, x, box, t, step, outstep_rot, outstep_slab);
++                do_flexible(MASTER(cr), er, rotg, g, x, box, t, outstep_rot, outstep_slab);
 +                break;
 +            case erotgFLEX:
 +            case erotgFLEX2:
 +                /* Do NOT subtract the center of mass in the low level routines! */
 +                clear_rvec(erg->xc_center);
-                 do_flexible(MASTER(cr), er, rotg, g, x, box, t, step, outstep_rot, outstep_slab);
++                do_flexible(MASTER(cr), er, rotg, g, x, box, t, outstep_rot, outstep_slab);
 +                break;
 +            default:
 +                gmx_fatal(FARGS, "No such rotation potential.");
 +                break;
 +        }
 +    }
 +
 +#ifdef TAKETIME
 +    if (MASTER(cr))
 +    {
 +        fprintf(stderr, "%s calculation (step %d) took %g seconds.\n", RotStr, step, MPI_Wtime()-t0);
 +    }
 +#endif
 +
 +    /* Stop the enforced rotation cycle counter and add the computation-only
 +     * cycles to the force cycles for load balancing */
 +    cycles_comp  = gmx_cycles_read() - cycles_comp;
 +
 +    if (DOMAINDECOMP(cr) && wcycle)
 +    {
 +        dd_cycles_add(cr->dd, cycles_comp, ddCyclF);
 +    }
 +}
diff --cc src/gromacs/mdlib/shakef.c
index 57a6a8d386,0000000000..659fc32889
mode 100644,000000..100644
--- a/src/gromacs/mdlib/shakef.c
+++ b/src/gromacs/mdlib/shakef.c
@@@ -1,561 -1,0 +1,576 @@@
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + *          GROningen MAchine for Chemical Simulations
 + *
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + *
 + * For more info, check our website at http://www.gromacs.org
 + *
 + * And Hey:
 + * GROwing Monsters And Cloning Shrimps
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <math.h>
 +#include "sysstuff.h"
 +#include "typedefs.h"
 +#include "smalloc.h"
 +#include "pbc.h"
 +#include "txtdump.h"
 +#include "vec.h"
 +#include "nrnb.h"
 +#include "constr.h"
 +
 +typedef struct gmx_shakedata
 +{
 +    rvec *rij;
 +    real *M2;
 +    real *tt;
 +    real *dist2;
 +    int   nalloc;
 +    /* SOR stuff */
 +    real  delta;
 +    real  omega;
 +    real  gamma;
 +} t_gmx_shakedata;
 +
 +gmx_shakedata_t shake_init()
 +{
 +    gmx_shakedata_t d;
 +
 +    snew(d, 1);
 +
 +    d->nalloc = 0;
 +    d->rij    = NULL;
 +    d->M2     = NULL;
 +    d->tt     = NULL;
 +    d->dist2  = NULL;
 +
 +    /* SOR initialization */
 +    d->delta = 0.1;
 +    d->omega = 1.0;
 +    d->gamma = 1000000;
 +
 +    return d;
 +}
 +
 +static void pv(FILE *log, char *s, rvec x)
 +{
 +    int m;
 +
 +    fprintf(log, "%5s:", s);
 +    for (m = 0; (m < DIM); m++)
 +    {
 +        fprintf(log, "  %10.3f", x[m]);
 +    }
 +    fprintf(log, "\n");
 +    fflush(log);
 +}
 +
 +void cshake(atom_id iatom[], int ncon, int *nnit, int maxnit,
 +            real dist2[], real xp[], real rij[], real m2[], real omega,
 +            real invmass[], real tt[], real lagr[], int *nerror)
 +{
 +    /*
 +     *     r.c. van schaik and w.f. van gunsteren
 +     *     eth zuerich
 +     *     june 1992
 +     *     Adapted for use with Gromacs by David van der Spoel november 92 and later.
 +     */
 +    /* default should be increased! MRS 8/4/2009 */
 +    const   real mytol = 1e-10;
 +
 +    int          ll, i, j, i3, j3, l3;
 +    int          ix, iy, iz, jx, jy, jz;
 +    real         toler, rpij2, rrpr, tx, ty, tz, diff, acor, im, jm;
 +    real         xh, yh, zh, rijx, rijy, rijz;
 +    real         tix, tiy, tiz;
 +    real         tjx, tjy, tjz;
 +    int          nit, error, nconv;
 +    real         iconvf;
 +
 +    error = 0;
 +    nconv = 1;
 +    for (nit = 0; (nit < maxnit) && (nconv != 0) && (error == 0); nit++)
 +    {
 +        nconv = 0;
 +        for (ll = 0; (ll < ncon) && (error == 0); ll++)
 +        {
 +            l3    = 3*ll;
 +            rijx  = rij[l3+XX];
 +            rijy  = rij[l3+YY];
 +            rijz  = rij[l3+ZZ];
 +            i     = iatom[l3+1];
 +            j     = iatom[l3+2];
 +            i3    = 3*i;
 +            j3    = 3*j;
 +            ix    = i3+XX;
 +            iy    = i3+YY;
 +            iz    = i3+ZZ;
 +            jx    = j3+XX;
 +            jy    = j3+YY;
 +            jz    = j3+ZZ;
 +
 +            tx      = xp[ix]-xp[jx];
 +            ty      = xp[iy]-xp[jy];
 +            tz      = xp[iz]-xp[jz];
 +            rpij2   = tx*tx+ty*ty+tz*tz;
 +            toler   = dist2[ll];
 +            diff    = toler-rpij2;
 +
 +            /* iconvf is less than 1 when the error is smaller than a bound */
 +            /* But if tt is too big, then it will result in looping in iconv */
 +
 +            iconvf = fabs(diff)*tt[ll];
 +
 +            if (iconvf > 1)
 +            {
 +                nconv   = iconvf;
 +                rrpr    = rijx*tx+rijy*ty+rijz*tz;
 +
 +                if (rrpr < toler*mytol)
 +                {
 +                    error = ll+1;
 +                }
 +                else
 +                {
 +                    acor      = omega*diff*m2[ll]/rrpr;
 +                    lagr[ll] += acor;
 +                    xh        = rijx*acor;
 +                    yh        = rijy*acor;
 +                    zh        = rijz*acor;
 +                    im        = invmass[i];
 +                    jm        = invmass[j];
 +                    xp[ix]   += xh*im;
 +                    xp[iy]   += yh*im;
 +                    xp[iz]   += zh*im;
 +                    xp[jx]   -= xh*jm;
 +                    xp[jy]   -= yh*jm;
 +                    xp[jz]   -= zh*jm;
 +                }
 +            }
 +        }
 +    }
 +    *nnit   = nit;
 +    *nerror = error;
 +}
 +
 +int vec_shakef(FILE *fplog, gmx_shakedata_t shaked,
 +               int natoms, real invmass[], int ncon,
 +               t_iparams ip[], t_iatom *iatom,
 +               real tol, rvec x[], rvec prime[], real omega,
 +               gmx_bool bFEP, real lambda, real lagr[],
 +               real invdt, rvec *v,
 +               gmx_bool bCalcVir, tensor vir_r_m_dr, int econq,
 +               t_vetavars *vetavar)
 +{
 +    rvec    *rij;
 +    real    *M2, *tt, *dist2;
 +    int      maxnit = 1000;
 +    int      nit    = 0, ll, i, j, type;
 +    t_iatom *ia;
 +    real     L1, tol2, toler;
 +    real     mm    = 0., tmp;
 +    int      error = 0;
 +    real     g, vscale, rscale, rvscale;
 +
 +    if (ncon > shaked->nalloc)
 +    {
 +        shaked->nalloc = over_alloc_dd(ncon);
 +        srenew(shaked->rij, shaked->nalloc);
 +        srenew(shaked->M2, shaked->nalloc);
 +        srenew(shaked->tt, shaked->nalloc);
 +        srenew(shaked->dist2, shaked->nalloc);
 +    }
 +    rij   = shaked->rij;
 +    M2    = shaked->M2;
 +    tt    = shaked->tt;
 +    dist2 = shaked->dist2;
 +
 +    L1   = 1.0-lambda;
 +    tol2 = 2.0*tol;
 +    ia   = iatom;
 +    for (ll = 0; (ll < ncon); ll++, ia += 3)
 +    {
 +        type  = ia[0];
 +        i     = ia[1];
 +        j     = ia[2];
 +
 +        mm          = 2*(invmass[i]+invmass[j]);
 +        rij[ll][XX] = x[i][XX]-x[j][XX];
 +        rij[ll][YY] = x[i][YY]-x[j][YY];
 +        rij[ll][ZZ] = x[i][ZZ]-x[j][ZZ];
 +        M2[ll]      = 1.0/mm;
 +        if (bFEP)
 +        {
 +            toler = sqr(L1*ip[type].constr.dA + lambda*ip[type].constr.dB);
 +        }
 +        else
 +        {
 +            toler = sqr(ip[type].constr.dA);
 +        }
 +        dist2[ll] = toler;
 +        tt[ll]    = 1.0/(toler*tol2);
 +    }
 +
 +    switch (econq)
 +    {
 +        case econqCoord:
 +            cshake(iatom, ncon, &nit, maxnit, dist2, prime[0], rij[0], M2, omega, invmass, tt, lagr, &error);
 +            break;
 +        case econqVeloc:
 +            crattle(iatom, ncon, &nit, maxnit, dist2, prime[0], rij[0], M2, omega, invmass, tt, lagr, &error, invdt, vetavar);
 +            break;
 +    }
 +
 +    if (nit >= maxnit)
 +    {
 +        if (fplog)
 +        {
 +            fprintf(fplog, "Shake did not converge in %d steps\n", maxnit);
 +        }
 +        fprintf(stderr, "Shake did not converge in %d steps\n", maxnit);
 +        nit = 0;
 +    }
 +    else if (error != 0)
 +    {
 +        if (fplog)
 +        {
 +            fprintf(fplog, "Inner product between old and new vector <= 0.0!\n"
 +                    "constraint #%d atoms %u and %u\n",
 +                    error-1, iatom[3*(error-1)+1]+1, iatom[3*(error-1)+2]+1);
 +        }
 +        fprintf(stderr, "Inner product between old and new vector <= 0.0!\n"
 +                "constraint #%d atoms %u and %u\n",
 +                error-1, iatom[3*(error-1)+1]+1, iatom[3*(error-1)+2]+1);
 +        nit = 0;
 +    }
 +
 +    /* Constraint virial and correct the lagrange multipliers for the length */
 +
 +    ia = iatom;
 +
 +    for (ll = 0; (ll < ncon); ll++, ia += 3)
 +    {
 +
 +        if ((econq == econqCoord) && v != NULL)
 +        {
 +            /* Correct the velocities */
 +            mm = lagr[ll]*invmass[ia[1]]*invdt/vetavar->rscale;
 +            for (i = 0; i < DIM; i++)
 +            {
 +                v[ia[1]][i] += mm*rij[ll][i];
 +            }
 +            mm = lagr[ll]*invmass[ia[2]]*invdt/vetavar->rscale;
 +            for (i = 0; i < DIM; i++)
 +            {
 +                v[ia[2]][i] -= mm*rij[ll][i];
 +            }
 +            /* 16 flops */
 +        }
 +
 +        /* constraint virial */
 +        if (bCalcVir)
 +        {
 +            if (econq == econqCoord)
 +            {
 +                mm = lagr[ll]/vetavar->rvscale;
 +            }
 +            if (econq == econqVeloc)
 +            {
 +                mm = lagr[ll]/(vetavar->vscale*vetavar->vscale_nhc[0]);
 +            }
 +            for (i = 0; i < DIM; i++)
 +            {
 +                tmp = mm*rij[ll][i];
 +                for (j = 0; j < DIM; j++)
 +                {
 +                    vir_r_m_dr[i][j] -= tmp*rij[ll][j];
 +                }
 +            }
 +            /* 21 flops */
 +        }
 +
 +        /* Correct the lagrange multipliers for the length  */
 +        /* (more details would be useful here . . . )*/
 +
 +        type  = ia[0];
 +        if (bFEP)
 +        {
 +            toler = L1*ip[type].constr.dA + lambda*ip[type].constr.dB;
 +        }
 +        else
 +        {
 +            toler     = ip[type].constr.dA;
 +            lagr[ll] *= toler;
 +        }
 +    }
 +
 +    return nit;
 +}
 +
 +static void check_cons(FILE *log, int nc, rvec x[], rvec prime[], rvec v[],
 +                       t_iparams ip[], t_iatom *iatom,
 +                       real invmass[], int econq)
 +{
 +    t_iatom *ia;
 +    int      ai, aj;
 +    int      i;
 +    real     d, dp;
 +    rvec     dx, dv;
 +
 +    fprintf(log,
 +            "    i     mi      j     mj      before       after   should be\n");
 +    ia = iatom;
 +    for (i = 0; (i < nc); i++, ia += 3)
 +    {
 +        ai = ia[1];
 +        aj = ia[2];
 +        rvec_sub(x[ai], x[aj], dx);
 +        d = norm(dx);
 +
 +        switch (econq)
 +        {
 +            case econqCoord:
 +                rvec_sub(prime[ai], prime[aj], dx);
 +                dp = norm(dx);
 +                fprintf(log, "%5d  %5.2f  %5d  %5.2f  %10.5f  %10.5f  %10.5f\n",
 +                        ai+1, 1.0/invmass[ai],
 +                        aj+1, 1.0/invmass[aj], d, dp, ip[ia[0]].constr.dA);
 +                break;
 +            case econqVeloc:
 +                rvec_sub(v[ai], v[aj], dv);
 +                d = iprod(dx, dv);
 +                rvec_sub(prime[ai], prime[aj], dv);
 +                dp = iprod(dx, dv);
 +                fprintf(log, "%5d  %5.2f  %5d  %5.2f  %10.5f  %10.5f  %10.5f\n",
 +                        ai+1, 1.0/invmass[ai],
 +                        aj+1, 1.0/invmass[aj], d, dp, 0.);
 +                break;
 +        }
 +    }
 +}
 +
 +gmx_bool bshakef(FILE *log, gmx_shakedata_t shaked,
 +                 int natoms, real invmass[], int nblocks, int sblock[],
 +                 t_idef *idef, t_inputrec *ir, rvec x_s[], rvec prime[],
 +                 t_nrnb *nrnb, real *lagr, real lambda, real *dvdlambda,
 +                 real invdt, rvec *v, gmx_bool bCalcVir, tensor vir_r_m_dr,
 +                 gmx_bool bDumpOnError, int econq, t_vetavars *vetavar)
 +{
 +    t_iatom *iatoms;
 +    real    *lam, dt_2, dvdl;
 +    int      i, n0, ncons, blen, type;
 +    int      tnit = 0, trij = 0;
 +
 +#ifdef DEBUG
 +    fprintf(log, "nblocks=%d, sblock[0]=%d\n", nblocks, sblock[0]);
 +#endif
 +
 +    ncons = idef->il[F_CONSTR].nr/3;
 +
 +    for (i = 0; i < ncons; i++)
 +    {
 +        lagr[i] = 0;
 +    }
 +
 +    iatoms = &(idef->il[F_CONSTR].iatoms[sblock[0]]);
 +    lam    = lagr;
 +    for (i = 0; (i < nblocks); )
 +    {
 +        blen  = (sblock[i+1]-sblock[i]);
 +        blen /= 3;
 +        n0    = vec_shakef(log, shaked, natoms, invmass, blen, idef->iparams,
 +                           iatoms, ir->shake_tol, x_s, prime, shaked->omega,
 +                           ir->efep != efepNO, lambda, lam, invdt, v, bCalcVir, vir_r_m_dr,
 +                           econq, vetavar);
 +
 +#ifdef DEBUGSHAKE
 +        check_cons(log, blen, x_s, prime, v, idef->iparams, iatoms, invmass, econq);
 +#endif
 +
 +        if (n0 == 0)
 +        {
 +            if (bDumpOnError && log)
 +            {
 +                {
 +                    check_cons(log, blen, x_s, prime, v, idef->iparams, iatoms, invmass, econq);
 +                }
 +            }
 +            return FALSE;
 +        }
 +        tnit   += n0*blen;
 +        trij   += blen;
 +        iatoms += 3*blen; /* Increment pointer! */
 +        lam    += blen;
 +        i++;
 +    }
 +    /* only for position part? */
 +    if (econq == econqCoord)
 +    {
 +        if (ir->efep != efepNO)
 +        {
++            real bondA,bondB;
 +            dt_2 = 1/sqr(ir->delta_t);
 +            dvdl = 0;
 +            for (i = 0; i < ncons; i++)
 +            {
 +                type  = idef->il[F_CONSTR].iatoms[3*i];
-                 dvdl += lagr[i]*dt_2*
-                     (idef->iparams[type].constr.dB-idef->iparams[type].constr.dA);
++
++                /* dh/dl contribution from constraint force is  dh/dr (constraint force) dot dr/dl */
++                /* constraint force is -\sum_i lagr_i* d(constraint)/dr, with constrant = r^2-d^2  */
++                /* constraint force is -\sum_i lagr_i* 2 r  */
++                /* so dh/dl = -\sum_i lagr_i* 2 r * dr/dl */
++                /* However, by comparison with lincs and with
++                   comparison with a full thermodynamics cycle (see
++                   redmine issue #1255), this is off by a factor of
++                   two -- the 2r should apparently just be r.  Further
++                   investigation should be done at some point to
++                   understand why and see if there is something deeper
++                   we are missing */
++
++                bondA = idef->iparams[type].constr.dA;
++                bondB = idef->iparams[type].constr.dB;
++                dvdl += lagr[i] * dt_2 * ((1.0-lambda)*bondA + lambda*bondB) * (bondB-bondA);
 +            }
 +            *dvdlambda += dvdl;
 +        }
 +    }
 +#ifdef DEBUG
 +    fprintf(log, "tnit: %5d  omega: %10.5f\n", tnit, omega);
 +#endif
 +    if (ir->bShakeSOR)
 +    {
 +        if (tnit > shaked->gamma)
 +        {
 +            shaked->delta *= -0.5;
 +        }
 +        shaked->omega += shaked->delta;
 +        shaked->gamma  = tnit;
 +    }
 +    inc_nrnb(nrnb, eNR_SHAKE, tnit);
 +    inc_nrnb(nrnb, eNR_SHAKE_RIJ, trij);
 +    if (v)
 +    {
 +        inc_nrnb(nrnb, eNR_CONSTR_V, trij*2);
 +    }
 +    if (bCalcVir)
 +    {
 +        inc_nrnb(nrnb, eNR_CONSTR_VIR, trij);
 +    }
 +
 +    return TRUE;
 +}
 +
 +void crattle(atom_id iatom[], int ncon, int *nnit, int maxnit,
 +             real dist2[], real vp[], real rij[], real m2[], real omega,
 +             real invmass[], real tt[], real lagr[], int *nerror, real invdt, t_vetavars *vetavar)
 +{
 +    /*
 +     *     r.c. van schaik and w.f. van gunsteren
 +     *     eth zuerich
 +     *     june 1992
 +     *     Adapted for use with Gromacs by David van der Spoel november 92 and later.
 +     *     rattle added by M.R. Shirts, April 2004, from code written by Jay Ponder in TINKER
 +     *     second part of rattle algorithm
 +     */
 +
 +    const   real mytol = 1e-10;
 +
 +    int          ll, i, j, i3, j3, l3, ii;
 +    int          ix, iy, iz, jx, jy, jz;
 +    real         toler, rijd, vpijd, vx, vy, vz, diff, acor, xdotd, fac, im, jm, imdt, jmdt;
 +    real         xh, yh, zh, rijx, rijy, rijz;
 +    real         tix, tiy, tiz;
 +    real         tjx, tjy, tjz;
 +    int          nit, error, nconv;
 +    real         veta, vscale_nhc, iconvf;
 +
 +    veta       = vetavar->veta;
 +    vscale_nhc = vetavar->vscale_nhc[0];  /* for now, just use the first state */
 +
 +    error = 0;
 +    nconv = 1;
 +    for (nit = 0; (nit < maxnit) && (nconv != 0) && (error == 0); nit++)
 +    {
 +        nconv = 0;
 +        for (ll = 0; (ll < ncon) && (error == 0); ll++)
 +        {
 +            l3      = 3*ll;
 +            rijx    = rij[l3+XX];
 +            rijy    = rij[l3+YY];
 +            rijz    = rij[l3+ZZ];
 +            i       = iatom[l3+1];
 +            j       = iatom[l3+2];
 +            i3      = 3*i;
 +            j3      = 3*j;
 +            ix      = i3+XX;
 +            iy      = i3+YY;
 +            iz      = i3+ZZ;
 +            jx      = j3+XX;
 +            jy      = j3+YY;
 +            jz      = j3+ZZ;
 +            vx      = vp[ix]-vp[jx];
 +            vy      = vp[iy]-vp[jy];
 +            vz      = vp[iz]-vp[jz];
 +
 +            vpijd   = vx*rijx+vy*rijy+vz*rijz;
 +            toler   = dist2[ll];
 +            /* this is r(t+dt) \dotproduct \dot{r}(t+dt) */
 +            xdotd   = vpijd*vscale_nhc + veta*toler;
 +
 +            /* iconv is zero when the error is smaller than a bound */
 +            iconvf   = fabs(xdotd)*(tt[ll]/invdt);
 +
 +            if (iconvf > 1)
 +            {
 +                nconv     = iconvf;
 +                fac       = omega*2.0*m2[ll]/toler;
 +                acor      = -fac*xdotd;
 +                lagr[ll] += acor;
 +
 +                xh        = rijx*acor;
 +                yh        = rijy*acor;
 +                zh        = rijz*acor;
 +
 +                im        = invmass[i]/vscale_nhc;
 +                jm        = invmass[j]/vscale_nhc;
 +
 +                vp[ix] += xh*im;
 +                vp[iy] += yh*im;
 +                vp[iz] += zh*im;
 +                vp[jx] -= xh*jm;
 +                vp[jy] -= yh*jm;
 +                vp[jz] -= zh*jm;
 +            }
 +        }
 +    }
 +    *nnit   = nit;
 +    *nerror = error;
 +}
diff --cc src/gromacs/selection/selhelp.cpp
index 5cd674a639,0000000000..8d8defbff6
mode 100644,000000..100644
--- a/src/gromacs/selection/selhelp.cpp
+++ b/src/gromacs/selection/selhelp.cpp
@@@ -1,634 -1,0 +1,634 @@@
 +/*
 + * This file is part of the GROMACS molecular simulation package.
 + *
 + * Copyright (c) 2009,2010,2011,2012, by the GROMACS development team, led by
 + * David van der Spoel, Berk Hess, Erik Lindahl, and including many
 + * others, as listed in the AUTHORS file in the top-level source
 + * directory and at http://www.gromacs.org.
 + *
 + * GROMACS is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU Lesser General Public License
 + * as published by the Free Software Foundation; either version 2.1
 + * of the License, or (at your option) any later version.
 + *
 + * GROMACS is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 + * Lesser General Public License for more details.
 + *
 + * You should have received a copy of the GNU Lesser General Public
 + * License along with GROMACS; if not, see
 + * http://www.gnu.org/licenses, or write to the Free Software Foundation,
 + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
 + *
 + * If you want to redistribute modifications to GROMACS, please
 + * consider that scientific software is very special. Version
 + * control is crucial - bugs must be traceable. We will be happy to
 + * consider code for inclusion in the official distribution, but
 + * derived work must not be called official GROMACS. Details are found
 + * in the README & COPYING files - if they are missing, get the
 + * official version at http://www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the research papers on the package. Check out http://www.gromacs.org.
 + */
 +/*! \internal \file
 + * \brief
 + * Implements functions in selhelp.h.
 + *
 + * \author Teemu Murtola <teemu.murtola@gmail.com>
 + * \ingroup module_selection
 + */
 +#include <string>
 +#include <vector>
 +#include <utility>
 +
 +#include <boost/scoped_ptr.hpp>
 +
 +#include "gromacs/onlinehelp/helptopic.h"
 +#include "gromacs/onlinehelp/helpwritercontext.h"
 +#include "gromacs/utility/exceptions.h"
 +#include "gromacs/utility/file.h"
 +#include "gromacs/utility/stringutil.h"
 +
 +#include "selhelp.h"
 +#include "selmethod.h"
 +#include "symrec.h"
 +
 +namespace
 +{
 +
 +struct CommonHelpText
 +{
 +    static const char        name[];
 +    static const char        title[];
 +    static const char *const text[];
 +};
 +
 +const char        CommonHelpText::name[]  = "selections";
 +const char        CommonHelpText::title[] =
 +    "Selection syntax and usage";
 +const char *const CommonHelpText::text[] = {
 +    "Selections are used to select atoms/molecules/residues for analysis.",
 +    "In contrast to traditional index files, selections can be dynamic, i.e.,",
 +    "select different atoms for different trajectory frames.[PAR]",
 +
 +    "Each analysis tool requires a different number of selections and the",
 +    "selections are interpreted differently. The general idea is still the",
 +    "same: each selection evaluates to a set of positions, where a position",
 +    "can be an atom position or center-of-mass or center-of-geometry of",
 +    "a set of atoms. The tool then uses these positions for its analysis to",
 +    "allow very flexible processing. Some analysis tools may have limitations",
 +    "on the types of selections allowed.[PAR]",
 +
 +    "To get started with selections, run, e.g., [TT][PROGRAM] select[tt]",
 +    "without specifying selections on the command-line and use the interactive",
 +    "prompt to try out different selections.",
 +    "This tool provides output options that allow one to see what is actually",
 +    "selected by the given selections, and the interactive prompt reports",
 +    "syntax errors immediately, allowing one to try again.",
 +    "The subtopics listed below give more details on different aspects of",
 +    "selections.",
 +};
 +
 +struct ArithmeticHelpText
 +{
 +    static const char        name[];
 +    static const char        title[];
 +    static const char *const text[];
 +};
 +
 +const char        ArithmeticHelpText::name[]  = "arithmetic";
 +const char        ArithmeticHelpText::title[] =
 +    "Arithmetic expressions in selections";
 +const char *const ArithmeticHelpText::text[] = {
 +    "Basic arithmetic evaluation is supported for numeric expressions.",
 +    "Supported operations are addition, subtraction, negation, multiplication,",
 +    "division, and exponentiation (using ^).",
 +    "Result of a division by zero or other illegal operations is undefined.",
 +};
 +
 +struct CmdLineHelpText
 +{
 +    static const char        name[];
 +    static const char        title[];
 +    static const char *const text[];
 +};
 +
 +const char        CmdLineHelpText::name[]  = "cmdline";
 +const char        CmdLineHelpText::title[] =
 +    "Specifying selections from command line";
 +const char *const CmdLineHelpText::text[] = {
 +    "If no selections are provided on the command line, you are prompted to",
 +    "type the selections interactively (a pipe can also be used to provide",
 +    "the selections in this case for most tools). While this works well for",
 +    "testing, it is easier to provide the selections from the command line",
 +    "if they are complex or for scripting.[PAR]",
 +
 +    "Each tool has different command-line arguments for specifying selections",
 +    "(listed by [TT][PROGRAM] help <tool>[tt]).",
 +    "You can either pass a single string containing all selections (separated",
 +    "by semicolons), or multiple strings, each containing one selection.",
 +    "Note that you need to quote the selections to protect them from the",
 +    "shell.[PAR]",
 +
 +    "If you set a selection command-line argument, but do not provide any",
 +    "selections, you are prompted to type the selections for that argument",
 +    "interactively. This is useful if that selection argument is optional,",
 +    "in which case it is not normally prompted for.[PAR]",
 +
 +    "To provide selections from a file, use [TT]-sf file.dat[tt] in the place",
 +    "of the selection for a selection argument (e.g.,",
 +    "[TT]-select -sf file.dat[tt]). In general, the [TT]-sf[tt] argument reads",
 +    "selections from the provided file and assigns them to selection arguments",
 +    "that have been specified up to that point, but for which no selections",
 +    "have been provided.",
 +    "As a special case, [TT]-sf[tt] provided on its own, without preceding",
 +    "selection arguments, assigns the selections to all (yet unset) required",
 +    "selections (i.e., those that would be promted interactively if no",
 +    "selections are provided on the command line).[PAR]",
 +
 +    "To use groups from a traditional index file, use argument [TT]-n[tt]",
 +    "to provide a file. See the \"syntax\" subtopic for how to use them.",
 +    "If this option is not provided, default groups are generated.",
 +    "The default groups are generated by reading selections from a file",
 +    "[TT]defselection.dat[tt]. If such a file is found in the current",
 +    "directory, it is used instead of the one provided by default.[PAR]",
 +
 +    "Depending on the tool, two additional command-line arguments may be",
 +    "available to control the behavior:[BR]",
 +    "1. [TT]-seltype[tt] can be used to specify the default type of",
 +    "positions to calculate for each selection.[BR]",
 +    "2. [TT]-selrpos[tt] can be used to specify the default type of",
 +    "positions used in selecting atoms by coordinates.[BR]",
 +    "See the \"positions\" subtopic for more information on these options.",
 +};
 +
 +struct EvaluationHelpText
 +{
 +    static const char        name[];
 +    static const char        title[];
 +    static const char *const text[];
 +};
 +
 +const char        EvaluationHelpText::name[]  = "evaluation";
 +const char        EvaluationHelpText::title[] =
 +    "Selection evaluation and optimization";
 +const char *const EvaluationHelpText::text[] = {
 +    "Boolean evaluation proceeds from left to right and is short-circuiting",
 +    "i.e., as soon as it is known whether an atom will be selected, the",
 +    "remaining expressions are not evaluated at all.",
 +    "This can be used to optimize the selections: you should write the",
 +    "most restrictive and/or the most inexpensive expressions first in",
 +    "boolean expressions.",
 +    "The relative ordering between dynamic and static expressions does not",
 +    "matter: all static expressions are evaluated only once, before the first",
 +    "frame, and the result becomes the leftmost expression.[PAR]",
 +
 +    "Another point for optimization is in common subexpressions: they are not",
 +    "automatically recognized, but can be manually optimized by the use of",
 +    "variables. This can have a big impact on the performance of complex",
 +    "selections, in particular if you define several index groups like this:",
 +    "  [TT]rdist = distance from com of resnr 1 to 5;[tt][BR]",
 +    "  [TT]resname RES and rdist < 2;[tt][BR]",
 +    "  [TT]resname RES and rdist < 4;[tt][BR]",
 +    "  [TT]resname RES and rdist < 6;[tt][BR]",
 +    "Without the variable assignment, the distances would be evaluated three",
 +    "times, although they are exactly the same within each selection.",
 +    "Anything assigned into a variable becomes a common subexpression that",
 +    "is evaluated only once during a frame.",
 +    "Currently, in some cases the use of variables can actually lead to a small",
 +    "performance loss because of the checks necessary to determine for which",
 +    "atoms the expression has already been evaluated, but this should not be",
 +    "a major problem.",
 +};
 +
 +struct ExamplesHelpText
 +{
 +    static const char        name[];
 +    static const char        title[];
 +    static const char *const text[];
 +};
 +
 +const char        ExamplesHelpText::name[]  = "examples";
 +const char        ExamplesHelpText::title[] =
 +    "Selection examples";
 +const char *const ExamplesHelpText::text[] = {
 +    // TODO: Once there are more tools available, use examples that invoke
 +    // tools and explain what the selections do in those tools.
 +    "Below, examples of increasingly complex selections are given.[PAR]",
 +
 +    "Selection of all water oxygens:[BR]",
 +    "  resname SOL and name OW",
 +    "[PAR]",
 +
 +    "Centers of mass of residues 1 to 5 and 10:[BR]",
 +    "  res_com of resnr 1 to 5 10",
 +    "[PAR]",
 +
 +    "All atoms farther than 1 nm of a fixed position:[BR]",
-     "  not within 1 of (1.2, 3.1, 2.4)",
++    "  not within 1 of [1.2, 3.1, 2.4]",
 +    "[PAR]",
 +
 +    "All atoms of a residue LIG within 0.5 nm of a protein (with a custom name):[BR]",
 +    "  \"Close to protein\" resname LIG and within 0.5 of group \"Protein\"",
 +    "[PAR]",
 +
 +    "All protein residues that have at least one atom within 0.5 nm of a residue LIG:[BR]",
 +    "  group \"Protein\" and same residue as within 0.5 of resname LIG",
 +    "[PAR]",
 +
 +    "All RES residues whose COM is between 2 and 4 nm from the COM of all of them:[BR]",
 +    "  rdist = res_com distance from com of resname RES[BR]",
 +    "  resname RES and rdist >= 2 and rdist <= 4",
 +    "[PAR]",
 +
 +    "Selection like C1 C2 C2 C3 C3 C4 ... C8 C9 (e.g., for g_bond):[BR]",
 +    "  name \"C[1-8]\" merge name \"C[2-9]\"",
 +};
 +
 +struct KeywordsHelpText
 +{
 +    static const char        name[];
 +    static const char        title[];
 +    static const char *const text[];
 +};
 +
 +const char        KeywordsHelpText::name[]  = "keywords";
 +const char        KeywordsHelpText::title[] =
 +    "Selection keywords";
 +const char *const KeywordsHelpText::text[] = {
 +    "The following selection keywords are currently available.",
 +    "For keywords marked with a star, additional help is available through",
 +    "a subtopic KEYWORD, where KEYWORD is the name of the keyword.",
 +};
 +
 +struct LimitationsHelpText
 +{
 +    static const char        name[];
 +    static const char        title[];
 +    static const char *const text[];
 +};
 +
 +const char        LimitationsHelpText::name[]  = "limitations";
 +const char        LimitationsHelpText::title[] =
 +    "Selection limitations";
 +const char *const LimitationsHelpText::text[] = {
 +    "Some analysis programs may require a special structure for the input",
 +    "selections (e.g., [TT]g_angle[tt] requires the index group to be made",
 +    "of groups of three or four atoms).",
 +    "For such programs, it is up to the user to provide a proper selection",
 +    "expression that always returns such positions.",
 +    "[PAR]",
 +
 +    "Due to technical reasons, having a negative value as the first value in",
 +    "expressions like[BR]",
 +    "[TT]charge -1 to -0.7[tt][BR]",
 +    "result in a syntax error. A workaround is to write[BR]",
 +    "[TT]charge {-1 to -0.7}[tt][BR]",
 +    "instead.[PAR]",
 +
 +    "When [TT]name[tt] selection keyword is used together with PDB input",
 +    "files, the behavior may be unintuitive. When Gromacs reads in a PDB",
 +    "file, 4 character atom names that start with a digit are transformed",
 +    "such that, e.g., 1HG2 becomes HG21, and the latter is what is matched",
 +    "by the [TT]name[tt] keyword. Use [TT]pdbname[tt] to match the atom name",
 +    "as it appears in the input PDB file.",
 +};
 +
 +struct PositionsHelpText
 +{
 +    static const char        name[];
 +    static const char        title[];
 +    static const char *const text[];
 +};
 +
 +const char        PositionsHelpText::name[]  = "positions";
 +const char        PositionsHelpText::title[] =
 +    "Specifying positions in selections";
 +const char *const PositionsHelpText::text[] = {
 +    "Possible ways of specifying positions in selections are:[PAR]",
 +
 +    "1. A constant position can be defined as [TT][XX, YY, ZZ][tt], where",
 +    "[TT]XX[tt], [TT]YY[tt] and [TT]ZZ[tt] are real numbers.[PAR]",
 +
 +    "2. [TT]com of ATOM_EXPR [pbc][tt] or [TT]cog of ATOM_EXPR [pbc][tt]",
 +    "calculate the center of mass/geometry of [TT]ATOM_EXPR[tt]. If",
 +    "[TT]pbc[tt] is specified, the center is calculated iteratively to try",
 +    "to deal with cases where [TT]ATOM_EXPR[tt] wraps around periodic",
 +    "boundary conditions.[PAR]",
 +
 +    "3. [TT]POSTYPE of ATOM_EXPR[tt] calculates the specified positions for",
 +    "the atoms in [TT]ATOM_EXPR[tt].",
 +    "[TT]POSTYPE[tt] can be [TT]atom[tt], [TT]res_com[tt], [TT]res_cog[tt],",
 +    "[TT]mol_com[tt] or [TT]mol_cog[tt], with an optional prefix [TT]whole_[tt]",
 +    "[TT]part_[tt] or [TT]dyn_[tt].",
 +    "[TT]whole_[tt] calculates the centers for the whole residue/molecule,",
 +    "even if only part of it is selected.",
 +    "[TT]part_[tt] prefix calculates the centers for the selected atoms, but",
 +    "uses always the same atoms for the same residue/molecule. The used atoms",
 +    "are determined from the the largest group allowed by the selection.",
 +    "[TT]dyn_[tt] calculates the centers strictly only for the selected atoms.",
 +    "If no prefix is specified, whole selections default to [TT]part_[tt] and",
 +    "other places default to [TT]whole_[tt].",
 +    "The latter is often desirable to select the same molecules in different",
 +    "tools, while the first is a compromise between speed ([TT]dyn_[tt]",
 +    "positions can be slower to evaluate than [TT]part_[tt]) and intuitive",
 +    "behavior.[PAR]",
 +
 +    "4. [TT]ATOM_EXPR[tt], when given for whole selections, is handled as 3.",
 +    "above, using the position type from the command-line argument",
 +    "[TT]-seltype[tt].[PAR]",
 +
 +    "Selection keywords that select atoms based on their positions, such as",
 +    "[TT]dist from[tt], use by default the positions defined by the",
 +    "[TT]-selrpos[tt] command-line option.",
 +    "This can be overridden by prepending a [TT]POSTYPE[tt] specifier to the",
 +    "keyword. For example, [TT]res_com dist from POS[tt] evaluates the",
 +    "residue center of mass distances. In the example, all atoms of a residue",
 +    "are either selected or not, based on the single distance calculated.",
 +};
 +
 +struct SyntaxHelpText
 +{
 +    static const char        name[];
 +    static const char        title[];
 +    static const char *const text[];
 +};
 +
 +const char        SyntaxHelpText::name[]  = "syntax";
 +const char        SyntaxHelpText::title[] =
 +    "Selection syntax";
 +const char *const SyntaxHelpText::text[] = {
 +    "A set of selections consists of one or more selections, separated by",
 +    "semicolons. Each selection defines a set of positions for the analysis.",
 +    "Each selection can also be preceded by a string that gives a name for",
 +    "the selection for use in, e.g., graph legends.",
 +    "If no name is provided, the string used for the selection is used",
 +    "automatically as the name.[PAR]",
 +
 +    "For interactive input, the syntax is slightly altered: line breaks can",
 +    "also be used to separate selections. \\ followed by a line break can",
 +    "be used to continue a line if necessary.",
 +    "Notice that the above only applies to real interactive input,",
 +    "not if you provide the selections, e.g., from a pipe.[PAR]",
 +
 +    "It is possible to use variables to store selection expressions.",
 +    "A variable is defined with the following syntax:[BR]",
 +    "[TT]VARNAME = EXPR ;[tt][BR]",
 +    "where [TT]EXPR[tt] is any valid selection expression.",
 +    "After this, [TT]VARNAME[tt] can be used anywhere where [TT]EXPR[tt]",
 +    "would be valid.[PAR]",
 +
 +    "Selections are composed of three main types of expressions, those that",
 +    "define atoms ([TT]ATOM_EXPR[tt]s), those that define positions",
 +    "([TT]POS_EXPR[tt]s), and those that evaluate to numeric values",
 +    "([TT]NUM_EXPR[tt]s). Each selection should be a [TT]POS_EXPR[tt]",
 +    "or a [TT]ATOM_EXPR[tt] (the latter is automatically converted to",
 +    "positions). The basic rules are as follows:[BR]",
 +    "1. An expression like [TT]NUM_EXPR1 < NUM_EXPR2[tt] evaluates to an",
 +    "[TT]ATOM_EXPR[tt] that selects all the atoms for which the comparison",
 +    "is true.[BR]",
 +    "2. Atom expressions can be combined with boolean operations such as",
 +    "[TT]not ATOM_EXPR[tt], [TT]ATOM_EXPR and ATOM_EXPR[tt], or",
 +    "[TT]ATOM_EXPR or ATOM_EXPR[tt]. Parentheses can be used to alter the",
 +    "evaluation order.[BR]",
 +    "3. [TT]ATOM_EXPR[tt] expressions can be converted into [TT]POS_EXPR[tt]",
 +    "expressions in various ways, see the \"positions\" subtopic for more",
 +    "details.[PAR]",
 +
 +    "Some keywords select atoms based on string values such as the atom name.",
 +    "For these keywords, it is possible to use wildcards ([TT]name \"C*\"[tt])",
 +    "or regular expressions (e.g., [TT]resname \"R[AB]\"[tt]).",
 +    "The match type is automatically guessed from the string: if it contains",
 +    "other characters than letters, numbers, '*', or '?', it is interpreted",
 +    "as a regular expression.",
 +    "To force the matching to use literal string matching, use",
 +    "[TT]name = \"C*\"[tt] to match a literal C*.",
 +    "To force other type of matching, use '?' or '~' in place of '=' to force",
 +    "wildcard or regular expression matching, respectively.[PAR]",
 +
 +    "Strings that contain non-alphanumeric characters should be enclosed in",
 +    "double quotes as in the examples. For other strings, the quotes are",
 +    "optional, but if the value conflicts with a reserved keyword, a syntax",
 +    "error will occur. If your strings contain uppercase letters, this should",
 +    "not happen.[PAR]",
 +
 +    "Index groups provided with the [TT]-n[tt] command-line option or",
 +    "generated by default can be accessed with [TT]group NR[tt] or",
 +    "[TT]group NAME[tt], where [TT]NR[tt] is a zero-based index of the group",
 +    "and [TT]NAME[tt] is part of the name of the desired group.",
 +    "The keyword [TT]group[tt] is optional if the whole selection is",
 +    "provided from an index group.",
 +    "To see a list of available groups in the interactive mode, press enter",
 +    "in the beginning of a line.",
 +};
 +
 +} // namespace
 +
 +namespace gmx
 +{
 +
 +namespace
 +{
 +
 +/*! \internal \brief
 + * Help topic implementation for an individual selection method.
 + *
 + * \ingroup module_selection
 + */
 +class KeywordDetailsHelpTopic : public AbstractSimpleHelpTopic
 +{
 +    public:
 +        //! Initialize help topic for the given selection method.
 +        KeywordDetailsHelpTopic(const std::string         &name,
 +                                const gmx_ana_selmethod_t &method)
 +            : name_(name), method_(method)
 +        {
 +        }
 +
 +        virtual const char *name() const
 +        {
 +            return name_.c_str();
 +        }
 +        virtual const char *title() const
 +        {
 +            return NULL;
 +        }
 +
 +    protected:
 +        virtual std::string helpText() const
 +        {
 +            return concatenateStrings(method_.help.help, method_.help.nlhelp);
 +        }
 +
 +    private:
 +        std::string                name_;
 +        const gmx_ana_selmethod_t &method_;
 +
 +        GMX_DISALLOW_COPY_AND_ASSIGN(KeywordDetailsHelpTopic);
 +};
 +
 +/*! \internal \brief
 + * Custom help topic for printing a list of selection keywords.
 + *
 + * \ingroup module_selection
 + */
 +class KeywordsHelpTopic : public CompositeHelpTopic<KeywordsHelpText>
 +{
 +    public:
 +        KeywordsHelpTopic();
 +
 +        virtual void writeHelp(const HelpWriterContext &context) const;
 +
 +    private:
 +        /*! \brief
 +         * Container for known selection methods.
 +         *
 +         * The first item in the pair is the name of the selection method, and
 +         * the second points to the static data structure that describes the
 +         * method.
 +         * The name in the first item may differ from the name of the static
 +         * data structure if an alias is defined for that method.
 +         */
 +        typedef std::vector<std::pair<std::string,
 +                                      const gmx_ana_selmethod_t *> >
 +            MethodList;
 +
 +        /*! \brief
 +         * Prints a brief list of keywords (selection methods) available.
 +         *
 +         * \param[in] context  Context for printing the help.
 +         * \param[in] type     Only methods that return this type are printed.
 +         * \param[in] bModifiers  If false, \ref SMETH_MODIFIER methods are
 +         *      excluded, otherwise only them are printed.
 +         */
 +        void printKeywordList(const HelpWriterContext &context,
 +                              e_selvalue_t type, bool bModifiers) const;
 +
 +        MethodList              methods_;
 +};
 +
 +KeywordsHelpTopic::KeywordsHelpTopic()
 +{
 +    // TODO: This is not a very elegant way of getting the list of selection
 +    // methods, but this needs to be rewritten in any case if/when #652 is
 +    // implemented.
 +    boost::scoped_ptr<SelectionParserSymbolTable> symtab(
 +            new SelectionParserSymbolTable);
 +    gmx_ana_selmethod_register_defaults(symtab.get());
 +
 +    SelectionParserSymbolIterator symbol
 +        = symtab->beginIterator(SelectionParserSymbol::MethodSymbol);
 +    while (symbol != symtab->endIterator())
 +    {
 +        const std::string         &symname = symbol->name();
 +        const gmx_ana_selmethod_t *method  = symbol->methodValue();
 +        methods_.push_back(std::make_pair(std::string(symname), method));
 +        if (method->help.nlhelp > 0 && method->help.help != NULL)
 +        {
 +            addSubTopic(HelpTopicPointer(
 +                                new KeywordDetailsHelpTopic(symname, *method)));
 +        }
 +        ++symbol;
 +    }
 +}
 +
 +void KeywordsHelpTopic::writeHelp(const HelpWriterContext &context) const
 +{
 +    if (context.outputFormat() != eHelpOutputFormat_Console)
 +    {
 +        GMX_THROW(NotImplementedError(
 +                          "Selection help is not implemented for this output format"));
 +    }
 +    // TODO: The markup here is not really appropriate, and printKeywordList()
 +    // still prints raw text, but these are waiting for discussion of the
 +    // markup format in #969.
 +    writeBasicHelpTopic(context, *this, helpText());
 +    context.writeTextBlock("[BR]");
 +
 +    // Print the list of keywords
 +    context.writeTextBlock(
 +            "Keywords that select atoms by an integer property:[BR]"
 +            "(use in expressions or like \"atomnr 1 to 5 7 9\")[BR]");
 +    printKeywordList(context, INT_VALUE, false);
 +    context.writeTextBlock("[BR]");
 +
 +    context.writeTextBlock(
 +            "Keywords that select atoms by a numeric property:[BR]"
 +            "(use in expressions or like \"occupancy 0.5 to 1\")[BR]");
 +    printKeywordList(context, REAL_VALUE, false);
 +    context.writeTextBlock("[BR]");
 +
 +    context.writeTextBlock(
 +            "Keywords that select atoms by a string property:[BR]"
 +            "(use like \"name PATTERN [PATTERN] ...\")[BR]");
 +    printKeywordList(context, STR_VALUE, false);
 +    context.writeTextBlock("[BR]");
 +
 +    context.writeTextBlock(
 +            "Additional keywords that directly select atoms:[BR]");
 +    printKeywordList(context, GROUP_VALUE, false);
 +    context.writeTextBlock("[BR]");
 +
 +    context.writeTextBlock(
 +            "Keywords that directly evaluate to positions:[BR]"
 +            "(see also \"positions\" subtopic)[BR]");
 +    printKeywordList(context, POS_VALUE, false);
 +    context.writeTextBlock("[BR]");
 +
 +    context.writeTextBlock("Additional keywords:[BR]");
 +    printKeywordList(context, POS_VALUE, true);
 +    printKeywordList(context, NO_VALUE, true);
 +}
 +
 +void KeywordsHelpTopic::printKeywordList(const HelpWriterContext &context,
 +                                         e_selvalue_t             type,
 +                                         bool                     bModifiers) const
 +{
 +    File &file = context.outputFile();
 +    MethodList::const_iterator iter;
 +    for (iter = methods_.begin(); iter != methods_.end(); ++iter)
 +    {
 +        const gmx_ana_selmethod_t &method = *iter->second;
 +        bool bIsModifier                  = (method.flags & SMETH_MODIFIER) != 0;
 +        if (method.type == type && bModifiers == bIsModifier)
 +        {
 +            bool bHasHelp = (method.help.nlhelp > 0 && method.help.help != NULL);
 +            file.writeString(formatString(" %c ", bHasHelp ? '*' : ' '));
 +            if (method.help.syntax != NULL)
 +            {
 +                file.writeLine(method.help.syntax);
 +            }
 +            else
 +            {
 +                std::string symname = iter->first;
 +                if (symname != method.name)
 +                {
 +                    symname.append(formatString(" (synonym for %s)", method.name));
 +                }
 +                file.writeLine(symname);
 +            }
 +        }
 +    }
 +}
 +
 +}   // namespace
 +
 +/*! \cond internal */
 +HelpTopicPointer createSelectionHelpTopic()
 +{
 +    CompositeHelpTopicPointer root(new CompositeHelpTopic<CommonHelpText>);
 +    root->registerSubTopic<SimpleHelpTopic<ArithmeticHelpText> >();
 +    root->registerSubTopic<SimpleHelpTopic<CmdLineHelpText> >();
 +    root->registerSubTopic<SimpleHelpTopic<EvaluationHelpText> >();
 +    root->registerSubTopic<SimpleHelpTopic<ExamplesHelpText> >();
 +    root->registerSubTopic<KeywordsHelpTopic>();
 +    root->registerSubTopic<SimpleHelpTopic<LimitationsHelpText> >();
 +    root->registerSubTopic<SimpleHelpTopic<PositionsHelpText> >();
 +    root->registerSubTopic<SimpleHelpTopic<SyntaxHelpText> >();
 +    return move(root);
 +}
 +//! \endcond
 +
 +} // namespace gmx
diff --cc src/programs/mdrun/md.c
index f972179326,0000000000..99683e8df2
mode 100644,000000..100644
--- a/src/programs/mdrun/md.c
+++ b/src/programs/mdrun/md.c
@@@ -1,2217 -1,0 +1,2236 @@@
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + *          GROningen MAchine for Chemical Simulations
 + *
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + *
 + * For more info, check our website at http://www.gromacs.org
 + *
 + * And Hey:
 + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include "typedefs.h"
 +#include "smalloc.h"
 +#include "sysstuff.h"
 +#include "vec.h"
 +#include "statutil.h"
 +#include "vcm.h"
 +#include "mdebin.h"
 +#include "nrnb.h"
 +#include "calcmu.h"
 +#include "index.h"
 +#include "vsite.h"
 +#include "update.h"
 +#include "ns.h"
 +#include "trnio.h"
 +#include "xtcio.h"
 +#include "mdrun.h"
 +#include "md_support.h"
 +#include "md_logging.h"
 +#include "confio.h"
 +#include "network.h"
 +#include "pull.h"
 +#include "xvgr.h"
 +#include "physics.h"
 +#include "names.h"
 +#include "xmdrun.h"
 +#include "ionize.h"
 +#include "disre.h"
 +#include "orires.h"
 +#include "pme.h"
 +#include "mdatoms.h"
 +#include "repl_ex.h"
 +#include "qmmm.h"
 +#include "domdec.h"
 +#include "domdec_network.h"
 +#include "partdec.h"
 +#include "topsort.h"
 +#include "coulomb.h"
 +#include "constr.h"
 +#include "shellfc.h"
 +#include "compute_io.h"
 +#include "mvdata.h"
 +#include "checkpoint.h"
 +#include "mtop_util.h"
 +#include "sighandler.h"
 +#include "txtdump.h"
 +#include "string2.h"
 +#include "pme_loadbal.h"
 +#include "bondf.h"
 +#include "membed.h"
 +#include "types/nlistheuristics.h"
 +#include "types/iteratedconstraints.h"
 +#include "nbnxn_cuda_data_mgmt.h"
 +
 +#include "gromacs/utility/gmxmpi.h"
 +
 +#ifdef GMX_FAHCORE
 +#include "corewrap.h"
 +#endif
 +
 +static void reset_all_counters(FILE *fplog, t_commrec *cr,
 +                               gmx_large_int_t step,
 +                               gmx_large_int_t *step_rel, t_inputrec *ir,
 +                               gmx_wallcycle_t wcycle, t_nrnb *nrnb,
 +                               gmx_runtime_t *runtime,
 +                               nbnxn_cuda_ptr_t cu_nbv)
 +{
 +    char sbuf[STEPSTRSIZE];
 +
 +    /* Reset all the counters related to performance over the run */
 +    md_print_warn(cr, fplog, "step %s: resetting all time and cycle counters\n",
 +                  gmx_step_str(step, sbuf));
 +
 +    if (cu_nbv)
 +    {
 +        nbnxn_cuda_reset_timings(cu_nbv);
 +    }
 +
 +    wallcycle_stop(wcycle, ewcRUN);
 +    wallcycle_reset_all(wcycle);
 +    if (DOMAINDECOMP(cr))
 +    {
 +        reset_dd_statistics_counters(cr->dd);
 +    }
 +    init_nrnb(nrnb);
 +    ir->init_step += *step_rel;
 +    ir->nsteps    -= *step_rel;
 +    *step_rel      = 0;
 +    wallcycle_start(wcycle, ewcRUN);
 +    runtime_start(runtime);
 +    print_date_and_time(fplog, cr->nodeid, "Restarted time", runtime);
 +}
 +
 +double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[],
 +             const output_env_t oenv, gmx_bool bVerbose, gmx_bool bCompact,
 +             int nstglobalcomm,
 +             gmx_vsite_t *vsite, gmx_constr_t constr,
 +             int stepout, t_inputrec *ir,
 +             gmx_mtop_t *top_global,
 +             t_fcdata *fcd,
 +             t_state *state_global,
 +             t_mdatoms *mdatoms,
 +             t_nrnb *nrnb, gmx_wallcycle_t wcycle,
 +             gmx_edsam_t ed, t_forcerec *fr,
 +             int repl_ex_nst, int repl_ex_nex, int repl_ex_seed, gmx_membed_t membed,
 +             real cpt_period, real max_hours,
 +             const char *deviceOptions,
 +             unsigned long Flags,
 +             gmx_runtime_t *runtime)
 +{
 +    gmx_mdoutf_t   *outf;
 +    gmx_large_int_t step, step_rel;
 +    double          run_time;
 +    double          t, t0, lam0[efptNR];
 +    gmx_bool        bGStatEveryStep, bGStat, bCalcVir, bCalcEner;
 +    gmx_bool        bNS, bNStList, bSimAnn, bStopCM, bRerunMD, bNotLastFrame = FALSE,
 +                    bFirstStep, bStateFromCP, bStateFromTPX, bInitStep, bLastStep,
 +                    bBornRadii, bStartingFromCpt;
 +    gmx_bool          bDoDHDL = FALSE, bDoFEP = FALSE, bDoExpanded = FALSE;
 +    gmx_bool          do_ene, do_log, do_verbose, bRerunWarnNoV = TRUE,
 +                      bForceUpdate = FALSE, bCPT;
 +    int               mdof_flags;
 +    gmx_bool          bMasterState;
 +    int               force_flags, cglo_flags;
 +    tensor            force_vir, shake_vir, total_vir, tmp_vir, pres;
 +    int               i, m;
 +    t_trxstatus      *status;
 +    rvec              mu_tot;
 +    t_vcm            *vcm;
 +    t_state          *bufstate = NULL;
 +    matrix           *scale_tot, pcoupl_mu, M, ebox;
 +    gmx_nlheur_t      nlh;
 +    t_trxframe        rerun_fr;
 +    gmx_repl_ex_t     repl_ex = NULL;
 +    int               nchkpt  = 1;
 +    gmx_localtop_t   *top;
 +    t_mdebin         *mdebin = NULL;
 +    df_history_t      df_history;
 +    t_state          *state    = NULL;
 +    rvec             *f_global = NULL;
 +    int               n_xtc    = -1;
 +    rvec             *x_xtc    = NULL;
 +    gmx_enerdata_t   *enerd;
 +    rvec             *f = NULL;
 +    gmx_global_stat_t gstat;
 +    gmx_update_t      upd   = NULL;
 +    t_graph          *graph = NULL;
 +    globsig_t         gs;
 +    gmx_rng_t         mcrng = NULL;
 +    gmx_bool          bFFscan;
 +    gmx_groups_t     *groups;
 +    gmx_ekindata_t   *ekind, *ekind_save;
 +    gmx_shellfc_t     shellfc;
 +    int               count, nconverged = 0;
 +    real              timestep = 0;
 +    double            tcount   = 0;
 +    gmx_bool          bIonize  = FALSE;
 +    gmx_bool          bTCR     = FALSE, bConverged = TRUE, bOK, bSumEkinhOld, bExchanged;
 +    gmx_bool          bAppend;
 +    gmx_bool          bResetCountersHalfMaxH = FALSE;
 +    gmx_bool          bVV, bIterativeCase, bFirstIterate, bTemp, bPres, bTrotter;
 +    gmx_bool          bUpdateDoLR;
 +    real              mu_aver = 0, dvdl_constr;
 +    int               a0, a1, gnx = 0, ii;
 +    atom_id          *grpindex = NULL;
 +    char             *grpname;
 +    t_coupl_rec      *tcr     = NULL;
 +    rvec             *xcopy   = NULL, *vcopy = NULL, *cbuf = NULL;
 +    matrix            boxcopy = {{0}}, lastbox;
 +    tensor            tmpvir;
 +    real              fom, oldfom, veta_save, pcurr, scalevir, tracevir;
 +    real              vetanew = 0;
 +    int               lamnew  = 0;
 +    /* for FEP */
 +    int               nstfep;
 +    real              rate;
 +    double            cycles;
 +    real              saved_conserved_quantity = 0;
 +    real              last_ekin                = 0;
 +    int               iter_i;
 +    t_extmass         MassQ;
 +    int             **trotter_seq;
 +    char              sbuf[STEPSTRSIZE], sbuf2[STEPSTRSIZE];
 +    int               handled_stop_condition = gmx_stop_cond_none; /* compare to get_stop_condition*/
 +    gmx_iterate_t     iterate;
 +    gmx_large_int_t   multisim_nsteps = -1;                        /* number of steps to do  before first multisim
 +                                                                      simulation stops. If equal to zero, don't
 +                                                                      communicate any more between multisims.*/
 +    /* PME load balancing data for GPU kernels */
 +    pme_load_balancing_t pme_loadbal = NULL;
 +    double               cycles_pmes;
 +    gmx_bool             bPMETuneTry = FALSE, bPMETuneRunning = FALSE;
 +
 +#ifdef GMX_FAHCORE
 +    /* Temporary addition for FAHCORE checkpointing */
 +    int chkpt_ret;
 +#endif
 +
 +    /* Check for special mdrun options */
 +    bRerunMD = (Flags & MD_RERUN);
 +    bIonize  = (Flags & MD_IONIZE);
 +    bFFscan  = (Flags & MD_FFSCAN);
 +    bAppend  = (Flags & MD_APPENDFILES);
 +    if (Flags & MD_RESETCOUNTERSHALFWAY)
 +    {
 +        if (ir->nsteps > 0)
 +        {
 +            /* Signal to reset the counters half the simulation steps. */
 +            wcycle_set_reset_counters(wcycle, ir->nsteps/2);
 +        }
 +        /* Signal to reset the counters halfway the simulation time. */
 +        bResetCountersHalfMaxH = (max_hours > 0);
 +    }
 +
 +    /* md-vv uses averaged full step velocities for T-control
 +       md-vv-avek uses averaged half step velocities for T-control (but full step ekin for P control)
 +       md uses averaged half step kinetic energies to determine temperature unless defined otherwise by GMX_EKIN_AVE_VEL; */
 +    bVV = EI_VV(ir->eI);
 +    if (bVV) /* to store the initial velocities while computing virial */
 +    {
 +        snew(cbuf, top_global->natoms);
 +    }
 +    /* all the iteratative cases - only if there are constraints */
 +    bIterativeCase = ((IR_NPH_TROTTER(ir) || IR_NPT_TROTTER(ir)) && (constr) && (!bRerunMD));
 +    gmx_iterate_init(&iterate, FALSE); /* The default value of iterate->bIterationActive is set to
 +                                          false in this step.  The correct value, true or false,
 +                                          is set at each step, as it depends on the frequency of temperature
 +                                          and pressure control.*/
 +    bTrotter = (bVV && (IR_NPT_TROTTER(ir) || IR_NPH_TROTTER(ir) || IR_NVT_TROTTER(ir)));
 +
 +    if (bRerunMD)
 +    {
 +        /* Since we don't know if the frames read are related in any way,
 +         * rebuild the neighborlist at every step.
 +         */
 +        ir->nstlist       = 1;
 +        ir->nstcalcenergy = 1;
 +        nstglobalcomm     = 1;
 +    }
 +
 +    check_ir_old_tpx_versions(cr, fplog, ir, top_global);
 +
 +    nstglobalcomm   = check_nstglobalcomm(fplog, cr, nstglobalcomm, ir);
 +    bGStatEveryStep = (nstglobalcomm == 1);
 +
 +    if (!bGStatEveryStep && ir->nstlist == -1 && fplog != NULL)
 +    {
 +        fprintf(fplog,
 +                "To reduce the energy communication with nstlist = -1\n"
 +                "the neighbor list validity should not be checked at every step,\n"
 +                "this means that exact integration is not guaranteed.\n"
 +                "The neighbor list validity is checked after:\n"
 +                "  <n.list life time> - 2*std.dev.(n.list life time)  steps.\n"
 +                "In most cases this will result in exact integration.\n"
 +                "This reduces the energy communication by a factor of 2 to 3.\n"
 +                "If you want less energy communication, set nstlist > 3.\n\n");
 +    }
 +
 +    if (bRerunMD || bFFscan)
 +    {
 +        ir->nstxtcout = 0;
 +    }
 +    groups = &top_global->groups;
 +
 +    /* Initial values */
 +    init_md(fplog, cr, ir, oenv, &t, &t0, state_global->lambda,
 +            &(state_global->fep_state), lam0,
 +            nrnb, top_global, &upd,
 +            nfile, fnm, &outf, &mdebin,
 +            force_vir, shake_vir, mu_tot, &bSimAnn, &vcm, state_global, Flags);
 +
 +    clear_mat(total_vir);
 +    clear_mat(pres);
 +    /* Energy terms and groups */
 +    snew(enerd, 1);
 +    init_enerdata(top_global->groups.grps[egcENER].nr, ir->fepvals->n_lambda,
 +                  enerd);
 +    if (DOMAINDECOMP(cr))
 +    {
 +        f = NULL;
 +    }
 +    else
 +    {
 +        snew(f, top_global->natoms);
 +    }
 +
 +    /* lambda Monte carlo random number generator  */
 +    if (ir->bExpanded)
 +    {
 +        mcrng = gmx_rng_init(ir->expandedvals->lmc_seed);
 +    }
 +    /* copy the state into df_history */
 +    copy_df_history(&df_history, &state_global->dfhist);
 +
 +    /* Kinetic energy data */
 +    snew(ekind, 1);
 +    init_ekindata(fplog, top_global, &(ir->opts), ekind);
 +    /* needed for iteration of constraints */
 +    snew(ekind_save, 1);
 +    init_ekindata(fplog, top_global, &(ir->opts), ekind_save);
 +    /* Copy the cos acceleration to the groups struct */
 +    ekind->cosacc.cos_accel = ir->cos_accel;
 +
 +    gstat = global_stat_init(ir);
 +    debug_gmx();
 +
 +    /* Check for polarizable models and flexible constraints */
 +    shellfc = init_shell_flexcon(fplog,
 +                                 top_global, n_flexible_constraints(constr),
 +                                 (ir->bContinuation ||
 +                                  (DOMAINDECOMP(cr) && !MASTER(cr))) ?
 +                                 NULL : state_global->x);
 +
 +    if (DEFORM(*ir))
 +    {
 +#ifdef GMX_THREAD_MPI
 +        tMPI_Thread_mutex_lock(&deform_init_box_mutex);
 +#endif
 +        set_deform_reference_box(upd,
 +                                 deform_init_init_step_tpx,
 +                                 deform_init_box_tpx);
 +#ifdef GMX_THREAD_MPI
 +        tMPI_Thread_mutex_unlock(&deform_init_box_mutex);
 +#endif
 +    }
 +
 +    {
 +        double io = compute_io(ir, top_global->natoms, groups, mdebin->ebin->nener, 1);
 +        if ((io > 2000) && MASTER(cr))
 +        {
 +            fprintf(stderr,
 +                    "\nWARNING: This run will generate roughly %.0f Mb of data\n\n",
 +                    io);
 +        }
 +    }
 +
 +    if (DOMAINDECOMP(cr))
 +    {
 +        top = dd_init_local_top(top_global);
 +
 +        snew(state, 1);
 +        dd_init_local_state(cr->dd, state_global, state);
 +
 +        if (DDMASTER(cr->dd) && ir->nstfout)
 +        {
 +            snew(f_global, state_global->natoms);
 +        }
 +    }
 +    else
 +    {
 +        if (PAR(cr))
 +        {
 +            /* Initialize the particle decomposition and split the topology */
 +            top = split_system(fplog, top_global, ir, cr);
 +
 +            pd_cg_range(cr, &fr->cg0, &fr->hcg);
 +            pd_at_range(cr, &a0, &a1);
 +        }
 +        else
 +        {
 +            top = gmx_mtop_generate_local_top(top_global, ir);
 +
 +            a0 = 0;
 +            a1 = top_global->natoms;
 +        }
 +
 +        forcerec_set_excl_load(fr, top, cr);
 +
 +        state    = partdec_init_local_state(cr, state_global);
 +        f_global = f;
 +
 +        atoms2md(top_global, ir, 0, NULL, a0, a1-a0, mdatoms);
 +
 +        if (vsite)
 +        {
 +            set_vsite_top(vsite, top, mdatoms, cr);
 +        }
 +
 +        if (ir->ePBC != epbcNONE && !fr->bMolPBC)
 +        {
 +            graph = mk_graph(fplog, &(top->idef), 0, top_global->natoms, FALSE, FALSE);
 +        }
 +
 +        if (shellfc)
 +        {
 +            make_local_shells(cr, mdatoms, shellfc);
 +        }
 +
 +        init_bonded_thread_force_reduction(fr, &top->idef);
 +
 +        if (ir->pull && PAR(cr))
 +        {
 +            dd_make_local_pull_groups(NULL, ir->pull, mdatoms);
 +        }
 +    }
 +
 +    if (DOMAINDECOMP(cr))
 +    {
 +        /* Distribute the charge groups over the nodes from the master node */
 +        dd_partition_system(fplog, ir->init_step, cr, TRUE, 1,
 +                            state_global, top_global, ir,
 +                            state, &f, mdatoms, top, fr,
 +                            vsite, shellfc, constr,
 +                            nrnb, wcycle, FALSE);
 +
 +    }
 +
 +    update_mdatoms(mdatoms, state->lambda[efptMASS]);
 +
 +    if (opt2bSet("-cpi", nfile, fnm))
 +    {
 +        bStateFromCP = gmx_fexist_master(opt2fn_master("-cpi", nfile, fnm, cr), cr);
 +    }
 +    else
 +    {
 +        bStateFromCP = FALSE;
 +    }
 +
 +    if (MASTER(cr))
 +    {
 +        if (bStateFromCP)
 +        {
 +            /* Update mdebin with energy history if appending to output files */
 +            if (Flags & MD_APPENDFILES)
 +            {
 +                restore_energyhistory_from_state(mdebin, &state_global->enerhist);
 +            }
 +            else
 +            {
 +                /* We might have read an energy history from checkpoint,
 +                 * free the allocated memory and reset the counts.
 +                 */
 +                done_energyhistory(&state_global->enerhist);
 +                init_energyhistory(&state_global->enerhist);
 +            }
 +        }
 +        /* Set the initial energy history in state by updating once */
 +        update_energyhistory(&state_global->enerhist, mdebin);
 +    }
 +
 +    if ((state->flags & (1<<estLD_RNG)) && (Flags & MD_READ_RNG))
 +    {
 +        /* Set the random state if we read a checkpoint file */
 +        set_stochd_state(upd, state);
 +    }
 +
 +    if (state->flags & (1<<estMC_RNG))
 +    {
 +        set_mc_state(mcrng, state);
 +    }
 +
 +    /* Initialize constraints */
 +    if (constr)
 +    {
 +        if (!DOMAINDECOMP(cr))
 +        {
 +            set_constraints(constr, top, ir, mdatoms, cr);
 +        }
 +    }
 +
 +    /* Check whether we have to GCT stuff */
 +    bTCR = ftp2bSet(efGCT, nfile, fnm);
 +    if (bTCR)
 +    {
 +        if (MASTER(cr))
 +        {
 +            fprintf(stderr, "Will do General Coupling Theory!\n");
 +        }
 +        gnx = top_global->mols.nr;
 +        snew(grpindex, gnx);
 +        for (i = 0; (i < gnx); i++)
 +        {
 +            grpindex[i] = i;
 +        }
 +    }
 +
 +    if (repl_ex_nst > 0)
 +    {
 +        /* We need to be sure replica exchange can only occur
 +         * when the energies are current */
 +        check_nst_param(fplog, cr, "nstcalcenergy", ir->nstcalcenergy,
 +                        "repl_ex_nst", &repl_ex_nst);
 +        /* This check needs to happen before inter-simulation
 +         * signals are initialized, too */
 +    }
 +    if (repl_ex_nst > 0 && MASTER(cr))
 +    {
 +        repl_ex = init_replica_exchange(fplog, cr->ms, state_global, ir,
 +                                        repl_ex_nst, repl_ex_nex, repl_ex_seed);
 +    }
 +
 +    /* PME tuning is only supported with GPUs or PME nodes and not with rerun.
 +     * With perturbed charges with soft-core we should not change the cut-off.
 +     */
 +    if ((Flags & MD_TUNEPME) &&
 +        EEL_PME(fr->eeltype) &&
 +        ( (fr->cutoff_scheme == ecutsVERLET && fr->nbv->bUseGPU) || !(cr->duty & DUTY_PME)) &&
 +        !(ir->efep != efepNO && mdatoms->nChargePerturbed > 0 && ir->fepvals->bScCoul) &&
 +        !bRerunMD)
 +    {
 +        pme_loadbal_init(&pme_loadbal, ir, state->box, fr->ic, fr->pmedata);
 +        cycles_pmes = 0;
 +        if (cr->duty & DUTY_PME)
 +        {
 +            /* Start tuning right away, as we can't measure the load */
 +            bPMETuneRunning = TRUE;
 +        }
 +        else
 +        {
 +            /* Separate PME nodes, we can measure the PP/PME load balance */
 +            bPMETuneTry = TRUE;
 +        }
 +    }
 +
 +    if (!ir->bContinuation && !bRerunMD)
 +    {
 +        if (mdatoms->cFREEZE && (state->flags & (1<<estV)))
 +        {
 +            /* Set the velocities of frozen particles to zero */
 +            for (i = mdatoms->start; i < mdatoms->start+mdatoms->homenr; i++)
 +            {
 +                for (m = 0; m < DIM; m++)
 +                {
 +                    if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m])
 +                    {
 +                        state->v[i][m] = 0;
 +                    }
 +                }
 +            }
 +        }
 +
 +        if (constr)
 +        {
 +            /* Constrain the initial coordinates and velocities */
 +            do_constrain_first(fplog, constr, ir, mdatoms, state, f,
 +                               graph, cr, nrnb, fr, top, shake_vir);
 +        }
 +        if (vsite)
 +        {
 +            /* Construct the virtual sites for the initial configuration */
 +            construct_vsites(fplog, vsite, state->x, nrnb, ir->delta_t, NULL,
 +                             top->idef.iparams, top->idef.il,
 +                             fr->ePBC, fr->bMolPBC, graph, cr, state->box);
 +        }
 +    }
 +
 +    debug_gmx();
 +
 +    /* set free energy calculation frequency as the minimum of nstdhdl, nstexpanded, and nstrepl_ex_nst*/
 +    nstfep = ir->fepvals->nstdhdl;
 +    if (ir->bExpanded && (nstfep > ir->expandedvals->nstexpanded))
 +    {
 +        nstfep = ir->expandedvals->nstexpanded;
 +    }
 +    if (repl_ex_nst > 0 && nstfep > repl_ex_nst)
 +    {
 +        nstfep = repl_ex_nst;
 +    }
 +
 +    /* I'm assuming we need global communication the first time! MRS */
 +    cglo_flags = (CGLO_TEMPERATURE | CGLO_GSTAT
 +                  | ((ir->comm_mode != ecmNO) ? CGLO_STOPCM : 0)
 +                  | (bVV ? CGLO_PRESSURE : 0)
 +                  | (bVV ? CGLO_CONSTRAINT : 0)
 +                  | (bRerunMD ? CGLO_RERUNMD : 0)
 +                  | ((Flags & MD_READ_EKIN) ? CGLO_READEKIN : 0));
 +
 +    bSumEkinhOld = FALSE;
 +    compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm,
 +                    NULL, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
 +                    constr, NULL, FALSE, state->box,
 +                    top_global, &pcurr, top_global->natoms, &bSumEkinhOld, cglo_flags);
 +    if (ir->eI == eiVVAK)
 +    {
 +        /* a second call to get the half step temperature initialized as well */
 +        /* we do the same call as above, but turn the pressure off -- internally to
 +           compute_globals, this is recognized as a velocity verlet half-step
 +           kinetic energy calculation.  This minimized excess variables, but
 +           perhaps loses some logic?*/
 +
 +        compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm,
 +                        NULL, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
 +                        constr, NULL, FALSE, state->box,
 +                        top_global, &pcurr, top_global->natoms, &bSumEkinhOld,
 +                        cglo_flags &~(CGLO_STOPCM | CGLO_PRESSURE));
 +    }
 +
 +    /* Calculate the initial half step temperature, and save the ekinh_old */
 +    if (!(Flags & MD_STARTFROMCPT))
 +    {
 +        for (i = 0; (i < ir->opts.ngtc); i++)
 +        {
 +            copy_mat(ekind->tcstat[i].ekinh, ekind->tcstat[i].ekinh_old);
 +        }
 +    }
 +    if (ir->eI != eiVV)
 +    {
 +        enerd->term[F_TEMP] *= 2; /* result of averages being done over previous and current step,
 +                                     and there is no previous step */
 +    }
 +
 +    /* if using an iterative algorithm, we need to create a working directory for the state. */
 +    if (bIterativeCase)
 +    {
 +        bufstate = init_bufstate(state);
 +    }
 +    if (bFFscan)
 +    {
 +        snew(xcopy, state->natoms);
 +        snew(vcopy, state->natoms);
 +        copy_rvecn(state->x, xcopy, 0, state->natoms);
 +        copy_rvecn(state->v, vcopy, 0, state->natoms);
 +        copy_mat(state->box, boxcopy);
 +    }
 +
 +    /* need to make an initiation call to get the Trotter variables set, as well as other constants for non-trotter
 +       temperature control */
 +    trotter_seq = init_npt_vars(ir, state, &MassQ, bTrotter);
 +
 +    if (MASTER(cr))
 +    {
 +        if (constr && !ir->bContinuation && ir->eConstrAlg == econtLINCS)
 +        {
 +            fprintf(fplog,
 +                    "RMS relative constraint deviation after constraining: %.2e\n",
 +                    constr_rmsd(constr, FALSE));
 +        }
 +        if (EI_STATE_VELOCITY(ir->eI))
 +        {
 +            fprintf(fplog, "Initial temperature: %g K\n", enerd->term[F_TEMP]);
 +        }
 +        if (bRerunMD)
 +        {
 +            fprintf(stderr, "starting md rerun '%s', reading coordinates from"
 +                    " input trajectory '%s'\n\n",
 +                    *(top_global->name), opt2fn("-rerun", nfile, fnm));
 +            if (bVerbose)
 +            {
 +                fprintf(stderr, "Calculated time to finish depends on nsteps from "
 +                        "run input file,\nwhich may not correspond to the time "
 +                        "needed to process input trajectory.\n\n");
 +            }
 +        }
 +        else
 +        {
 +            char tbuf[20];
 +            fprintf(stderr, "starting mdrun '%s'\n",
 +                    *(top_global->name));
 +            if (ir->nsteps >= 0)
 +            {
 +                sprintf(tbuf, "%8.1f", (ir->init_step+ir->nsteps)*ir->delta_t);
 +            }
 +            else
 +            {
 +                sprintf(tbuf, "%s", "infinite");
 +            }
 +            if (ir->init_step > 0)
 +            {
 +                fprintf(stderr, "%s steps, %s ps (continuing from step %s, %8.1f ps).\n",
 +                        gmx_step_str(ir->init_step+ir->nsteps, sbuf), tbuf,
 +                        gmx_step_str(ir->init_step, sbuf2),
 +                        ir->init_step*ir->delta_t);
 +            }
 +            else
 +            {
 +                fprintf(stderr, "%s steps, %s ps.\n",
 +                        gmx_step_str(ir->nsteps, sbuf), tbuf);
 +            }
 +        }
 +        fprintf(fplog, "\n");
 +    }
 +
 +    /* Set and write start time */
 +    runtime_start(runtime);
 +    print_date_and_time(fplog, cr->nodeid, "Started mdrun", runtime);
 +    wallcycle_start(wcycle, ewcRUN);
 +    if (fplog)
 +    {
 +        fprintf(fplog, "\n");
 +    }
 +
 +    /* safest point to do file checkpointing is here.  More general point would be immediately before integrator call */
 +#ifdef GMX_FAHCORE
 +    chkpt_ret = fcCheckPointParallel( cr->nodeid,
 +                                      NULL, 0);
 +    if (chkpt_ret == 0)
 +    {
 +        gmx_fatal( 3, __FILE__, __LINE__, "Checkpoint error on step %d\n", 0 );
 +    }
 +#endif
 +
 +    debug_gmx();
 +    /***********************************************************
 +     *
 +     *             Loop over MD steps
 +     *
 +     ************************************************************/
 +
 +    /* if rerunMD then read coordinates and velocities from input trajectory */
 +    if (bRerunMD)
 +    {
 +        if (getenv("GMX_FORCE_UPDATE"))
 +        {
 +            bForceUpdate = TRUE;
 +        }
 +
 +        rerun_fr.natoms = 0;
 +        if (MASTER(cr))
 +        {
 +            bNotLastFrame = read_first_frame(oenv, &status,
 +                                             opt2fn("-rerun", nfile, fnm),
 +                                             &rerun_fr, TRX_NEED_X | TRX_READ_V);
 +            if (rerun_fr.natoms != top_global->natoms)
 +            {
 +                gmx_fatal(FARGS,
 +                          "Number of atoms in trajectory (%d) does not match the "
 +                          "run input file (%d)\n",
 +                          rerun_fr.natoms, top_global->natoms);
 +            }
 +            if (ir->ePBC != epbcNONE)
 +            {
 +                if (!rerun_fr.bBox)
 +                {
 +                    gmx_fatal(FARGS, "Rerun trajectory frame step %d time %f does not contain a box, while pbc is used", rerun_fr.step, rerun_fr.time);
 +                }
 +                if (max_cutoff2(ir->ePBC, rerun_fr.box) < sqr(fr->rlistlong))
 +                {
 +                    gmx_fatal(FARGS, "Rerun trajectory frame step %d time %f has too small box dimensions", rerun_fr.step, rerun_fr.time);
 +                }
 +            }
 +        }
 +
 +        if (PAR(cr))
 +        {
 +            rerun_parallel_comm(cr, &rerun_fr, &bNotLastFrame);
 +        }
 +
 +        if (ir->ePBC != epbcNONE)
 +        {
 +            /* Set the shift vectors.
 +             * Necessary here when have a static box different from the tpr box.
 +             */
 +            calc_shifts(rerun_fr.box, fr->shift_vec);
 +        }
 +    }
 +
 +    /* loop over MD steps or if rerunMD to end of input trajectory */
 +    bFirstStep = TRUE;
 +    /* Skip the first Nose-Hoover integration when we get the state from tpx */
 +    bStateFromTPX    = !bStateFromCP;
 +    bInitStep        = bFirstStep && (bStateFromTPX || bVV);
 +    bStartingFromCpt = (Flags & MD_STARTFROMCPT) && bInitStep;
 +    bLastStep        = FALSE;
 +    bSumEkinhOld     = FALSE;
 +    bExchanged       = FALSE;
 +
 +    init_global_signals(&gs, cr, ir, repl_ex_nst);
 +
 +    step     = ir->init_step;
 +    step_rel = 0;
 +
 +    if (ir->nstlist == -1)
 +    {
 +        init_nlistheuristics(&nlh, bGStatEveryStep, step);
 +    }
 +
 +    if (MULTISIM(cr) && (repl_ex_nst <= 0 ))
 +    {
 +        /* check how many steps are left in other sims */
 +        multisim_nsteps = get_multisim_nsteps(cr, ir->nsteps);
 +    }
 +
 +
 +    /* and stop now if we should */
 +    bLastStep = (bRerunMD || (ir->nsteps >= 0 && step_rel > ir->nsteps) ||
 +                 ((multisim_nsteps >= 0) && (step_rel >= multisim_nsteps )));
 +    while (!bLastStep || (bRerunMD && bNotLastFrame))
 +    {
 +
 +        wallcycle_start(wcycle, ewcSTEP);
 +
 +        if (bRerunMD)
 +        {
 +            if (rerun_fr.bStep)
 +            {
 +                step     = rerun_fr.step;
 +                step_rel = step - ir->init_step;
 +            }
 +            if (rerun_fr.bTime)
 +            {
 +                t = rerun_fr.time;
 +            }
 +            else
 +            {
 +                t = step;
 +            }
 +        }
 +        else
 +        {
 +            bLastStep = (step_rel == ir->nsteps);
 +            t         = t0 + step*ir->delta_t;
 +        }
 +
 +        if (ir->efep != efepNO || ir->bSimTemp)
 +        {
 +            /* find and set the current lambdas.  If rerunning, we either read in a state, or a lambda value,
 +               requiring different logic. */
 +
 +            set_current_lambdas(step, ir->fepvals, bRerunMD, &rerun_fr, state_global, state, lam0);
 +            bDoDHDL      = do_per_step(step, ir->fepvals->nstdhdl);
 +            bDoFEP       = (do_per_step(step, nstfep) && (ir->efep != efepNO));
 +            bDoExpanded  = (do_per_step(step, ir->expandedvals->nstexpanded) && (ir->bExpanded) && (step > 0));
 +        }
 +
 +        if (bSimAnn)
 +        {
 +            update_annealing_target_temp(&(ir->opts), t);
 +        }
 +
 +        if (bRerunMD)
 +        {
 +            if (!(DOMAINDECOMP(cr) && !MASTER(cr)))
 +            {
 +                for (i = 0; i < state_global->natoms; i++)
 +                {
 +                    copy_rvec(rerun_fr.x[i], state_global->x[i]);
 +                }
 +                if (rerun_fr.bV)
 +                {
 +                    for (i = 0; i < state_global->natoms; i++)
 +                    {
 +                        copy_rvec(rerun_fr.v[i], state_global->v[i]);
 +                    }
 +                }
 +                else
 +                {
 +                    for (i = 0; i < state_global->natoms; i++)
 +                    {
 +                        clear_rvec(state_global->v[i]);
 +                    }
 +                    if (bRerunWarnNoV)
 +                    {
 +                        fprintf(stderr, "\nWARNING: Some frames do not contain velocities.\n"
 +                                "         Ekin, temperature and pressure are incorrect,\n"
 +                                "         the virial will be incorrect when constraints are present.\n"
 +                                "\n");
 +                        bRerunWarnNoV = FALSE;
 +                    }
 +                }
 +            }
 +            copy_mat(rerun_fr.box, state_global->box);
 +            copy_mat(state_global->box, state->box);
 +
 +            if (vsite && (Flags & MD_RERUN_VSITE))
 +            {
 +                if (DOMAINDECOMP(cr))
 +                {
 +                    gmx_fatal(FARGS, "Vsite recalculation with -rerun is not implemented for domain decomposition, use particle decomposition");
 +                }
 +                if (graph)
 +                {
 +                    /* Following is necessary because the graph may get out of sync
 +                     * with the coordinates if we only have every N'th coordinate set
 +                     */
 +                    mk_mshift(fplog, graph, fr->ePBC, state->box, state->x);
 +                    shift_self(graph, state->box, state->x);
 +                }
 +                construct_vsites(fplog, vsite, state->x, nrnb, ir->delta_t, state->v,
 +                                 top->idef.iparams, top->idef.il,
 +                                 fr->ePBC, fr->bMolPBC, graph, cr, state->box);
 +                if (graph)
 +                {
 +                    unshift_self(graph, state->box, state->x);
 +                }
 +            }
 +        }
 +
 +        /* Stop Center of Mass motion */
 +        bStopCM = (ir->comm_mode != ecmNO && do_per_step(step, ir->nstcomm));
 +
 +        /* Copy back starting coordinates in case we're doing a forcefield scan */
 +        if (bFFscan)
 +        {
 +            for (ii = 0; (ii < state->natoms); ii++)
 +            {
 +                copy_rvec(xcopy[ii], state->x[ii]);
 +                copy_rvec(vcopy[ii], state->v[ii]);
 +            }
 +            copy_mat(boxcopy, state->box);
 +        }
 +
 +        if (bRerunMD)
 +        {
 +            /* for rerun MD always do Neighbour Searching */
 +            bNS      = (bFirstStep || ir->nstlist != 0);
 +            bNStList = bNS;
 +        }
 +        else
 +        {
 +            /* Determine whether or not to do Neighbour Searching and LR */
 +            bNStList = (ir->nstlist > 0  && step % ir->nstlist == 0);
 +
 +            bNS = (bFirstStep || bExchanged || bNStList || bDoFEP ||
 +                   (ir->nstlist == -1 && nlh.nabnsb > 0));
 +
 +            if (bNS && ir->nstlist == -1)
 +            {
 +                set_nlistheuristics(&nlh, bFirstStep || bExchanged || bDoFEP, step);
 +            }
 +        }
 +
 +        /* check whether we should stop because another simulation has
 +           stopped. */
 +        if (MULTISIM(cr))
 +        {
 +            if ( (multisim_nsteps >= 0) &&  (step_rel >= multisim_nsteps)  &&
 +                 (multisim_nsteps != ir->nsteps) )
 +            {
 +                if (bNS)
 +                {
 +                    if (MASTER(cr))
 +                    {
 +                        fprintf(stderr,
 +                                "Stopping simulation %d because another one has finished\n",
 +                                cr->ms->sim);
 +                    }
 +                    bLastStep         = TRUE;
 +                    gs.sig[eglsCHKPT] = 1;
 +                }
 +            }
 +        }
 +
 +        /* < 0 means stop at next step, > 0 means stop at next NS step */
 +        if ( (gs.set[eglsSTOPCOND] < 0 ) ||
 +             ( (gs.set[eglsSTOPCOND] > 0 ) && ( bNS || ir->nstlist == 0)) )
 +        {
 +            bLastStep = TRUE;
 +        }
 +
 +        /* Determine whether or not to update the Born radii if doing GB */
 +        bBornRadii = bFirstStep;
 +        if (ir->implicit_solvent && (step % ir->nstgbradii == 0))
 +        {
 +            bBornRadii = TRUE;
 +        }
 +
 +        do_log     = do_per_step(step, ir->nstlog) || bFirstStep || bLastStep;
 +        do_verbose = bVerbose &&
 +            (step % stepout == 0 || bFirstStep || bLastStep);
 +
 +        if (bNS && !(bFirstStep && ir->bContinuation && !bRerunMD))
 +        {
 +            if (bRerunMD)
 +            {
 +                bMasterState = TRUE;
 +            }
 +            else
 +            {
 +                bMasterState = FALSE;
 +                /* Correct the new box if it is too skewed */
 +                if (DYNAMIC_BOX(*ir))
 +                {
 +                    if (correct_box(fplog, step, state->box, graph))
 +                    {
 +                        bMasterState = TRUE;
 +                    }
 +                }
 +                if (DOMAINDECOMP(cr) && bMasterState)
 +                {
 +                    dd_collect_state(cr->dd, state, state_global);
 +                }
 +            }
 +
 +            if (DOMAINDECOMP(cr))
 +            {
 +                /* Repartition the domain decomposition */
 +                wallcycle_start(wcycle, ewcDOMDEC);
 +                dd_partition_system(fplog, step, cr,
 +                                    bMasterState, nstglobalcomm,
 +                                    state_global, top_global, ir,
 +                                    state, &f, mdatoms, top, fr,
 +                                    vsite, shellfc, constr,
 +                                    nrnb, wcycle,
 +                                    do_verbose && !bPMETuneRunning);
 +                wallcycle_stop(wcycle, ewcDOMDEC);
 +                /* If using an iterative integrator, reallocate space to match the decomposition */
 +            }
 +        }
 +
 +        if (MASTER(cr) && do_log && !bFFscan)
 +        {
 +            print_ebin_header(fplog, step, t, state->lambda[efptFEP]); /* can we improve the information printed here? */
 +        }
 +
 +        if (ir->efep != efepNO)
 +        {
 +            update_mdatoms(mdatoms, state->lambda[efptMASS]);
 +        }
 +
 +        if ((bRerunMD && rerun_fr.bV) || bExchanged)
 +        {
 +
 +            /* We need the kinetic energy at minus the half step for determining
 +             * the full step kinetic energy and possibly for T-coupling.*/
 +            /* This may not be quite working correctly yet . . . . */
 +            compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm,
 +                            wcycle, enerd, NULL, NULL, NULL, NULL, mu_tot,
 +                            constr, NULL, FALSE, state->box,
 +                            top_global, &pcurr, top_global->natoms, &bSumEkinhOld,
 +                            CGLO_RERUNMD | CGLO_GSTAT | CGLO_TEMPERATURE);
 +        }
 +        clear_mat(force_vir);
 +
 +        /* Ionize the atoms if necessary */
 +        if (bIonize)
 +        {
 +            ionize(fplog, oenv, mdatoms, top_global, t, ir, state->x, state->v,
 +                   mdatoms->start, mdatoms->start+mdatoms->homenr, state->box, cr);
 +        }
 +
 +        /* Update force field in ffscan program */
 +        if (bFFscan)
 +        {
 +            if (update_forcefield(fplog,
 +                                  nfile, fnm, fr,
 +                                  mdatoms->nr, state->x, state->box))
 +            {
 +                gmx_finalize_par();
 +
 +                exit(0);
 +            }
 +        }
 +
 +        /* We write a checkpoint at this MD step when:
 +         * either at an NS step when we signalled through gs,
 +         * or at the last step (but not when we do not want confout),
 +         * but never at the first step or with rerun.
 +         */
 +        bCPT = (((gs.set[eglsCHKPT] && (bNS || ir->nstlist == 0)) ||
 +                 (bLastStep && (Flags & MD_CONFOUT))) &&
 +                step > ir->init_step && !bRerunMD);
 +        if (bCPT)
 +        {
 +            gs.set[eglsCHKPT] = 0;
 +        }
 +
 +        /* Determine the energy and pressure:
 +         * at nstcalcenergy steps and at energy output steps (set below).
 +         */
 +        if (EI_VV(ir->eI) && (!bInitStep))
 +        {
 +            /* for vv, the first half of the integration actually corresponds
 +               to the previous step.  bCalcEner is only required to be evaluated on the 'next' step,
 +               but the virial needs to be calculated on both the current step and the 'next' step. Future
 +               reorganization may be able to get rid of one of the bCalcVir=TRUE steps. */
 +
 +            bCalcEner = do_per_step(step-1, ir->nstcalcenergy);
 +            bCalcVir  = bCalcEner ||
 +                (ir->epc != epcNO && (do_per_step(step, ir->nstpcouple) || do_per_step(step-1, ir->nstpcouple)));
 +        }
 +        else
 +        {
 +            bCalcEner = do_per_step(step, ir->nstcalcenergy);
 +            bCalcVir  = bCalcEner ||
 +                (ir->epc != epcNO && do_per_step(step, ir->nstpcouple));
 +        }
 +
 +        /* Do we need global communication ? */
 +        bGStat = (bCalcVir || bCalcEner || bStopCM ||
 +                  do_per_step(step, nstglobalcomm) || (bVV && IR_NVT_TROTTER(ir) && do_per_step(step-1, nstglobalcomm)) ||
 +                  (ir->nstlist == -1 && !bRerunMD && step >= nlh.step_nscheck));
 +
 +        do_ene = (do_per_step(step, ir->nstenergy) || bLastStep);
 +
 +        if (do_ene || do_log)
 +        {
 +            bCalcVir  = TRUE;
 +            bCalcEner = TRUE;
 +            bGStat    = TRUE;
 +        }
 +
 +        /* these CGLO_ options remain the same throughout the iteration */
 +        cglo_flags = ((bRerunMD ? CGLO_RERUNMD : 0) |
 +                      (bGStat ? CGLO_GSTAT : 0)
 +                      );
 +
 +        force_flags = (GMX_FORCE_STATECHANGED |
 +                       ((DYNAMIC_BOX(*ir) || bRerunMD) ? GMX_FORCE_DYNAMICBOX : 0) |
 +                       GMX_FORCE_ALLFORCES |
 +                       GMX_FORCE_SEPLRF |
 +                       (bCalcVir ? GMX_FORCE_VIRIAL : 0) |
 +                       (bCalcEner ? GMX_FORCE_ENERGY : 0) |
 +                       (bDoFEP ? GMX_FORCE_DHDL : 0)
 +                       );
 +
 +        if (fr->bTwinRange)
 +        {
 +            if (do_per_step(step, ir->nstcalclr))
 +            {
 +                force_flags |= GMX_FORCE_DO_LR;
 +            }
 +        }
 +
 +        if (shellfc)
 +        {
 +            /* Now is the time to relax the shells */
 +            count = relax_shell_flexcon(fplog, cr, bVerbose, bFFscan ? step+1 : step,
 +                                        ir, bNS, force_flags,
 +                                        bStopCM, top, top_global,
 +                                        constr, enerd, fcd,
 +                                        state, f, force_vir, mdatoms,
 +                                        nrnb, wcycle, graph, groups,
 +                                        shellfc, fr, bBornRadii, t, mu_tot,
 +                                        state->natoms, &bConverged, vsite,
 +                                        outf->fp_field);
 +            tcount += count;
 +
 +            if (bConverged)
 +            {
 +                nconverged++;
 +            }
 +        }
 +        else
 +        {
 +            /* The coordinates (x) are shifted (to get whole molecules)
 +             * in do_force.
 +             * This is parallellized as well, and does communication too.
 +             * Check comments in sim_util.c
 +             */
 +            do_force(fplog, cr, ir, step, nrnb, wcycle, top, top_global, groups,
 +                     state->box, state->x, &state->hist,
 +                     f, force_vir, mdatoms, enerd, fcd,
 +                     state->lambda, graph,
 +                     fr, vsite, mu_tot, t, outf->fp_field, ed, bBornRadii,
 +                     (bNS ? GMX_FORCE_NS : 0) | force_flags);
 +        }
 +
 +        if (bTCR)
 +        {
 +            mu_aver = calc_mu_aver(cr, state->x, mdatoms->chargeA,
 +                                   mu_tot, &top_global->mols, mdatoms, gnx, grpindex);
 +        }
 +
 +        if (bTCR && bFirstStep)
 +        {
 +            tcr = init_coupling(fplog, nfile, fnm, cr, fr, mdatoms, &(top->idef));
 +            fprintf(fplog, "Done init_coupling\n");
 +            fflush(fplog);
 +        }
 +
 +        if (bVV && !bStartingFromCpt && !bRerunMD)
 +        /*  ############### START FIRST UPDATE HALF-STEP FOR VV METHODS############### */
 +        {
 +            if (ir->eI == eiVV && bInitStep)
 +            {
 +                /* if using velocity verlet with full time step Ekin,
 +                 * take the first half step only to compute the
 +                 * virial for the first step. From there,
 +                 * revert back to the initial coordinates
 +                 * so that the input is actually the initial step.
 +                 */
 +                copy_rvecn(state->v, cbuf, 0, state->natoms); /* should make this better for parallelizing? */
 +            }
 +            else
 +            {
 +                /* this is for NHC in the Ekin(t+dt/2) version of vv */
 +                trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ1);
 +            }
 +
 +            /* If we are using twin-range interactions where the long-range component
 +             * is only evaluated every nstcalclr>1 steps, we should do a special update
 +             * step to combine the long-range forces on these steps.
 +             * For nstcalclr=1 this is not done, since the forces would have been added
 +             * directly to the short-range forces already.
 +             */
 +            bUpdateDoLR = (fr->bTwinRange && do_per_step(step, ir->nstcalclr));
 +
 +            update_coords(fplog, step, ir, mdatoms, state, fr->bMolPBC,
 +                          f, bUpdateDoLR, fr->f_twin, fcd,
 +                          ekind, M, wcycle, upd, bInitStep, etrtVELOCITY1,
 +                          cr, nrnb, constr, &top->idef);
 +
 +            if (bIterativeCase && do_per_step(step-1, ir->nstpcouple) && !bInitStep)
 +            {
 +                gmx_iterate_init(&iterate, TRUE);
 +            }
 +            /* for iterations, we save these vectors, as we will be self-consistently iterating
 +               the calculations */
 +
 +            /*#### UPDATE EXTENDED VARIABLES IN TROTTER FORMULATION */
 +
 +            /* save the state */
 +            if (iterate.bIterationActive)
 +            {
 +                copy_coupling_state(state, bufstate, ekind, ekind_save, &(ir->opts));
 +            }
 +
 +            bFirstIterate = TRUE;
 +            while (bFirstIterate || iterate.bIterationActive)
 +            {
 +                if (iterate.bIterationActive)
 +                {
 +                    copy_coupling_state(bufstate, state, ekind_save, ekind, &(ir->opts));
 +                    if (bFirstIterate && bTrotter)
 +                    {
 +                        /* The first time through, we need a decent first estimate
 +                           of veta(t+dt) to compute the constraints.  Do
 +                           this by computing the box volume part of the
 +                           trotter integration at this time. Nothing else
 +                           should be changed by this routine here.  If
 +                           !(first time), we start with the previous value
 +                           of veta.  */
 +
 +                        veta_save = state->veta;
 +                        trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ0);
 +                        vetanew     = state->veta;
 +                        state->veta = veta_save;
 +                    }
 +                }
 +
 +                bOK = TRUE;
 +                if (!bRerunMD || rerun_fr.bV || bForceUpdate)     /* Why is rerun_fr.bV here?  Unclear. */
 +                {
 +                    update_constraints(fplog, step, NULL, ir, ekind, mdatoms,
 +                                       state, fr->bMolPBC, graph, f,
 +                                       &top->idef, shake_vir, NULL,
 +                                       cr, nrnb, wcycle, upd, constr,
 +                                       bInitStep, TRUE, bCalcVir, vetanew);
 +
 +                    if (!bOK && !bFFscan)
 +                    {
 +                        gmx_fatal(FARGS, "Constraint error: Shake, Lincs or Settle could not solve the constrains");
 +                    }
 +
 +                }
 +                else if (graph)
 +                {
 +                    /* Need to unshift here if a do_force has been
 +                       called in the previous step */
 +                    unshift_self(graph, state->box, state->x);
 +                }
 +
 +                /* if VV, compute the pressure and constraints */
 +                /* For VV2, we strictly only need this if using pressure
 +                 * control, but we really would like to have accurate pressures
 +                 * printed out.
 +                 * Think about ways around this in the future?
 +                 * For now, keep this choice in comments.
 +                 */
 +                /*bPres = (ir->eI==eiVV || IR_NPT_TROTTER(ir)); */
 +                /*bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK && IR_NPT_TROTTER(ir)));*/
 +                bPres = TRUE;
 +                bTemp = ((ir->eI == eiVV && (!bInitStep)) || (ir->eI == eiVVAK));
 +                if (bCalcEner && ir->eI == eiVVAK)  /*MRS:  7/9/2010 -- this still doesn't fix it?*/
 +                {
 +                    bSumEkinhOld = TRUE;
 +                }
 +                /* for vv, the first half of the integration actually corresponds to the previous step.
 +                   So we need information from the last step in the first half of the integration */
 +                if (bGStat || do_per_step(step-1, nstglobalcomm))
 +                {
 +                    compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm,
 +                                    wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
 +                                    constr, NULL, FALSE, state->box,
 +                                    top_global, &pcurr, top_global->natoms, &bSumEkinhOld,
 +                                    cglo_flags
 +                                    | CGLO_ENERGY
 +                                    | (bTemp ? CGLO_TEMPERATURE : 0)
 +                                    | (bPres ? CGLO_PRESSURE : 0)
 +                                    | (bPres ? CGLO_CONSTRAINT : 0)
 +                                    | ((iterate.bIterationActive) ? CGLO_ITERATE : 0)
 +                                    | (bFirstIterate ? CGLO_FIRSTITERATE : 0)
 +                                    | CGLO_SCALEEKIN
 +                                    );
 +                    /* explanation of above:
 +                       a) We compute Ekin at the full time step
 +                       if 1) we are using the AveVel Ekin, and it's not the
 +                       initial step, or 2) if we are using AveEkin, but need the full
 +                       time step kinetic energy for the pressure (always true now, since we want accurate statistics).
 +                       b) If we are using EkinAveEkin for the kinetic energy for the temperature control, we still feed in
 +                       EkinAveVel because it's needed for the pressure */
 +                }
 +                /* temperature scaling and pressure scaling to produce the extended variables at t+dt */
 +                if (!bInitStep)
 +                {
 +                    if (bTrotter)
 +                    {
 +                        m_add(force_vir, shake_vir, total_vir); /* we need the un-dispersion corrected total vir here */
 +                        trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ2);
 +                    }
 +                    else
 +                    {
 +                        if (bExchanged)
 +                        {
 +
 +                            /* We need the kinetic energy at minus the half step for determining
 +                             * the full step kinetic energy and possibly for T-coupling.*/
 +                            /* This may not be quite working correctly yet . . . . */
 +                            compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm,
 +                                            wcycle, enerd, NULL, NULL, NULL, NULL, mu_tot,
 +                                            constr, NULL, FALSE, state->box,
 +                                            top_global, &pcurr, top_global->natoms, &bSumEkinhOld,
 +                                            CGLO_RERUNMD | CGLO_GSTAT | CGLO_TEMPERATURE);
 +                        }
 +                    }
 +                }
 +
 +                if (iterate.bIterationActive &&
 +                    done_iterating(cr, fplog, step, &iterate, bFirstIterate,
 +                                   state->veta, &vetanew))
 +                {
 +                    break;
 +                }
 +                bFirstIterate = FALSE;
 +            }
 +
 +            if (bTrotter && !bInitStep)
 +            {
 +                copy_mat(shake_vir, state->svir_prev);
 +                copy_mat(force_vir, state->fvir_prev);
 +                if (IR_NVT_TROTTER(ir) && ir->eI == eiVV)
 +                {
 +                    /* update temperature and kinetic energy now that step is over - this is the v(t+dt) point */
 +                    enerd->term[F_TEMP] = sum_ekin(&(ir->opts), ekind, NULL, (ir->eI == eiVV), FALSE, FALSE);
 +                    enerd->term[F_EKIN] = trace(ekind->ekin);
 +                }
 +            }
 +            /* if it's the initial step, we performed this first step just to get the constraint virial */
 +            if (bInitStep && ir->eI == eiVV)
 +            {
 +                copy_rvecn(cbuf, state->v, 0, state->natoms);
 +            }
 +        }
 +
 +        /* MRS -- now done iterating -- compute the conserved quantity */
 +        if (bVV)
 +        {
 +            saved_conserved_quantity = compute_conserved_from_auxiliary(ir, state, &MassQ);
 +            if (ir->eI == eiVV)
 +            {
 +                last_ekin = enerd->term[F_EKIN];
 +            }
 +            if ((ir->eDispCorr != edispcEnerPres) && (ir->eDispCorr != edispcAllEnerPres))
 +            {
 +                saved_conserved_quantity -= enerd->term[F_DISPCORR];
 +            }
 +            /* sum up the foreign energy and dhdl terms for vv.  currently done every step so that dhdl is correct in the .edr */
 +            if (!bRerunMD)
 +            {
 +                sum_dhdl(enerd, state->lambda, ir->fepvals);
 +            }
 +        }
 +
 +        /* ########  END FIRST UPDATE STEP  ############## */
 +        /* ########  If doing VV, we now have v(dt) ###### */
 +        if (bDoExpanded)
 +        {
 +            /* perform extended ensemble sampling in lambda - we don't
 +               actually move to the new state before outputting
 +               statistics, but if performing simulated tempering, we
 +               do update the velocities and the tau_t. */
 +
 +            lamnew = ExpandedEnsembleDynamics(fplog, ir, enerd, state, &MassQ, &df_history, step, mcrng, state->v, mdatoms);
 +        }
 +        /* ################## START TRAJECTORY OUTPUT ################# */
 +
 +        /* Now we have the energies and forces corresponding to the
 +         * coordinates at time t. We must output all of this before
 +         * the update.
 +         * for RerunMD t is read from input trajectory
 +         */
 +        mdof_flags = 0;
 +        if (do_per_step(step, ir->nstxout))
 +        {
 +            mdof_flags |= MDOF_X;
 +        }
 +        if (do_per_step(step, ir->nstvout))
 +        {
 +            mdof_flags |= MDOF_V;
 +        }
 +        if (do_per_step(step, ir->nstfout))
 +        {
 +            mdof_flags |= MDOF_F;
 +        }
 +        if (do_per_step(step, ir->nstxtcout))
 +        {
 +            mdof_flags |= MDOF_XTC;
 +        }
 +        if (bCPT)
 +        {
 +            mdof_flags |= MDOF_CPT;
 +        }
 +        ;
 +
 +#if defined(GMX_FAHCORE) || defined(GMX_WRITELASTSTEP)
 +        if (bLastStep)
 +        {
 +            /* Enforce writing positions and velocities at end of run */
 +            mdof_flags |= (MDOF_X | MDOF_V);
 +        }
 +#endif
 +#ifdef GMX_FAHCORE
 +        if (MASTER(cr))
 +        {
 +            fcReportProgress( ir->nsteps, step );
 +        }
 +
 +        /* sync bCPT and fc record-keeping */
 +        if (bCPT && MASTER(cr))
 +        {
 +            fcRequestCheckPoint();
 +        }
 +#endif
 +
 +        if (mdof_flags != 0)
 +        {
 +            wallcycle_start(wcycle, ewcTRAJ);
 +            if (bCPT)
 +            {
 +                if (state->flags & (1<<estLD_RNG))
 +                {
 +                    get_stochd_state(upd, state);
 +                }
 +                if (state->flags  & (1<<estMC_RNG))
 +                {
 +                    get_mc_state(mcrng, state);
 +                }
 +                if (MASTER(cr))
 +                {
 +                    if (bSumEkinhOld)
 +                    {
 +                        state_global->ekinstate.bUpToDate = FALSE;
 +                    }
 +                    else
 +                    {
 +                        update_ekinstate(&state_global->ekinstate, ekind);
 +                        state_global->ekinstate.bUpToDate = TRUE;
 +                    }
 +                    update_energyhistory(&state_global->enerhist, mdebin);
 +                    if (ir->efep != efepNO || ir->bSimTemp)
 +                    {
 +                        state_global->fep_state = state->fep_state; /* MRS: seems kludgy. The code should be
 +                                                                       structured so this isn't necessary.
 +                                                                       Note this reassignment is only necessary
 +                                                                       for single threads.*/
 +                        copy_df_history(&state_global->dfhist, &df_history);
 +                    }
 +                }
 +            }
 +            write_traj(fplog, cr, outf, mdof_flags, top_global,
 +                       step, t, state, state_global, f, f_global, &n_xtc, &x_xtc);
 +            if (bCPT)
 +            {
 +                nchkpt++;
 +                bCPT = FALSE;
 +            }
 +            debug_gmx();
 +            if (bLastStep && step_rel == ir->nsteps &&
 +                (Flags & MD_CONFOUT) && MASTER(cr) &&
 +                !bRerunMD && !bFFscan)
 +            {
 +                /* x and v have been collected in write_traj,
 +                 * because a checkpoint file will always be written
 +                 * at the last step.
 +                 */
 +                fprintf(stderr, "\nWriting final coordinates.\n");
 +                if (fr->bMolPBC)
 +                {
 +                    /* Make molecules whole only for confout writing */
 +                    do_pbc_mtop(fplog, ir->ePBC, state->box, top_global, state_global->x);
 +                }
 +                write_sto_conf_mtop(ftp2fn(efSTO, nfile, fnm),
 +                                    *top_global->name, top_global,
 +                                    state_global->x, state_global->v,
 +                                    ir->ePBC, state->box);
 +                debug_gmx();
 +            }
 +            wallcycle_stop(wcycle, ewcTRAJ);
 +        }
 +
 +        /* kludge -- virial is lost with restart for NPT control. Must restart */
 +        if (bStartingFromCpt && bVV)
 +        {
 +            copy_mat(state->svir_prev, shake_vir);
 +            copy_mat(state->fvir_prev, force_vir);
 +        }
 +        /*  ################## END TRAJECTORY OUTPUT ################ */
 +
 +        /* Determine the wallclock run time up till now */
 +        run_time = gmx_gettime() - (double)runtime->real;
 +
 +        /* Check whether everything is still allright */
 +        if (((int)gmx_get_stop_condition() > handled_stop_condition)
 +#ifdef GMX_THREAD_MPI
 +            && MASTER(cr)
 +#endif
 +            )
 +        {
 +            /* this is just make gs.sig compatible with the hack
 +               of sending signals around by MPI_Reduce with together with
 +               other floats */
 +            if (gmx_get_stop_condition() == gmx_stop_cond_next_ns)
 +            {
 +                gs.sig[eglsSTOPCOND] = 1;
 +            }
 +            if (gmx_get_stop_condition() == gmx_stop_cond_next)
 +            {
 +                gs.sig[eglsSTOPCOND] = -1;
 +            }
 +            /* < 0 means stop at next step, > 0 means stop at next NS step */
 +            if (fplog)
 +            {
 +                fprintf(fplog,
 +                        "\n\nReceived the %s signal, stopping at the next %sstep\n\n",
 +                        gmx_get_signal_name(),
 +                        gs.sig[eglsSTOPCOND] == 1 ? "NS " : "");
 +                fflush(fplog);
 +            }
 +            fprintf(stderr,
 +                    "\n\nReceived the %s signal, stopping at the next %sstep\n\n",
 +                    gmx_get_signal_name(),
 +                    gs.sig[eglsSTOPCOND] == 1 ? "NS " : "");
 +            fflush(stderr);
 +            handled_stop_condition = (int)gmx_get_stop_condition();
 +        }
 +        else if (MASTER(cr) && (bNS || ir->nstlist <= 0) &&
 +                 (max_hours > 0 && run_time > max_hours*60.0*60.0*0.99) &&
 +                 gs.sig[eglsSTOPCOND] == 0 && gs.set[eglsSTOPCOND] == 0)
 +        {
 +            /* Signal to terminate the run */
 +            gs.sig[eglsSTOPCOND] = 1;
 +            if (fplog)
 +            {
 +                fprintf(fplog, "\nStep %s: Run time exceeded %.3f hours, will terminate the run\n", gmx_step_str(step, sbuf), max_hours*0.99);
 +            }
 +            fprintf(stderr, "\nStep %s: Run time exceeded %.3f hours, will terminate the run\n", gmx_step_str(step, sbuf), max_hours*0.99);
 +        }
 +
 +        if (bResetCountersHalfMaxH && MASTER(cr) &&
 +            run_time > max_hours*60.0*60.0*0.495)
 +        {
 +            gs.sig[eglsRESETCOUNTERS] = 1;
 +        }
 +
 +        if (ir->nstlist == -1 && !bRerunMD)
 +        {
 +            /* When bGStatEveryStep=FALSE, global_stat is only called
 +             * when we check the atom displacements, not at NS steps.
 +             * This means that also the bonded interaction count check is not
 +             * performed immediately after NS. Therefore a few MD steps could
 +             * be performed with missing interactions.
 +             * But wrong energies are never written to file,
 +             * since energies are only written after global_stat
 +             * has been called.
 +             */
 +            if (step >= nlh.step_nscheck)
 +            {
 +                nlh.nabnsb = natoms_beyond_ns_buffer(ir, fr, &top->cgs,
 +                                                     nlh.scale_tot, state->x);
 +            }
 +            else
 +            {
 +                /* This is not necessarily true,
 +                 * but step_nscheck is determined quite conservatively.
 +                 */
 +                nlh.nabnsb = 0;
 +            }
 +        }
 +
 +        /* In parallel we only have to check for checkpointing in steps
 +         * where we do global communication,
 +         *  otherwise the other nodes don't know.
 +         */
 +        if (MASTER(cr) && ((bGStat || !PAR(cr)) &&
 +                           cpt_period >= 0 &&
 +                           (cpt_period == 0 ||
 +                            run_time >= nchkpt*cpt_period*60.0)) &&
 +            gs.set[eglsCHKPT] == 0)
 +        {
 +            gs.sig[eglsCHKPT] = 1;
 +        }
 +
 +        /* at the start of step, randomize or scale the velocities (trotter done elsewhere) */
 +        if (EI_VV(ir->eI))
 +        {
 +            if (!bInitStep)
 +            {
 +                update_tcouple(fplog, step, ir, state, ekind, wcycle, upd, &MassQ, mdatoms);
 +            }
 +            if (ETC_ANDERSEN(ir->etc)) /* keep this outside of update_tcouple because of the extra info required to pass */
 +            {
 +                gmx_bool bIfRandomize;
 +                bIfRandomize = update_randomize_velocities(ir, step, mdatoms, state, upd, &top->idef, constr);
 +                /* if we have constraints, we have to remove the kinetic energy parallel to the bonds */
 +                if (constr && bIfRandomize)
 +                {
 +                    update_constraints(fplog, step, NULL, ir, ekind, mdatoms,
 +                                       state, fr->bMolPBC, graph, f,
 +                                       &top->idef, tmp_vir, NULL,
 +                                       cr, nrnb, wcycle, upd, constr,
 +                                       bInitStep, TRUE, bCalcVir, vetanew);
 +                }
 +            }
 +        }
 +
 +        if (bIterativeCase && do_per_step(step, ir->nstpcouple))
 +        {
 +            gmx_iterate_init(&iterate, TRUE);
 +            /* for iterations, we save these vectors, as we will be redoing the calculations */
 +            copy_coupling_state(state, bufstate, ekind, ekind_save, &(ir->opts));
 +        }
 +
 +        bFirstIterate = TRUE;
 +        while (bFirstIterate || iterate.bIterationActive)
 +        {
 +            /* We now restore these vectors to redo the calculation with improved extended variables */
 +            if (iterate.bIterationActive)
 +            {
 +                copy_coupling_state(bufstate, state, ekind_save, ekind, &(ir->opts));
 +            }
 +
 +            /* We make the decision to break or not -after- the calculation of Ekin and Pressure,
 +               so scroll down for that logic */
 +
 +            /* #########   START SECOND UPDATE STEP ################# */
 +            /* Box is changed in update() when we do pressure coupling,
 +             * but we should still use the old box for energy corrections and when
 +             * writing it to the energy file, so it matches the trajectory files for
 +             * the same timestep above. Make a copy in a separate array.
 +             */
 +            copy_mat(state->box, lastbox);
 +
 +            bOK = TRUE;
 +            dvdl_constr = 0;
 +
 +            if (!(bRerunMD && !rerun_fr.bV && !bForceUpdate))
 +            {
 +                wallcycle_start(wcycle, ewcUPDATE);
 +                /* UPDATE PRESSURE VARIABLES IN TROTTER FORMULATION WITH CONSTRAINTS */
 +                if (bTrotter)
 +                {
 +                    if (iterate.bIterationActive)
 +                    {
 +                        if (bFirstIterate)
 +                        {
 +                            scalevir = 1;
 +                        }
 +                        else
 +                        {
 +                            /* we use a new value of scalevir to converge the iterations faster */
 +                            scalevir = tracevir/trace(shake_vir);
 +                        }
 +                        msmul(shake_vir, scalevir, shake_vir);
 +                        m_add(force_vir, shake_vir, total_vir);
 +                        clear_mat(shake_vir);
 +                    }
 +                    trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ3);
 +                    /* We can only do Berendsen coupling after we have summed
 +                     * the kinetic energy or virial. Since the happens
 +                     * in global_state after update, we should only do it at
 +                     * step % nstlist = 1 with bGStatEveryStep=FALSE.
 +                     */
 +                }
 +                else
 +                {
 +                    update_tcouple(fplog, step, ir, state, ekind, wcycle, upd, &MassQ, mdatoms);
 +                    update_pcouple(fplog, step, ir, state, pcoupl_mu, M, wcycle,
 +                                   upd, bInitStep);
 +                }
 +
 +                if (bVV)
 +                {
 +                    bUpdateDoLR = (fr->bTwinRange && do_per_step(step, ir->nstcalclr));
 +
 +                    /* velocity half-step update */
 +                    update_coords(fplog, step, ir, mdatoms, state, fr->bMolPBC, f,
 +                                  bUpdateDoLR, fr->f_twin, fcd,
 +                                  ekind, M, wcycle, upd, FALSE, etrtVELOCITY2,
 +                                  cr, nrnb, constr, &top->idef);
 +                }
 +
 +                /* Above, initialize just copies ekinh into ekin,
 +                 * it doesn't copy position (for VV),
 +                 * and entire integrator for MD.
 +                 */
 +
 +                if (ir->eI == eiVVAK)
 +                {
 +                    copy_rvecn(state->x, cbuf, 0, state->natoms);
 +                }
 +                bUpdateDoLR = (fr->bTwinRange && do_per_step(step, ir->nstcalclr));
 +
 +                update_coords(fplog, step, ir, mdatoms, state, fr->bMolPBC, f,
 +                              bUpdateDoLR, fr->f_twin, fcd,
 +                              ekind, M, wcycle, upd, bInitStep, etrtPOSITION, cr, nrnb, constr, &top->idef);
 +                wallcycle_stop(wcycle, ewcUPDATE);
 +
 +                update_constraints(fplog, step, &dvdl_constr, ir, ekind, mdatoms, state,
 +                                   fr->bMolPBC, graph, f,
 +                                   &top->idef, shake_vir, force_vir,
 +                                   cr, nrnb, wcycle, upd, constr,
 +                                   bInitStep, FALSE, bCalcVir, state->veta);
 +
 +                if (ir->eI == eiVVAK)
 +                {
 +                    /* erase F_EKIN and F_TEMP here? */
 +                    /* just compute the kinetic energy at the half step to perform a trotter step */
 +                    compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm,
 +                                    wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
 +                                    constr, NULL, FALSE, lastbox,
 +                                    top_global, &pcurr, top_global->natoms, &bSumEkinhOld,
 +                                    cglo_flags | CGLO_TEMPERATURE
 +                                    );
 +                    wallcycle_start(wcycle, ewcUPDATE);
 +                    trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ4);
 +                    /* now we know the scaling, we can compute the positions again again */
 +                    copy_rvecn(cbuf, state->x, 0, state->natoms);
 +
 +                    bUpdateDoLR = (fr->bTwinRange && do_per_step(step, ir->nstcalclr));
 +
 +                    update_coords(fplog, step, ir, mdatoms, state, fr->bMolPBC, f,
 +                                  bUpdateDoLR, fr->f_twin, fcd,
 +                                  ekind, M, wcycle, upd, bInitStep, etrtPOSITION, cr, nrnb, constr, &top->idef);
 +                    wallcycle_stop(wcycle, ewcUPDATE);
 +
 +                    /* do we need an extra constraint here? just need to copy out of state->v to upd->xp? */
 +                    /* are the small terms in the shake_vir here due
 +                     * to numerical errors, or are they important
 +                     * physically? I'm thinking they are just errors, but not completely sure.
 +                     * For now, will call without actually constraining, constr=NULL*/
 +                    update_constraints(fplog, step, NULL, ir, ekind, mdatoms,
 +                                       state, fr->bMolPBC, graph, f,
 +                                       &top->idef, tmp_vir, force_vir,
 +                                       cr, nrnb, wcycle, upd, NULL,
 +                                       bInitStep, FALSE, bCalcVir,
 +                                       state->veta);
 +                }
 +                if (!bOK && !bFFscan)
 +                {
 +                    gmx_fatal(FARGS, "Constraint error: Shake, Lincs or Settle could not solve the constrains");
 +                }
 +
 +                if (fr->bSepDVDL && fplog && do_log)
 +                {
 +                    fprintf(fplog, sepdvdlformat, "Constraint dV/dl", 0.0, dvdl_constr);
 +                }
-                 enerd->term[F_DVDL_CONSTR] += dvdl_constr;
++                if (bVV)
++                {
++                    /* this factor or 2 correction is necessary
++                       because half of the constraint force is removed
++                       in the vv step, so we have to double it.  See
++                       the Redmine issue #1255.  It is not yet clear
++                       if the factor of 2 is exact, or just a very
++                       good approximation, and this will be
++                       investigated.  The next step is to see if this
++                       can be done adding a dhdl contribution from the
++                       rattle step, but this is somewhat more
++                       complicated with the current code. Will be
++                       investigated, hopefully for 4.6.3. However,
++                       this current solution is much better than
++                       having it completely wrong.
++                    */
++                    enerd->term[F_DVDL_CONSTR] += 2*dvdl_constr;
++                }
++                else
++                {
++                    enerd->term[F_DVDL_CONSTR] += dvdl_constr;
++                }
 +            }
 +            else if (graph)
 +            {
 +                /* Need to unshift here */
 +                unshift_self(graph, state->box, state->x);
 +            }
 +
 +            if (vsite != NULL)
 +            {
 +                wallcycle_start(wcycle, ewcVSITECONSTR);
 +                if (graph != NULL)
 +                {
 +                    shift_self(graph, state->box, state->x);
 +                }
 +                construct_vsites(fplog, vsite, state->x, nrnb, ir->delta_t, state->v,
 +                                 top->idef.iparams, top->idef.il,
 +                                 fr->ePBC, fr->bMolPBC, graph, cr, state->box);
 +
 +                if (graph != NULL)
 +                {
 +                    unshift_self(graph, state->box, state->x);
 +                }
 +                wallcycle_stop(wcycle, ewcVSITECONSTR);
 +            }
 +
 +            /* ############## IF NOT VV, Calculate globals HERE, also iterate constraints  ############ */
 +            /* With Leap-Frog we can skip compute_globals at
 +             * non-communication steps, but we need to calculate
 +             * the kinetic energy one step before communication.
 +             */
 +            if (bGStat || (!EI_VV(ir->eI) && do_per_step(step+1, nstglobalcomm)))
 +            {
 +                if (ir->nstlist == -1 && bFirstIterate)
 +                {
 +                    gs.sig[eglsNABNSB] = nlh.nabnsb;
 +                }
 +                compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm,
 +                                wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
 +                                constr,
 +                                bFirstIterate ? &gs : NULL,
 +                                (step_rel % gs.nstms == 0) &&
 +                                (multisim_nsteps < 0 || (step_rel < multisim_nsteps)),
 +                                lastbox,
 +                                top_global, &pcurr, top_global->natoms, &bSumEkinhOld,
 +                                cglo_flags
 +                                | (!EI_VV(ir->eI) || bRerunMD ? CGLO_ENERGY : 0)
 +                                | (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0)
 +                                | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0)
 +                                | (!EI_VV(ir->eI) || bRerunMD ? CGLO_PRESSURE : 0)
 +                                | (iterate.bIterationActive ? CGLO_ITERATE : 0)
 +                                | (bFirstIterate ? CGLO_FIRSTITERATE : 0)
 +                                | CGLO_CONSTRAINT
 +                                );
 +                if (ir->nstlist == -1 && bFirstIterate)
 +                {
 +                    nlh.nabnsb         = gs.set[eglsNABNSB];
 +                    gs.set[eglsNABNSB] = 0;
 +                }
 +            }
 +            /* bIterate is set to keep it from eliminating the old ekin kinetic energy terms */
 +            /* #############  END CALC EKIN AND PRESSURE ################# */
 +
 +            /* Note: this is OK, but there are some numerical precision issues with using the convergence of
 +               the virial that should probably be addressed eventually. state->veta has better properies,
 +               but what we actually need entering the new cycle is the new shake_vir value. Ideally, we could
 +               generate the new shake_vir, but test the veta value for convergence.  This will take some thought. */
 +
 +            if (iterate.bIterationActive &&
 +                done_iterating(cr, fplog, step, &iterate, bFirstIterate,
 +                               trace(shake_vir), &tracevir))
 +            {
 +                break;
 +            }
 +            bFirstIterate = FALSE;
 +        }
 +
-         /* only add constraint dvdl after constraints */
-         enerd->term[F_DVDL_CONSTR] += dvdl_constr;
 +        if (!bVV || bRerunMD)
 +        {
 +            /* sum up the foreign energy and dhdl terms for md and sd. currently done every step so that dhdl is correct in the .edr */
 +            sum_dhdl(enerd, state->lambda, ir->fepvals);
 +        }
 +        update_box(fplog, step, ir, mdatoms, state, graph, f,
 +                   ir->nstlist == -1 ? &nlh.scale_tot : NULL, pcoupl_mu, nrnb, wcycle, upd, bInitStep, FALSE);
 +
 +        /* ################# END UPDATE STEP 2 ################# */
 +        /* #### We now have r(t+dt) and v(t+dt/2)  ############# */
 +
 +        /* The coordinates (x) were unshifted in update */
 +        if (bFFscan && (shellfc == NULL || bConverged))
 +        {
 +            if (print_forcefield(fplog, enerd->term, mdatoms->homenr,
 +                                 f, NULL, xcopy,
 +                                 &(top_global->mols), mdatoms->massT, pres))
 +            {
 +                gmx_finalize_par();
 +
 +                fprintf(stderr, "\n");
 +                exit(0);
 +            }
 +        }
 +        if (!bGStat)
 +        {
 +            /* We will not sum ekinh_old,
 +             * so signal that we still have to do it.
 +             */
 +            bSumEkinhOld = TRUE;
 +        }
 +
 +        if (bTCR)
 +        {
 +            /* Only do GCT when the relaxation of shells (minimization) has converged,
 +             * otherwise we might be coupling to bogus energies.
 +             * In parallel we must always do this, because the other sims might
 +             * update the FF.
 +             */
 +
 +            /* Since this is called with the new coordinates state->x, I assume
 +             * we want the new box state->box too. / EL 20040121
 +             */
 +            do_coupling(fplog, oenv, nfile, fnm, tcr, t, step, enerd->term, fr,
 +                        ir, MASTER(cr),
 +                        mdatoms, &(top->idef), mu_aver,
 +                        top_global->mols.nr, cr,
 +                        state->box, total_vir, pres,
 +                        mu_tot, state->x, f, bConverged);
 +            debug_gmx();
 +        }
 +
 +        /* #########  BEGIN PREPARING EDR OUTPUT  ###########  */
 +
 +        /* use the directly determined last velocity, not actually the averaged half steps */
 +        if (bTrotter && ir->eI == eiVV)
 +        {
 +            enerd->term[F_EKIN] = last_ekin;
 +        }
 +        enerd->term[F_ETOT] = enerd->term[F_EPOT] + enerd->term[F_EKIN];
 +
 +        if (bVV)
 +        {
 +            enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + saved_conserved_quantity;
 +        }
 +        else
 +        {
 +            enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + compute_conserved_from_auxiliary(ir, state, &MassQ);
 +        }
 +        /* Check for excessively large energies */
 +        if (bIonize)
 +        {
 +#ifdef GMX_DOUBLE
 +            real etot_max = 1e200;
 +#else
 +            real etot_max = 1e30;
 +#endif
 +            if (fabs(enerd->term[F_ETOT]) > etot_max)
 +            {
 +                fprintf(stderr, "Energy too large (%g), giving up\n",
 +                        enerd->term[F_ETOT]);
 +            }
 +        }
 +        /* #########  END PREPARING EDR OUTPUT  ###########  */
 +
 +        /* Time for performance */
 +        if (((step % stepout) == 0) || bLastStep)
 +        {
 +            runtime_upd_proc(runtime);
 +        }
 +
 +        /* Output stuff */
 +        if (MASTER(cr))
 +        {
 +            gmx_bool do_dr, do_or;
 +
 +            if (fplog && do_log && bDoExpanded)
 +            {
 +                /* only needed if doing expanded ensemble */
 +                PrintFreeEnergyInfoToFile(fplog, ir->fepvals, ir->expandedvals, ir->bSimTemp ? ir->simtempvals : NULL,
 +                                          &df_history, state->fep_state, ir->nstlog, step);
 +            }
 +            if (!(bStartingFromCpt && (EI_VV(ir->eI))))
 +            {
 +                if (bCalcEner)
 +                {
 +                    upd_mdebin(mdebin, bDoDHDL, TRUE,
 +                               t, mdatoms->tmass, enerd, state,
 +                               ir->fepvals, ir->expandedvals, lastbox,
 +                               shake_vir, force_vir, total_vir, pres,
 +                               ekind, mu_tot, constr);
 +                }
 +                else
 +                {
 +                    upd_mdebin_step(mdebin);
 +                }
 +
 +                do_dr  = do_per_step(step, ir->nstdisreout);
 +                do_or  = do_per_step(step, ir->nstorireout);
 +
 +                print_ebin(outf->fp_ene, do_ene, do_dr, do_or, do_log ? fplog : NULL,
 +                           step, t,
 +                           eprNORMAL, bCompact, mdebin, fcd, groups, &(ir->opts));
 +            }
 +            if (ir->ePull != epullNO)
 +            {
 +                pull_print_output(ir->pull, step, t);
 +            }
 +
 +            if (do_per_step(step, ir->nstlog))
 +            {
 +                if (fflush(fplog) != 0)
 +                {
 +                    gmx_fatal(FARGS, "Cannot flush logfile - maybe you are out of disk space?");
 +                }
 +            }
 +        }
 +        if (bDoExpanded)
 +        {
 +            /* Have to do this part after outputting the logfile and the edr file */
 +            state->fep_state = lamnew;
 +            for (i = 0; i < efptNR; i++)
 +            {
 +                state_global->lambda[i] = ir->fepvals->all_lambda[i][lamnew];
 +            }
 +        }
 +        /* Remaining runtime */
 +        if (MULTIMASTER(cr) && (do_verbose || gmx_got_usr_signal()) && !bPMETuneRunning)
 +        {
 +            if (shellfc)
 +            {
 +                fprintf(stderr, "\n");
 +            }
 +            print_time(stderr, runtime, step, ir, cr);
 +        }
 +
 +        /* Replica exchange */
 +        bExchanged = FALSE;
 +        if ((repl_ex_nst > 0) && (step > 0) && !bLastStep &&
 +            do_per_step(step, repl_ex_nst))
 +        {
 +            bExchanged = replica_exchange(fplog, cr, repl_ex,
 +                                          state_global, enerd,
 +                                          state, step, t);
 +
 +            if (bExchanged && DOMAINDECOMP(cr))
 +            {
 +                dd_partition_system(fplog, step, cr, TRUE, 1,
 +                                    state_global, top_global, ir,
 +                                    state, &f, mdatoms, top, fr,
 +                                    vsite, shellfc, constr,
 +                                    nrnb, wcycle, FALSE);
 +            }
 +        }
 +
 +        bFirstStep       = FALSE;
 +        bInitStep        = FALSE;
 +        bStartingFromCpt = FALSE;
 +
 +        /* #######  SET VARIABLES FOR NEXT ITERATION IF THEY STILL NEED IT ###### */
 +        /* With all integrators, except VV, we need to retain the pressure
 +         * at the current step for coupling at the next step.
 +         */
 +        if ((state->flags & (1<<estPRES_PREV)) &&
 +            (bGStatEveryStep ||
 +             (ir->nstpcouple > 0 && step % ir->nstpcouple == 0)))
 +        {
 +            /* Store the pressure in t_state for pressure coupling
 +             * at the next MD step.
 +             */
 +            copy_mat(pres, state->pres_prev);
 +        }
 +
 +        /* #######  END SET VARIABLES FOR NEXT ITERATION ###### */
 +
 +        if ( (membed != NULL) && (!bLastStep) )
 +        {
 +            rescale_membed(step_rel, membed, state_global->x);
 +        }
 +
 +        if (bRerunMD)
 +        {
 +            if (MASTER(cr))
 +            {
 +                /* read next frame from input trajectory */
 +                bNotLastFrame = read_next_frame(oenv, status, &rerun_fr);
 +            }
 +
 +            if (PAR(cr))
 +            {
 +                rerun_parallel_comm(cr, &rerun_fr, &bNotLastFrame);
 +            }
 +        }
 +
 +        if (!bRerunMD || !rerun_fr.bStep)
 +        {
 +            /* increase the MD step number */
 +            step++;
 +            step_rel++;
 +        }
 +
 +        cycles = wallcycle_stop(wcycle, ewcSTEP);
 +        if (DOMAINDECOMP(cr) && wcycle)
 +        {
 +            dd_cycles_add(cr->dd, cycles, ddCyclStep);
 +        }
 +
 +        if (bPMETuneRunning || bPMETuneTry)
 +        {
 +            /* PME grid + cut-off optimization with GPUs or PME nodes */
 +
 +            /* Count the total cycles over the last steps */
 +            cycles_pmes += cycles;
 +
 +            /* We can only switch cut-off at NS steps */
 +            if (step % ir->nstlist == 0)
 +            {
 +                /* PME grid + cut-off optimization with GPUs or PME nodes */
 +                if (bPMETuneTry)
 +                {
 +                    if (DDMASTER(cr->dd))
 +                    {
 +                        /* PME node load is too high, start tuning */
 +                        bPMETuneRunning = (dd_pme_f_ratio(cr->dd) >= 1.05);
 +                    }
 +                    dd_bcast(cr->dd, sizeof(gmx_bool), &bPMETuneRunning);
 +
 +                    if (bPMETuneRunning || step_rel > ir->nstlist*50)
 +                    {
 +                        bPMETuneTry     = FALSE;
 +                    }
 +                }
 +                if (bPMETuneRunning)
 +                {
 +                    /* init_step might not be a multiple of nstlist,
 +                     * but the first cycle is always skipped anyhow.
 +                     */
 +                    bPMETuneRunning =
 +                        pme_load_balance(pme_loadbal, cr,
 +                                         (bVerbose && MASTER(cr)) ? stderr : NULL,
 +                                         fplog,
 +                                         ir, state, cycles_pmes,
 +                                         fr->ic, fr->nbv, &fr->pmedata,
 +                                         step);
 +
 +                    /* Update constants in forcerec/inputrec to keep them in sync with fr->ic */
 +                    fr->ewaldcoeff = fr->ic->ewaldcoeff;
 +                    fr->rlist      = fr->ic->rlist;
 +                    fr->rlistlong  = fr->ic->rlistlong;
 +                    fr->rcoulomb   = fr->ic->rcoulomb;
 +                    fr->rvdw       = fr->ic->rvdw;
 +                }
 +                cycles_pmes = 0;
 +            }
 +        }
 +
 +        if (step_rel == wcycle_get_reset_counters(wcycle) ||
 +            gs.set[eglsRESETCOUNTERS] != 0)
 +        {
 +            /* Reset all the counters related to performance over the run */
 +            reset_all_counters(fplog, cr, step, &step_rel, ir, wcycle, nrnb, runtime,
 +                               fr->nbv != NULL && fr->nbv->bUseGPU ? fr->nbv->cu_nbv : NULL);
 +            wcycle_set_reset_counters(wcycle, -1);
 +            if (!(cr->duty & DUTY_PME))
 +            {
 +                /* Tell our PME node to reset its counters */
 +                gmx_pme_send_resetcounters(cr, step);
 +            }
 +            /* Correct max_hours for the elapsed time */
 +            max_hours                -= run_time/(60.0*60.0);
 +            bResetCountersHalfMaxH    = FALSE;
 +            gs.set[eglsRESETCOUNTERS] = 0;
 +        }
 +
 +    }
 +    /* End of main MD loop */
 +    debug_gmx();
 +
 +    /* Stop the time */
 +    runtime_end(runtime);
 +
 +    if (bRerunMD && MASTER(cr))
 +    {
 +        close_trj(status);
 +    }
 +
 +    if (!(cr->duty & DUTY_PME))
 +    {
 +        /* Tell the PME only node to finish */
 +        gmx_pme_send_finish(cr);
 +    }
 +
 +    if (MASTER(cr))
 +    {
 +        if (ir->nstcalcenergy > 0 && !bRerunMD)
 +        {
 +            print_ebin(outf->fp_ene, FALSE, FALSE, FALSE, fplog, step, t,
 +                       eprAVER, FALSE, mdebin, fcd, groups, &(ir->opts));
 +        }
 +    }
 +
 +    done_mdoutf(outf);
 +
 +    debug_gmx();
 +
 +    if (ir->nstlist == -1 && nlh.nns > 0 && fplog)
 +    {
 +        fprintf(fplog, "Average neighborlist lifetime: %.1f steps, std.dev.: %.1f steps\n", nlh.s1/nlh.nns, sqrt(nlh.s2/nlh.nns - sqr(nlh.s1/nlh.nns)));
 +        fprintf(fplog, "Average number of atoms that crossed the half buffer length: %.1f\n\n", nlh.ab/nlh.nns);
 +    }
 +
 +    if (pme_loadbal != NULL)
 +    {
 +        pme_loadbal_done(pme_loadbal, cr, fplog,
 +                         fr->nbv != NULL && fr->nbv->bUseGPU);
 +    }
 +
 +    if (shellfc && fplog)
 +    {
 +        fprintf(fplog, "Fraction of iterations that converged:           %.2f %%\n",
 +                (nconverged*100.0)/step_rel);
 +        fprintf(fplog, "Average number of force evaluations per MD step: %.2f\n\n",
 +                tcount/step_rel);
 +    }
 +
 +    if (repl_ex_nst > 0 && MASTER(cr))
 +    {
 +        print_replica_exchange_statistics(fplog, repl_ex);
 +    }
 +
 +    runtime->nsteps_done = step_rel;
 +
 +    return 0;
 +}
diff --cc src/programs/mdrun/runner.c
index 8ff74dbdd5,0000000000..c820938cf6
mode 100644,000000..100644
--- a/src/programs/mdrun/runner.c
+++ b/src/programs/mdrun/runner.c
@@@ -1,1689 -1,0 +1,1689 @@@
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + *          GROningen MAchine for Chemical Simulations
 + *
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + *
 + * For more info, check our website at http://www.gromacs.org
 + *
 + * And Hey:
 + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +#include <signal.h>
 +#include <stdlib.h>
 +#ifdef HAVE_UNISTD_H
 +#include <unistd.h>
 +#endif
 +#include <string.h>
 +#include <assert.h>
 +
 +#include "typedefs.h"
 +#include "smalloc.h"
 +#include "sysstuff.h"
 +#include "statutil.h"
 +#include "mdrun.h"
 +#include "md_logging.h"
 +#include "md_support.h"
 +#include "network.h"
 +#include "pull.h"
 +#include "pull_rotation.h"
 +#include "names.h"
 +#include "disre.h"
 +#include "orires.h"
 +#include "pme.h"
 +#include "mdatoms.h"
 +#include "repl_ex.h"
 +#include "qmmm.h"
 +#include "domdec.h"
 +#include "partdec.h"
 +#include "coulomb.h"
 +#include "constr.h"
 +#include "mvdata.h"
 +#include "checkpoint.h"
 +#include "mtop_util.h"
 +#include "sighandler.h"
 +#include "tpxio.h"
 +#include "txtdump.h"
 +#include "gmx_detect_hardware.h"
 +#include "gmx_omp_nthreads.h"
 +#include "pull_rotation.h"
 +#include "calc_verletbuf.h"
 +#include "../mdlib/nbnxn_search.h"
 +#include "../mdlib/nbnxn_consts.h"
 +#include "gmx_fatal_collective.h"
 +#include "membed.h"
 +#include "macros.h"
 +#include "gmx_omp.h"
 +#include "gmx_thread_affinity.h"
 +
 +#include "gromacs/utility/gmxmpi.h"
 +
 +#ifdef GMX_FAHCORE
 +#include "corewrap.h"
 +#endif
 +
 +#include "gpu_utils.h"
 +#include "nbnxn_cuda_data_mgmt.h"
 +
 +typedef struct {
 +    gmx_integrator_t *func;
 +} gmx_intp_t;
 +
 +/* The array should match the eI array in include/types/enums.h */
 +const gmx_intp_t    integrator[eiNR] = { {do_md}, {do_steep}, {do_cg}, {do_md}, {do_md}, {do_nm}, {do_lbfgs}, {do_tpi}, {do_tpi}, {do_md}, {do_md}, {do_md}};
 +
 +gmx_large_int_t     deform_init_init_step_tpx;
 +matrix              deform_init_box_tpx;
 +#ifdef GMX_THREAD_MPI
 +tMPI_Thread_mutex_t deform_init_box_mutex = TMPI_THREAD_MUTEX_INITIALIZER;
 +#endif
 +
 +
 +#ifdef GMX_THREAD_MPI
 +struct mdrunner_arglist
 +{
 +    gmx_hw_opt_t   *hw_opt;
 +    FILE           *fplog;
 +    t_commrec      *cr;
 +    int             nfile;
 +    const t_filenm *fnm;
 +    output_env_t    oenv;
 +    gmx_bool        bVerbose;
 +    gmx_bool        bCompact;
 +    int             nstglobalcomm;
 +    ivec            ddxyz;
 +    int             dd_node_order;
 +    real            rdd;
 +    real            rconstr;
 +    const char     *dddlb_opt;
 +    real            dlb_scale;
 +    const char     *ddcsx;
 +    const char     *ddcsy;
 +    const char     *ddcsz;
 +    const char     *nbpu_opt;
 +    gmx_large_int_t nsteps_cmdline;
 +    int             nstepout;
 +    int             resetstep;
 +    int             nmultisim;
 +    int             repl_ex_nst;
 +    int             repl_ex_nex;
 +    int             repl_ex_seed;
 +    real            pforce;
 +    real            cpt_period;
 +    real            max_hours;
 +    const char     *deviceOptions;
 +    unsigned long   Flags;
 +    int             ret; /* return value */
 +};
 +
 +
 +/* The function used for spawning threads. Extracts the mdrunner()
 +   arguments from its one argument and calls mdrunner(), after making
 +   a commrec. */
 +static void mdrunner_start_fn(void *arg)
 +{
 +    struct mdrunner_arglist *mda = (struct mdrunner_arglist*)arg;
 +    struct mdrunner_arglist  mc  = *mda; /* copy the arg list to make sure
 +                                            that it's thread-local. This doesn't
 +                                            copy pointed-to items, of course,
 +                                            but those are all const. */
 +    t_commrec *cr;                       /* we need a local version of this */
 +    FILE      *fplog = NULL;
 +    t_filenm  *fnm;
 +
 +    fnm = dup_tfn(mc.nfile, mc.fnm);
 +
 +    cr = init_par_threads(mc.cr);
 +
 +    if (MASTER(cr))
 +    {
 +        fplog = mc.fplog;
 +    }
 +
 +    mda->ret = mdrunner(mc.hw_opt, fplog, cr, mc.nfile, fnm, mc.oenv,
 +                        mc.bVerbose, mc.bCompact, mc.nstglobalcomm,
 +                        mc.ddxyz, mc.dd_node_order, mc.rdd,
 +                        mc.rconstr, mc.dddlb_opt, mc.dlb_scale,
 +                        mc.ddcsx, mc.ddcsy, mc.ddcsz,
 +                        mc.nbpu_opt,
 +                        mc.nsteps_cmdline, mc.nstepout, mc.resetstep,
 +                        mc.nmultisim, mc.repl_ex_nst, mc.repl_ex_nex, mc.repl_ex_seed, mc.pforce,
 +                        mc.cpt_period, mc.max_hours, mc.deviceOptions, mc.Flags);
 +}
 +
 +/* called by mdrunner() to start a specific number of threads (including
 +   the main thread) for thread-parallel runs. This in turn calls mdrunner()
 +   for each thread.
 +   All options besides nthreads are the same as for mdrunner(). */
 +static t_commrec *mdrunner_start_threads(gmx_hw_opt_t *hw_opt,
 +                                         FILE *fplog, t_commrec *cr, int nfile,
 +                                         const t_filenm fnm[], const output_env_t oenv, gmx_bool bVerbose,
 +                                         gmx_bool bCompact, int nstglobalcomm,
 +                                         ivec ddxyz, int dd_node_order, real rdd, real rconstr,
 +                                         const char *dddlb_opt, real dlb_scale,
 +                                         const char *ddcsx, const char *ddcsy, const char *ddcsz,
 +                                         const char *nbpu_opt,
 +                                         gmx_large_int_t nsteps_cmdline,
 +                                         int nstepout, int resetstep,
 +                                         int nmultisim, int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
 +                                         real pforce, real cpt_period, real max_hours,
 +                                         const char *deviceOptions, unsigned long Flags)
 +{
 +    int                      ret;
 +    struct mdrunner_arglist *mda;
 +    t_commrec               *crn; /* the new commrec */
 +    t_filenm                *fnmn;
 +
 +    /* first check whether we even need to start tMPI */
 +    if (hw_opt->nthreads_tmpi < 2)
 +    {
 +        return cr;
 +    }
 +
 +    /* a few small, one-time, almost unavoidable memory leaks: */
 +    snew(mda, 1);
 +    fnmn = dup_tfn(nfile, fnm);
 +
 +    /* fill the data structure to pass as void pointer to thread start fn */
 +    mda->hw_opt         = hw_opt;
 +    mda->fplog          = fplog;
 +    mda->cr             = cr;
 +    mda->nfile          = nfile;
 +    mda->fnm            = fnmn;
 +    mda->oenv           = oenv;
 +    mda->bVerbose       = bVerbose;
 +    mda->bCompact       = bCompact;
 +    mda->nstglobalcomm  = nstglobalcomm;
 +    mda->ddxyz[XX]      = ddxyz[XX];
 +    mda->ddxyz[YY]      = ddxyz[YY];
 +    mda->ddxyz[ZZ]      = ddxyz[ZZ];
 +    mda->dd_node_order  = dd_node_order;
 +    mda->rdd            = rdd;
 +    mda->rconstr        = rconstr;
 +    mda->dddlb_opt      = dddlb_opt;
 +    mda->dlb_scale      = dlb_scale;
 +    mda->ddcsx          = ddcsx;
 +    mda->ddcsy          = ddcsy;
 +    mda->ddcsz          = ddcsz;
 +    mda->nbpu_opt       = nbpu_opt;
 +    mda->nsteps_cmdline = nsteps_cmdline;
 +    mda->nstepout       = nstepout;
 +    mda->resetstep      = resetstep;
 +    mda->nmultisim      = nmultisim;
 +    mda->repl_ex_nst    = repl_ex_nst;
 +    mda->repl_ex_nex    = repl_ex_nex;
 +    mda->repl_ex_seed   = repl_ex_seed;
 +    mda->pforce         = pforce;
 +    mda->cpt_period     = cpt_period;
 +    mda->max_hours      = max_hours;
 +    mda->deviceOptions  = deviceOptions;
 +    mda->Flags          = Flags;
 +
 +    /* now spawn new threads that start mdrunner_start_fn(), while
 +       the main thread returns, we set thread affinity later */
 +    ret = tMPI_Init_fn(TRUE, hw_opt->nthreads_tmpi, TMPI_AFFINITY_NONE,
 +                       mdrunner_start_fn, (void*)(mda) );
 +    if (ret != TMPI_SUCCESS)
 +    {
 +        return NULL;
 +    }
 +
 +    /* make a new comm_rec to reflect the new situation */
 +    crn = init_par_threads(cr);
 +    return crn;
 +}
 +
 +
 +static int get_tmpi_omp_thread_division(const gmx_hw_info_t *hwinfo,
 +                                        const gmx_hw_opt_t  *hw_opt,
 +                                        int                  nthreads_tot,
 +                                        int                  ngpu)
 +{
 +    int nthreads_tmpi;
 +
 +    /* There are no separate PME nodes here, as we ensured in
 +     * check_and_update_hw_opt that nthreads_tmpi>0 with PME nodes
 +     * and a conditional ensures we would not have ended up here.
 +     * Note that separate PME nodes might be switched on later.
 +     */
 +    if (ngpu > 0)
 +    {
 +        nthreads_tmpi = ngpu;
 +        if (nthreads_tot > 0 && nthreads_tot < nthreads_tmpi)
 +        {
 +            nthreads_tmpi = nthreads_tot;
 +        }
 +    }
 +    else if (hw_opt->nthreads_omp > 0)
 +    {
 +        /* Here we could oversubscribe, when we do, we issue a warning later */
 +        nthreads_tmpi = max(1, nthreads_tot/hw_opt->nthreads_omp);
 +    }
 +    else
 +    {
 +        /* TODO choose nthreads_omp based on hardware topology
 +           when we have a hardware topology detection library */
 +        /* In general, when running up to 4 threads, OpenMP should be faster.
 +         * Note: on AMD Bulldozer we should avoid running OpenMP over two dies.
 +         * On Intel>=Nehalem running OpenMP on a single CPU is always faster,
 +         * even on two CPUs it's usually faster (but with many OpenMP threads
 +         * it could be faster not to use HT, currently we always use HT).
 +         * On Nehalem/Westmere we want to avoid running 16 threads over
 +         * two CPUs with HT, so we need a limit<16; thus we use 12.
 +         * A reasonable limit for Intel Sandy and Ivy bridge,
 +         * not knowing the topology, is 16 threads.
 +         */
 +        const int nthreads_omp_always_faster             =  4;
 +        const int nthreads_omp_always_faster_Nehalem     = 12;
 +        const int nthreads_omp_always_faster_SandyBridge = 16;
 +        const int first_model_Nehalem                    = 0x1A;
 +        const int first_model_SandyBridge                = 0x2A;
 +        gmx_bool  bIntel_Family6;
 +
 +        bIntel_Family6 =
 +            (gmx_cpuid_vendor(hwinfo->cpuid_info) == GMX_CPUID_VENDOR_INTEL &&
 +             gmx_cpuid_family(hwinfo->cpuid_info) == 6);
 +
 +        if (nthreads_tot <= nthreads_omp_always_faster ||
 +            (bIntel_Family6 &&
 +             ((gmx_cpuid_model(hwinfo->cpuid_info) >= nthreads_omp_always_faster_Nehalem && nthreads_tot <= nthreads_omp_always_faster_Nehalem) ||
 +              (gmx_cpuid_model(hwinfo->cpuid_info) >= nthreads_omp_always_faster_SandyBridge && nthreads_tot <= nthreads_omp_always_faster_SandyBridge))))
 +        {
 +            /* Use pure OpenMP parallelization */
 +            nthreads_tmpi = 1;
 +        }
 +        else
 +        {
 +            /* Don't use OpenMP parallelization */
 +            nthreads_tmpi = nthreads_tot;
 +        }
 +    }
 +
 +    return nthreads_tmpi;
 +}
 +
 +
 +/* Get the number of threads to use for thread-MPI based on how many
 + * were requested, which algorithms we're using,
 + * and how many particles there are.
 + * At the point we have already called check_and_update_hw_opt.
 + * Thus all options should be internally consistent and consistent
 + * with the hardware, except that ntmpi could be larger than #GPU.
 + */
 +static int get_nthreads_mpi(gmx_hw_info_t *hwinfo,
 +                            gmx_hw_opt_t *hw_opt,
 +                            t_inputrec *inputrec, gmx_mtop_t *mtop,
 +                            const t_commrec *cr,
 +                            FILE *fplog)
 +{
 +    int      nthreads_hw, nthreads_tot_max, nthreads_tmpi, nthreads_new, ngpu;
 +    int      min_atoms_per_mpi_thread;
 +    char    *env;
 +    char     sbuf[STRLEN];
 +    gmx_bool bCanUseGPU;
 +
 +    if (hw_opt->nthreads_tmpi > 0)
 +    {
 +        /* Trivial, return right away */
 +        return hw_opt->nthreads_tmpi;
 +    }
 +
 +    nthreads_hw = hwinfo->nthreads_hw_avail;
 +
 +    /* How many total (#tMPI*#OpenMP) threads can we start? */
 +    if (hw_opt->nthreads_tot > 0)
 +    {
 +        nthreads_tot_max = hw_opt->nthreads_tot;
 +    }
 +    else
 +    {
 +        nthreads_tot_max = nthreads_hw;
 +    }
 +
 +    bCanUseGPU = (inputrec->cutoff_scheme == ecutsVERLET && hwinfo->bCanUseGPU);
 +    if (bCanUseGPU)
 +    {
 +        ngpu = hwinfo->gpu_info.ncuda_dev_use;
 +    }
 +    else
 +    {
 +        ngpu = 0;
 +    }
 +
 +    nthreads_tmpi =
 +        get_tmpi_omp_thread_division(hwinfo, hw_opt, nthreads_tot_max, ngpu);
 +
 +    if (inputrec->eI == eiNM || EI_TPI(inputrec->eI))
 +    {
 +        /* Steps are divided over the nodes iso splitting the atoms */
 +        min_atoms_per_mpi_thread = 0;
 +    }
 +    else
 +    {
 +        if (bCanUseGPU)
 +        {
 +            min_atoms_per_mpi_thread = MIN_ATOMS_PER_GPU;
 +        }
 +        else
 +        {
 +            min_atoms_per_mpi_thread = MIN_ATOMS_PER_MPI_THREAD;
 +        }
 +    }
 +
 +    /* Check if an algorithm does not support parallel simulation.  */
 +    if (nthreads_tmpi != 1 &&
 +        ( inputrec->eI == eiLBFGS ||
 +          inputrec->coulombtype == eelEWALD ) )
 +    {
 +        nthreads_tmpi = 1;
 +
 +        md_print_warn(cr, fplog, "The integration or electrostatics algorithm doesn't support parallel runs. Using a single thread-MPI thread.\n");
 +        if (hw_opt->nthreads_tmpi > nthreads_tmpi)
 +        {
 +            gmx_fatal(FARGS, "You asked for more than 1 thread-MPI thread, but an algorithm doesn't support that");
 +        }
 +    }
 +    else if (mtop->natoms/nthreads_tmpi < min_atoms_per_mpi_thread)
 +    {
 +        /* the thread number was chosen automatically, but there are too many
 +           threads (too few atoms per thread) */
 +        nthreads_new = max(1, mtop->natoms/min_atoms_per_mpi_thread);
 +
 +        /* Avoid partial use of Hyper-Threading */
 +        if (gmx_cpuid_x86_smt(hwinfo->cpuid_info) == GMX_CPUID_X86_SMT_ENABLED &&
 +            nthreads_new > nthreads_hw/2 && nthreads_new < nthreads_hw)
 +        {
 +            nthreads_new = nthreads_hw/2;
 +        }
 +
 +        /* Avoid large prime numbers in the thread count */
 +        if (nthreads_new >= 6)
 +        {
 +            /* Use only 6,8,10 with additional factors of 2 */
 +            int fac;
 +
 +            fac = 2;
 +            while (3*fac*2 <= nthreads_new)
 +            {
 +                fac *= 2;
 +            }
 +
 +            nthreads_new = (nthreads_new/fac)*fac;
 +        }
 +        else
 +        {
 +            /* Avoid 5 */
 +            if (nthreads_new == 5)
 +            {
 +                nthreads_new = 4;
 +            }
 +        }
 +
 +        nthreads_tmpi = nthreads_new;
 +
 +        fprintf(stderr, "\n");
 +        fprintf(stderr, "NOTE: Parallelization is limited by the small number of atoms,\n");
 +        fprintf(stderr, "      only starting %d thread-MPI threads.\n", nthreads_tmpi);
 +        fprintf(stderr, "      You can use the -nt and/or -ntmpi option to optimize the number of threads.\n\n");
 +    }
 +
 +    return nthreads_tmpi;
 +}
 +#endif /* GMX_THREAD_MPI */
 +
 +
 +/* Environment variable for setting nstlist */
 +static const char*  NSTLIST_ENVVAR          =  "GMX_NSTLIST";
 +/* Try to increase nstlist when using a GPU with nstlist less than this */
 +static const int    NSTLIST_GPU_ENOUGH      = 20;
 +/* Increase nstlist until the non-bonded cost increases more than this factor */
 +static const float  NBNXN_GPU_LIST_OK_FAC   = 1.25;
 +/* Don't increase nstlist beyond a non-bonded cost increases of this factor */
 +static const float  NBNXN_GPU_LIST_MAX_FAC  = 1.40;
 +
 +/* Try to increase nstlist when running on a GPU */
 +static void increase_nstlist(FILE *fp, t_commrec *cr,
 +                             t_inputrec *ir, const gmx_mtop_t *mtop, matrix box)
 +{
 +    char                  *env;
 +    int                    nstlist_orig, nstlist_prev;
 +    verletbuf_list_setup_t ls;
 +    real                   rlist_inc, rlist_ok, rlist_max, rlist_new, rlist_prev;
 +    int                    i;
 +    t_state                state_tmp;
 +    gmx_bool               bBox, bDD, bCont;
 +    const char            *nstl_fmt = "\nFor optimal performance with a GPU nstlist (now %d) should be larger.\nThe optimum depends on your CPU and GPU resources.\nYou might want to try several nstlist values.\n";
 +    const char            *vbd_err  = "Can not increase nstlist for GPU run because verlet-buffer-drift is not set or used";
 +    const char            *box_err  = "Can not increase nstlist for GPU run because the box is too small";
 +    const char            *dd_err   = "Can not increase nstlist for GPU run because of domain decomposition limitations";
 +    char                   buf[STRLEN];
 +
 +    /* Number of + nstlist alternative values to try when switching  */
 +    const int nstl[] = { 20, 25, 40, 50 };
 +#define NNSTL  sizeof(nstl)/sizeof(nstl[0])
 +
 +    env = getenv(NSTLIST_ENVVAR);
 +    if (env == NULL)
 +    {
 +        if (fp != NULL)
 +        {
 +            fprintf(fp, nstl_fmt, ir->nstlist);
 +        }
 +    }
 +
 +    if (ir->verletbuf_drift == 0)
 +    {
 +        gmx_fatal(FARGS, "You are using an old tpr file with a GPU, please generate a new tpr file with an up to date version of grompp");
 +    }
 +
 +    if (ir->verletbuf_drift < 0)
 +    {
 +        if (MASTER(cr))
 +        {
 +            fprintf(stderr, "%s\n", vbd_err);
 +        }
 +        if (fp != NULL)
 +        {
 +            fprintf(fp, "%s\n", vbd_err);
 +        }
 +
 +        return;
 +    }
 +
 +    nstlist_orig = ir->nstlist;
 +    if (env != NULL)
 +    {
 +        sprintf(buf, "Getting nstlist from environment variable GMX_NSTLIST=%s", env);
 +        if (MASTER(cr))
 +        {
 +            fprintf(stderr, "%s\n", buf);
 +        }
 +        if (fp != NULL)
 +        {
 +            fprintf(fp, "%s\n", buf);
 +        }
 +        sscanf(env, "%d", &ir->nstlist);
 +    }
 +
 +    verletbuf_get_list_setup(TRUE, &ls);
 +
 +    /* Allow rlist to make the list double the size of the cut-off sphere */
 +    rlist_inc = nbnxn_get_rlist_effective_inc(NBNXN_GPU_CLUSTER_SIZE, mtop->natoms/det(box));
 +    rlist_ok  = (max(ir->rvdw, ir->rcoulomb) + rlist_inc)*pow(NBNXN_GPU_LIST_OK_FAC, 1.0/3.0) - rlist_inc;
 +    rlist_max = (max(ir->rvdw, ir->rcoulomb) + rlist_inc)*pow(NBNXN_GPU_LIST_MAX_FAC, 1.0/3.0) - rlist_inc;
 +    if (debug)
 +    {
 +        fprintf(debug, "GPU nstlist tuning: rlist_inc %.3f rlist_max %.3f\n",
 +                rlist_inc, rlist_max);
 +    }
 +
 +    i            = 0;
 +    nstlist_prev = nstlist_orig;
 +    rlist_prev   = ir->rlist;
 +    do
 +    {
 +        if (env == NULL)
 +        {
 +            ir->nstlist = nstl[i];
 +        }
 +
 +        /* Set the pair-list buffer size in ir */
 +        calc_verlet_buffer_size(mtop, det(box), ir, ir->verletbuf_drift, &ls,
 +                                NULL, &rlist_new);
 +
 +        /* Does rlist fit in the box? */
 +        bBox = (sqr(rlist_new) < max_cutoff2(ir->ePBC, box));
 +        bDD  = TRUE;
 +        if (bBox && DOMAINDECOMP(cr))
 +        {
 +            /* Check if rlist fits in the domain decomposition */
 +            if (inputrec2nboundeddim(ir) < DIM)
 +            {
 +                gmx_incons("Changing nstlist with domain decomposition and unbounded dimensions is not implemented yet");
 +            }
 +            copy_mat(box, state_tmp.box);
 +            bDD = change_dd_cutoff(cr, &state_tmp, ir, rlist_new);
 +        }
 +
 +        bCont = FALSE;
 +
 +        if (env == NULL)
 +        {
 +            if (bBox && bDD && rlist_new <= rlist_max)
 +            {
 +                /* Increase nstlist */
 +                nstlist_prev = ir->nstlist;
 +                rlist_prev   = rlist_new;
 +                bCont        = (i+1 < NNSTL && rlist_new < rlist_ok);
 +            }
 +            else
 +            {
 +                /* Stick with the previous nstlist */
 +                ir->nstlist = nstlist_prev;
 +                rlist_new   = rlist_prev;
 +                bBox        = TRUE;
 +                bDD         = TRUE;
 +            }
 +        }
 +
 +        i++;
 +    }
 +    while (bCont);
 +
 +    if (!bBox || !bDD)
 +    {
 +        gmx_warning(!bBox ? box_err : dd_err);
 +        if (fp != NULL)
 +        {
 +            fprintf(fp, "\n%s\n", bBox ? box_err : dd_err);
 +        }
 +        ir->nstlist = nstlist_orig;
 +    }
 +    else if (ir->nstlist != nstlist_orig || rlist_new != ir->rlist)
 +    {
 +        sprintf(buf, "Changing nstlist from %d to %d, rlist from %g to %g",
 +                nstlist_orig, ir->nstlist,
 +                ir->rlist, rlist_new);
 +        if (MASTER(cr))
 +        {
 +            fprintf(stderr, "%s\n\n", buf);
 +        }
 +        if (fp != NULL)
 +        {
 +            fprintf(fp, "%s\n\n", buf);
 +        }
 +        ir->rlist     = rlist_new;
 +        ir->rlistlong = rlist_new;
 +    }
 +}
 +
 +static void prepare_verlet_scheme(FILE             *fplog,
 +                                  gmx_hw_info_t    *hwinfo,
 +                                  t_commrec        *cr,
-                                   gmx_hw_opt_t     *hw_opt,
 +                                  const char       *nbpu_opt,
 +                                  t_inputrec       *ir,
 +                                  const gmx_mtop_t *mtop,
 +                                  matrix            box,
 +                                  gmx_bool         *bUseGPU)
 +{
 +    /* Here we only check for GPU usage on the MPI master process,
 +     * as here we don't know how many GPUs we will use yet.
 +     * We check for a GPU on all processes later.
 +     */
 +    *bUseGPU = hwinfo->bCanUseGPU || (getenv("GMX_EMULATE_GPU") != NULL);
 +
 +    if (ir->verletbuf_drift > 0)
 +    {
 +        /* Update the Verlet buffer size for the current run setup */
 +        verletbuf_list_setup_t ls;
 +        real                   rlist_new;
 +
 +        /* Here we assume CPU acceleration is on. But as currently
 +         * calc_verlet_buffer_size gives the same results for 4x8 and 4x4
 +         * and 4x2 gives a larger buffer than 4x4, this is ok.
 +         */
 +        verletbuf_get_list_setup(*bUseGPU, &ls);
 +
 +        calc_verlet_buffer_size(mtop, det(box), ir,
 +                                ir->verletbuf_drift, &ls,
 +                                NULL, &rlist_new);
 +        if (rlist_new != ir->rlist)
 +        {
 +            if (fplog != NULL)
 +            {
 +                fprintf(fplog, "\nChanging rlist from %g to %g for non-bonded %dx%d atom kernels\n\n",
 +                        ir->rlist, rlist_new,
 +                        ls.cluster_size_i, ls.cluster_size_j);
 +            }
 +            ir->rlist     = rlist_new;
 +            ir->rlistlong = rlist_new;
 +        }
 +    }
 +
 +    /* With GPU or emulation we should check nstlist for performance */
 +    if ((EI_DYNAMICS(ir->eI) &&
 +         *bUseGPU &&
 +         ir->nstlist < NSTLIST_GPU_ENOUGH) ||
 +        getenv(NSTLIST_ENVVAR) != NULL)
 +    {
 +        /* Choose a better nstlist */
 +        increase_nstlist(fplog, cr, ir, mtop, box);
 +    }
 +}
 +
 +static void convert_to_verlet_scheme(FILE *fplog,
 +                                     t_inputrec *ir,
 +                                     gmx_mtop_t *mtop, real box_vol)
 +{
 +    char *conv_mesg = "Converting input file with group cut-off scheme to the Verlet cut-off scheme";
 +
 +    md_print_warn(NULL, fplog, "%s\n", conv_mesg);
 +
 +    ir->cutoff_scheme   = ecutsVERLET;
 +    ir->verletbuf_drift = 0.005;
 +
 +    if (ir->rcoulomb != ir->rvdw)
 +    {
 +        gmx_fatal(FARGS, "The VdW and Coulomb cut-offs are different, whereas the Verlet scheme only supports equal cut-offs");
 +    }
 +
 +    if (ir->vdwtype == evdwUSER || EEL_USER(ir->coulombtype))
 +    {
 +        gmx_fatal(FARGS, "User non-bonded potentials are not (yet) supported with the Verlet scheme");
 +    }
 +    else if (EVDW_SWITCHED(ir->vdwtype) || EEL_SWITCHED(ir->coulombtype))
 +    {
 +        md_print_warn(NULL, fplog, "Converting switched or shifted interactions to a shifted potential (without force shift), this will lead to slightly different interaction potentials");
 +
 +        if (EVDW_SWITCHED(ir->vdwtype))
 +        {
 +            ir->vdwtype = evdwCUT;
 +        }
 +        if (EEL_SWITCHED(ir->coulombtype))
 +        {
 +            if (EEL_FULL(ir->coulombtype))
 +            {
 +                /* With full electrostatic only PME can be switched */
 +                ir->coulombtype = eelPME;
 +            }
 +            else
 +            {
 +                md_print_warn(NULL, fplog, "NOTE: Replacing %s electrostatics with reaction-field with epsilon-rf=inf\n", eel_names[ir->coulombtype]);
 +                ir->coulombtype = eelRF;
 +                ir->epsilon_rf  = 0.0;
 +            }
 +        }
 +
 +        /* We set the target energy drift to a small number.
 +         * Note that this is only for testing. For production the user
 +         * should think about this and set the mdp options.
 +         */
 +        ir->verletbuf_drift = 1e-4;
 +    }
 +
 +    if (inputrec2nboundeddim(ir) != 3)
 +    {
 +        gmx_fatal(FARGS, "Can only convert old tpr files to the Verlet cut-off scheme with 3D pbc");
 +    }
 +
 +    if (ir->efep != efepNO || ir->implicit_solvent != eisNO)
 +    {
 +        gmx_fatal(FARGS, "Will not convert old tpr files to the Verlet cut-off scheme with free-energy calculations or implicit solvent");
 +    }
 +
 +    if (EI_DYNAMICS(ir->eI) && !(EI_MD(ir->eI) && ir->etc == etcNO))
 +    {
 +        verletbuf_list_setup_t ls;
 +
 +        verletbuf_get_list_setup(FALSE, &ls);
 +        calc_verlet_buffer_size(mtop, box_vol, ir, ir->verletbuf_drift, &ls,
 +                                NULL, &ir->rlist);
 +    }
 +    else
 +    {
 +        ir->verletbuf_drift = -1;
 +        ir->rlist           = 1.05*max(ir->rvdw, ir->rcoulomb);
 +    }
 +
 +    gmx_mtop_remove_chargegroups(mtop);
 +}
 +
 +static void check_and_update_hw_opt(gmx_hw_opt_t *hw_opt,
 +                                    int           cutoff_scheme,
 +                                    gmx_bool      bIsSimMaster)
 +{
 +    gmx_omp_nthreads_read_env(&hw_opt->nthreads_omp, bIsSimMaster);
 +
 +#ifndef GMX_THREAD_MPI
 +    if (hw_opt->nthreads_tot > 0)
 +    {
 +        gmx_fatal(FARGS, "Setting the total number of threads is only supported with thread-MPI and Gromacs was compiled without thread-MPI");
 +    }
 +    if (hw_opt->nthreads_tmpi > 0)
 +    {
 +        gmx_fatal(FARGS, "Setting the number of thread-MPI threads is only supported with thread-MPI and Gromacs was compiled without thread-MPI");
 +    }
 +#endif
 +
 +    if (hw_opt->nthreads_tot > 0 && hw_opt->nthreads_omp_pme <= 0)
 +    {
 +        /* We have the same number of OpenMP threads for PP and PME processes,
 +         * thus we can perform several consistency checks.
 +         */
 +        if (hw_opt->nthreads_tmpi > 0 &&
 +            hw_opt->nthreads_omp > 0 &&
 +            hw_opt->nthreads_tot != hw_opt->nthreads_tmpi*hw_opt->nthreads_omp)
 +        {
 +            gmx_fatal(FARGS, "The total number of threads requested (%d) does not match the thread-MPI threads (%d) times the OpenMP threads (%d) requested",
 +                      hw_opt->nthreads_tot, hw_opt->nthreads_tmpi, hw_opt->nthreads_omp);
 +        }
 +
 +        if (hw_opt->nthreads_tmpi > 0 &&
 +            hw_opt->nthreads_tot % hw_opt->nthreads_tmpi != 0)
 +        {
 +            gmx_fatal(FARGS, "The total number of threads requested (%d) is not divisible by the number of thread-MPI threads requested (%d)",
 +                      hw_opt->nthreads_tot, hw_opt->nthreads_tmpi);
 +        }
 +
 +        if (hw_opt->nthreads_omp > 0 &&
 +            hw_opt->nthreads_tot % hw_opt->nthreads_omp != 0)
 +        {
 +            gmx_fatal(FARGS, "The total number of threads requested (%d) is not divisible by the number of OpenMP threads requested (%d)",
 +                      hw_opt->nthreads_tot, hw_opt->nthreads_omp);
 +        }
 +
 +        if (hw_opt->nthreads_tmpi > 0 &&
 +            hw_opt->nthreads_omp <= 0)
 +        {
 +            hw_opt->nthreads_omp = hw_opt->nthreads_tot/hw_opt->nthreads_tmpi;
 +        }
 +    }
 +
 +#ifndef GMX_OPENMP
 +    if (hw_opt->nthreads_omp > 1)
 +    {
 +        gmx_fatal(FARGS, "OpenMP threads are requested, but Gromacs was compiled without OpenMP support");
 +    }
 +#endif
 +
 +    if (cutoff_scheme == ecutsGROUP)
 +    {
 +        /* We only have OpenMP support for PME only nodes */
 +        if (hw_opt->nthreads_omp > 1)
 +        {
 +            gmx_fatal(FARGS, "OpenMP threads have been requested with cut-off scheme %s, but these are only supported with cut-off scheme %s",
 +                      ecutscheme_names[cutoff_scheme],
 +                      ecutscheme_names[ecutsVERLET]);
 +        }
 +        hw_opt->nthreads_omp = 1;
 +    }
 +
 +    if (hw_opt->nthreads_omp_pme > 0 && hw_opt->nthreads_omp <= 0)
 +    {
 +        gmx_fatal(FARGS, "You need to specify -ntomp in addition to -ntomp_pme");
 +    }
 +
 +    if (hw_opt->nthreads_tot == 1)
 +    {
 +        hw_opt->nthreads_tmpi = 1;
 +
 +        if (hw_opt->nthreads_omp > 1)
 +        {
 +            gmx_fatal(FARGS, "You requested %d OpenMP threads with %d total threads",
 +                      hw_opt->nthreads_tmpi, hw_opt->nthreads_tot);
 +        }
 +        hw_opt->nthreads_omp = 1;
 +    }
 +
 +    if (hw_opt->nthreads_omp_pme <= 0 && hw_opt->nthreads_omp > 0)
 +    {
 +        hw_opt->nthreads_omp_pme = hw_opt->nthreads_omp;
 +    }
 +
 +    if (debug)
 +    {
 +        fprintf(debug, "hw_opt: nt %d ntmpi %d ntomp %d ntomp_pme %d gpu_id '%s'\n",
 +                hw_opt->nthreads_tot,
 +                hw_opt->nthreads_tmpi,
 +                hw_opt->nthreads_omp,
 +                hw_opt->nthreads_omp_pme,
 +                hw_opt->gpu_id != NULL ? hw_opt->gpu_id : "");
 +
 +    }
 +}
 +
 +
 +/* Override the value in inputrec with value passed on the command line (if any) */
 +static void override_nsteps_cmdline(FILE            *fplog,
 +                                    gmx_large_int_t  nsteps_cmdline,
 +                                    t_inputrec      *ir,
 +                                    const t_commrec *cr)
 +{
 +    char sbuf[STEPSTRSIZE];
 +
 +    assert(ir);
 +    assert(cr);
 +
 +    /* override with anything else than the default -2 */
 +    if (nsteps_cmdline > -2)
 +    {
 +        char stmp[STRLEN];
 +
 +        ir->nsteps = nsteps_cmdline;
 +        if (EI_DYNAMICS(ir->eI))
 +        {
 +            sprintf(stmp, "Overriding nsteps with value passed on the command line: %s steps, %.3f ps",
 +                    gmx_step_str(nsteps_cmdline, sbuf),
 +                    nsteps_cmdline*ir->delta_t);
 +        }
 +        else
 +        {
 +            sprintf(stmp, "Overriding nsteps with value passed on the command line: %s steps",
 +                    gmx_step_str(nsteps_cmdline, sbuf));
 +        }
 +
 +        md_print_warn(cr, fplog, "%s\n", stmp);
 +    }
 +}
 +
 +/* Data structure set by SIMMASTER which needs to be passed to all nodes
 + * before the other nodes have read the tpx file and called gmx_detect_hardware.
 + */
 +typedef struct {
 +    int      cutoff_scheme; /* The cutoff scheme from inputrec_t */
 +    gmx_bool bUseGPU;       /* Use GPU or GPU emulation          */
 +} master_inf_t;
 +
 +int mdrunner(gmx_hw_opt_t *hw_opt,
 +             FILE *fplog, t_commrec *cr, int nfile,
 +             const t_filenm fnm[], const output_env_t oenv, gmx_bool bVerbose,
 +             gmx_bool bCompact, int nstglobalcomm,
 +             ivec ddxyz, int dd_node_order, real rdd, real rconstr,
 +             const char *dddlb_opt, real dlb_scale,
 +             const char *ddcsx, const char *ddcsy, const char *ddcsz,
 +             const char *nbpu_opt,
 +             gmx_large_int_t nsteps_cmdline, int nstepout, int resetstep,
 +             int nmultisim, int repl_ex_nst, int repl_ex_nex,
 +             int repl_ex_seed, real pforce, real cpt_period, real max_hours,
 +             const char *deviceOptions, unsigned long Flags)
 +{
 +    gmx_bool        bForceUseGPU, bTryUseGPU;
 +    double          nodetime = 0, realtime;
 +    t_inputrec     *inputrec;
 +    t_state        *state = NULL;
 +    matrix          box;
 +    gmx_ddbox_t     ddbox = {0};
 +    int             npme_major, npme_minor;
 +    real            tmpr1, tmpr2;
 +    t_nrnb         *nrnb;
 +    gmx_mtop_t     *mtop       = NULL;
 +    t_mdatoms      *mdatoms    = NULL;
 +    t_forcerec     *fr         = NULL;
 +    t_fcdata       *fcd        = NULL;
 +    real            ewaldcoeff = 0;
 +    gmx_pme_t      *pmedata    = NULL;
 +    gmx_vsite_t    *vsite      = NULL;
 +    gmx_constr_t    constr;
 +    int             i, m, nChargePerturbed = -1, status, nalloc;
 +    char           *gro;
 +    gmx_wallcycle_t wcycle;
 +    gmx_bool        bReadRNG, bReadEkin;
 +    int             list;
 +    gmx_runtime_t   runtime;
 +    int             rc;
 +    gmx_large_int_t reset_counters;
 +    gmx_edsam_t     ed           = NULL;
 +    t_commrec      *cr_old       = cr;
 +    int             nthreads_pme = 1;
 +    int             nthreads_pp  = 1;
 +    gmx_membed_t    membed       = NULL;
 +    gmx_hw_info_t  *hwinfo       = NULL;
 +    master_inf_t    minf         = {-1, FALSE};
 +
 +    /* CAUTION: threads may be started later on in this function, so
 +       cr doesn't reflect the final parallel state right now */
 +    snew(inputrec, 1);
 +    snew(mtop, 1);
 +
 +    if (Flags & MD_APPENDFILES)
 +    {
 +        fplog = NULL;
 +    }
 +
 +    bForceUseGPU = (strncmp(nbpu_opt, "gpu", 3) == 0);
 +    bTryUseGPU   = (strncmp(nbpu_opt, "auto", 4) == 0) || bForceUseGPU;
 +
 +    snew(state, 1);
 +    if (SIMMASTER(cr))
 +    {
 +        /* Read (nearly) all data required for the simulation */
 +        read_tpx_state(ftp2fn(efTPX, nfile, fnm), inputrec, state, NULL, mtop);
 +
 +        if (inputrec->cutoff_scheme != ecutsVERLET &&
 +            ((Flags & MD_TESTVERLET) || getenv("GMX_VERLET_SCHEME") != NULL))
 +        {
 +            convert_to_verlet_scheme(fplog, inputrec, mtop, det(state->box));
 +        }
 +
 +        /* Detect hardware, gather information. With tMPI only thread 0 does it
 +         * and after threads are started broadcasts hwinfo around. */
 +        snew(hwinfo, 1);
 +        gmx_detect_hardware(fplog, hwinfo, cr,
 +                            bForceUseGPU, bTryUseGPU, hw_opt->gpu_id);
 +
 +        minf.cutoff_scheme = inputrec->cutoff_scheme;
 +        minf.bUseGPU       = FALSE;
 +
 +        if (inputrec->cutoff_scheme == ecutsVERLET)
 +        {
-             prepare_verlet_scheme(fplog, hwinfo, cr, hw_opt, nbpu_opt,
++            prepare_verlet_scheme(fplog, hwinfo, cr, nbpu_opt,
 +                                  inputrec, mtop, state->box,
 +                                  &minf.bUseGPU);
 +        }
 +        else if (hwinfo->bCanUseGPU)
 +        {
 +            md_print_warn(cr, fplog,
 +                          "NOTE: GPU(s) found, but the current simulation can not use GPUs\n"
 +                          "      To use a GPU, set the mdp option: cutoff-scheme = Verlet\n"
 +                          "      (for quick performance testing you can use the -testverlet option)\n");
 +
 +            if (bForceUseGPU)
 +            {
 +                gmx_fatal(FARGS, "GPU requested, but can't be used without cutoff-scheme=Verlet");
 +            }
 +        }
 +    }
 +#ifndef GMX_THREAD_MPI
 +    if (PAR(cr))
 +    {
 +        gmx_bcast_sim(sizeof(minf), &minf, cr);
 +    }
 +#endif
 +    if (minf.bUseGPU && cr->npmenodes == -1)
 +    {
 +        /* Don't automatically use PME-only nodes with GPUs */
 +        cr->npmenodes = 0;
 +    }
 +
 +    /* Check for externally set OpenMP affinity and turn off internal
 +     * pinning if any is found. We need to do this check early to tell
 +     * thread-MPI whether it should do pinning when spawning threads.
 +     * TODO: the above no longer holds, we should move these checks down
 +     */
 +    gmx_omp_check_thread_affinity(fplog, cr, hw_opt);
 +
 +#ifdef GMX_THREAD_MPI
 +    /* With thread-MPI inputrec is only set here on the master thread */
 +    if (SIMMASTER(cr))
 +#endif
 +    {
 +        check_and_update_hw_opt(hw_opt, minf.cutoff_scheme, SIMMASTER(cr));
 +
 +#ifdef GMX_THREAD_MPI
 +        /* Early check for externally set process affinity. Can't do over all
 +         * MPI processes because hwinfo is not available everywhere, but with
 +         * thread-MPI it's needed as pinning might get turned off which needs
 +         * to be known before starting thread-MPI. */
 +        gmx_check_thread_affinity_set(fplog,
 +                                      NULL,
 +                                      hw_opt, hwinfo->nthreads_hw_avail, FALSE);
 +#endif
 +
 +#ifdef GMX_THREAD_MPI
 +        if (cr->npmenodes > 0 && hw_opt->nthreads_tmpi <= 0)
 +        {
 +            gmx_fatal(FARGS, "You need to explicitly specify the number of MPI threads (-ntmpi) when using separate PME nodes");
 +        }
 +#endif
 +
 +        if (hw_opt->nthreads_omp_pme != hw_opt->nthreads_omp &&
 +            cr->npmenodes <= 0)
 +        {
 +            gmx_fatal(FARGS, "You need to explicitly specify the number of PME nodes (-npme) when using different number of OpenMP threads for PP and PME nodes");
 +        }
 +    }
 +
 +#ifdef GMX_THREAD_MPI
 +    if (SIMMASTER(cr))
 +    {
 +        /* NOW the threads will be started: */
 +        hw_opt->nthreads_tmpi = get_nthreads_mpi(hwinfo,
 +                                                 hw_opt,
 +                                                 inputrec, mtop,
 +                                                 cr, fplog);
 +        if (hw_opt->nthreads_tot > 0 && hw_opt->nthreads_omp <= 0)
 +        {
 +            hw_opt->nthreads_omp = hw_opt->nthreads_tot/hw_opt->nthreads_tmpi;
 +        }
 +
 +        if (hw_opt->nthreads_tmpi > 1)
 +        {
 +            /* now start the threads. */
 +            cr = mdrunner_start_threads(hw_opt, fplog, cr_old, nfile, fnm,
 +                                        oenv, bVerbose, bCompact, nstglobalcomm,
 +                                        ddxyz, dd_node_order, rdd, rconstr,
 +                                        dddlb_opt, dlb_scale, ddcsx, ddcsy, ddcsz,
 +                                        nbpu_opt,
 +                                        nsteps_cmdline, nstepout, resetstep, nmultisim,
 +                                        repl_ex_nst, repl_ex_nex, repl_ex_seed, pforce,
 +                                        cpt_period, max_hours, deviceOptions,
 +                                        Flags);
 +            /* the main thread continues here with a new cr. We don't deallocate
 +               the old cr because other threads may still be reading it. */
 +            if (cr == NULL)
 +            {
 +                gmx_comm("Failed to spawn threads");
 +            }
 +        }
 +    }
 +#endif
 +    /* END OF CAUTION: cr is now reliable */
 +
 +    /* g_membed initialisation *
 +     * Because we change the mtop, init_membed is called before the init_parallel *
 +     * (in case we ever want to make it run in parallel) */
 +    if (opt2bSet("-membed", nfile, fnm))
 +    {
 +        if (MASTER(cr))
 +        {
 +            fprintf(stderr, "Initializing membed");
 +        }
 +        membed = init_membed(fplog, nfile, fnm, mtop, inputrec, state, cr, &cpt_period);
 +    }
 +
 +    if (PAR(cr))
 +    {
 +        /* now broadcast everything to the non-master nodes/threads: */
 +        init_parallel(fplog, cr, inputrec, mtop);
 +
 +        /* This check needs to happen after get_nthreads_mpi() */
 +        if (inputrec->cutoff_scheme == ecutsVERLET && (Flags & MD_PARTDEC))
 +        {
 +            gmx_fatal_collective(FARGS, cr, NULL,
 +                                 "The Verlet cut-off scheme is not supported with particle decomposition.\n"
 +                                 "You can achieve the same effect as particle decomposition by running in parallel using only OpenMP threads.");
 +        }
 +    }
 +    if (fplog != NULL)
 +    {
 +        pr_inputrec(fplog, 0, "Input Parameters", inputrec, FALSE);
 +    }
 +
 +#if defined GMX_THREAD_MPI
 +    /* With tMPI we detected on thread 0 and we'll just pass the hwinfo pointer
 +     * to the other threads  -- slightly uncool, but works fine, just need to
 +     * make sure that the data doesn't get freed twice. */
 +    if (cr->nnodes > 1)
 +    {
 +        if (!SIMMASTER(cr))
 +        {
 +            snew(hwinfo, 1);
 +        }
 +        gmx_bcast(sizeof(&hwinfo), &hwinfo, cr);
 +    }
 +#else
 +    if (PAR(cr) && !SIMMASTER(cr))
 +    {
 +        /* now we have inputrec on all nodes, can run the detection */
 +        /* TODO: perhaps it's better to propagate within a node instead? */
 +        snew(hwinfo, 1);
 +        gmx_detect_hardware(fplog, hwinfo, cr,
 +                            bForceUseGPU, bTryUseGPU, hw_opt->gpu_id);
 +    }
 +
 +    /* Now do the affinity check with MPI/no-MPI (done earlier with thread-MPI). */
 +    gmx_check_thread_affinity_set(fplog, cr,
 +                                  hw_opt, hwinfo->nthreads_hw_avail, FALSE);
 +#endif
 +
 +    /* now make sure the state is initialized and propagated */
 +    set_state_entries(state, inputrec, cr->nnodes);
 +
 +    /* A parallel command line option consistency check that we can
 +       only do after any threads have started. */
 +    if (!PAR(cr) &&
 +        (ddxyz[XX] > 1 || ddxyz[YY] > 1 || ddxyz[ZZ] > 1 || cr->npmenodes > 0))
 +    {
 +        gmx_fatal(FARGS,
 +                  "The -dd or -npme option request a parallel simulation, "
 +#ifndef GMX_MPI
 +                  "but %s was compiled without threads or MPI enabled"
 +#else
 +#ifdef GMX_THREAD_MPI
 +                  "but the number of threads (option -nt) is 1"
 +#else
 +                  "but %s was not started through mpirun/mpiexec or only one process was requested through mpirun/mpiexec"
 +#endif
 +#endif
 +                  , ShortProgram()
 +                  );
 +    }
 +
 +    if ((Flags & MD_RERUN) &&
 +        (EI_ENERGY_MINIMIZATION(inputrec->eI) || eiNM == inputrec->eI))
 +    {
 +        gmx_fatal(FARGS, "The .mdp file specified an energy mininization or normal mode algorithm, and these are not compatible with mdrun -rerun");
 +    }
 +
 +    if (can_use_allvsall(inputrec, mtop, TRUE, cr, fplog) && PAR(cr))
 +    {
-         /* All-vs-all loops do not work with domain decomposition */
++        /* Simple neighbour searching and (also?) all-vs-all loops
++         * do not work with domain decomposition. */
 +        Flags |= MD_PARTDEC;
 +    }
 +
 +    if (!EEL_PME(inputrec->coulombtype) || (Flags & MD_PARTDEC))
 +    {
 +        if (cr->npmenodes > 0)
 +        {
 +            if (!EEL_PME(inputrec->coulombtype))
 +            {
 +                gmx_fatal_collective(FARGS, cr, NULL,
 +                                     "PME nodes are requested, but the system does not use PME electrostatics");
 +            }
 +            if (Flags & MD_PARTDEC)
 +            {
 +                gmx_fatal_collective(FARGS, cr, NULL,
 +                                     "PME nodes are requested, but particle decomposition does not support separate PME nodes");
 +            }
 +        }
 +
 +        cr->npmenodes = 0;
 +    }
 +
 +#ifdef GMX_FAHCORE
 +    fcRegisterSteps(inputrec->nsteps, inputrec->init_step);
 +#endif
 +
 +    /* NMR restraints must be initialized before load_checkpoint,
 +     * since with time averaging the history is added to t_state.
 +     * For proper consistency check we therefore need to extend
 +     * t_state here.
 +     * So the PME-only nodes (if present) will also initialize
 +     * the distance restraints.
 +     */
 +    snew(fcd, 1);
 +
 +    /* This needs to be called before read_checkpoint to extend the state */
 +    init_disres(fplog, mtop, inputrec, cr, Flags & MD_PARTDEC, fcd, state, repl_ex_nst > 0);
 +
 +    if (gmx_mtop_ftype_count(mtop, F_ORIRES) > 0)
 +    {
 +        if (PAR(cr) && !(Flags & MD_PARTDEC))
 +        {
 +            gmx_fatal(FARGS, "Orientation restraints do not work (yet) with domain decomposition, use particle decomposition (mdrun option -pd)");
 +        }
 +        /* Orientation restraints */
 +        if (MASTER(cr))
 +        {
 +            init_orires(fplog, mtop, state->x, inputrec, cr->ms, &(fcd->orires),
 +                        state);
 +        }
 +    }
 +
 +    if (DEFORM(*inputrec))
 +    {
 +        /* Store the deform reference box before reading the checkpoint */
 +        if (SIMMASTER(cr))
 +        {
 +            copy_mat(state->box, box);
 +        }
 +        if (PAR(cr))
 +        {
 +            gmx_bcast(sizeof(box), box, cr);
 +        }
 +        /* Because we do not have the update struct available yet
 +         * in which the reference values should be stored,
 +         * we store them temporarily in static variables.
 +         * This should be thread safe, since they are only written once
 +         * and with identical values.
 +         */
 +#ifdef GMX_THREAD_MPI
 +        tMPI_Thread_mutex_lock(&deform_init_box_mutex);
 +#endif
 +        deform_init_init_step_tpx = inputrec->init_step;
 +        copy_mat(box, deform_init_box_tpx);
 +#ifdef GMX_THREAD_MPI
 +        tMPI_Thread_mutex_unlock(&deform_init_box_mutex);
 +#endif
 +    }
 +
 +    if (opt2bSet("-cpi", nfile, fnm))
 +    {
 +        /* Check if checkpoint file exists before doing continuation.
 +         * This way we can use identical input options for the first and subsequent runs...
 +         */
 +        if (gmx_fexist_master(opt2fn_master("-cpi", nfile, fnm, cr), cr) )
 +        {
 +            load_checkpoint(opt2fn_master("-cpi", nfile, fnm, cr), &fplog,
 +                            cr, Flags & MD_PARTDEC, ddxyz,
 +                            inputrec, state, &bReadRNG, &bReadEkin,
 +                            (Flags & MD_APPENDFILES),
 +                            (Flags & MD_APPENDFILESSET));
 +
 +            if (bReadRNG)
 +            {
 +                Flags |= MD_READ_RNG;
 +            }
 +            if (bReadEkin)
 +            {
 +                Flags |= MD_READ_EKIN;
 +            }
 +        }
 +    }
 +
 +    if (((MASTER(cr) || (Flags & MD_SEPPOT)) && (Flags & MD_APPENDFILES))
 +#ifdef GMX_THREAD_MPI
 +        /* With thread MPI only the master node/thread exists in mdrun.c,
 +         * therefore non-master nodes need to open the "seppot" log file here.
 +         */
 +        || (!MASTER(cr) && (Flags & MD_SEPPOT))
 +#endif
 +        )
 +    {
 +        gmx_log_open(ftp2fn(efLOG, nfile, fnm), cr, !(Flags & MD_SEPPOT),
 +                     Flags, &fplog);
 +    }
 +
 +    /* override nsteps with value from cmdline */
 +    override_nsteps_cmdline(fplog, nsteps_cmdline, inputrec, cr);
 +
 +    if (SIMMASTER(cr))
 +    {
 +        copy_mat(state->box, box);
 +    }
 +
 +    if (PAR(cr))
 +    {
 +        gmx_bcast(sizeof(box), box, cr);
 +    }
 +
 +    /* Essential dynamics */
 +    if (opt2bSet("-ei", nfile, fnm))
 +    {
 +        /* Open input and output files, allocate space for ED data structure */
 +        ed = ed_open(mtop->natoms, &state->edsamstate, nfile, fnm, Flags, oenv, cr);
 +    }
 +
 +    if (PAR(cr) && !((Flags & MD_PARTDEC) ||
 +                     EI_TPI(inputrec->eI) ||
 +                     inputrec->eI == eiNM))
 +    {
 +        cr->dd = init_domain_decomposition(fplog, cr, Flags, ddxyz, rdd, rconstr,
 +                                           dddlb_opt, dlb_scale,
 +                                           ddcsx, ddcsy, ddcsz,
 +                                           mtop, inputrec,
 +                                           box, state->x,
 +                                           &ddbox, &npme_major, &npme_minor);
 +
 +        make_dd_communicators(fplog, cr, dd_node_order);
 +
 +        /* Set overallocation to avoid frequent reallocation of arrays */
 +        set_over_alloc_dd(TRUE);
 +    }
 +    else
 +    {
 +        /* PME, if used, is done on all nodes with 1D decomposition */
 +        cr->npmenodes = 0;
 +        cr->duty      = (DUTY_PP | DUTY_PME);
 +        npme_major    = 1;
 +        npme_minor    = 1;
 +        if (!EI_TPI(inputrec->eI))
 +        {
 +            npme_major = cr->nnodes;
 +        }
 +
 +        if (inputrec->ePBC == epbcSCREW)
 +        {
 +            gmx_fatal(FARGS,
 +                      "pbc=%s is only implemented with domain decomposition",
 +                      epbc_names[inputrec->ePBC]);
 +        }
 +    }
 +
 +    if (PAR(cr))
 +    {
 +        /* After possible communicator splitting in make_dd_communicators.
 +         * we can set up the intra/inter node communication.
 +         */
 +        gmx_setup_nodecomm(fplog, cr);
 +    }
 +
 +    /* Initialize per-physical-node MPI process/thread ID and counters. */
 +    gmx_init_intranode_counters(cr);
 +
 +#ifdef GMX_MPI
 +    md_print_info(cr, fplog, "Using %d MPI %s\n",
 +                  cr->nnodes,
 +#ifdef GMX_THREAD_MPI
 +                  cr->nnodes == 1 ? "thread" : "threads"
 +#else
 +                  cr->nnodes == 1 ? "process" : "processes"
 +#endif
 +                  );
 +    fflush(stderr);
 +#endif
 +
 +    gmx_omp_nthreads_init(fplog, cr,
 +                          hwinfo->nthreads_hw_avail,
 +                          hw_opt->nthreads_omp,
 +                          hw_opt->nthreads_omp_pme,
 +                          (cr->duty & DUTY_PP) == 0,
 +                          inputrec->cutoff_scheme == ecutsVERLET);
 +
 +    gmx_check_hw_runconf_consistency(fplog, hwinfo, cr, hw_opt->nthreads_tmpi, minf.bUseGPU);
 +
 +    /* getting number of PP/PME threads
 +       PME: env variable should be read only on one node to make sure it is
 +       identical everywhere;
 +     */
 +    /* TODO nthreads_pp is only used for pinning threads.
 +     * This is a temporary solution until we have a hw topology library.
 +     */
 +    nthreads_pp  = gmx_omp_nthreads_get(emntNonbonded);
 +    nthreads_pme = gmx_omp_nthreads_get(emntPME);
 +
 +    wcycle = wallcycle_init(fplog, resetstep, cr, nthreads_pp, nthreads_pme);
 +
 +    if (PAR(cr))
 +    {
 +        /* Master synchronizes its value of reset_counters with all nodes
 +         * including PME only nodes */
 +        reset_counters = wcycle_get_reset_counters(wcycle);
 +        gmx_bcast_sim(sizeof(reset_counters), &reset_counters, cr);
 +        wcycle_set_reset_counters(wcycle, reset_counters);
 +    }
 +
 +    snew(nrnb, 1);
 +    if (cr->duty & DUTY_PP)
 +    {
 +        /* For domain decomposition we allocate dynamically
 +         * in dd_partition_system.
 +         */
 +        if (DOMAINDECOMP(cr))
 +        {
 +            bcast_state_setup(cr, state);
 +        }
 +        else
 +        {
 +            if (PAR(cr))
 +            {
 +                bcast_state(cr, state, TRUE);
 +            }
 +        }
 +
 +        /* Initiate forcerecord */
 +        fr         = mk_forcerec();
 +        fr->hwinfo = hwinfo;
 +        init_forcerec(fplog, oenv, fr, fcd, inputrec, mtop, cr, box, FALSE,
 +                      opt2fn("-table", nfile, fnm),
 +                      opt2fn("-tabletf", nfile, fnm),
 +                      opt2fn("-tablep", nfile, fnm),
 +                      opt2fn("-tableb", nfile, fnm),
 +                      nbpu_opt,
 +                      FALSE, pforce);
 +
 +        /* version for PCA_NOT_READ_NODE (see md.c) */
 +        /*init_forcerec(fplog,fr,fcd,inputrec,mtop,cr,box,FALSE,
 +           "nofile","nofile","nofile","nofile",FALSE,pforce);
 +         */
 +        fr->bSepDVDL = ((Flags & MD_SEPPOT) == MD_SEPPOT);
 +
 +        /* Initialize QM-MM */
 +        if (fr->bQMMM)
 +        {
 +            init_QMMMrec(cr, box, mtop, inputrec, fr);
 +        }
 +
 +        /* Initialize the mdatoms structure.
 +         * mdatoms is not filled with atom data,
 +         * as this can not be done now with domain decomposition.
 +         */
 +        mdatoms = init_mdatoms(fplog, mtop, inputrec->efep != efepNO);
 +
 +        if (mdatoms->nPerturbed > 0 && inputrec->cutoff_scheme == ecutsVERLET)
 +        {
 +            gmx_fatal(FARGS, "The Verlet cut-off scheme does not (yet) support free-energy calculations with perturbed atoms, only perturbed interactions. This will be implemented soon. Use the group scheme for now.");
 +        }
 +
 +        /* Initialize the virtual site communication */
 +        vsite = init_vsite(mtop, cr, FALSE);
 +
 +        calc_shifts(box, fr->shift_vec);
 +
 +        /* With periodic molecules the charge groups should be whole at start up
 +         * and the virtual sites should not be far from their proper positions.
 +         */
 +        if (!inputrec->bContinuation && MASTER(cr) &&
 +            !(inputrec->ePBC != epbcNONE && inputrec->bPeriodicMols))
 +        {
 +            /* Make molecules whole at start of run */
 +            if (fr->ePBC != epbcNONE)
 +            {
 +                do_pbc_first_mtop(fplog, inputrec->ePBC, box, mtop, state->x);
 +            }
 +            if (vsite)
 +            {
 +                /* Correct initial vsite positions are required
 +                 * for the initial distribution in the domain decomposition
 +                 * and for the initial shell prediction.
 +                 */
 +                construct_vsites_mtop(fplog, vsite, mtop, state->x);
 +            }
 +        }
 +
 +        if (EEL_PME(fr->eeltype))
 +        {
 +            ewaldcoeff = fr->ewaldcoeff;
 +            pmedata    = &fr->pmedata;
 +        }
 +        else
 +        {
 +            pmedata = NULL;
 +        }
 +    }
 +    else
 +    {
 +        /* This is a PME only node */
 +
 +        /* We don't need the state */
 +        done_state(state);
 +
 +        ewaldcoeff = calc_ewaldcoeff(inputrec->rcoulomb, inputrec->ewald_rtol);
 +        snew(pmedata, 1);
 +    }
 +
 +    if (hw_opt->thread_affinity != threadaffOFF)
 +    {
 +        /* Before setting affinity, check whether the affinity has changed
 +         * - which indicates that probably the OpenMP library has changed it
 +         * since we first checked).
 +         */
 +        gmx_check_thread_affinity_set(fplog, cr,
 +                                      hw_opt, hwinfo->nthreads_hw_avail, TRUE);
 +
 +        /* Set the CPU affinity */
 +        gmx_set_thread_affinity(fplog, cr, hw_opt, nthreads_pme, hwinfo, inputrec);
 +    }
 +
 +    /* Initiate PME if necessary,
 +     * either on all nodes or on dedicated PME nodes only. */
 +    if (EEL_PME(inputrec->coulombtype))
 +    {
 +        if (mdatoms)
 +        {
 +            nChargePerturbed = mdatoms->nChargePerturbed;
 +        }
 +        if (cr->npmenodes > 0)
 +        {
 +            /* The PME only nodes need to know nChargePerturbed */
 +            gmx_bcast_sim(sizeof(nChargePerturbed), &nChargePerturbed, cr);
 +        }
 +
 +        if (cr->duty & DUTY_PME)
 +        {
 +            status = gmx_pme_init(pmedata, cr, npme_major, npme_minor, inputrec,
 +                                  mtop ? mtop->natoms : 0, nChargePerturbed,
 +                                  (Flags & MD_REPRODUCIBLE), nthreads_pme);
 +            if (status != 0)
 +            {
 +                gmx_fatal(FARGS, "Error %d initializing PME", status);
 +            }
 +        }
 +    }
 +
 +
 +    if (integrator[inputrec->eI].func == do_md)
 +    {
 +        /* Turn on signal handling on all nodes */
 +        /*
 +         * (A user signal from the PME nodes (if any)
 +         * is communicated to the PP nodes.
 +         */
 +        signal_handler_install();
 +    }
 +
 +    if (cr->duty & DUTY_PP)
 +    {
 +        if (inputrec->ePull != epullNO)
 +        {
 +            /* Initialize pull code */
 +            init_pull(fplog, inputrec, nfile, fnm, mtop, cr, oenv, inputrec->fepvals->init_lambda,
 +                      EI_DYNAMICS(inputrec->eI) && MASTER(cr), Flags);
 +        }
 +
 +        if (inputrec->bRot)
 +        {
 +            /* Initialize enforced rotation code */
 +            init_rot(fplog, inputrec, nfile, fnm, cr, state->x, box, mtop, oenv,
 +                     bVerbose, Flags);
 +        }
 +
 +        constr = init_constraints(fplog, mtop, inputrec, ed, state, cr);
 +
 +        if (DOMAINDECOMP(cr))
 +        {
 +            dd_init_bondeds(fplog, cr->dd, mtop, vsite, constr, inputrec,
 +                            Flags & MD_DDBONDCHECK, fr->cginfo_mb);
 +
 +            set_dd_parameters(fplog, cr->dd, dlb_scale, inputrec, fr, &ddbox);
 +
 +            setup_dd_grid(fplog, cr->dd);
 +        }
 +
 +        /* Now do whatever the user wants us to do (how flexible...) */
 +        integrator[inputrec->eI].func(fplog, cr, nfile, fnm,
 +                                      oenv, bVerbose, bCompact,
 +                                      nstglobalcomm,
 +                                      vsite, constr,
 +                                      nstepout, inputrec, mtop,
 +                                      fcd, state,
 +                                      mdatoms, nrnb, wcycle, ed, fr,
 +                                      repl_ex_nst, repl_ex_nex, repl_ex_seed,
 +                                      membed,
 +                                      cpt_period, max_hours,
 +                                      deviceOptions,
 +                                      Flags,
 +                                      &runtime);
 +
 +        if (inputrec->ePull != epullNO)
 +        {
 +            finish_pull(fplog, inputrec->pull);
 +        }
 +
 +        if (inputrec->bRot)
 +        {
-             finish_rot(fplog, inputrec->rot);
++            finish_rot(inputrec->rot);
 +        }
 +
 +    }
 +    else
 +    {
 +        /* do PME only */
 +        gmx_pmeonly(*pmedata, cr, nrnb, wcycle, ewaldcoeff, FALSE, inputrec);
 +    }
 +
 +    if (EI_DYNAMICS(inputrec->eI) || EI_TPI(inputrec->eI))
 +    {
 +        /* Some timing stats */
 +        if (SIMMASTER(cr))
 +        {
 +            if (runtime.proc == 0)
 +            {
 +                runtime.proc = runtime.real;
 +            }
 +        }
 +        else
 +        {
 +            runtime.real = 0;
 +        }
 +    }
 +
 +    wallcycle_stop(wcycle, ewcRUN);
 +
 +    /* Finish up, write some stuff
 +     * if rerunMD, don't write last frame again
 +     */
 +    finish_run(fplog, cr, ftp2fn(efSTO, nfile, fnm),
 +               inputrec, nrnb, wcycle, &runtime,
 +               fr != NULL && fr->nbv != NULL && fr->nbv->bUseGPU ?
 +               nbnxn_cuda_get_timings(fr->nbv->cu_nbv) : NULL,
 +               nthreads_pp,
 +               EI_DYNAMICS(inputrec->eI) && !MULTISIM(cr));
 +
 +    if ((cr->duty & DUTY_PP) && fr->nbv != NULL && fr->nbv->bUseGPU)
 +    {
 +        char gpu_err_str[STRLEN];
 +
 +        /* free GPU memory and uninitialize GPU (by destroying the context) */
 +        nbnxn_cuda_free(fplog, fr->nbv->cu_nbv);
 +
 +        if (!free_gpu(gpu_err_str))
 +        {
 +            gmx_warning("On node %d failed to free GPU #%d: %s",
 +                        cr->nodeid, get_current_gpu_device_id(), gpu_err_str);
 +        }
 +    }
 +
 +    if (opt2bSet("-membed", nfile, fnm))
 +    {
 +        sfree(membed);
 +    }
 +
 +#ifdef GMX_THREAD_MPI
 +    if (PAR(cr) && SIMMASTER(cr))
 +#endif
 +    {
 +        gmx_hardware_info_free(hwinfo);
 +    }
 +
 +    /* Does what it says */
 +    print_date_and_time(fplog, cr->nodeid, "Finished mdrun", &runtime);
 +
 +    /* Close logfile already here if we were appending to it */
 +    if (MASTER(cr) && (Flags & MD_APPENDFILES))
 +    {
 +        gmx_log_close(fplog);
 +    }
 +
 +    rc = (int)gmx_get_stop_condition();
 +
 +#ifdef GMX_THREAD_MPI
 +    /* we need to join all threads. The sub-threads join when they
 +       exit this function, but the master thread needs to be told to
 +       wait for that. */
 +    if (PAR(cr) && MASTER(cr))
 +    {
 +        tMPI_Finalize();
 +    }
 +#endif
 +
 +    return rc;
 +}