Merge branch 'release-4-6'

author Mark Abraham <mark.j.abraham@gmail.com>

Tue, 28 Jan 2014 20:06:13 +0000 (21:06 +0100)

committer Mark Abraham <mark.j.abraham@gmail.com>

Tue, 28 Jan 2014 21:39:51 +0000 (22:39 +0100)
author Mark Abraham <mark.j.abraham@gmail.com>
Tue, 28 Jan 2014 20:06:13 +0000 (21:06 +0100)
committer Mark Abraham <mark.j.abraham@gmail.com>
Tue, 28 Jan 2014 21:39:51 +0000 (22:39 +0100)
diff --cc src/contrib/fftw/CMakeLists.txt

index d3bd2a9c7a637d50af832db23fd0a4fb0c524440,32cb601099ce4951a7c6650e3ab3a5cb84fc3e37..e02a0c1ac112795627f2ab4aaf57905a7e7ddd78
--- 1/src/contrib/fftw/CMakeLists.txt
--- 2/src/contrib/fftw/CMakeLists.txt
+++ b/src/contrib/fftw/CMakeLists.txt
@@@ -66,9 -32,9 +66,11 @@@ if (TARGET_HOST
   endif()
   
   # Machinery for running the external project
- -set(EXTERNAL_FFTW_VERSION 3.3.2)
+ +set(EXTERNAL_FFTW_VERSION 3.3.3)
+ set(GMX_BUILD_OWN_FFTW_URL "http://www.fftw.org/fftw-${EXTERNAL_FFTW_VERSION}.tar.gz" CACHE PATH "URL from where to download fftw, (use an absolute path when offline)")
+ mark_as_advanced(GMX_BUILD_OWN_FFTW_URL)
+ +set(EXTERNAL_FFTW_MD5SUM 0a05ca9c7b3bfddc8278e7c40791a1c2)
+ +set (EXTERNAL_FFTW_BUILD_TARGET fftwBuild)
   include(ExternalProject)
   # TODO in master branch - show this warning only on the first run
   # by using gmx_check_if_changed_result from I21b791ab8e4f3 when
@@@ -77,8 -43,8 +79,8 @@@ message(WARNING "The GROMACS build wil
   # TODO if/when CMake fixes http://www.cmake.org/Bug/view.php?id=14330
   # (ie. at least version > 2.8.11.2), consider reverting to using an
   # md5sum check to avoid needing the above warning
- -    ExternalProject_add(fftwBuild
+ +    ExternalProject_add(${EXTERNAL_FFTW_BUILD_TARGET}
-         URL "http://www.fftw.org/fftw-${EXTERNAL_FFTW_VERSION}.tar.gz"
+         URL "${GMX_BUILD_OWN_FFTW_URL}"
           CONFIGURE_COMMAND <SOURCE_DIR>/configure --prefix=<INSTALL_DIR> --libdir=<INSTALL_DIR>/lib --disable-fortran
           ${GMX_BUILD_OWN_FFTW_SHARED_FLAG} ${GMX_BUILD_OWN_FFTW_OPTIMIZATION_CONFIGURATION}
           ${GMX_BUILD_OWN_FFTW_PREC}
diff --cc src/gromacs/gmxana/gmx_energy.c

index 8192741d06050c483735c1eafd9e2fc48347e878,0000000000000000000000000000000000000000..6ea86f10809d0177c13178ac926f9082cc93eb0a

mode 100644,000000..100644
--- 1/src/gromacs/gmxana/gmx_energy.c
--- /dev/null
+++ b/src/gromacs/gmxana/gmx_energy.c
@@@ -1,2832 -1,0 +1,2833 @@@
-  * Copyright (c) 2013, by the GROMACS development team, led by
+ +/*
+ + * This file is part of the GROMACS molecular simulation package.
+ + *
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team.
-     double vvhh, vv, v, h, hh2, vv2, varv, hh, varh, tt, cv, cp, alpha, kappa, dcp, et, varet;
++ * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ + * and including many others, as listed in the AUTHORS file in the
+ + * top-level source directory and at http://www.gromacs.org.
+ + *
+ + * GROMACS is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU Lesser General Public License
+ + * as published by the Free Software Foundation; either version 2.1
+ + * of the License, or (at your option) any later version.
+ + *
+ + * GROMACS is distributed in the hope that it will be useful,
+ + * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ + * Lesser General Public License for more details.
+ + *
+ + * You should have received a copy of the GNU Lesser General Public
+ + * License along with GROMACS; if not, see
+ + * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ + *
+ + * If you want to redistribute modifications to GROMACS, please
+ + * consider that scientific software is very special. Version
+ + * control is crucial - bugs must be traceable. We will be happy to
+ + * consider code for inclusion in the official distribution, but
+ + * derived work must not be called official GROMACS. Details are found
+ + * in the README & COPYING files - if they are missing, get the
+ + * official version at http://www.gromacs.org.
+ + *
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the research papers on the package. Check out http://www.gromacs.org.
+ + */
+ +#ifdef HAVE_CONFIG_H
+ +#include <config.h>
+ +#endif
+ +
+ +#include <string.h>
+ +#include <math.h>
+ +#include <ctype.h>
+ +
+ +#include "typedefs.h"
+ +#include "gmx_fatal.h"
+ +#include "vec.h"
+ +#include "string2.h"
+ +#include "smalloc.h"
+ +#include "gromacs/fileio/enxio.h"
+ +#include "gromacs/commandline/pargs.h"
+ +#include "names.h"
+ +#include "copyrite.h"
+ +#include "macros.h"
+ +#include "xvgr.h"
+ +#include "gstat.h"
+ +#include "physics.h"
+ +#include "gromacs/fileio/tpxio.h"
+ +#include "gromacs/fileio/trxio.h"
+ +#include "viewit.h"
+ +#include "mtop_util.h"
+ +#include "gmx_ana.h"
+ +#include "mdebin.h"
+ +
+ +static real       minthird = -1.0/3.0, minsixth = -1.0/6.0;
+ +
+ +typedef struct {
+ +    real sum;
+ +    real sum2;
+ +} exactsum_t;
+ +
+ +typedef struct {
+ +    real       *ener;
+ +    exactsum_t *es;
+ +    gmx_bool    bExactStat;
+ +    double      av;
+ +    double      rmsd;
+ +    double      ee;
+ +    double      slope;
+ +} enerdat_t;
+ +
+ +typedef struct {
+ +    gmx_int64_t      nsteps;
+ +    gmx_int64_t      npoints;
+ +    int              nframes;
+ +    int             *step;
+ +    int             *steps;
+ +    int             *points;
+ +    enerdat_t       *s;
+ +} enerdata_t;
+ +
+ +static double mypow(double x, double y)
+ +{
+ +    if (x > 0)
+ +    {
+ +        return pow(x, y);
+ +    }
+ +    else
+ +    {
+ +        return 0.0;
+ +    }
+ +}
+ +
+ +static int *select_it(int nre, char *nm[], int *nset)
+ +{
+ +    gmx_bool *bE;
+ +    int       n, k, j, i;
+ +    int      *set;
+ +    gmx_bool  bVerbose = TRUE;
+ +
+ +    if ((getenv("VERBOSE")) != NULL)
+ +    {
+ +        bVerbose = FALSE;
+ +    }
+ +
+ +    fprintf(stderr, "Select the terms you want from the following list\n");
+ +    fprintf(stderr, "End your selection with 0\n");
+ +
+ +    if (bVerbose)
+ +    {
+ +        for (k = 0; (k < nre); )
+ +        {
+ +            for (j = 0; (j < 4) && (k < nre); j++, k++)
+ +            {
+ +                fprintf(stderr, " %3d=%14s", k+1, nm[k]);
+ +            }
+ +            fprintf(stderr, "\n");
+ +        }
+ +    }
+ +
+ +    snew(bE, nre);
+ +    do
+ +    {
+ +        if (1 != scanf("%d", &n))
+ +        {
+ +            gmx_fatal(FARGS, "Error reading user input");
+ +        }
+ +        if ((n > 0) && (n <= nre))
+ +        {
+ +            bE[n-1] = TRUE;
+ +        }
+ +    }
+ +    while (n != 0);
+ +
+ +    snew(set, nre);
+ +    for (i = (*nset) = 0; (i < nre); i++)
+ +    {
+ +        if (bE[i])
+ +        {
+ +            set[(*nset)++] = i;
+ +        }
+ +    }
+ +
+ +    sfree(bE);
+ +
+ +    return set;
+ +}
+ +
+ +static void chomp(char *buf)
+ +{
+ +    int len = strlen(buf);
+ +
+ +    while ((len > 0) && (buf[len-1] == '\n'))
+ +    {
+ +        buf[len-1] = '\0';
+ +        len--;
+ +    }
+ +}
+ +
+ +static int *select_by_name(int nre, gmx_enxnm_t *nm, int *nset)
+ +{
+ +    gmx_bool   *bE;
+ +    int         n, k, kk, j, i, nmatch, nind, nss;
+ +    int        *set;
+ +    gmx_bool    bEOF, bVerbose = TRUE, bLong = FALSE;
+ +    char       *ptr, buf[STRLEN];
+ +    const char *fm4   = "%3d  %-14s";
+ +    const char *fm2   = "%3d  %-34s";
+ +    char      **newnm = NULL;
+ +
+ +    if ((getenv("VERBOSE")) != NULL)
+ +    {
+ +        bVerbose = FALSE;
+ +    }
+ +
+ +    fprintf(stderr, "\n");
+ +    fprintf(stderr, "Select the terms you want from the following list by\n");
+ +    fprintf(stderr, "selecting either (part of) the name or the number or a combination.\n");
+ +    fprintf(stderr, "End your selection with an empty line or a zero.\n");
+ +    fprintf(stderr, "-------------------------------------------------------------------\n");
+ +
+ +    snew(newnm, nre);
+ +    j = 0;
+ +    for (k = 0; k < nre; k++)
+ +    {
+ +        newnm[k] = strdup(nm[k].name);
+ +        /* Insert dashes in all the names */
+ +        while ((ptr = strchr(newnm[k], ' ')) != NULL)
+ +        {
+ +            *ptr = '-';
+ +        }
+ +        if (bVerbose)
+ +        {
+ +            if (j == 0)
+ +            {
+ +                if (k > 0)
+ +                {
+ +                    fprintf(stderr, "\n");
+ +                }
+ +                bLong = FALSE;
+ +                for (kk = k; kk < k+4; kk++)
+ +                {
+ +                    if (kk < nre && strlen(nm[kk].name) > 14)
+ +                    {
+ +                        bLong = TRUE;
+ +                    }
+ +                }
+ +            }
+ +            else
+ +            {
+ +                fprintf(stderr, " ");
+ +            }
+ +            if (!bLong)
+ +            {
+ +                fprintf(stderr, fm4, k+1, newnm[k]);
+ +                j++;
+ +                if (j == 4)
+ +                {
+ +                    j = 0;
+ +                }
+ +            }
+ +            else
+ +            {
+ +                fprintf(stderr, fm2, k+1, newnm[k]);
+ +                j++;
+ +                if (j == 2)
+ +                {
+ +                    j = 0;
+ +                }
+ +            }
+ +        }
+ +    }
+ +    if (bVerbose)
+ +    {
+ +        fprintf(stderr, "\n\n");
+ +    }
+ +
+ +    snew(bE, nre);
+ +
+ +    bEOF = FALSE;
+ +    while (!bEOF && (fgets2(buf, STRLEN-1, stdin)))
+ +    {
+ +        /* Remove newlines */
+ +        chomp(buf);
+ +
+ +        /* Remove spaces */
+ +        trim(buf);
+ +
+ +        /* Empty line means end of input */
+ +        bEOF = (strlen(buf) == 0);
+ +        if (!bEOF)
+ +        {
+ +            ptr = buf;
+ +            do
+ +            {
+ +                if (!bEOF)
+ +                {
+ +                    /* First try to read an integer */
+ +                    nss   = sscanf(ptr, "%d", &nind);
+ +                    if (nss == 1)
+ +                    {
+ +                        /* Zero means end of input */
+ +                        if (nind == 0)
+ +                        {
+ +                            bEOF = TRUE;
+ +                        }
+ +                        else if ((1 <= nind) && (nind <= nre))
+ +                        {
+ +                            bE[nind-1] = TRUE;
+ +                        }
+ +                        else
+ +                        {
+ +                            fprintf(stderr, "number %d is out of range\n", nind);
+ +                        }
+ +                    }
+ +                    else
+ +                    {
+ +                        /* Now try to read a string */
+ +                        i      = strlen(ptr);
+ +                        nmatch = 0;
+ +                        for (nind = 0; nind < nre; nind++)
+ +                        {
+ +                            if (gmx_strcasecmp(newnm[nind], ptr) == 0)
+ +                            {
+ +                                bE[nind] = TRUE;
+ +                                nmatch++;
+ +                            }
+ +                        }
+ +                        if (nmatch == 0)
+ +                        {
+ +                            i      = strlen(ptr);
+ +                            nmatch = 0;
+ +                            for (nind = 0; nind < nre; nind++)
+ +                            {
+ +                                if (gmx_strncasecmp(newnm[nind], ptr, i) == 0)
+ +                                {
+ +                                    bE[nind] = TRUE;
+ +                                    nmatch++;
+ +                                }
+ +                            }
+ +                            if (nmatch == 0)
+ +                            {
+ +                                fprintf(stderr, "String '%s' does not match anything\n", ptr);
+ +                            }
+ +                        }
+ +                    }
+ +                }
+ +                /* Look for the first space, and remove spaces from there */
+ +                if ((ptr = strchr(ptr, ' ')) != NULL)
+ +                {
+ +                    trim(ptr);
+ +                }
+ +            }
+ +            while (!bEOF && (ptr && (strlen(ptr) > 0)));
+ +        }
+ +    }
+ +
+ +    snew(set, nre);
+ +    for (i = (*nset) = 0; (i < nre); i++)
+ +    {
+ +        if (bE[i])
+ +        {
+ +            set[(*nset)++] = i;
+ +        }
+ +    }
+ +
+ +    sfree(bE);
+ +
+ +    if (*nset == 0)
+ +    {
+ +        gmx_fatal(FARGS, "No energy terms selected");
+ +    }
+ +
+ +    for (i = 0; (i < nre); i++)
+ +    {
+ +        sfree(newnm[i]);
+ +    }
+ +    sfree(newnm);
+ +
+ +    return set;
+ +}
+ +
+ +static void get_dhdl_parms(const char *topnm, t_inputrec *ir)
+ +{
+ +    gmx_mtop_t  mtop;
+ +    int         natoms;
+ +    t_iatom    *iatom;
+ +    matrix      box;
+ +
+ +    /* all we need is the ir to be able to write the label */
+ +    read_tpx(topnm, ir, box, &natoms, NULL, NULL, NULL, &mtop);
+ +}
+ +
+ +static void get_orires_parms(const char *topnm,
+ +                             int *nor, int *nex, int **label, real **obs)
+ +{
+ +    gmx_mtop_t      mtop;
+ +    gmx_localtop_t *top;
+ +    t_inputrec      ir;
+ +    t_iparams      *ip;
+ +    int             natoms, i;
+ +    t_iatom        *iatom;
+ +    int             nb;
+ +    matrix          box;
+ +
+ +    read_tpx(topnm, &ir, box, &natoms, NULL, NULL, NULL, &mtop);
+ +    top = gmx_mtop_generate_local_top(&mtop, &ir);
+ +
+ +    ip       = top->idef.iparams;
+ +    iatom    = top->idef.il[F_ORIRES].iatoms;
+ +
+ +    /* Count how many distance restraint there are... */
+ +    nb = top->idef.il[F_ORIRES].nr;
+ +    if (nb == 0)
+ +    {
+ +        gmx_fatal(FARGS, "No orientation restraints in topology!\n");
+ +    }
+ +
+ +    *nor = nb/3;
+ +    *nex = 0;
+ +    snew(*label, *nor);
+ +    snew(*obs, *nor);
+ +    for (i = 0; i < nb; i += 3)
+ +    {
+ +        (*label)[i/3] = ip[iatom[i]].orires.label;
+ +        (*obs)[i/3]   = ip[iatom[i]].orires.obs;
+ +        if (ip[iatom[i]].orires.ex >= *nex)
+ +        {
+ +            *nex = ip[iatom[i]].orires.ex+1;
+ +        }
+ +    }
+ +    fprintf(stderr, "Found %d orientation restraints with %d experiments",
+ +            *nor, *nex);
+ +}
+ +
+ +static int get_bounds(const char *topnm,
+ +                      real **bounds, int **index, int **dr_pair, int *npairs,
+ +                      gmx_mtop_t *mtop, gmx_localtop_t **ltop, t_inputrec *ir)
+ +{
+ +    gmx_localtop_t *top;
+ +    t_functype     *functype;
+ +    t_iparams      *ip;
+ +    int             natoms, i, j, k, type, ftype, natom;
+ +    t_ilist        *disres;
+ +    t_iatom        *iatom;
+ +    real           *b;
+ +    int            *ind, *pair;
+ +    int             nb, label1;
+ +    matrix          box;
+ +
+ +    read_tpx(topnm, ir, box, &natoms, NULL, NULL, NULL, mtop);
+ +    snew(*ltop, 1);
+ +    top   = gmx_mtop_generate_local_top(mtop, ir);
+ +    *ltop = top;
+ +
+ +    functype = top->idef.functype;
+ +    ip       = top->idef.iparams;
+ +
+ +    /* Count how many distance restraint there are... */
+ +    nb = top->idef.il[F_DISRES].nr;
+ +    if (nb == 0)
+ +    {
+ +        gmx_fatal(FARGS, "No distance restraints in topology!\n");
+ +    }
+ +
+ +    /* Allocate memory */
+ +    snew(b, nb);
+ +    snew(ind, nb);
+ +    snew(pair, nb+1);
+ +
+ +    /* Fill the bound array */
+ +    nb = 0;
+ +    for (i = 0; (i < top->idef.ntypes); i++)
+ +    {
+ +        ftype = functype[i];
+ +        if (ftype == F_DISRES)
+ +        {
+ +
+ +            label1  = ip[i].disres.label;
+ +            b[nb]   = ip[i].disres.up1;
+ +            ind[nb] = label1;
+ +            nb++;
+ +        }
+ +    }
+ +    *bounds = b;
+ +
+ +    /* Fill the index array */
+ +    label1  = -1;
+ +    disres  = &(top->idef.il[F_DISRES]);
+ +    iatom   = disres->iatoms;
+ +    for (i = j = k = 0; (i < disres->nr); )
+ +    {
+ +        type  = iatom[i];
+ +        ftype = top->idef.functype[type];
+ +        natom = interaction_function[ftype].nratoms+1;
+ +        if (label1 != top->idef.iparams[type].disres.label)
+ +        {
+ +            pair[j] = k;
+ +            label1  = top->idef.iparams[type].disres.label;
+ +            j++;
+ +        }
+ +        k++;
+ +        i += natom;
+ +    }
+ +    pair[j]  = k;
+ +    *npairs  = k;
+ +    if (j != nb)
+ +    {
+ +        gmx_incons("get_bounds for distance restraints");
+ +    }
+ +
+ +    *index   = ind;
+ +    *dr_pair = pair;
+ +
+ +    return nb;
+ +}
+ +
+ +static void calc_violations(real rt[], real rav3[], int nb, int index[],
+ +                            real bounds[], real *viol, double *st, double *sa)
+ +{
+ +    const   real sixth = 1.0/6.0;
+ +    int          i, j;
+ +    double       rsum, rav, sumaver, sumt;
+ +
+ +    sumaver = 0;
+ +    sumt    = 0;
+ +    for (i = 0; (i < nb); i++)
+ +    {
+ +        rsum = 0.0;
+ +        rav  = 0.0;
+ +        for (j = index[i]; (j < index[i+1]); j++)
+ +        {
+ +            if (viol)
+ +            {
+ +                viol[j] += mypow(rt[j], -3.0);
+ +            }
+ +            rav     += sqr(rav3[j]);
+ +            rsum    += mypow(rt[j], -6);
+ +        }
+ +        rsum    = max(0.0, mypow(rsum, -sixth)-bounds[i]);
+ +        rav     = max(0.0, mypow(rav, -sixth)-bounds[i]);
+ +
+ +        sumt    += rsum;
+ +        sumaver += rav;
+ +    }
+ +    *st = sumt;
+ +    *sa = sumaver;
+ +}
+ +
+ +static void analyse_disre(const char *voutfn,    int nframes,
+ +                          real violaver[], real bounds[], int index[],
+ +                          int pair[],      int nbounds,
+ +                          const output_env_t oenv)
+ +{
+ +    FILE   *vout;
+ +    double  sum, sumt, sumaver;
+ +    int     i, j;
+ +
+ +    /* Subtract bounds from distances, to calculate violations */
+ +    calc_violations(violaver, violaver,
+ +                    nbounds, pair, bounds, NULL, &sumt, &sumaver);
+ +
+ +#ifdef DEBUG
+ +    fprintf(stdout, "\nSum of violations averaged over simulation: %g nm\n",
+ +            sumaver);
+ +    fprintf(stdout, "Largest violation averaged over simulation: %g nm\n\n",
+ +            sumt);
+ +#endif
+ +    vout = xvgropen(voutfn, "r\\S-3\\N average violations", "DR Index", "nm",
+ +                    oenv);
+ +    sum  = 0.0;
+ +    sumt = 0.0;
+ +    for (i = 0; (i < nbounds); i++)
+ +    {
+ +        /* Do ensemble averaging */
+ +        sumaver = 0;
+ +        for (j = pair[i]; (j < pair[i+1]); j++)
+ +        {
+ +            sumaver += sqr(violaver[j]/nframes);
+ +        }
+ +        sumaver = max(0.0, mypow(sumaver, minsixth)-bounds[i]);
+ +
+ +        sumt   += sumaver;
+ +        sum     = max(sum, sumaver);
+ +        fprintf(vout, "%10d  %10.5e\n", index[i], sumaver);
+ +    }
+ +#ifdef DEBUG
+ +    for (j = 0; (j < dr.ndr); j++)
+ +    {
+ +        fprintf(vout, "%10d  %10.5e\n", j, mypow(violaver[j]/nframes, minthird));
+ +    }
+ +#endif
+ +    ffclose(vout);
+ +
+ +    fprintf(stdout, "\nSum of violations averaged over simulation: %g nm\n",
+ +            sumt);
+ +    fprintf(stdout, "Largest violation averaged over simulation: %g nm\n\n", sum);
+ +
+ +    do_view(oenv, voutfn, "-graphtype bar");
+ +}
+ +
+ +static void einstein_visco(const char *fn, const char *fni, int nsets,
+ +                           int nframes, real **sum,
+ +                           real V, real T, int nsteps, double time[],
+ +                           const output_env_t oenv)
+ +{
+ +    FILE  *fp0, *fp1;
+ +    double av[4], avold[4];
+ +    double fac, dt, di;
+ +    int    i, j, m, nf4;
+ +
+ +    if (nframes < 1)
+ +    {
+ +        return;
+ +    }
+ +
+ +    dt  = (time[1]-time[0]);
+ +    nf4 = nframes/4+1;
+ +
+ +    for (i = 0; i <= nsets; i++)
+ +    {
+ +        avold[i] = 0;
+ +    }
+ +    fp0 = xvgropen(fni, "Shear viscosity integral",
+ +                   "Time (ps)", "(kg m\\S-1\\N s\\S-1\\N ps)", oenv);
+ +    fp1 = xvgropen(fn, "Shear viscosity using Einstein relation",
+ +                   "Time (ps)", "(kg m\\S-1\\N s\\S-1\\N)", oenv);
+ +    for (i = 1; i < nf4; i++)
+ +    {
+ +        fac = dt*nframes/nsteps;
+ +        for (m = 0; m <= nsets; m++)
+ +        {
+ +            av[m] = 0;
+ +        }
+ +        for (j = 0; j < nframes-i; j++)
+ +        {
+ +            for (m = 0; m < nsets; m++)
+ +            {
+ +                di   = sqr(fac*(sum[m][j+i]-sum[m][j]));
+ +
+ +                av[m]     += di;
+ +                av[nsets] += di/nsets;
+ +            }
+ +        }
+ +        /* Convert to SI for the viscosity */
+ +        fac = (V*NANO*NANO*NANO*PICO*1e10)/(2*BOLTZMANN*T)/(nframes-i);
+ +        fprintf(fp0, "%10g", time[i]-time[0]);
+ +        for (m = 0; (m <= nsets); m++)
+ +        {
+ +            av[m] = fac*av[m];
+ +            fprintf(fp0, "  %10g", av[m]);
+ +        }
+ +        fprintf(fp0, "\n");
+ +        fprintf(fp1, "%10g", 0.5*(time[i]+time[i-1])-time[0]);
+ +        for (m = 0; (m <= nsets); m++)
+ +        {
+ +            fprintf(fp1, "  %10g", (av[m]-avold[m])/dt);
+ +            avold[m] = av[m];
+ +        }
+ +        fprintf(fp1, "\n");
+ +    }
+ +    ffclose(fp0);
+ +    ffclose(fp1);
+ +}
+ +
+ +typedef struct {
+ +    gmx_int64_t     np;
+ +    double          sum;
+ +    double          sav;
+ +    double          sav2;
+ +} ee_sum_t;
+ +
+ +typedef struct {
+ +    int             b;
+ +    ee_sum_t        sum;
+ +    gmx_int64_t     nst;
+ +    gmx_int64_t     nst_min;
+ +} ener_ee_t;
+ +
+ +static void clear_ee_sum(ee_sum_t *ees)
+ +{
+ +    ees->sav  = 0;
+ +    ees->sav2 = 0;
+ +    ees->np   = 0;
+ +    ees->sum  = 0;
+ +}
+ +
+ +static void add_ee_sum(ee_sum_t *ees, double sum, int np)
+ +{
+ +    ees->np  += np;
+ +    ees->sum += sum;
+ +}
+ +
+ +static void add_ee_av(ee_sum_t *ees)
+ +{
+ +    double av;
+ +
+ +    av         = ees->sum/ees->np;
+ +    ees->sav  += av;
+ +    ees->sav2 += av*av;
+ +    ees->np    = 0;
+ +    ees->sum   = 0;
+ +}
+ +
+ +static double calc_ee2(int nb, ee_sum_t *ees)
+ +{
+ +    return (ees->sav2/nb - dsqr(ees->sav/nb))/(nb - 1);
+ +}
+ +
+ +static void set_ee_av(ener_ee_t *eee)
+ +{
+ +    if (debug)
+ +    {
+ +        char buf[STEPSTRSIZE];
+ +        fprintf(debug, "Storing average for err.est.: %s steps\n",
+ +                gmx_step_str(eee->nst, buf));
+ +    }
+ +    add_ee_av(&eee->sum);
+ +    eee->b++;
+ +    if (eee->b == 1 || eee->nst < eee->nst_min)
+ +    {
+ +        eee->nst_min = eee->nst;
+ +    }
+ +    eee->nst = 0;
+ +}
+ +
+ +static void calc_averages(int nset, enerdata_t *edat, int nbmin, int nbmax)
+ +{
+ +    int             nb, i, f, nee;
+ +    double          sum, sum2, sump, see2;
+ +    gmx_int64_t     steps, np, p, bound_nb;
+ +    enerdat_t      *ed;
+ +    exactsum_t     *es;
+ +    gmx_bool        bAllZero;
+ +    double          x, sx, sy, sxx, sxy;
+ +    ener_ee_t      *eee;
+ +
+ +    /* Check if we have exact statistics over all points */
+ +    for (i = 0; i < nset; i++)
+ +    {
+ +        ed             = &edat->s[i];
+ +        ed->bExactStat = FALSE;
+ +        if (edat->npoints > 0)
+ +        {
+ +            /* All energy file sum entries 0 signals no exact sums.
+ +             * But if all energy values are 0, we still have exact sums.
+ +             */
+ +            bAllZero = TRUE;
+ +            for (f = 0; f < edat->nframes && !ed->bExactStat; f++)
+ +            {
+ +                if (ed->ener[i] != 0)
+ +                {
+ +                    bAllZero = FALSE;
+ +                }
+ +                ed->bExactStat = (ed->es[f].sum != 0);
+ +            }
+ +            if (bAllZero)
+ +            {
+ +                ed->bExactStat = TRUE;
+ +            }
+ +        }
+ +    }
+ +
+ +    snew(eee, nbmax+1);
+ +    for (i = 0; i < nset; i++)
+ +    {
+ +        ed = &edat->s[i];
+ +
+ +        sum  = 0;
+ +        sum2 = 0;
+ +        np   = 0;
+ +        sx   = 0;
+ +        sy   = 0;
+ +        sxx  = 0;
+ +        sxy  = 0;
+ +        for (nb = nbmin; nb <= nbmax; nb++)
+ +        {
+ +            eee[nb].b     = 0;
+ +            clear_ee_sum(&eee[nb].sum);
+ +            eee[nb].nst     = 0;
+ +            eee[nb].nst_min = 0;
+ +        }
+ +        for (f = 0; f < edat->nframes; f++)
+ +        {
+ +            es = &ed->es[f];
+ +
+ +            if (ed->bExactStat)
+ +            {
+ +                /* Add the sum and the sum of variances to the totals. */
+ +                p     = edat->points[f];
+ +                sump  = es->sum;
+ +                sum2 += es->sum2;
+ +                if (np > 0)
+ +                {
+ +                    sum2 += dsqr(sum/np - (sum + es->sum)/(np + p))
+ +                        *np*(np + p)/p;
+ +                }
+ +            }
+ +            else
+ +            {
+ +                /* Add a single value to the sum and sum of squares. */
+ +                p     = 1;
+ +                sump  = ed->ener[f];
+ +                sum2 += dsqr(sump);
+ +            }
+ +
+ +            /* sum has to be increased after sum2 */
+ +            np  += p;
+ +            sum += sump;
+ +
+ +            /* For the linear regression use variance 1/p.
+ +             * Note that sump is the sum, not the average, so we don't need p*.
+ +             */
+ +            x    = edat->step[f] - 0.5*(edat->steps[f] - 1);
+ +            sx  += p*x;
+ +            sy  += sump;
+ +            sxx += p*x*x;
+ +            sxy += x*sump;
+ +
+ +            for (nb = nbmin; nb <= nbmax; nb++)
+ +            {
+ +                /* Check if the current end step is closer to the desired
+ +                 * block boundary than the next end step.
+ +                 */
+ +                bound_nb = (edat->step[0]-1)*nb + edat->nsteps*(eee[nb].b+1);
+ +                if (eee[nb].nst > 0 &&
+ +                    bound_nb - edat->step[f-1]*nb < edat->step[f]*nb - bound_nb)
+ +                {
+ +                    set_ee_av(&eee[nb]);
+ +                }
+ +                if (f == 0)
+ +                {
+ +                    eee[nb].nst = 1;
+ +                }
+ +                else
+ +                {
+ +                    eee[nb].nst += edat->step[f] - edat->step[f-1];
+ +                }
+ +                if (ed->bExactStat)
+ +                {
+ +                    add_ee_sum(&eee[nb].sum, es->sum, edat->points[f]);
+ +                }
+ +                else
+ +                {
+ +                    add_ee_sum(&eee[nb].sum, edat->s[i].ener[f], 1);
+ +                }
+ +                bound_nb = (edat->step[0]-1)*nb + edat->nsteps*(eee[nb].b+1);
+ +                if (edat->step[f]*nb >= bound_nb)
+ +                {
+ +                    set_ee_av(&eee[nb]);
+ +                }
+ +            }
+ +        }
+ +
+ +        edat->s[i].av = sum/np;
+ +        if (ed->bExactStat)
+ +        {
+ +            edat->s[i].rmsd = sqrt(sum2/np);
+ +        }
+ +        else
+ +        {
+ +            edat->s[i].rmsd = sqrt(sum2/np - dsqr(edat->s[i].av));
+ +        }
+ +
+ +        if (edat->nframes > 1)
+ +        {
+ +            edat->s[i].slope = (np*sxy - sx*sy)/(np*sxx - sx*sx);
+ +        }
+ +        else
+ +        {
+ +            edat->s[i].slope = 0;
+ +        }
+ +
+ +        nee  = 0;
+ +        see2 = 0;
+ +        for (nb = nbmin; nb <= nbmax; nb++)
+ +        {
+ +            /* Check if we actually got nb blocks and if the smallest
+ +             * block is not shorter than 80% of the average.
+ +             */
+ +            if (debug)
+ +            {
+ +                char buf1[STEPSTRSIZE], buf2[STEPSTRSIZE];
+ +                fprintf(debug, "Requested %d blocks, we have %d blocks, min %s nsteps %s\n",
+ +                        nb, eee[nb].b,
+ +                        gmx_step_str(eee[nb].nst_min, buf1),
+ +                        gmx_step_str(edat->nsteps, buf2));
+ +            }
+ +            if (eee[nb].b == nb && 5*nb*eee[nb].nst_min >= 4*edat->nsteps)
+ +            {
+ +                see2 += calc_ee2(nb, &eee[nb].sum);
+ +                nee++;
+ +            }
+ +        }
+ +        if (nee > 0)
+ +        {
+ +            edat->s[i].ee = sqrt(see2/nee);
+ +        }
+ +        else
+ +        {
+ +            edat->s[i].ee = -1;
+ +        }
+ +    }
+ +    sfree(eee);
+ +}
+ +
+ +static enerdata_t *calc_sum(int nset, enerdata_t *edat, int nbmin, int nbmax)
+ +{
+ +    enerdata_t *esum;
+ +    enerdat_t  *s;
+ +    int         f, i;
+ +    double      sum;
+ +
+ +    snew(esum, 1);
+ +    *esum = *edat;
+ +    snew(esum->s, 1);
+ +    s = &esum->s[0];
+ +    snew(s->ener, esum->nframes);
+ +    snew(s->es, esum->nframes);
+ +
+ +    s->bExactStat = TRUE;
+ +    s->slope      = 0;
+ +    for (i = 0; i < nset; i++)
+ +    {
+ +        if (!edat->s[i].bExactStat)
+ +        {
+ +            s->bExactStat = FALSE;
+ +        }
+ +        s->slope += edat->s[i].slope;
+ +    }
+ +
+ +    for (f = 0; f < edat->nframes; f++)
+ +    {
+ +        sum = 0;
+ +        for (i = 0; i < nset; i++)
+ +        {
+ +            sum += edat->s[i].ener[f];
+ +        }
+ +        s->ener[f] = sum;
+ +        sum        = 0;
+ +        for (i = 0; i < nset; i++)
+ +        {
+ +            sum += edat->s[i].es[f].sum;
+ +        }
+ +        s->es[f].sum  = sum;
+ +        s->es[f].sum2 = 0;
+ +    }
+ +
+ +    calc_averages(1, esum, nbmin, nbmax);
+ +
+ +    return esum;
+ +}
+ +
+ +static char *ee_pr(double ee, char *buf)
+ +{
+ +    char   tmp[100];
+ +    double rnd;
+ +
+ +    if (ee < 0)
+ +    {
+ +        sprintf(buf, "%s", "--");
+ +    }
+ +    else
+ +    {
+ +        /* Round to two decimals by printing. */
+ +        sprintf(tmp, "%.1e", ee);
+ +        sscanf(tmp, "%lf", &rnd);
+ +        sprintf(buf, "%g", rnd);
+ +    }
+ +
+ +    return buf;
+ +}
+ +
+ +static void remove_drift(int nset, int nbmin, int nbmax, real dt, enerdata_t *edat)
+ +{
+ +/* Remove the drift by performing a fit to y = ax+b.
+ +   Uses 5 iterations. */
+ +    int    i, j, k;
+ +    double delta, d, sd, sd2;
+ +
+ +    edat->npoints = edat->nframes;
+ +    edat->nsteps  = edat->nframes;
+ +
+ +    for (k = 0; (k < 5); k++)
+ +    {
+ +        for (i = 0; (i < nset); i++)
+ +        {
+ +            delta = edat->s[i].slope*dt;
+ +
+ +            if (NULL != debug)
+ +            {
+ +                fprintf(debug, "slope for set %d is %g\n", i, edat->s[i].slope);
+ +            }
+ +
+ +            for (j = 0; (j < edat->nframes); j++)
+ +            {
+ +                edat->s[i].ener[j]   -= j*delta;
+ +                edat->s[i].es[j].sum  = 0;
+ +                edat->s[i].es[j].sum2 = 0;
+ +            }
+ +        }
+ +        calc_averages(nset, edat, nbmin, nbmax);
+ +    }
+ +}
+ +
+ +static void calc_fluctuation_props(FILE *fp,
+ +                                   gmx_bool bDriftCorr, real dt,
+ +                                   int nset, int nmol,
+ +                                   char **leg, enerdata_t *edat,
+ +                                   int nbmin, int nbmax)
+ +{
+ +    int    i, j;
-     vvhh = alpha = kappa = cp = dcp = cv = NOTSET;
++    double vv, v, h, varv, hh, varh, tt, cv, cp, alpha, kappa, dcp, et, varet;
+ +    double NANO3;
+ +    enum {
+ +        eVol, eEnth, eTemp, eEtot, eNR
+ +    };
+ +    const char *my_ener[] = { "Volume", "Enthalpy", "Temperature", "Total Energy" };
+ +    int         ii[eNR];
+ +
+ +    NANO3 = NANO*NANO*NANO;
+ +    if (!bDriftCorr)
+ +    {
+ +        fprintf(fp, "\nYou may want to use the -driftcorr flag in order to correct\n"
+ +                "for spurious drift in the graphs. Note that this is not\n"
+ +                "a substitute for proper equilibration and sampling!\n");
+ +    }
+ +    else
+ +    {
+ +        remove_drift(nset, nbmin, nbmax, dt, edat);
+ +    }
+ +    for (i = 0; (i < eNR); i++)
+ +    {
+ +        for (ii[i] = 0; (ii[i] < nset &&
+ +                         (gmx_strcasecmp(leg[ii[i]], my_ener[i]) != 0)); ii[i]++)
+ +        {
+ +            ;
+ +        }
+ +/*        if (ii[i] < nset)
+ +            fprintf(fp,"Found %s data.\n",my_ener[i]);
+ + */ }
+ +    /* Compute it all! */
-         vvhh = 0;
++    alpha = kappa = cp = dcp = cv = NOTSET;
+ +
+ +    /* Temperature */
+ +    tt = NOTSET;
+ +    if (ii[eTemp] < nset)
+ +    {
+ +        tt    = edat->s[ii[eTemp]].av;
+ +    }
+ +    /* Volume */
+ +    vv = varv = NOTSET;
+ +    if ((ii[eVol] < nset) && (ii[eTemp] < nset))
+ +    {
+ +        vv    = edat->s[ii[eVol]].av*NANO3;
+ +        varv  = dsqr(edat->s[ii[eVol]].rmsd*NANO3);
+ +        kappa = (varv/vv)/(BOLTZMANN*tt);
+ +    }
+ +    /* Enthalpy */
+ +    hh = varh = NOTSET;
+ +    if ((ii[eEnth] < nset) && (ii[eTemp] < nset))
+ +    {
+ +        hh    = KILO*edat->s[ii[eEnth]].av/AVOGADRO;
+ +        varh  = dsqr(KILO*edat->s[ii[eEnth]].rmsd/AVOGADRO);
+ +        cp    = AVOGADRO*((varh/nmol)/(BOLTZMANN*tt*tt));
+ +    }
+ +    /* Total energy */
+ +    et = varet = NOTSET;
+ +    if ((ii[eEtot] < nset) && (hh == NOTSET) && (tt != NOTSET))
+ +    {
+ +        /* Only compute cv in constant volume runs, which we can test
+ +           by checking whether the enthalpy was computed.
+ +         */
+ +        et    = edat->s[ii[eEtot]].av;
+ +        varet = sqr(edat->s[ii[eEtot]].rmsd);
+ +        cv    = KILO*((varet/nmol)/(BOLTZ*tt*tt));
+ +    }
+ +    /* Alpha, dcp */
+ +    if ((ii[eVol] < nset) && (ii[eEnth] < nset) && (ii[eTemp] < nset))
+ +    {
-             v     = edat->s[ii[eVol]].ener[j]*NANO3;
-             h     = KILO*edat->s[ii[eEnth]].ener[j]/AVOGADRO;
-             vvhh += (v*h);
++        double v_sum, h_sum, vh_sum, v_aver, h_aver, vh_aver;
++        vh_sum = v_sum = h_sum = 0;
+ +        for (j = 0; (j < edat->nframes); j++)
+ +        {
-         vvhh /= edat->nframes;
-         alpha = (vvhh-vv*hh)/(vv*BOLTZMANN*tt*tt);
-         dcp   = (vv*AVOGADRO/nmol)*tt*sqr(alpha)/(kappa);
++            v       = edat->s[ii[eVol]].ener[j]*NANO3;
++            h       = KILO*edat->s[ii[eEnth]].ener[j]/AVOGADRO;
++            v_sum  += v;
++            h_sum  += h;
++            vh_sum += (v*h);
+ +        }
-             if (vvhh != NOTSET)
-             {
-                 fprintf(fp, "vvhh  =  %10g (m^3 J)\n", vvhh);
-             }
++        vh_aver = vh_sum / edat->nframes;
++        v_aver  = v_sum  / edat->nframes;
++        h_aver  = h_sum  / edat->nframes;
++        alpha   = (vh_aver-v_aver*h_aver)/(v_aver*BOLTZMANN*tt*tt);
++        dcp     = (v_aver*AVOGADRO/nmol)*tt*sqr(alpha)/(kappa);
+ +    }
+ +
+ +    if (tt != NOTSET)
+ +    {
+ +        if (nmol < 2)
+ +        {
+ +            fprintf(fp, "\nWARNING: nmol = %d, this may not be what you want.\n",
+ +                    nmol);
+ +        }
+ +        fprintf(fp, "\nTemperature dependent fluctuation properties at T = %g.\n", tt);
+ +        fprintf(fp, "\nHeat capacities obtained from fluctuations do *not* include\n");
+ +        fprintf(fp, "quantum corrections. If you want to get a more accurate estimate\n");
+ +        fprintf(fp, "please use the g_dos program.\n\n");
+ +        fprintf(fp, "WARNING: Please verify that your simulations are converged and perform\n"
+ +                "a block-averaging error analysis (not implemented in g_energy yet)\n");
+ +
+ +        if (debug != NULL)
+ +        {
+ +            if (varv != NOTSET)
+ +            {
+ +                fprintf(fp, "varv  =  %10g (m^6)\n", varv*AVOGADRO/nmol);
+ +            }
+ +        }
+ +        if (vv != NOTSET)
+ +        {
+ +            fprintf(fp, "Volume                                   = %10g m^3/mol\n",
+ +                    vv*AVOGADRO/nmol);
+ +        }
+ +        if (varh != NOTSET)
+ +        {
+ +            fprintf(fp, "Enthalpy                                 = %10g kJ/mol\n",
+ +                    hh*AVOGADRO/(KILO*nmol));
+ +        }
+ +        if (alpha != NOTSET)
+ +        {
+ +            fprintf(fp, "Coefficient of Thermal Expansion Alpha_P = %10g (1/K)\n",
+ +                    alpha);
+ +        }
+ +        if (kappa != NOTSET)
+ +        {
+ +            fprintf(fp, "Isothermal Compressibility Kappa         = %10g (J/m^3)\n",
+ +                    kappa);
+ +            fprintf(fp, "Adiabatic bulk modulus                   = %10g (m^3/J)\n",
+ +                    1.0/kappa);
+ +        }
+ +        if (cp != NOTSET)
+ +        {
+ +            fprintf(fp, "Heat capacity at constant pressure Cp    = %10g J/mol K\n",
+ +                    cp);
+ +        }
+ +        if (cv != NOTSET)
+ +        {
+ +            fprintf(fp, "Heat capacity at constant volume Cv      = %10g J/mol K\n",
+ +                    cv);
+ +        }
+ +        if (dcp != NOTSET)
+ +        {
+ +            fprintf(fp, "Cp-Cv                                    =  %10g J/mol K\n",
+ +                    dcp);
+ +        }
+ +        please_cite(fp, "Allen1987a");
+ +    }
+ +    else
+ +    {
+ +        fprintf(fp, "You should select the temperature in order to obtain fluctuation properties.\n");
+ +    }
+ +}
+ +
+ +static void analyse_ener(gmx_bool bCorr, const char *corrfn,
+ +                         gmx_bool bFee, gmx_bool bSum, gmx_bool bFluct,
+ +                         gmx_bool bVisco, const char *visfn, int nmol,
+ +                         gmx_int64_t start_step, double start_t,
+ +                         gmx_int64_t step, double t,
+ +                         double time[], real reftemp,
+ +                         enerdata_t *edat,
+ +                         int nset, int set[], gmx_bool *bIsEner,
+ +                         char **leg, gmx_enxnm_t *enm,
+ +                         real Vaver, real ezero,
+ +                         int nbmin, int nbmax,
+ +                         const output_env_t oenv)
+ +{
+ +    FILE           *fp;
+ +    /* Check out the printed manual for equations! */
+ +    double          Dt, aver, stddev, errest, delta_t, totaldrift;
+ +    enerdata_t     *esum = NULL;
+ +    real            xxx, integral, intBulk, Temp = 0, Pres = 0;
+ +    real            sfrac, oldfrac, diffsum, diffav, fstep, pr_aver, pr_stddev, pr_errest;
+ +    double          beta = 0, expE, expEtot, *fee = NULL;
+ +    gmx_int64_t     nsteps;
+ +    int             nexact, nnotexact;
+ +    double          x1m, x1mk;
+ +    int             i, j, k, nout;
+ +    real            chi2;
+ +    char            buf[256], eebuf[100];
+ +
+ +    nsteps  = step - start_step + 1;
+ +    if (nsteps < 1)
+ +    {
+ +        fprintf(stdout, "Not enough steps (%s) for statistics\n",
+ +                gmx_step_str(nsteps, buf));
+ +    }
+ +    else
+ +    {
+ +        /* Calculate the time difference */
+ +        delta_t = t - start_t;
+ +
+ +        fprintf(stdout, "\nStatistics over %s steps [ %.4f through %.4f ps ], %d data sets\n",
+ +                gmx_step_str(nsteps, buf), start_t, t, nset);
+ +
+ +        calc_averages(nset, edat, nbmin, nbmax);
+ +
+ +        if (bSum)
+ +        {
+ +            esum = calc_sum(nset, edat, nbmin, nbmax);
+ +        }
+ +
+ +        if (edat->npoints == 0)
+ +        {
+ +            nexact    = 0;
+ +            nnotexact = nset;
+ +        }
+ +        else
+ +        {
+ +            nexact    = 0;
+ +            nnotexact = 0;
+ +            for (i = 0; (i < nset); i++)
+ +            {
+ +                if (edat->s[i].bExactStat)
+ +                {
+ +                    nexact++;
+ +                }
+ +                else
+ +                {
+ +                    nnotexact++;
+ +                }
+ +            }
+ +        }
+ +
+ +        if (nnotexact == 0)
+ +        {
+ +            fprintf(stdout, "All statistics are over %s points\n",
+ +                    gmx_step_str(edat->npoints, buf));
+ +        }
+ +        else if (nexact == 0 || edat->npoints == edat->nframes)
+ +        {
+ +            fprintf(stdout, "All statistics are over %d points (frames)\n",
+ +                    edat->nframes);
+ +        }
+ +        else
+ +        {
+ +            fprintf(stdout, "The term%s", nnotexact == 1 ? "" : "s");
+ +            for (i = 0; (i < nset); i++)
+ +            {
+ +                if (!edat->s[i].bExactStat)
+ +                {
+ +                    fprintf(stdout, " '%s'", leg[i]);
+ +                }
+ +            }
+ +            fprintf(stdout, " %s has statistics over %d points (frames)\n",
+ +                    nnotexact == 1 ? "is" : "are", edat->nframes);
+ +            fprintf(stdout, "All other statistics are over %s points\n",
+ +                    gmx_step_str(edat->npoints, buf));
+ +        }
+ +        fprintf(stdout, "\n");
+ +
+ +        fprintf(stdout, "%-24s %10s %10s %10s %10s",
+ +                "Energy", "Average", "Err.Est.", "RMSD", "Tot-Drift");
+ +        if (bFee)
+ +        {
+ +            fprintf(stdout, "  %10s\n", "-kT ln<e^(E/kT)>");
+ +        }
+ +        else
+ +        {
+ +            fprintf(stdout, "\n");
+ +        }
+ +        fprintf(stdout, "-------------------------------------------------------------------------------\n");
+ +
+ +        /* Initiate locals, only used with -sum */
+ +        expEtot = 0;
+ +        if (bFee)
+ +        {
+ +            beta = 1.0/(BOLTZ*reftemp);
+ +            snew(fee, nset);
+ +        }
+ +        for (i = 0; (i < nset); i++)
+ +        {
+ +            aver   = edat->s[i].av;
+ +            stddev = edat->s[i].rmsd;
+ +            errest = edat->s[i].ee;
+ +
+ +            if (bFee)
+ +            {
+ +                expE = 0;
+ +                for (j = 0; (j < edat->nframes); j++)
+ +                {
+ +                    expE += exp(beta*(edat->s[i].ener[j] - aver)/nmol);
+ +                }
+ +                if (bSum)
+ +                {
+ +                    expEtot += expE/edat->nframes;
+ +                }
+ +
+ +                fee[i] = log(expE/edat->nframes)/beta + aver/nmol;
+ +            }
+ +            if (strstr(leg[i], "empera") != NULL)
+ +            {
+ +                Temp = aver;
+ +            }
+ +            else if (strstr(leg[i], "olum") != NULL)
+ +            {
+ +                Vaver = aver;
+ +            }
+ +            else if (strstr(leg[i], "essure") != NULL)
+ +            {
+ +                Pres = aver;
+ +            }
+ +            if (bIsEner[i])
+ +            {
+ +                pr_aver   = aver/nmol-ezero;
+ +                pr_stddev = stddev/nmol;
+ +                pr_errest = errest/nmol;
+ +            }
+ +            else
+ +            {
+ +                pr_aver   = aver;
+ +                pr_stddev = stddev;
+ +                pr_errest = errest;
+ +            }
+ +
+ +            /* Multiply the slope in steps with the number of steps taken */
+ +            totaldrift = (edat->nsteps - 1)*edat->s[i].slope;
+ +            if (bIsEner[i])
+ +            {
+ +                totaldrift /= nmol;
+ +            }
+ +
+ +            fprintf(stdout, "%-24s %10g %10s %10g %10g",
+ +                    leg[i], pr_aver, ee_pr(pr_errest, eebuf), pr_stddev, totaldrift);
+ +            if (bFee)
+ +            {
+ +                fprintf(stdout, "  %10g", fee[i]);
+ +            }
+ +
+ +            fprintf(stdout, "  (%s)\n", enm[set[i]].unit);
+ +
+ +            if (bFluct)
+ +            {
+ +                for (j = 0; (j < edat->nframes); j++)
+ +                {
+ +                    edat->s[i].ener[j] -= aver;
+ +                }
+ +            }
+ +        }
+ +        if (bSum)
+ +        {
+ +            totaldrift = (edat->nsteps - 1)*esum->s[0].slope;
+ +            fprintf(stdout, "%-24s %10g %10s %10s %10g  (%s)",
+ +                    "Total", esum->s[0].av/nmol, ee_pr(esum->s[0].ee/nmol, eebuf),
+ +                    "--", totaldrift/nmol, enm[set[0]].unit);
+ +            /* pr_aver,pr_stddev,a,totaldrift */
+ +            if (bFee)
+ +            {
+ +                fprintf(stdout, "  %10g  %10g\n",
+ +                        log(expEtot)/beta + esum->s[0].av/nmol, log(expEtot)/beta);
+ +            }
+ +            else
+ +            {
+ +                fprintf(stdout, "\n");
+ +            }
+ +        }
+ +
+ +        /* Do correlation function */
+ +        if (edat->nframes > 1)
+ +        {
+ +            Dt = delta_t/(edat->nframes - 1);
+ +        }
+ +        else
+ +        {
+ +            Dt = 0;
+ +        }
+ +        if (bVisco)
+ +        {
+ +            const char* leg[] = { "Shear", "Bulk" };
+ +            real        factor;
+ +            real      **eneset;
+ +            real      **enesum;
+ +
+ +            /* Assume pressure tensor is in Pxx Pxy Pxz Pyx Pyy Pyz Pzx Pzy Pzz */
+ +
+ +            /* Symmetrise tensor! (and store in first three elements)
+ +             * And subtract average pressure!
+ +             */
+ +            snew(eneset, 12);
+ +            for (i = 0; i < 12; i++)
+ +            {
+ +                snew(eneset[i], edat->nframes);
+ +            }
+ +            snew(enesum, 3);
+ +            for (i = 0; i < 3; i++)
+ +            {
+ +                snew(enesum[i], edat->nframes);
+ +            }
+ +            for (i = 0; (i < edat->nframes); i++)
+ +            {
+ +                eneset[0][i] = 0.5*(edat->s[1].ener[i]+edat->s[3].ener[i]);
+ +                eneset[1][i] = 0.5*(edat->s[2].ener[i]+edat->s[6].ener[i]);
+ +                eneset[2][i] = 0.5*(edat->s[5].ener[i]+edat->s[7].ener[i]);
+ +                for (j = 3; j <= 11; j++)
+ +                {
+ +                    eneset[j][i] = edat->s[j].ener[i];
+ +                }
+ +                eneset[11][i] -= Pres;
+ +                enesum[0][i]   = 0.5*(edat->s[1].es[i].sum+edat->s[3].es[i].sum);
+ +                enesum[1][i]   = 0.5*(edat->s[2].es[i].sum+edat->s[6].es[i].sum);
+ +                enesum[2][i]   = 0.5*(edat->s[5].es[i].sum+edat->s[7].es[i].sum);
+ +            }
+ +
+ +            einstein_visco("evisco.xvg", "eviscoi.xvg",
+ +                           3, edat->nframes, enesum, Vaver, Temp, nsteps, time, oenv);
+ +
+ +            /*do_autocorr(corrfn,buf,nenergy,3,eneset,Dt,eacNormal,TRUE);*/
+ +            /* Do it for shear viscosity */
+ +            strcpy(buf, "Shear Viscosity");
+ +            low_do_autocorr(corrfn, oenv, buf, edat->nframes, 3,
+ +                            (edat->nframes+1)/2, eneset, Dt,
+ +                            eacNormal, 1, TRUE, FALSE, FALSE, 0.0, 0.0, 0);
+ +
+ +            /* Now for bulk viscosity */
+ +            strcpy(buf, "Bulk Viscosity");
+ +            low_do_autocorr(corrfn, oenv, buf, edat->nframes, 1,
+ +                            (edat->nframes+1)/2, &(eneset[11]), Dt,
+ +                            eacNormal, 1, TRUE, FALSE, FALSE, 0.0, 0.0, 0);
+ +
+ +            factor = (Vaver*1e-26/(BOLTZMANN*Temp))*Dt;
+ +            fp     = xvgropen(visfn, buf, "Time (ps)", "\\8h\\4 (cp)", oenv);
+ +            xvgr_legend(fp, asize(leg), leg, oenv);
+ +
+ +            /* Use trapezium rule for integration */
+ +            integral = 0;
+ +            intBulk  = 0;
+ +            nout     = get_acfnout();
+ +            if ((nout < 2) || (nout >= edat->nframes/2))
+ +            {
+ +                nout = edat->nframes/2;
+ +            }
+ +            for (i = 1; (i < nout); i++)
+ +            {
+ +                integral += 0.5*(eneset[0][i-1]  + eneset[0][i])*factor;
+ +                intBulk  += 0.5*(eneset[11][i-1] + eneset[11][i])*factor;
+ +                fprintf(fp, "%10g  %10g  %10g\n", (i*Dt), integral, intBulk);
+ +            }
+ +            ffclose(fp);
+ +        }
+ +        else if (bCorr)
+ +        {
+ +            if (bFluct)
+ +            {
+ +                strcpy(buf, "Autocorrelation of Energy Fluctuations");
+ +            }
+ +            else
+ +            {
+ +                strcpy(buf, "Energy Autocorrelation");
+ +            }
+ +#if 0
+ +            do_autocorr(corrfn, oenv, buf, edat->nframes,
+ +                        bSum ? 1                 : nset,
+ +                        bSum ? &edat->s[nset-1].ener : eneset,
+ +                        (delta_t/edat->nframes), eacNormal, FALSE);
+ +#endif
+ +        }
+ +    }
+ +}
+ +
+ +static void print_time(FILE *fp, double t)
+ +{
+ +    fprintf(fp, "%12.6f", t);
+ +}
+ +
+ +static void print1(FILE *fp, gmx_bool bDp, real e)
+ +{
+ +    if (bDp)
+ +    {
+ +        fprintf(fp, "  %16.12f", e);
+ +    }
+ +    else
+ +    {
+ +        fprintf(fp, "  %10.6f", e);
+ +    }
+ +}
+ +
+ +static void fec(const char *ene2fn, const char *runavgfn,
+ +                real reftemp, int nset, int set[], char *leg[],
+ +                enerdata_t *edat, double time[],
+ +                const output_env_t oenv)
+ +{
+ +    const char * ravgleg[] = {
+ +        "\\8D\\4E = E\\sB\\N-E\\sA\\N",
+ +        "<e\\S-\\8D\\4E/kT\\N>\\s0..t\\N"
+ +    };
+ +    FILE        *fp;
+ +    ener_file_t  enx;
+ +    int          nre, timecheck, step, nenergy, nenergy2, maxenergy;
+ +    int          i, j;
+ +    gmx_bool     bCont;
+ +    real         aver, beta;
+ +    real       **eneset2;
+ +    double       dE, sum;
+ +    gmx_enxnm_t *enm = NULL;
+ +    t_enxframe  *fr;
+ +    char         buf[22];
+ +
+ +    /* read second energy file */
+ +    snew(fr, 1);
+ +    enm = NULL;
+ +    enx = open_enx(ene2fn, "r");
+ +    do_enxnms(enx, &(fr->nre), &enm);
+ +
+ +    snew(eneset2, nset+1);
+ +    nenergy2  = 0;
+ +    maxenergy = 0;
+ +    timecheck = 0;
+ +    do
+ +    {
+ +        /* This loop searches for the first frame (when -b option is given),
+ +         * or when this has been found it reads just one energy frame
+ +         */
+ +        do
+ +        {
+ +            bCont = do_enx(enx, fr);
+ +
+ +            if (bCont)
+ +            {
+ +                timecheck = check_times(fr->t);
+ +            }
+ +
+ +        }
+ +        while (bCont && (timecheck < 0));
+ +
+ +        /* Store energies for analysis afterwards... */
+ +        if ((timecheck == 0) && bCont)
+ +        {
+ +            if (fr->nre > 0)
+ +            {
+ +                if (nenergy2 >= maxenergy)
+ +                {
+ +                    maxenergy += 1000;
+ +                    for (i = 0; i <= nset; i++)
+ +                    {
+ +                        srenew(eneset2[i], maxenergy);
+ +                    }
+ +                }
+ +                if (fr->t != time[nenergy2])
+ +                {
+ +                    fprintf(stderr, "\nWARNING time mismatch %g!=%g at frame %s\n",
+ +                            fr->t, time[nenergy2], gmx_step_str(fr->step, buf));
+ +                }
+ +                for (i = 0; i < nset; i++)
+ +                {
+ +                    eneset2[i][nenergy2] = fr->ener[set[i]].e;
+ +                }
+ +                nenergy2++;
+ +            }
+ +        }
+ +    }
+ +    while (bCont && (timecheck == 0));
+ +
+ +    /* check */
+ +    if (edat->nframes != nenergy2)
+ +    {
+ +        fprintf(stderr, "\nWARNING file length mismatch %d!=%d\n",
+ +                edat->nframes, nenergy2);
+ +    }
+ +    nenergy = min(edat->nframes, nenergy2);
+ +
+ +    /* calculate fe difference dF = -kT ln < exp(-(E_B-E_A)/kT) >_A */
+ +    fp = NULL;
+ +    if (runavgfn)
+ +    {
+ +        fp = xvgropen(runavgfn, "Running average free energy difference",
+ +                      "Time (" unit_time ")", "\\8D\\4E (" unit_energy ")", oenv);
+ +        xvgr_legend(fp, asize(ravgleg), ravgleg, oenv);
+ +    }
+ +    fprintf(stdout, "\n%-24s %10s\n",
+ +            "Energy", "dF = -kT ln < exp(-(EB-EA)/kT) >A");
+ +    sum  = 0;
+ +    beta = 1.0/(BOLTZ*reftemp);
+ +    for (i = 0; i < nset; i++)
+ +    {
+ +        if (gmx_strcasecmp(leg[i], enm[set[i]].name) != 0)
+ +        {
+ +            fprintf(stderr, "\nWARNING energy set name mismatch %s!=%s\n",
+ +                    leg[i], enm[set[i]].name);
+ +        }
+ +        for (j = 0; j < nenergy; j++)
+ +        {
+ +            dE   = eneset2[i][j] - edat->s[i].ener[j];
+ +            sum += exp(-dE*beta);
+ +            if (fp)
+ +            {
+ +                fprintf(fp, "%10g %10g %10g\n",
+ +                        time[j], dE, -BOLTZ*reftemp*log(sum/(j+1)) );
+ +            }
+ +        }
+ +        aver = -BOLTZ*reftemp*log(sum/nenergy);
+ +        fprintf(stdout, "%-24s %10g\n", leg[i], aver);
+ +    }
+ +    if (fp)
+ +    {
+ +        ffclose(fp);
+ +    }
+ +    sfree(fr);
+ +}
+ +
+ +
+ +static void do_dhdl(t_enxframe *fr, t_inputrec *ir, FILE **fp_dhdl,
+ +                    const char *filename, gmx_bool bDp,
+ +                    int *blocks, int *hists, int *samples, int *nlambdas,
+ +                    const output_env_t oenv)
+ +{
+ +    const char  *dhdl = "dH/d\\lambda", *deltag = "\\DeltaH", *lambda = "\\lambda";
+ +    char         title[STRLEN], label_x[STRLEN], label_y[STRLEN], legend[STRLEN];
+ +    char         buf[STRLEN];
+ +    gmx_bool     first       = FALSE;
+ +    int          nblock_hist = 0, nblock_dh = 0, nblock_dhcoll = 0;
+ +    int          i, j, k;
+ +    /* coll data */
+ +    double       temp              = 0, start_time = 0, delta_time = 0, start_lambda = 0, delta_lambda = 0;
+ +    static int   setnr             = 0;
+ +    double      *native_lambda_vec = NULL;
+ +    const char **lambda_components = NULL;
+ +    int          n_lambda_vec      = 0;
+ +    gmx_bool     changing_lambda   = FALSE;
+ +    int          lambda_fep_state;
+ +
+ +    /* now count the blocks & handle the global dh data */
+ +    for (i = 0; i < fr->nblock; i++)
+ +    {
+ +        if (fr->block[i].id == enxDHHIST)
+ +        {
+ +            nblock_hist++;
+ +        }
+ +        else if (fr->block[i].id == enxDH)
+ +        {
+ +            nblock_dh++;
+ +        }
+ +        else if (fr->block[i].id == enxDHCOLL)
+ +        {
+ +            nblock_dhcoll++;
+ +            if ( (fr->block[i].nsub < 1) ||
+ +                 (fr->block[i].sub[0].type != xdr_datatype_double) ||
+ +                 (fr->block[i].sub[0].nr < 5))
+ +            {
+ +                gmx_fatal(FARGS, "Unexpected block data");
+ +            }
+ +
+ +            /* read the data from the DHCOLL block */
+ +            temp            =         fr->block[i].sub[0].dval[0];
+ +            start_time      =   fr->block[i].sub[0].dval[1];
+ +            delta_time      =   fr->block[i].sub[0].dval[2];
+ +            start_lambda    = fr->block[i].sub[0].dval[3];
+ +            delta_lambda    = fr->block[i].sub[0].dval[4];
+ +            changing_lambda = (delta_lambda != 0);
+ +            if (fr->block[i].nsub > 1)
+ +            {
+ +                lambda_fep_state = fr->block[i].sub[1].ival[0];
+ +                if (n_lambda_vec == 0)
+ +                {
+ +                    n_lambda_vec = fr->block[i].sub[1].ival[1];
+ +                }
+ +                else
+ +                {
+ +                    if (n_lambda_vec != fr->block[i].sub[1].ival[1])
+ +                    {
+ +                        gmx_fatal(FARGS,
+ +                                  "Unexpected change of basis set in lambda");
+ +                    }
+ +                }
+ +                if (lambda_components == NULL)
+ +                {
+ +                    snew(lambda_components, n_lambda_vec);
+ +                }
+ +                if (native_lambda_vec == NULL)
+ +                {
+ +                    snew(native_lambda_vec, n_lambda_vec);
+ +                }
+ +                for (j = 0; j < n_lambda_vec; j++)
+ +                {
+ +                    native_lambda_vec[j] = fr->block[i].sub[0].dval[5+j];
+ +                    lambda_components[j] =
+ +                        efpt_singular_names[fr->block[i].sub[1].ival[2+j]];
+ +                }
+ +            }
+ +        }
+ +    }
+ +
+ +    if (nblock_hist == 0 && nblock_dh == 0)
+ +    {
+ +        /* don't do anything */
+ +        return;
+ +    }
+ +    if (nblock_hist > 0 && nblock_dh > 0)
+ +    {
+ +        gmx_fatal(FARGS, "This energy file contains both histogram dhdl data and non-histogram dhdl data. Don't know what to do.");
+ +    }
+ +    if (!*fp_dhdl)
+ +    {
+ +        if (nblock_dh > 0)
+ +        {
+ +            /* we have standard, non-histogram data --
+ +               call open_dhdl to open the file */
+ +            /* TODO this is an ugly hack that needs to be fixed: this will only
+ +               work if the order of data is always the same and if we're
+ +               only using the g_energy compiled with the mdrun that produced
+ +               the ener.edr. */
+ +            *fp_dhdl = open_dhdl(filename, ir, oenv);
+ +        }
+ +        else
+ +        {
+ +            sprintf(title, "N(%s)", deltag);
+ +            sprintf(label_x, "%s (%s)", deltag, unit_energy);
+ +            sprintf(label_y, "Samples");
+ +            *fp_dhdl = xvgropen_type(filename, title, label_x, label_y, exvggtXNY, oenv);
+ +            sprintf(buf, "T = %g (K), %s = %g", temp, lambda, start_lambda);
+ +            xvgr_subtitle(*fp_dhdl, buf, oenv);
+ +        }
+ +    }
+ +
+ +    (*hists)   += nblock_hist;
+ +    (*blocks)  += nblock_dh;
+ +    (*nlambdas) = nblock_hist+nblock_dh;
+ +
+ +    /* write the data */
+ +    if (nblock_hist > 0)
+ +    {
+ +        gmx_int64_t sum = 0;
+ +        /* histograms */
+ +        for (i = 0; i < fr->nblock; i++)
+ +        {
+ +            t_enxblock *blk = &(fr->block[i]);
+ +            if (blk->id == enxDHHIST)
+ +            {
+ +                double          foreign_lambda, dx;
+ +                gmx_int64_t     x0;
+ +                int             nhist, derivative;
+ +
+ +                /* check the block types etc. */
+ +                if ( (blk->nsub < 2) ||
+ +                     (blk->sub[0].type != xdr_datatype_double) ||
+ +                     (blk->sub[1].type != xdr_datatype_int64) ||
+ +                     (blk->sub[0].nr < 2)  ||
+ +                     (blk->sub[1].nr < 2) )
+ +                {
+ +                    gmx_fatal(FARGS, "Unexpected block data in file");
+ +                }
+ +                foreign_lambda = blk->sub[0].dval[0];
+ +                dx             = blk->sub[0].dval[1];
+ +                nhist          = blk->sub[1].lval[0];
+ +                derivative     = blk->sub[1].lval[1];
+ +                for (j = 0; j < nhist; j++)
+ +                {
+ +                    const char *lg[1];
+ +                    x0 = blk->sub[1].lval[2+j];
+ +
+ +                    if (!derivative)
+ +                    {
+ +                        sprintf(legend, "N(%s(%s=%g) | %s=%g)",
+ +                                deltag, lambda, foreign_lambda,
+ +                                lambda, start_lambda);
+ +                    }
+ +                    else
+ +                    {
+ +                        sprintf(legend, "N(%s | %s=%g)",
+ +                                dhdl, lambda, start_lambda);
+ +                    }
+ +
+ +                    lg[0] = legend;
+ +                    xvgr_new_dataset(*fp_dhdl, setnr, 1, lg, oenv);
+ +                    setnr++;
+ +                    for (k = 0; k < blk->sub[j+2].nr; k++)
+ +                    {
+ +                        int    hist;
+ +                        double xmin, xmax;
+ +
+ +                        hist = blk->sub[j+2].ival[k];
+ +                        xmin = (x0+k)*dx;
+ +                        xmax = (x0+k+1)*dx;
+ +                        fprintf(*fp_dhdl, "%g %d\n%g %d\n", xmin, hist,
+ +                                xmax, hist);
+ +                        sum += hist;
+ +                    }
+ +                    /* multiple histogram data blocks in one histogram
+ +                       mean that the second one is the reverse of the first one:
+ +                       for dhdl derivatives, it's important to know both the
+ +                       maximum and minimum values */
+ +                    dx = -dx;
+ +                }
+ +            }
+ +        }
+ +        (*samples) += (int)(sum/nblock_hist);
+ +    }
+ +    else
+ +    {
+ +        /* raw dh */
+ +        int    len      = 0;
+ +        char **setnames = NULL;
+ +        int    nnames   = nblock_dh;
+ +
+ +        for (i = 0; i < fr->nblock; i++)
+ +        {
+ +            t_enxblock *blk = &(fr->block[i]);
+ +            if (blk->id == enxDH)
+ +            {
+ +                if (len == 0)
+ +                {
+ +                    len = blk->sub[2].nr;
+ +                }
+ +                else
+ +                {
+ +                    if (len != blk->sub[2].nr)
+ +                    {
+ +                        gmx_fatal(FARGS, "Length inconsistency in dhdl data");
+ +                    }
+ +                }
+ +            }
+ +        }
+ +        (*samples) += len;
+ +
+ +        for (i = 0; i < len; i++)
+ +        {
+ +            double time = start_time + delta_time*i;
+ +
+ +            fprintf(*fp_dhdl, "%.4f ", time);
+ +
+ +            for (j = 0; j < fr->nblock; j++)
+ +            {
+ +                t_enxblock *blk = &(fr->block[j]);
+ +                if (blk->id == enxDH)
+ +                {
+ +                    double value;
+ +                    if (blk->sub[2].type == xdr_datatype_float)
+ +                    {
+ +                        value = blk->sub[2].fval[i];
+ +                    }
+ +                    else
+ +                    {
+ +                        value = blk->sub[2].dval[i];
+ +                    }
+ +                    /* we need to decide which data type it is based on the count*/
+ +
+ +                    if (j == 1 && ir->bExpanded)
+ +                    {
+ +                        fprintf(*fp_dhdl, "%4d", (int)value);   /* if expanded ensembles and zero, this is a state value, it's an integer. We need a cleaner conditional than if j==1! */
+ +                    }
+ +                    else
+ +                    {
+ +                        if (bDp)
+ +                        {
+ +                            fprintf(*fp_dhdl, " %#.12g", value);   /* print normal precision */
+ +                        }
+ +                        else
+ +                        {
+ +                            fprintf(*fp_dhdl, " %#.8g", value);   /* print normal precision */
+ +                        }
+ +                    }
+ +                }
+ +            }
+ +            fprintf(*fp_dhdl, "\n");
+ +        }
+ +    }
+ +}
+ +
+ +
+ +int gmx_energy(int argc, char *argv[])
+ +{
+ +    const char        *desc[] = {
+ +        "[THISMODULE] extracts energy components or distance restraint",
+ +        "data from an energy file. The user is prompted to interactively",
+ +        "select the desired energy terms.[PAR]",
+ +
+ +        "Average, RMSD, and drift are calculated with full precision from the",
+ +        "simulation (see printed manual). Drift is calculated by performing",
+ +        "a least-squares fit of the data to a straight line. The reported total drift",
+ +        "is the difference of the fit at the first and last point.",
+ +        "An error estimate of the average is given based on a block averages",
+ +        "over 5 blocks using the full-precision averages. The error estimate",
+ +        "can be performed over multiple block lengths with the options",
+ +        "[TT]-nbmin[tt] and [TT]-nbmax[tt].",
+ +        "[BB]Note[bb] that in most cases the energy files contains averages over all",
+ +        "MD steps, or over many more points than the number of frames in",
+ +        "energy file. This makes the [THISMODULE] statistics output more accurate",
+ +        "than the [TT].xvg[tt] output. When exact averages are not present in the energy",
+ +        "file, the statistics mentioned above are simply over the single, per-frame",
+ +        "energy values.[PAR]",
+ +
+ +        "The term fluctuation gives the RMSD around the least-squares fit.[PAR]",
+ +
+ +        "Some fluctuation-dependent properties can be calculated provided",
+ +        "the correct energy terms are selected, and that the command line option",
+ +        "[TT]-fluct_props[tt] is given. The following properties",
+ +        "will be computed:[BR]",
+ +        "Property                        Energy terms needed[BR]",
+ +        "---------------------------------------------------[BR]",
+ +        "Heat capacity C[SUB]p[sub] (NPT sims):    Enthalpy, Temp     [BR]",
+ +        "Heat capacity C[SUB]v[sub] (NVT sims):    Etot, Temp         [BR]",
+ +        "Thermal expansion coeff. (NPT): Enthalpy, Vol, Temp[BR]",
+ +        "Isothermal compressibility:     Vol, Temp          [BR]",
+ +        "Adiabatic bulk modulus:         Vol, Temp          [BR]",
+ +        "---------------------------------------------------[BR]",
+ +        "You always need to set the number of molecules [TT]-nmol[tt].",
+ +        "The C[SUB]p[sub]/C[SUB]v[sub] computations do [BB]not[bb] include any corrections",
+ +        "for quantum effects. Use the [gmx-dos] program if you need that (and you do).[PAR]"
+ +        "When the [TT]-viol[tt] option is set, the time averaged",
+ +        "violations are plotted and the running time-averaged and",
+ +        "instantaneous sum of violations are recalculated. Additionally",
+ +        "running time-averaged and instantaneous distances between",
+ +        "selected pairs can be plotted with the [TT]-pairs[tt] option.[PAR]",
+ +
+ +        "Options [TT]-ora[tt], [TT]-ort[tt], [TT]-oda[tt], [TT]-odr[tt] and",
+ +        "[TT]-odt[tt] are used for analyzing orientation restraint data.",
+ +        "The first two options plot the orientation, the last three the",
+ +        "deviations of the orientations from the experimental values.",
+ +        "The options that end on an 'a' plot the average over time",
+ +        "as a function of restraint. The options that end on a 't'",
+ +        "prompt the user for restraint label numbers and plot the data",
+ +        "as a function of time. Option [TT]-odr[tt] plots the RMS",
+ +        "deviation as a function of restraint.",
+ +        "When the run used time or ensemble averaged orientation restraints,",
+ +        "option [TT]-orinst[tt] can be used to analyse the instantaneous,",
+ +        "not ensemble-averaged orientations and deviations instead of",
+ +        "the time and ensemble averages.[PAR]",
+ +
+ +        "Option [TT]-oten[tt] plots the eigenvalues of the molecular order",
+ +        "tensor for each orientation restraint experiment. With option",
+ +        "[TT]-ovec[tt] also the eigenvectors are plotted.[PAR]",
+ +
+ +        "Option [TT]-odh[tt] extracts and plots the free energy data",
+ +        "(Hamiltoian differences and/or the Hamiltonian derivative dhdl)",
+ +        "from the [TT]ener.edr[tt] file.[PAR]",
+ +
+ +        "With [TT]-fee[tt] an estimate is calculated for the free-energy",
+ +        "difference with an ideal gas state: [BR]",
+ +        "  [GRK]Delta[grk] A = A(N,V,T) - A[SUB]idealgas[sub](N,V,T) = kT [LN][CHEVRON][EXP]U[SUB]pot[sub]/kT[exp][chevron][ln][BR]",
+ +        "  [GRK]Delta[grk] G = G(N,p,T) - G[SUB]idealgas[sub](N,p,T) = kT [LN][CHEVRON][EXP]U[SUB]pot[sub]/kT[exp][chevron][ln][BR]",
+ +        "where k is Boltzmann's constant, T is set by [TT]-fetemp[tt] and",
+ +        "the average is over the ensemble (or time in a trajectory).",
+ +        "Note that this is in principle",
+ +        "only correct when averaging over the whole (Boltzmann) ensemble",
+ +        "and using the potential energy. This also allows for an entropy",
+ +        "estimate using:[BR]",
+ +        "  [GRK]Delta[grk] S(N,V,T) = S(N,V,T) - S[SUB]idealgas[sub](N,V,T) = ([CHEVRON]U[SUB]pot[sub][chevron] - [GRK]Delta[grk] A)/T[BR]",
+ +        "  [GRK]Delta[grk] S(N,p,T) = S(N,p,T) - S[SUB]idealgas[sub](N,p,T) = ([CHEVRON]U[SUB]pot[sub][chevron] + pV - [GRK]Delta[grk] G)/T",
+ +        "[PAR]",
+ +
+ +        "When a second energy file is specified ([TT]-f2[tt]), a free energy",
+ +        "difference is calculated [BR] dF = -kT [LN][CHEVRON][EXP]-(E[SUB]B[sub]-E[SUB]A[sub])/kT[exp][chevron][SUB]A[sub][ln] ,",
+ +        "where E[SUB]A[sub] and E[SUB]B[sub] are the energies from the first and second energy",
+ +        "files, and the average is over the ensemble A. The running average",
+ +        "of the free energy difference is printed to a file specified by [TT]-ravg[tt].",
+ +        "[BB]Note[bb] that the energies must both be calculated from the same trajectory."
+ +
+ +    };
+ +    static gmx_bool    bSum    = FALSE, bFee = FALSE, bPrAll = FALSE, bFluct = FALSE, bDriftCorr = FALSE;
+ +    static gmx_bool    bDp     = FALSE, bMutot = FALSE, bOrinst = FALSE, bOvec = FALSE, bFluctProps = FALSE;
+ +    static int         skip    = 0, nmol = 1, nbmin = 5, nbmax = 5;
+ +    static real        reftemp = 300.0, ezero = 0;
+ +    t_pargs            pa[]    = {
+ +        { "-fee",   FALSE, etBOOL,  {&bFee},
+ +          "Do a free energy estimate" },
+ +        { "-fetemp", FALSE, etREAL, {&reftemp},
+ +          "Reference temperature for free energy calculation" },
+ +        { "-zero", FALSE, etREAL, {&ezero},
+ +          "Subtract a zero-point energy" },
+ +        { "-sum",  FALSE, etBOOL, {&bSum},
+ +          "Sum the energy terms selected rather than display them all" },
+ +        { "-dp",   FALSE, etBOOL, {&bDp},
+ +          "Print energies in high precision" },
+ +        { "-nbmin", FALSE, etINT, {&nbmin},
+ +          "Minimum number of blocks for error estimate" },
+ +        { "-nbmax", FALSE, etINT, {&nbmax},
+ +          "Maximum number of blocks for error estimate" },
+ +        { "-mutot", FALSE, etBOOL, {&bMutot},
+ +          "Compute the total dipole moment from the components" },
+ +        { "-skip", FALSE, etINT,  {&skip},
+ +          "Skip number of frames between data points" },
+ +        { "-aver", FALSE, etBOOL, {&bPrAll},
+ +          "Also print the exact average and rmsd stored in the energy frames (only when 1 term is requested)" },
+ +        { "-nmol", FALSE, etINT,  {&nmol},
+ +          "Number of molecules in your sample: the energies are divided by this number" },
+ +        { "-fluct_props", FALSE, etBOOL, {&bFluctProps},
+ +          "Compute properties based on energy fluctuations, like heat capacity" },
+ +        { "-driftcorr", FALSE, etBOOL, {&bDriftCorr},
+ +          "Useful only for calculations of fluctuation properties. The drift in the observables will be subtracted before computing the fluctuation properties."},
+ +        { "-fluc", FALSE, etBOOL, {&bFluct},
+ +          "Calculate autocorrelation of energy fluctuations rather than energy itself" },
+ +        { "-orinst", FALSE, etBOOL, {&bOrinst},
+ +          "Analyse instantaneous orientation data" },
+ +        { "-ovec", FALSE, etBOOL, {&bOvec},
+ +          "Also plot the eigenvectors with [TT]-oten[tt]" }
+ +    };
+ +    const char       * drleg[] = {
+ +        "Running average",
+ +        "Instantaneous"
+ +    };
+ +    static const char *setnm[] = {
+ +        "Pres-XX", "Pres-XY", "Pres-XZ", "Pres-YX", "Pres-YY",
+ +        "Pres-YZ", "Pres-ZX", "Pres-ZY", "Pres-ZZ", "Temperature",
+ +        "Volume",  "Pressure"
+ +    };
+ +
+ +    FILE              *out     = NULL, *fp_pairs = NULL, *fort = NULL, *fodt = NULL, *foten = NULL;
+ +    FILE              *fp_dhdl = NULL;
+ +    FILE             **drout;
+ +    ener_file_t        fp;
+ +    int                timecheck = 0;
+ +    gmx_mtop_t         mtop;
+ +    gmx_localtop_t    *top = NULL;
+ +    t_inputrec         ir;
+ +    t_energy         **ee;
+ +    enerdata_t         edat;
+ +    gmx_enxnm_t       *enm = NULL;
+ +    t_enxframe        *frame, *fr = NULL;
+ +    int                cur = 0;
+ +#define NEXT (1-cur)
+ +    int                nre, teller, teller_disre, nfr;
+ +    gmx_int64_t        start_step;
+ +    int                nor = 0, nex = 0, norfr = 0, enx_i = 0;
+ +    real               start_t;
+ +    real              *bounds  = NULL, *violaver = NULL, *oobs = NULL, *orient = NULL, *odrms = NULL;
+ +    int               *index   = NULL, *pair = NULL, norsel = 0, *orsel = NULL, *or_label = NULL;
+ +    int                nbounds = 0, npairs;
+ +    gmx_bool           bDisRe, bDRAll, bORA, bORT, bODA, bODR, bODT, bORIRE, bOTEN, bDHDL;
+ +    gmx_bool           bFoundStart, bCont, bEDR, bVisco;
+ +    double             sum, sumaver, sumt, ener, dbl;
+ +    double            *time = NULL;
+ +    real               Vaver;
+ +    int               *set     = NULL, i, j, k, nset, sss;
+ +    gmx_bool          *bIsEner = NULL;
+ +    char             **pairleg, **odtleg, **otenleg;
+ +    char             **leg = NULL;
+ +    char             **nms;
+ +    char              *anm_j, *anm_k, *resnm_j, *resnm_k;
+ +    int                resnr_j, resnr_k;
+ +    const char        *orinst_sub = "@ subtitle \"instantaneous\"\n";
+ +    char               buf[256];
+ +    output_env_t       oenv;
+ +    t_enxblock        *blk       = NULL;
+ +    t_enxblock        *blk_disre = NULL;
+ +    int                ndisre    = 0;
+ +    int                dh_blocks = 0, dh_hists = 0, dh_samples = 0, dh_lambdas = 0;
+ +
+ +    t_filenm           fnm[] = {
+ +        { efEDR, "-f",    NULL,      ffREAD  },
+ +        { efEDR, "-f2",   NULL,      ffOPTRD },
+ +        { efTPX, "-s",    NULL,      ffOPTRD },
+ +        { efXVG, "-o",    "energy",  ffWRITE },
+ +        { efXVG, "-viol", "violaver", ffOPTWR },
+ +        { efXVG, "-pairs", "pairs",   ffOPTWR },
+ +        { efXVG, "-ora",  "orienta", ffOPTWR },
+ +        { efXVG, "-ort",  "orientt", ffOPTWR },
+ +        { efXVG, "-oda",  "orideva", ffOPTWR },
+ +        { efXVG, "-odr",  "oridevr", ffOPTWR },
+ +        { efXVG, "-odt",  "oridevt", ffOPTWR },
+ +        { efXVG, "-oten", "oriten",  ffOPTWR },
+ +        { efXVG, "-corr", "enecorr", ffOPTWR },
+ +        { efXVG, "-vis",  "visco",   ffOPTWR },
+ +        { efXVG, "-ravg", "runavgdf", ffOPTWR },
+ +        { efXVG, "-odh",  "dhdl", ffOPTWR }
+ +    };
+ +#define NFILE asize(fnm)
+ +    int                npargs;
+ +    t_pargs           *ppa;
+ +
+ +    npargs = asize(pa);
+ +    ppa    = add_acf_pargs(&npargs, pa);
+ +    if (!parse_common_args(&argc, argv,
+ +                           PCA_CAN_VIEW | PCA_CAN_BEGIN | PCA_CAN_END | PCA_BE_NICE,
+ +                           NFILE, fnm, npargs, ppa, asize(desc), desc, 0, NULL, &oenv))
+ +    {
+ +        return 0;
+ +    }
+ +
+ +    bDRAll = opt2bSet("-pairs", NFILE, fnm);
+ +    bDisRe = opt2bSet("-viol", NFILE, fnm) || bDRAll;
+ +    bORA   = opt2bSet("-ora", NFILE, fnm);
+ +    bORT   = opt2bSet("-ort", NFILE, fnm);
+ +    bODA   = opt2bSet("-oda", NFILE, fnm);
+ +    bODR   = opt2bSet("-odr", NFILE, fnm);
+ +    bODT   = opt2bSet("-odt", NFILE, fnm);
+ +    bORIRE = bORA || bORT || bODA || bODR || bODT;
+ +    bOTEN  = opt2bSet("-oten", NFILE, fnm);
+ +    bDHDL  = opt2bSet("-odh", NFILE, fnm);
+ +
+ +    nset = 0;
+ +
+ +    snew(frame, 2);
+ +    fp = open_enx(ftp2fn(efEDR, NFILE, fnm), "r");
+ +    do_enxnms(fp, &nre, &enm);
+ +
+ +    Vaver = -1;
+ +
+ +    bVisco = opt2bSet("-vis", NFILE, fnm);
+ +
+ +    if ((!bDisRe) && (!bDHDL))
+ +    {
+ +        if (bVisco)
+ +        {
+ +            nset = asize(setnm);
+ +            snew(set, nset);
+ +            /* This is nasty code... To extract Pres tensor, Volume and Temperature */
+ +            for (j = 0; j < nset; j++)
+ +            {
+ +                for (i = 0; i < nre; i++)
+ +                {
+ +                    if (strstr(enm[i].name, setnm[j]))
+ +                    {
+ +                        set[j] = i;
+ +                        break;
+ +                    }
+ +                }
+ +                if (i == nre)
+ +                {
+ +                    if (gmx_strcasecmp(setnm[j], "Volume") == 0)
+ +                    {
+ +                        printf("Enter the box volume (" unit_volume "): ");
+ +                        if (1 != scanf("%lf", &dbl))
+ +                        {
+ +                            gmx_fatal(FARGS, "Error reading user input");
+ +                        }
+ +                        Vaver = dbl;
+ +                    }
+ +                    else
+ +                    {
+ +                        gmx_fatal(FARGS, "Could not find term %s for viscosity calculation",
+ +                                  setnm[j]);
+ +                    }
+ +                }
+ +            }
+ +        }
+ +        else
+ +        {
+ +            set = select_by_name(nre, enm, &nset);
+ +        }
+ +        /* Print all the different units once */
+ +        sprintf(buf, "(%s)", enm[set[0]].unit);
+ +        for (i = 1; i < nset; i++)
+ +        {
+ +            for (j = 0; j < i; j++)
+ +            {
+ +                if (strcmp(enm[set[i]].unit, enm[set[j]].unit) == 0)
+ +                {
+ +                    break;
+ +                }
+ +            }
+ +            if (j == i)
+ +            {
+ +                strcat(buf, ", (");
+ +                strcat(buf, enm[set[i]].unit);
+ +                strcat(buf, ")");
+ +            }
+ +        }
+ +        out = xvgropen(opt2fn("-o", NFILE, fnm), "Gromacs Energies", "Time (ps)", buf,
+ +                       oenv);
+ +
+ +        snew(leg, nset+1);
+ +        for (i = 0; (i < nset); i++)
+ +        {
+ +            leg[i] = enm[set[i]].name;
+ +        }
+ +        if (bSum)
+ +        {
+ +            leg[nset] = strdup("Sum");
+ +            xvgr_legend(out, nset+1, (const char**)leg, oenv);
+ +        }
+ +        else
+ +        {
+ +            xvgr_legend(out, nset, (const char**)leg, oenv);
+ +        }
+ +
+ +        snew(bIsEner, nset);
+ +        for (i = 0; (i < nset); i++)
+ +        {
+ +            bIsEner[i] = FALSE;
+ +            for (j = 0; (j <= F_ETOT); j++)
+ +            {
+ +                bIsEner[i] = bIsEner[i] ||
+ +                    (gmx_strcasecmp(interaction_function[j].longname, leg[i]) == 0);
+ +            }
+ +        }
+ +
+ +        if (bPrAll && nset > 1)
+ +        {
+ +            gmx_fatal(FARGS, "Printing averages can only be done when a single set is selected");
+ +        }
+ +
+ +        time = NULL;
+ +
+ +        if (bORIRE || bOTEN)
+ +        {
+ +            get_orires_parms(ftp2fn(efTPX, NFILE, fnm), &nor, &nex, &or_label, &oobs);
+ +        }
+ +
+ +        if (bORIRE)
+ +        {
+ +            if (bOrinst)
+ +            {
+ +                enx_i = enxORI;
+ +            }
+ +            else
+ +            {
+ +                enx_i = enxOR;
+ +            }
+ +
+ +            if (bORA || bODA)
+ +            {
+ +                snew(orient, nor);
+ +            }
+ +            if (bODR)
+ +            {
+ +                snew(odrms, nor);
+ +            }
+ +            if (bORT || bODT)
+ +            {
+ +                fprintf(stderr, "Select the orientation restraint labels you want (-1 is all)\n");
+ +                fprintf(stderr, "End your selection with 0\n");
+ +                j     = -1;
+ +                orsel = NULL;
+ +                do
+ +                {
+ +                    j++;
+ +                    srenew(orsel, j+1);
+ +                    if (1 != scanf("%d", &(orsel[j])))
+ +                    {
+ +                        gmx_fatal(FARGS, "Error reading user input");
+ +                    }
+ +                }
+ +                while (orsel[j] > 0);
+ +                if (orsel[0] == -1)
+ +                {
+ +                    fprintf(stderr, "Selecting all %d orientation restraints\n", nor);
+ +                    norsel = nor;
+ +                    srenew(orsel, nor);
+ +                    for (i = 0; i < nor; i++)
+ +                    {
+ +                        orsel[i] = i;
+ +                    }
+ +                }
+ +                else
+ +                {
+ +                    /* Build the selection */
+ +                    norsel = 0;
+ +                    for (i = 0; i < j; i++)
+ +                    {
+ +                        for (k = 0; k < nor; k++)
+ +                        {
+ +                            if (or_label[k] == orsel[i])
+ +                            {
+ +                                orsel[norsel] = k;
+ +                                norsel++;
+ +                                break;
+ +                            }
+ +                        }
+ +                        if (k == nor)
+ +                        {
+ +                            fprintf(stderr, "Orientation restraint label %d not found\n",
+ +                                    orsel[i]);
+ +                        }
+ +                    }
+ +                }
+ +                snew(odtleg, norsel);
+ +                for (i = 0; i < norsel; i++)
+ +                {
+ +                    snew(odtleg[i], 256);
+ +                    sprintf(odtleg[i], "%d", or_label[orsel[i]]);
+ +                }
+ +                if (bORT)
+ +                {
+ +                    fort = xvgropen(opt2fn("-ort", NFILE, fnm), "Calculated orientations",
+ +                                    "Time (ps)", "", oenv);
+ +                    if (bOrinst)
+ +                    {
+ +                        fprintf(fort, "%s", orinst_sub);
+ +                    }
+ +                    xvgr_legend(fort, norsel, (const char**)odtleg, oenv);
+ +                }
+ +                if (bODT)
+ +                {
+ +                    fodt = xvgropen(opt2fn("-odt", NFILE, fnm),
+ +                                    "Orientation restraint deviation",
+ +                                    "Time (ps)", "", oenv);
+ +                    if (bOrinst)
+ +                    {
+ +                        fprintf(fodt, "%s", orinst_sub);
+ +                    }
+ +                    xvgr_legend(fodt, norsel, (const char**)odtleg, oenv);
+ +                }
+ +            }
+ +        }
+ +        if (bOTEN)
+ +        {
+ +            foten = xvgropen(opt2fn("-oten", NFILE, fnm),
+ +                             "Order tensor", "Time (ps)", "", oenv);
+ +            snew(otenleg, bOvec ? nex*12 : nex*3);
+ +            for (i = 0; i < nex; i++)
+ +            {
+ +                for (j = 0; j < 3; j++)
+ +                {
+ +                    sprintf(buf, "eig%d", j+1);
+ +                    otenleg[(bOvec ? 12 : 3)*i+j] = strdup(buf);
+ +                }
+ +                if (bOvec)
+ +                {
+ +                    for (j = 0; j < 9; j++)
+ +                    {
+ +                        sprintf(buf, "vec%d%s", j/3+1, j%3 == 0 ? "x" : (j%3 == 1 ? "y" : "z"));
+ +                        otenleg[12*i+3+j] = strdup(buf);
+ +                    }
+ +                }
+ +            }
+ +            xvgr_legend(foten, bOvec ? nex*12 : nex*3, (const char**)otenleg, oenv);
+ +        }
+ +    }
+ +    else if (bDisRe)
+ +    {
+ +        nbounds = get_bounds(ftp2fn(efTPX, NFILE, fnm), &bounds, &index, &pair, &npairs,
+ +                             &mtop, &top, &ir);
+ +        snew(violaver, npairs);
+ +        out = xvgropen(opt2fn("-o", NFILE, fnm), "Sum of Violations",
+ +                       "Time (ps)", "nm", oenv);
+ +        xvgr_legend(out, 2, drleg, oenv);
+ +        if (bDRAll)
+ +        {
+ +            fp_pairs = xvgropen(opt2fn("-pairs", NFILE, fnm), "Pair Distances",
+ +                                "Time (ps)", "Distance (nm)", oenv);
+ +            if (output_env_get_print_xvgr_codes(oenv))
+ +            {
+ +                fprintf(fp_pairs, "@ subtitle \"averaged (tau=%g) and instantaneous\"\n",
+ +                        ir.dr_tau);
+ +            }
+ +        }
+ +    }
+ +    else if (bDHDL)
+ +    {
+ +        get_dhdl_parms(ftp2fn(efTPX, NFILE, fnm), &ir);
+ +    }
+ +
+ +    /* Initiate energies and set them to zero */
+ +    edat.nsteps  = 0;
+ +    edat.npoints = 0;
+ +    edat.nframes = 0;
+ +    edat.step    = NULL;
+ +    edat.steps   = NULL;
+ +    edat.points  = NULL;
+ +    snew(edat.s, nset);
+ +
+ +    /* Initiate counters */
+ +    teller       = 0;
+ +    teller_disre = 0;
+ +    bFoundStart  = FALSE;
+ +    start_step   = 0;
+ +    start_t      = 0;
+ +    do
+ +    {
+ +        /* This loop searches for the first frame (when -b option is given),
+ +         * or when this has been found it reads just one energy frame
+ +         */
+ +        do
+ +        {
+ +            bCont = do_enx(fp, &(frame[NEXT]));
+ +            if (bCont)
+ +            {
+ +                timecheck = check_times(frame[NEXT].t);
+ +            }
+ +        }
+ +        while (bCont && (timecheck < 0));
+ +
+ +        if ((timecheck == 0) && bCont)
+ +        {
+ +            /* We read a valid frame, so we can use it */
+ +            fr = &(frame[NEXT]);
+ +
+ +            if (fr->nre > 0)
+ +            {
+ +                /* The frame contains energies, so update cur */
+ +                cur  = NEXT;
+ +
+ +                if (edat.nframes % 1000 == 0)
+ +                {
+ +                    srenew(edat.step, edat.nframes+1000);
+ +                    memset(&(edat.step[edat.nframes]), 0, 1000*sizeof(edat.step[0]));
+ +                    srenew(edat.steps, edat.nframes+1000);
+ +                    memset(&(edat.steps[edat.nframes]), 0, 1000*sizeof(edat.steps[0]));
+ +                    srenew(edat.points, edat.nframes+1000);
+ +                    memset(&(edat.points[edat.nframes]), 0, 1000*sizeof(edat.points[0]));
+ +
+ +                    for (i = 0; i < nset; i++)
+ +                    {
+ +                        srenew(edat.s[i].ener, edat.nframes+1000);
+ +                        memset(&(edat.s[i].ener[edat.nframes]), 0,
+ +                               1000*sizeof(edat.s[i].ener[0]));
+ +                        srenew(edat.s[i].es, edat.nframes+1000);
+ +                        memset(&(edat.s[i].es[edat.nframes]), 0,
+ +                               1000*sizeof(edat.s[i].es[0]));
+ +                    }
+ +                }
+ +
+ +                nfr            = edat.nframes;
+ +                edat.step[nfr] = fr->step;
+ +
+ +                if (!bFoundStart)
+ +                {
+ +                    bFoundStart = TRUE;
+ +                    /* Initiate the previous step data */
+ +                    start_step = fr->step;
+ +                    start_t    = fr->t;
+ +                    /* Initiate the energy sums */
+ +                    edat.steps[nfr]  = 1;
+ +                    edat.points[nfr] = 1;
+ +                    for (i = 0; i < nset; i++)
+ +                    {
+ +                        sss                    = set[i];
+ +                        edat.s[i].es[nfr].sum  = fr->ener[sss].e;
+ +                        edat.s[i].es[nfr].sum2 = 0;
+ +                    }
+ +                    edat.nsteps  = 1;
+ +                    edat.npoints = 1;
+ +                }
+ +                else
+ +                {
+ +                    edat.steps[nfr] = fr->nsteps;
+ +                    {
+ +                        if (fr->step - start_step + 1 == edat.nsteps + fr->nsteps)
+ +                        {
+ +                            if (fr->nsum <= 1)
+ +                            {
+ +                                edat.points[nfr] = 1;
+ +                                for (i = 0; i < nset; i++)
+ +                                {
+ +                                    sss                    = set[i];
+ +                                    edat.s[i].es[nfr].sum  = fr->ener[sss].e;
+ +                                    edat.s[i].es[nfr].sum2 = 0;
+ +                                }
+ +                                edat.npoints += 1;
+ +                            }
+ +                            else
+ +                            {
+ +                                edat.points[nfr] = fr->nsum;
+ +                                for (i = 0; i < nset; i++)
+ +                                {
+ +                                    sss                    = set[i];
+ +                                    edat.s[i].es[nfr].sum  = fr->ener[sss].esum;
+ +                                    edat.s[i].es[nfr].sum2 = fr->ener[sss].eav;
+ +                                }
+ +                                edat.npoints += fr->nsum;
+ +                            }
+ +                        }
+ +                        else
+ +                        {
+ +                            /* The interval does not match fr->nsteps:
+ +                             * can not do exact averages.
+ +                             */
+ +                            edat.npoints = 0;
+ +                        }
+ +                        edat.nsteps = fr->step - start_step + 1;
+ +                    }
+ +                }
+ +                for (i = 0; i < nset; i++)
+ +                {
+ +                    edat.s[i].ener[nfr] = fr->ener[set[i]].e;
+ +                }
+ +            }
+ +            /*
+ +             * Define distance restraint legends. Can only be done after
+ +             * the first frame has been read... (Then we know how many there are)
+ +             */
+ +            blk_disre = find_block_id_enxframe(fr, enxDISRE, NULL);
+ +            if (bDisRe && bDRAll && !leg && blk_disre)
+ +            {
+ +                t_iatom   *fa;
+ +                t_iparams *ip;
+ +
+ +                fa = top->idef.il[F_DISRES].iatoms;
+ +                ip = top->idef.iparams;
+ +                if (blk_disre->nsub != 2 ||
+ +                    (blk_disre->sub[0].nr != blk_disre->sub[1].nr) )
+ +                {
+ +                    gmx_incons("Number of disre sub-blocks not equal to 2");
+ +                }
+ +
+ +                ndisre = blk_disre->sub[0].nr;
+ +                if (ndisre != top->idef.il[F_DISRES].nr/3)
+ +                {
+ +                    gmx_fatal(FARGS, "Number of disre pairs in the energy file (%d) does not match the number in the run input file (%d)\n",
+ +                              ndisre, top->idef.il[F_DISRES].nr/3);
+ +                }
+ +                snew(pairleg, ndisre);
+ +                for (i = 0; i < ndisre; i++)
+ +                {
+ +                    snew(pairleg[i], 30);
+ +                    j = fa[3*i+1];
+ +                    k = fa[3*i+2];
+ +                    gmx_mtop_atominfo_global(&mtop, j, &anm_j, &resnr_j, &resnm_j);
+ +                    gmx_mtop_atominfo_global(&mtop, k, &anm_k, &resnr_k, &resnm_k);
+ +                    sprintf(pairleg[i], "%d %s %d %s (%d)",
+ +                            resnr_j, anm_j, resnr_k, anm_k,
+ +                            ip[fa[3*i]].disres.label);
+ +                }
+ +                set = select_it(ndisre, pairleg, &nset);
+ +                snew(leg, 2*nset);
+ +                for (i = 0; (i < nset); i++)
+ +                {
+ +                    snew(leg[2*i], 32);
+ +                    sprintf(leg[2*i],  "a %s", pairleg[set[i]]);
+ +                    snew(leg[2*i+1], 32);
+ +                    sprintf(leg[2*i+1], "i %s", pairleg[set[i]]);
+ +                }
+ +                xvgr_legend(fp_pairs, 2*nset, (const char**)leg, oenv);
+ +            }
+ +
+ +            /*
+ +             * Store energies for analysis afterwards...
+ +             */
+ +            if (!bDisRe && !bDHDL && (fr->nre > 0))
+ +            {
+ +                if (edat.nframes % 1000 == 0)
+ +                {
+ +                    srenew(time, edat.nframes+1000);
+ +                }
+ +                time[edat.nframes] = fr->t;
+ +                edat.nframes++;
+ +            }
+ +            /*
+ +             * Printing time, only when we do not want to skip frames
+ +             */
+ +            if (!skip || teller % skip == 0)
+ +            {
+ +                if (bDisRe)
+ +                {
+ +                    /*******************************************
+ +                     * D I S T A N C E   R E S T R A I N T S
+ +                     *******************************************/
+ +                    if (ndisre > 0)
+ +                    {
+ + #ifndef GMX_DOUBLE
+ +                        float  *disre_rt     =     blk_disre->sub[0].fval;
+ +                        float  *disre_rm3tav = blk_disre->sub[1].fval;
+ + #else
+ +                        double *disre_rt     =     blk_disre->sub[0].dval;
+ +                        double *disre_rm3tav = blk_disre->sub[1].dval;
+ + #endif
+ +
+ +                        print_time(out, fr->t);
+ +                        if (violaver == NULL)
+ +                        {
+ +                            snew(violaver, ndisre);
+ +                        }
+ +
+ +                        /* Subtract bounds from distances, to calculate violations */
+ +                        calc_violations(disre_rt, disre_rm3tav,
+ +                                        nbounds, pair, bounds, violaver, &sumt, &sumaver);
+ +
+ +                        fprintf(out, "  %8.4f  %8.4f\n", sumaver, sumt);
+ +                        if (bDRAll)
+ +                        {
+ +                            print_time(fp_pairs, fr->t);
+ +                            for (i = 0; (i < nset); i++)
+ +                            {
+ +                                sss = set[i];
+ +                                fprintf(fp_pairs, "  %8.4f", mypow(disre_rm3tav[sss], minthird));
+ +                                fprintf(fp_pairs, "  %8.4f", disre_rt[sss]);
+ +                            }
+ +                            fprintf(fp_pairs, "\n");
+ +                        }
+ +                        teller_disre++;
+ +                    }
+ +                }
+ +                else if (bDHDL)
+ +                {
+ +                    do_dhdl(fr, &ir, &fp_dhdl, opt2fn("-odh", NFILE, fnm), bDp, &dh_blocks, &dh_hists, &dh_samples, &dh_lambdas, oenv);
+ +                }
+ +
+ +                /*******************************************
+ +                 * E N E R G I E S
+ +                 *******************************************/
+ +                else
+ +                {
+ +                    if (fr->nre > 0)
+ +                    {
+ +                        if (bPrAll)
+ +                        {
+ +                            /* We skip frames with single points (usually only the first frame),
+ +                             * since they would result in an average plot with outliers.
+ +                             */
+ +                            if (fr->nsum > 1)
+ +                            {
+ +                                print_time(out, fr->t);
+ +                                print1(out, bDp, fr->ener[set[0]].e);
+ +                                print1(out, bDp, fr->ener[set[0]].esum/fr->nsum);
+ +                                print1(out, bDp, sqrt(fr->ener[set[0]].eav/fr->nsum));
+ +                                fprintf(out, "\n");
+ +                            }
+ +                        }
+ +                        else
+ +                        {
+ +                            print_time(out, fr->t);
+ +                            if (bSum)
+ +                            {
+ +                                sum = 0;
+ +                                for (i = 0; i < nset; i++)
+ +                                {
+ +                                    sum += fr->ener[set[i]].e;
+ +                                }
+ +                                print1(out, bDp, sum/nmol-ezero);
+ +                            }
+ +                            else
+ +                            {
+ +                                for (i = 0; (i < nset); i++)
+ +                                {
+ +                                    if (bIsEner[i])
+ +                                    {
+ +                                        print1(out, bDp, (fr->ener[set[i]].e)/nmol-ezero);
+ +                                    }
+ +                                    else
+ +                                    {
+ +                                        print1(out, bDp, fr->ener[set[i]].e);
+ +                                    }
+ +                                }
+ +                            }
+ +                            fprintf(out, "\n");
+ +                        }
+ +                    }
+ +                    blk = find_block_id_enxframe(fr, enx_i, NULL);
+ +                    if (bORIRE && blk)
+ +                    {
+ +#ifndef GMX_DOUBLE
+ +                        xdr_datatype dt = xdr_datatype_float;
+ +#else
+ +                        xdr_datatype dt = xdr_datatype_double;
+ +#endif
+ +                        real        *vals;
+ +
+ +                        if ( (blk->nsub != 1) || (blk->sub[0].type != dt) )
+ +                        {
+ +                            gmx_fatal(FARGS, "Orientational restraints read in incorrectly");
+ +                        }
+ +#ifndef GMX_DOUBLE
+ +                        vals = blk->sub[0].fval;
+ +#else
+ +                        vals = blk->sub[0].dval;
+ +#endif
+ +
+ +                        if (blk->sub[0].nr != (size_t)nor)
+ +                        {
+ +                            gmx_fatal(FARGS, "Number of orientation restraints in energy file (%d) does not match with the topology (%d)", blk->sub[0].nr);
+ +                        }
+ +                        if (bORA || bODA)
+ +                        {
+ +                            for (i = 0; i < nor; i++)
+ +                            {
+ +                                orient[i] += vals[i];
+ +                            }
+ +                        }
+ +                        if (bODR)
+ +                        {
+ +                            for (i = 0; i < nor; i++)
+ +                            {
+ +                                odrms[i] += sqr(vals[i]-oobs[i]);
+ +                            }
+ +                        }
+ +                        if (bORT)
+ +                        {
+ +                            fprintf(fort, "  %10f", fr->t);
+ +                            for (i = 0; i < norsel; i++)
+ +                            {
+ +                                fprintf(fort, " %g", vals[orsel[i]]);
+ +                            }
+ +                            fprintf(fort, "\n");
+ +                        }
+ +                        if (bODT)
+ +                        {
+ +                            fprintf(fodt, "  %10f", fr->t);
+ +                            for (i = 0; i < norsel; i++)
+ +                            {
+ +                                fprintf(fodt, " %g", vals[orsel[i]]-oobs[orsel[i]]);
+ +                            }
+ +                            fprintf(fodt, "\n");
+ +                        }
+ +                        norfr++;
+ +                    }
+ +                    blk = find_block_id_enxframe(fr, enxORT, NULL);
+ +                    if (bOTEN && blk)
+ +                    {
+ +#ifndef GMX_DOUBLE
+ +                        xdr_datatype dt = xdr_datatype_float;
+ +#else
+ +                        xdr_datatype dt = xdr_datatype_double;
+ +#endif
+ +                        real        *vals;
+ +
+ +                        if ( (blk->nsub != 1) || (blk->sub[0].type != dt) )
+ +                        {
+ +                            gmx_fatal(FARGS, "Orientational restraints read in incorrectly");
+ +                        }
+ +#ifndef GMX_DOUBLE
+ +                        vals = blk->sub[0].fval;
+ +#else
+ +                        vals = blk->sub[0].dval;
+ +#endif
+ +
+ +                        if (blk->sub[0].nr != (size_t)(nex*12))
+ +                        {
+ +                            gmx_fatal(FARGS, "Number of orientation experiments in energy file (%g) does not match with the topology (%d)",
+ +                                      blk->sub[0].nr/12, nex);
+ +                        }
+ +                        fprintf(foten, "  %10f", fr->t);
+ +                        for (i = 0; i < nex; i++)
+ +                        {
+ +                            for (j = 0; j < (bOvec ? 12 : 3); j++)
+ +                            {
+ +                                fprintf(foten, " %g", vals[i*12+j]);
+ +                            }
+ +                        }
+ +                        fprintf(foten, "\n");
+ +                    }
+ +                }
+ +            }
+ +            teller++;
+ +        }
+ +    }
+ +    while (bCont && (timecheck == 0));
+ +
+ +    fprintf(stderr, "\n");
+ +    close_enx(fp);
+ +    if (out)
+ +    {
+ +        ffclose(out);
+ +    }
+ +
+ +    if (bDRAll)
+ +    {
+ +        ffclose(fp_pairs);
+ +    }
+ +
+ +    if (bORT)
+ +    {
+ +        ffclose(fort);
+ +    }
+ +    if (bODT)
+ +    {
+ +        ffclose(fodt);
+ +    }
+ +    if (bORA)
+ +    {
+ +        out = xvgropen(opt2fn("-ora", NFILE, fnm),
+ +                       "Average calculated orientations",
+ +                       "Restraint label", "", oenv);
+ +        if (bOrinst)
+ +        {
+ +            fprintf(out, "%s", orinst_sub);
+ +        }
+ +        for (i = 0; i < nor; i++)
+ +        {
+ +            fprintf(out, "%5d  %g\n", or_label[i], orient[i]/norfr);
+ +        }
+ +        ffclose(out);
+ +    }
+ +    if (bODA)
+ +    {
+ +        out = xvgropen(opt2fn("-oda", NFILE, fnm),
+ +                       "Average restraint deviation",
+ +                       "Restraint label", "", oenv);
+ +        if (bOrinst)
+ +        {
+ +            fprintf(out, "%s", orinst_sub);
+ +        }
+ +        for (i = 0; i < nor; i++)
+ +        {
+ +            fprintf(out, "%5d  %g\n", or_label[i], orient[i]/norfr-oobs[i]);
+ +        }
+ +        ffclose(out);
+ +    }
+ +    if (bODR)
+ +    {
+ +        out = xvgropen(opt2fn("-odr", NFILE, fnm),
+ +                       "RMS orientation restraint deviations",
+ +                       "Restraint label", "", oenv);
+ +        if (bOrinst)
+ +        {
+ +            fprintf(out, "%s", orinst_sub);
+ +        }
+ +        for (i = 0; i < nor; i++)
+ +        {
+ +            fprintf(out, "%5d  %g\n", or_label[i], sqrt(odrms[i]/norfr));
+ +        }
+ +        ffclose(out);
+ +    }
+ +    if (bOTEN)
+ +    {
+ +        ffclose(foten);
+ +    }
+ +
+ +    if (bDisRe)
+ +    {
+ +        analyse_disre(opt2fn("-viol", NFILE, fnm),
+ +                      teller_disre, violaver, bounds, index, pair, nbounds, oenv);
+ +    }
+ +    else if (bDHDL)
+ +    {
+ +        if (fp_dhdl)
+ +        {
+ +            ffclose(fp_dhdl);
+ +            printf("\n\nWrote %d lambda values with %d samples as ",
+ +                   dh_lambdas, dh_samples);
+ +            if (dh_hists > 0)
+ +            {
+ +                printf("%d dH histograms ", dh_hists);
+ +            }
+ +            if (dh_blocks > 0)
+ +            {
+ +                printf("%d dH data blocks ", dh_blocks);
+ +            }
+ +            printf("to %s\n", opt2fn("-odh", NFILE, fnm));
+ +
+ +        }
+ +        else
+ +        {
+ +            gmx_fatal(FARGS, "No dH data in %s\n", opt2fn("-f", NFILE, fnm));
+ +        }
+ +
+ +    }
+ +    else
+ +    {
+ +        double dt = (frame[cur].t-start_t)/(edat.nframes-1);
+ +        analyse_ener(opt2bSet("-corr", NFILE, fnm), opt2fn("-corr", NFILE, fnm),
+ +                     bFee, bSum, opt2parg_bSet("-nmol", npargs, ppa),
+ +                     bVisco, opt2fn("-vis", NFILE, fnm),
+ +                     nmol,
+ +                     start_step, start_t, frame[cur].step, frame[cur].t,
+ +                     time, reftemp, &edat,
+ +                     nset, set, bIsEner, leg, enm, Vaver, ezero, nbmin, nbmax,
+ +                     oenv);
+ +        if (bFluctProps)
+ +        {
+ +            calc_fluctuation_props(stdout, bDriftCorr, dt, nset, nmol, leg, &edat,
+ +                                   nbmin, nbmax);
+ +        }
+ +    }
+ +    if (opt2bSet("-f2", NFILE, fnm))
+ +    {
+ +        fec(opt2fn("-f2", NFILE, fnm), opt2fn("-ravg", NFILE, fnm),
+ +            reftemp, nset, set, leg, &edat, time, oenv);
+ +    }
+ +
+ +    {
+ +        const char *nxy = "-nxy";
+ +
+ +        do_view(oenv, opt2fn("-o", NFILE, fnm), nxy);
+ +        do_view(oenv, opt2fn_null("-ravg", NFILE, fnm), nxy);
+ +        do_view(oenv, opt2fn_null("-ora", NFILE, fnm), nxy);
+ +        do_view(oenv, opt2fn_null("-ort", NFILE, fnm), nxy);
+ +        do_view(oenv, opt2fn_null("-oda", NFILE, fnm), nxy);
+ +        do_view(oenv, opt2fn_null("-odr", NFILE, fnm), nxy);
+ +        do_view(oenv, opt2fn_null("-odt", NFILE, fnm), nxy);
+ +        do_view(oenv, opt2fn_null("-oten", NFILE, fnm), nxy);
+ +        do_view(oenv, opt2fn_null("-odh", NFILE, fnm), nxy);
+ +    }
+ +
+ +    return 0;
+ +}
diff --cc src/gromacs/legacyheaders/sim_util.h

index d515c7b19a62e4899535e103e0225ceedab1e78d,0000000000000000000000000000000000000000..47d68cb2717f501a649976f272aeda20f6308ca0

mode 100644,000000..100644
--- 1/src/gromacs/legacyheaders/sim_util.h
--- /dev/null
+++ b/src/gromacs/legacyheaders/sim_util.h
@@@ -1,131 -1,0 +1,135 @@@
-  * Copyright (c) 2013, by the GROMACS development team, led by
+ +/*
+ + * This file is part of the GROMACS molecular simulation package.
+ + *
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team.
++ * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ + * and including many others, as listed in the AUTHORS file in the
+ + * top-level source directory and at http://www.gromacs.org.
+ + *
+ + * GROMACS is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU Lesser General Public License
+ + * as published by the Free Software Foundation; either version 2.1
+ + * of the License, or (at your option) any later version.
+ + *
+ + * GROMACS is distributed in the hope that it will be useful,
+ + * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ + * Lesser General Public License for more details.
+ + *
+ + * You should have received a copy of the GNU Lesser General Public
+ + * License along with GROMACS; if not, see
+ + * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ + *
+ + * If you want to redistribute modifications to GROMACS, please
+ + * consider that scientific software is very special. Version
+ + * control is crucial - bugs must be traceable. We will be happy to
+ + * consider code for inclusion in the official distribution, but
+ + * derived work must not be called official GROMACS. Details are found
+ + * in the README & COPYING files - if they are missing, get the
+ + * official version at http://www.gromacs.org.
+ + *
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the research papers on the package. Check out http://www.gromacs.org.
+ + */
+ +
+ +#ifndef _sim_util_h
+ +#define _sim_util_h
+ +
+ +#include "typedefs.h"
+ +#include "mdebin.h"
+ +#include "update.h"
+ +#include "vcm.h"
+ +#include "../fileio/enxio.h"
+ +#include "../fileio/mdoutf.h"
+ +#include "../timing/walltime_accounting.h"
+ +
+ +#ifdef __cplusplus
+ +extern "C" {
+ +#endif
+ +
+ +typedef struct gmx_global_stat *gmx_global_stat_t;
+ +
+ +void do_pbc_first(FILE *log, matrix box, t_forcerec *fr,
+ +                  t_graph *graph, rvec x[]);
+ +
+ +void do_pbc_first_mtop(FILE *fplog, int ePBC, matrix box,
+ +                       gmx_mtop_t *mtop, rvec x[]);
+ +
+ +void do_pbc_mtop(FILE *fplog, int ePBC, matrix box,
+ +                 gmx_mtop_t *mtop, rvec x[]);
+ +
+ +
+ +
+ +/* ROUTINES from stat.c */
+ +gmx_global_stat_t global_stat_init(t_inputrec *ir);
+ +
+ +void global_stat_destroy(gmx_global_stat_t gs);
+ +
+ +void global_stat(FILE *log, gmx_global_stat_t gs,
+ +                 t_commrec *cr, gmx_enerdata_t *enerd,
+ +                 tensor fvir, tensor svir, rvec mu_tot,
+ +                 t_inputrec *inputrec,
+ +                 gmx_ekindata_t *ekind,
+ +                 gmx_constr_t constr, t_vcm *vcm,
+ +                 int nsig, real *sig,
+ +                 gmx_mtop_t *top_global, t_state *state_local,
+ +                 gmx_bool bSumEkinhOld, int flags);
+ +/* Communicate statistics over cr->mpi_comm_mysim */
+ +
+ +int do_per_step(gmx_int64_t step, gmx_int64_t nstep);
+ +/* Return TRUE if io should be done */
+ +
+ +/* ROUTINES from sim_util.c */
+ +
+ +void print_time(FILE *out, gmx_walltime_accounting_t walltime_accounting,
+ +                gmx_int64_t step, t_inputrec *ir, t_commrec *cr);
+ +
+ +void print_date_and_time(FILE *log, int pid, const char *title,
+ +                         const gmx_walltime_accounting_t walltime_accounting);
+ +
++void print_start(FILE *fplog, t_commrec *cr,
++                 gmx_walltime_accounting_t walltime_accounting,
++                 const char *name);
++
+ +void finish_run(FILE *log, t_commrec *cr,
+ +                t_inputrec *inputrec,
+ +                t_nrnb nrnb[], gmx_wallcycle_t wcycle,
+ +                gmx_walltime_accounting_t walltime_accounting,
+ +                wallclock_gpu_t *gputimes,
+ +                gmx_bool bWriteStat);
+ +
+ +void calc_enervirdiff(FILE *fplog, int eDispCorr, t_forcerec *fr);
+ +
+ +void calc_dispcorr(FILE *fplog, t_inputrec *ir, t_forcerec *fr,
+ +                   gmx_int64_t step, int natoms,
+ +                   matrix box, real lambda, tensor pres, tensor virial,
+ +                   real *prescorr, real *enercorr, real *dvdlcorr);
+ +
+ +void initialize_lambdas(FILE *fplog, t_inputrec *ir, int *fep_state, real *lambda, double *lam0);
+ +
+ +void do_constrain_first(FILE *log, gmx_constr_t constr,
+ +                        t_inputrec *inputrec, t_mdatoms *md,
+ +                        t_state *state, t_commrec *cr, t_nrnb *nrnb,
+ +                        t_forcerec *fr, gmx_localtop_t *top);
+ +
+ +void init_md(FILE *fplog,
+ +             t_commrec *cr, t_inputrec *ir, const output_env_t oenv,
+ +             double *t, double *t0,
+ +             real *lambda, int *fep_state, double *lam0,
+ +             t_nrnb *nrnb, gmx_mtop_t *mtop,
+ +             gmx_update_t *upd,
+ +             int nfile, const t_filenm fnm[],
+ +             gmx_mdoutf_t *outf, t_mdebin **mdebin,
+ +             tensor force_vir, tensor shake_vir,
+ +             rvec mu_tot,
+ +             gmx_bool *bSimAnn, t_vcm **vcm, unsigned long Flags);
+ +/* Routine in sim_util.c */
+ +
+ +#ifdef __cplusplus
+ +}
+ +#endif
+ +
+ +#endif  /* _sim_util_h */
diff --cc src/gromacs/mdlib/minimize.c

index ed15e138ce0bfa670fe9f7b24976ef544f03152b,0000000000000000000000000000000000000000..c2e1d3178457378031b84ae4baaa038db204dac7

mode 100644,000000..100644
--- 1/src/gromacs/mdlib/minimize.c
--- /dev/null
+++ b/src/gromacs/mdlib/minimize.c
@@@ -1,2867 -1,0 +1,2863 @@@
-     char buf[STRLEN];
- 
+ +/*
+ + * This file is part of the GROMACS molecular simulation package.
+ + *
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team.
+ + * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ + * and including many others, as listed in the AUTHORS file in the
+ + * top-level source directory and at http://www.gromacs.org.
+ + *
+ + * GROMACS is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU Lesser General Public License
+ + * as published by the Free Software Foundation; either version 2.1
+ + * of the License, or (at your option) any later version.
+ + *
+ + * GROMACS is distributed in the hope that it will be useful,
+ + * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ + * Lesser General Public License for more details.
+ + *
+ + * You should have received a copy of the GNU Lesser General Public
+ + * License along with GROMACS; if not, see
+ + * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ + *
+ + * If you want to redistribute modifications to GROMACS, please
+ + * consider that scientific software is very special. Version
+ + * control is crucial - bugs must be traceable. We will be happy to
+ + * consider code for inclusion in the official distribution, but
+ + * derived work must not be called official GROMACS. Details are found
+ + * in the README & COPYING files - if they are missing, get the
+ + * official version at http://www.gromacs.org.
+ + *
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the research papers on the package. Check out http://www.gromacs.org.
+ + */
+ +#ifdef HAVE_CONFIG_H
+ +#include <config.h>
+ +#endif
+ +
+ +#include <string.h>
+ +#include <time.h>
+ +#include <math.h>
+ +#include "sysstuff.h"
+ +#include "string2.h"
+ +#include "network.h"
+ +#include "smalloc.h"
+ +#include "nrnb.h"
+ +#include "main.h"
+ +#include "force.h"
+ +#include "macros.h"
+ +#include "random.h"
+ +#include "names.h"
+ +#include "gmx_fatal.h"
+ +#include "txtdump.h"
+ +#include "typedefs.h"
+ +#include "update.h"
+ +#include "constr.h"
+ +#include "vec.h"
+ +#include "tgroup.h"
+ +#include "mdebin.h"
+ +#include "vsite.h"
+ +#include "force.h"
+ +#include "mdrun.h"
+ +#include "md_support.h"
++#include "sim_util.h"
+ +#include "domdec.h"
+ +#include "partdec.h"
+ +#include "mdatoms.h"
+ +#include "ns.h"
+ +#include "mtop_util.h"
+ +#include "pme.h"
+ +#include "bondf.h"
+ +#include "gmx_omp_nthreads.h"
+ +#include "md_logging.h"
+ +
+ +#include "gromacs/fileio/confio.h"
+ +#include "gromacs/fileio/trajectory_writing.h"
+ +#include "gromacs/linearalgebra/mtxio.h"
+ +#include "gromacs/linearalgebra/sparsematrix.h"
+ +#include "gromacs/timing/wallcycle.h"
+ +#include "gromacs/timing/walltime_accounting.h"
+ +
+ +typedef struct {
+ +    t_state  s;
+ +    rvec    *f;
+ +    real     epot;
+ +    real     fnorm;
+ +    real     fmax;
+ +    int      a_fmax;
+ +} em_state_t;
+ +
+ +static em_state_t *init_em_state()
+ +{
+ +    em_state_t *ems;
+ +
+ +    snew(ems, 1);
+ +
+ +    /* does this need to be here?  Should the array be declared differently (staticaly)in the state definition? */
+ +    snew(ems->s.lambda, efptNR);
+ +
+ +    return ems;
+ +}
+ +
+ +static void print_em_start(FILE                     *fplog,
+ +                           t_commrec                *cr,
+ +                           gmx_walltime_accounting_t walltime_accounting,
+ +                           gmx_wallcycle_t           wcycle,
+ +                           const char               *name)
+ +{
- 
-     sprintf(buf, "Started %s", name);
-     print_date_and_time(fplog, cr->nodeid, buf, NULL);
- 
+ +    walltime_accounting_start(walltime_accounting);
+ +    wallcycle_start(wcycle, ewcRUN);
++    print_start(fplog, cr, walltime_accounting, name);
+ +}
+ +static void em_time_end(gmx_walltime_accounting_t walltime_accounting,
+ +                        gmx_wallcycle_t           wcycle)
+ +{
+ +    wallcycle_stop(wcycle, ewcRUN);
+ +
+ +    walltime_accounting_end(walltime_accounting);
+ +}
+ +
+ +static void sp_header(FILE *out, const char *minimizer, real ftol, int nsteps)
+ +{
+ +    fprintf(out, "\n");
+ +    fprintf(out, "%s:\n", minimizer);
+ +    fprintf(out, "   Tolerance (Fmax)   = %12.5e\n", ftol);
+ +    fprintf(out, "   Number of steps    = %12d\n", nsteps);
+ +}
+ +
+ +static void warn_step(FILE *fp, real ftol, gmx_bool bLastStep, gmx_bool bConstrain)
+ +{
+ +    char buffer[2048];
+ +    if (bLastStep)
+ +    {
+ +        sprintf(buffer,
+ +                "\nEnergy minimization reached the maximum number "
+ +                "of steps before the forces reached the requested "
+ +                "precision Fmax < %g.\n", ftol);
+ +    }
+ +    else
+ +    {
+ +        sprintf(buffer,
+ +                "\nEnergy minimization has stopped, but the forces have "
+ +                "not converged to the requested precision Fmax < %g (which "
+ +                "may not be possible for your system). It stopped "
+ +                "because the algorithm tried to make a new step whose size "
+ +                "was too small, or there was no change in the energy since "
+ +                "last step. Either way, we regard the minimization as "
+ +                "converged to within the available machine precision, "
+ +                "given your starting configuration and EM parameters.\n%s%s",
+ +                ftol,
+ +                sizeof(real) < sizeof(double) ?
+ +                "\nDouble precision normally gives you higher accuracy, but "
+ +                "this is often not needed for preparing to run molecular "
+ +                "dynamics.\n" :
+ +                "",
+ +                bConstrain ?
+ +                "You might need to increase your constraint accuracy, or turn\n"
+ +                "off constraints altogether (set constraints = none in mdp file)\n" :
+ +                "");
+ +    }
+ +    fputs(wrap_lines(buffer, 78, 0, FALSE), fp);
+ +}
+ +
+ +
+ +
+ +static void print_converged(FILE *fp, const char *alg, real ftol,
+ +                            gmx_int64_t count, gmx_bool bDone, gmx_int64_t nsteps,
+ +                            real epot, real fmax, int nfmax, real fnorm)
+ +{
+ +    char buf[STEPSTRSIZE];
+ +
+ +    if (bDone)
+ +    {
+ +        fprintf(fp, "\n%s converged to Fmax < %g in %s steps\n",
+ +                alg, ftol, gmx_step_str(count, buf));
+ +    }
+ +    else if (count < nsteps)
+ +    {
+ +        fprintf(fp, "\n%s converged to machine precision in %s steps,\n"
+ +                "but did not reach the requested Fmax < %g.\n",
+ +                alg, gmx_step_str(count, buf), ftol);
+ +    }
+ +    else
+ +    {
+ +        fprintf(fp, "\n%s did not converge to Fmax < %g in %s steps.\n",
+ +                alg, ftol, gmx_step_str(count, buf));
+ +    }
+ +
+ +#ifdef GMX_DOUBLE
+ +    fprintf(fp, "Potential Energy  = %21.14e\n", epot);
+ +    fprintf(fp, "Maximum force     = %21.14e on atom %d\n", fmax, nfmax+1);
+ +    fprintf(fp, "Norm of force     = %21.14e\n", fnorm);
+ +#else
+ +    fprintf(fp, "Potential Energy  = %14.7e\n", epot);
+ +    fprintf(fp, "Maximum force     = %14.7e on atom %d\n", fmax, nfmax+1);
+ +    fprintf(fp, "Norm of force     = %14.7e\n", fnorm);
+ +#endif
+ +}
+ +
+ +static void get_f_norm_max(t_commrec *cr,
+ +                           t_grpopts *opts, t_mdatoms *mdatoms, rvec *f,
+ +                           real *fnorm, real *fmax, int *a_fmax)
+ +{
+ +    double fnorm2, *sum;
+ +    real   fmax2, fmax2_0, fam;
+ +    int    la_max, a_max, start, end, i, m, gf;
+ +
+ +    /* This routine finds the largest force and returns it.
+ +     * On parallel machines the global max is taken.
+ +     */
+ +    fnorm2 = 0;
+ +    fmax2  = 0;
+ +    la_max = -1;
+ +    gf     = 0;
+ +    start  = mdatoms->start;
+ +    end    = mdatoms->homenr + start;
+ +    if (mdatoms->cFREEZE)
+ +    {
+ +        for (i = start; i < end; i++)
+ +        {
+ +            gf  = mdatoms->cFREEZE[i];
+ +            fam = 0;
+ +            for (m = 0; m < DIM; m++)
+ +            {
+ +                if (!opts->nFreeze[gf][m])
+ +                {
+ +                    fam += sqr(f[i][m]);
+ +                }
+ +            }
+ +            fnorm2 += fam;
+ +            if (fam > fmax2)
+ +            {
+ +                fmax2  = fam;
+ +                la_max = i;
+ +            }
+ +        }
+ +    }
+ +    else
+ +    {
+ +        for (i = start; i < end; i++)
+ +        {
+ +            fam     = norm2(f[i]);
+ +            fnorm2 += fam;
+ +            if (fam > fmax2)
+ +            {
+ +                fmax2  = fam;
+ +                la_max = i;
+ +            }
+ +        }
+ +    }
+ +
+ +    if (la_max >= 0 && DOMAINDECOMP(cr))
+ +    {
+ +        a_max = cr->dd->gatindex[la_max];
+ +    }
+ +    else
+ +    {
+ +        a_max = la_max;
+ +    }
+ +    if (PAR(cr))
+ +    {
+ +        snew(sum, 2*cr->nnodes+1);
+ +        sum[2*cr->nodeid]   = fmax2;
+ +        sum[2*cr->nodeid+1] = a_max;
+ +        sum[2*cr->nnodes]   = fnorm2;
+ +        gmx_sumd(2*cr->nnodes+1, sum, cr);
+ +        fnorm2 = sum[2*cr->nnodes];
+ +        /* Determine the global maximum */
+ +        for (i = 0; i < cr->nnodes; i++)
+ +        {
+ +            if (sum[2*i] > fmax2)
+ +            {
+ +                fmax2 = sum[2*i];
+ +                a_max = (int)(sum[2*i+1] + 0.5);
+ +            }
+ +        }
+ +        sfree(sum);
+ +    }
+ +
+ +    if (fnorm)
+ +    {
+ +        *fnorm = sqrt(fnorm2);
+ +    }
+ +    if (fmax)
+ +    {
+ +        *fmax  = sqrt(fmax2);
+ +    }
+ +    if (a_fmax)
+ +    {
+ +        *a_fmax = a_max;
+ +    }
+ +}
+ +
+ +static void get_state_f_norm_max(t_commrec *cr,
+ +                                 t_grpopts *opts, t_mdatoms *mdatoms,
+ +                                 em_state_t *ems)
+ +{
+ +    get_f_norm_max(cr, opts, mdatoms, ems->f, &ems->fnorm, &ems->fmax, &ems->a_fmax);
+ +}
+ +
+ +void init_em(FILE *fplog, const char *title,
+ +             t_commrec *cr, t_inputrec *ir,
+ +             t_state *state_global, gmx_mtop_t *top_global,
+ +             em_state_t *ems, gmx_localtop_t **top,
+ +             rvec **f, rvec **f_global,
+ +             t_nrnb *nrnb, rvec mu_tot,
+ +             t_forcerec *fr, gmx_enerdata_t **enerd,
+ +             t_graph **graph, t_mdatoms *mdatoms, gmx_global_stat_t *gstat,
+ +             gmx_vsite_t *vsite, gmx_constr_t constr,
+ +             int nfile, const t_filenm fnm[],
+ +             gmx_mdoutf_t *outf, t_mdebin **mdebin)
+ +{
+ +    int  start, homenr, i;
+ +    real dvdl_constr;
+ +
+ +    if (fplog)
+ +    {
+ +        fprintf(fplog, "Initiating %s\n", title);
+ +    }
+ +
+ +    state_global->ngtc = 0;
+ +
+ +    /* Initialize lambda variables */
+ +    initialize_lambdas(fplog, ir, &(state_global->fep_state), state_global->lambda, NULL);
+ +
+ +    init_nrnb(nrnb);
+ +
+ +    if (DOMAINDECOMP(cr))
+ +    {
+ +        *top = dd_init_local_top(top_global);
+ +
+ +        dd_init_local_state(cr->dd, state_global, &ems->s);
+ +
+ +        *f = NULL;
+ +
+ +        /* Distribute the charge groups over the nodes from the master node */
+ +        dd_partition_system(fplog, ir->init_step, cr, TRUE, 1,
+ +                            state_global, top_global, ir,
+ +                            &ems->s, &ems->f, mdatoms, *top,
+ +                            fr, vsite, NULL, constr,
+ +                            nrnb, NULL, FALSE);
+ +        dd_store_state(cr->dd, &ems->s);
+ +
+ +        if (ir->nstfout)
+ +        {
+ +            snew(*f_global, top_global->natoms);
+ +        }
+ +        else
+ +        {
+ +            *f_global = NULL;
+ +        }
+ +        *graph = NULL;
+ +    }
+ +    else
+ +    {
+ +        snew(*f, top_global->natoms);
+ +
+ +        /* Just copy the state */
+ +        ems->s = *state_global;
+ +        snew(ems->s.x, ems->s.nalloc);
+ +        snew(ems->f, ems->s.nalloc);
+ +        for (i = 0; i < state_global->natoms; i++)
+ +        {
+ +            copy_rvec(state_global->x[i], ems->s.x[i]);
+ +        }
+ +        copy_mat(state_global->box, ems->s.box);
+ +
+ +        if (PAR(cr) && ir->eI != eiNM)
+ +        {
+ +            /* Initialize the particle decomposition and split the topology */
+ +            *top = split_system(fplog, top_global, ir, cr);
+ +
+ +            pd_cg_range(cr, &fr->cg0, &fr->hcg);
+ +        }
+ +        else
+ +        {
+ +            *top = gmx_mtop_generate_local_top(top_global, ir);
+ +        }
+ +        *f_global = *f;
+ +
+ +        forcerec_set_excl_load(fr, *top, cr);
+ +
+ +        setup_bonded_threading(fr, &(*top)->idef);
+ +
+ +        if (ir->ePBC != epbcNONE && !fr->bMolPBC)
+ +        {
+ +            *graph = mk_graph(fplog, &((*top)->idef), 0, top_global->natoms, FALSE, FALSE);
+ +        }
+ +        else
+ +        {
+ +            *graph = NULL;
+ +        }
+ +
+ +        if (PARTDECOMP(cr))
+ +        {
+ +            pd_at_range(cr, &start, &homenr);
+ +            homenr -= start;
+ +        }
+ +        else
+ +        {
+ +            start  = 0;
+ +            homenr = top_global->natoms;
+ +        }
+ +        atoms2md(top_global, ir, 0, NULL, start, homenr, mdatoms);
+ +        update_mdatoms(mdatoms, state_global->lambda[efptFEP]);
+ +
+ +        if (vsite)
+ +        {
+ +            set_vsite_top(vsite, *top, mdatoms, cr);
+ +        }
+ +    }
+ +
+ +    if (constr)
+ +    {
+ +        if (ir->eConstrAlg == econtSHAKE &&
+ +            gmx_mtop_ftype_count(top_global, F_CONSTR) > 0)
+ +        {
+ +            gmx_fatal(FARGS, "Can not do energy minimization with %s, use %s\n",
+ +                      econstr_names[econtSHAKE], econstr_names[econtLINCS]);
+ +        }
+ +
+ +        if (!DOMAINDECOMP(cr))
+ +        {
+ +            set_constraints(constr, *top, ir, mdatoms, cr);
+ +        }
+ +
+ +        if (!ir->bContinuation)
+ +        {
+ +            /* Constrain the starting coordinates */
+ +            dvdl_constr = 0;
+ +            constrain(PAR(cr) ? NULL : fplog, TRUE, TRUE, constr, &(*top)->idef,
+ +                      ir, NULL, cr, -1, 0, mdatoms,
+ +                      ems->s.x, ems->s.x, NULL, fr->bMolPBC, ems->s.box,
+ +                      ems->s.lambda[efptFEP], &dvdl_constr,
+ +                      NULL, NULL, nrnb, econqCoord, FALSE, 0, 0);
+ +        }
+ +    }
+ +
+ +    if (PAR(cr))
+ +    {
+ +        *gstat = global_stat_init(ir);
+ +    }
+ +
+ +    *outf = init_mdoutf(nfile, fnm, 0, cr, ir, top_global, NULL);
+ +
+ +    snew(*enerd, 1);
+ +    init_enerdata(top_global->groups.grps[egcENER].nr, ir->fepvals->n_lambda,
+ +                  *enerd);
+ +
+ +    if (mdebin != NULL)
+ +    {
+ +        /* Init bin for energy stuff */
+ +        *mdebin = init_mdebin(mdoutf_get_fp_ene(*outf), top_global, ir, NULL);
+ +    }
+ +
+ +    clear_rvec(mu_tot);
+ +    calc_shifts(ems->s.box, fr->shift_vec);
+ +}
+ +
+ +static void finish_em(t_commrec *cr, gmx_mdoutf_t outf,
+ +                      gmx_walltime_accounting_t walltime_accounting,
+ +                      gmx_wallcycle_t wcycle)
+ +{
+ +    if (!(cr->duty & DUTY_PME))
+ +    {
+ +        /* Tell the PME only node to finish */
+ +        gmx_pme_send_finish(cr);
+ +    }
+ +
+ +    done_mdoutf(outf);
+ +
+ +    em_time_end(walltime_accounting, wcycle);
+ +}
+ +
+ +static void swap_em_state(em_state_t *ems1, em_state_t *ems2)
+ +{
+ +    em_state_t tmp;
+ +
+ +    tmp   = *ems1;
+ +    *ems1 = *ems2;
+ +    *ems2 = tmp;
+ +}
+ +
+ +static void copy_em_coords(em_state_t *ems, t_state *state)
+ +{
+ +    int i;
+ +
+ +    for (i = 0; (i < state->natoms); i++)
+ +    {
+ +        copy_rvec(ems->s.x[i], state->x[i]);
+ +    }
+ +}
+ +
+ +static void write_em_traj(FILE *fplog, t_commrec *cr,
+ +                          gmx_mdoutf_t outf,
+ +                          gmx_bool bX, gmx_bool bF, const char *confout,
+ +                          gmx_mtop_t *top_global,
+ +                          t_inputrec *ir, gmx_int64_t step,
+ +                          em_state_t *state,
+ +                          t_state *state_global, rvec *f_global)
+ +{
+ +    int mdof_flags;
+ +
+ +    if ((bX || bF || confout != NULL) && !DOMAINDECOMP(cr))
+ +    {
+ +        copy_em_coords(state, state_global);
+ +        f_global = state->f;
+ +    }
+ +
+ +    mdof_flags = 0;
+ +    if (bX)
+ +    {
+ +        mdof_flags |= MDOF_X;
+ +    }
+ +    if (bF)
+ +    {
+ +        mdof_flags |= MDOF_F;
+ +    }
+ +    mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags,
+ +                                     top_global, step, (double)step,
+ +                                     &state->s, state_global, state->f, f_global);
+ +
+ +    if (confout != NULL && MASTER(cr))
+ +    {
+ +        if (ir->ePBC != epbcNONE && !ir->bPeriodicMols && DOMAINDECOMP(cr))
+ +        {
+ +            /* Make molecules whole only for confout writing */
+ +            do_pbc_mtop(fplog, ir->ePBC, state_global->box, top_global,
+ +                        state_global->x);
+ +        }
+ +
+ +        write_sto_conf_mtop(confout,
+ +                            *top_global->name, top_global,
+ +                            state_global->x, NULL, ir->ePBC, state_global->box);
+ +    }
+ +}
+ +
+ +static void do_em_step(t_commrec *cr, t_inputrec *ir, t_mdatoms *md,
+ +                       gmx_bool bMolPBC,
+ +                       em_state_t *ems1, real a, rvec *f, em_state_t *ems2,
+ +                       gmx_constr_t constr, gmx_localtop_t *top,
+ +                       t_nrnb *nrnb, gmx_wallcycle_t wcycle,
+ +                       gmx_int64_t count)
+ +
+ +{
+ +    t_state *s1, *s2;
+ +    int      i;
+ +    int      start, end;
+ +    rvec    *x1, *x2;
+ +    real     dvdl_constr;
+ +
+ +    s1 = &ems1->s;
+ +    s2 = &ems2->s;
+ +
+ +    if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count)
+ +    {
+ +        gmx_incons("state mismatch in do_em_step");
+ +    }
+ +
+ +    s2->flags = s1->flags;
+ +
+ +    if (s2->nalloc != s1->nalloc)
+ +    {
+ +        s2->nalloc = s1->nalloc;
+ +        srenew(s2->x, s1->nalloc);
+ +        srenew(ems2->f,  s1->nalloc);
+ +        if (s2->flags & (1<<estCGP))
+ +        {
+ +            srenew(s2->cg_p,  s1->nalloc);
+ +        }
+ +    }
+ +
+ +    s2->natoms = s1->natoms;
+ +    copy_mat(s1->box, s2->box);
+ +    /* Copy free energy state */
+ +    for (i = 0; i < efptNR; i++)
+ +    {
+ +        s2->lambda[i] = s1->lambda[i];
+ +    }
+ +    copy_mat(s1->box, s2->box);
+ +
+ +    start = md->start;
+ +    end   = md->start + md->homenr;
+ +
+ +    x1 = s1->x;
+ +    x2 = s2->x;
+ +
+ +#pragma omp parallel num_threads(gmx_omp_nthreads_get(emntUpdate))
+ +    {
+ +        int gf, i, m;
+ +
+ +        gf = 0;
+ +#pragma omp for schedule(static) nowait
+ +        for (i = start; i < end; i++)
+ +        {
+ +            if (md->cFREEZE)
+ +            {
+ +                gf = md->cFREEZE[i];
+ +            }
+ +            for (m = 0; m < DIM; m++)
+ +            {
+ +                if (ir->opts.nFreeze[gf][m])
+ +                {
+ +                    x2[i][m] = x1[i][m];
+ +                }
+ +                else
+ +                {
+ +                    x2[i][m] = x1[i][m] + a*f[i][m];
+ +                }
+ +            }
+ +        }
+ +
+ +        if (s2->flags & (1<<estCGP))
+ +        {
+ +            /* Copy the CG p vector */
+ +            x1 = s1->cg_p;
+ +            x2 = s2->cg_p;
+ +#pragma omp for schedule(static) nowait
+ +            for (i = start; i < end; i++)
+ +            {
+ +                copy_rvec(x1[i], x2[i]);
+ +            }
+ +        }
+ +
+ +        if (DOMAINDECOMP(cr))
+ +        {
+ +            s2->ddp_count = s1->ddp_count;
+ +            if (s2->cg_gl_nalloc < s1->cg_gl_nalloc)
+ +            {
+ +#pragma omp barrier
+ +                s2->cg_gl_nalloc = s1->cg_gl_nalloc;
+ +                srenew(s2->cg_gl, s2->cg_gl_nalloc);
+ +#pragma omp barrier
+ +            }
+ +            s2->ncg_gl = s1->ncg_gl;
+ +#pragma omp for schedule(static) nowait
+ +            for (i = 0; i < s2->ncg_gl; i++)
+ +            {
+ +                s2->cg_gl[i] = s1->cg_gl[i];
+ +            }
+ +            s2->ddp_count_cg_gl = s1->ddp_count_cg_gl;
+ +        }
+ +    }
+ +
+ +    if (constr)
+ +    {
+ +        wallcycle_start(wcycle, ewcCONSTR);
+ +        dvdl_constr = 0;
+ +        constrain(NULL, TRUE, TRUE, constr, &top->idef,
+ +                  ir, NULL, cr, count, 0, md,
+ +                  s1->x, s2->x, NULL, bMolPBC, s2->box,
+ +                  s2->lambda[efptBONDED], &dvdl_constr,
+ +                  NULL, NULL, nrnb, econqCoord, FALSE, 0, 0);
+ +        wallcycle_stop(wcycle, ewcCONSTR);
+ +    }
+ +}
+ +
+ +static void em_dd_partition_system(FILE *fplog, int step, t_commrec *cr,
+ +                                   gmx_mtop_t *top_global, t_inputrec *ir,
+ +                                   em_state_t *ems, gmx_localtop_t *top,
+ +                                   t_mdatoms *mdatoms, t_forcerec *fr,
+ +                                   gmx_vsite_t *vsite, gmx_constr_t constr,
+ +                                   t_nrnb *nrnb, gmx_wallcycle_t wcycle)
+ +{
+ +    /* Repartition the domain decomposition */
+ +    wallcycle_start(wcycle, ewcDOMDEC);
+ +    dd_partition_system(fplog, step, cr, FALSE, 1,
+ +                        NULL, top_global, ir,
+ +                        &ems->s, &ems->f,
+ +                        mdatoms, top, fr, vsite, NULL, constr,
+ +                        nrnb, wcycle, FALSE);
+ +    dd_store_state(cr->dd, &ems->s);
+ +    wallcycle_stop(wcycle, ewcDOMDEC);
+ +}
+ +
+ +static void evaluate_energy(FILE *fplog, t_commrec *cr,
+ +                            gmx_mtop_t *top_global,
+ +                            em_state_t *ems, gmx_localtop_t *top,
+ +                            t_inputrec *inputrec,
+ +                            t_nrnb *nrnb, gmx_wallcycle_t wcycle,
+ +                            gmx_global_stat_t gstat,
+ +                            gmx_vsite_t *vsite, gmx_constr_t constr,
+ +                            t_fcdata *fcd,
+ +                            t_graph *graph, t_mdatoms *mdatoms,
+ +                            t_forcerec *fr, rvec mu_tot,
+ +                            gmx_enerdata_t *enerd, tensor vir, tensor pres,
+ +                            gmx_int64_t count, gmx_bool bFirst)
+ +{
+ +    real     t;
+ +    gmx_bool bNS;
+ +    int      nabnsb;
+ +    tensor   force_vir, shake_vir, ekin;
+ +    real     dvdl_constr, prescorr, enercorr, dvdlcorr;
+ +    real     terminate = 0;
+ +
+ +    /* Set the time to the initial time, the time does not change during EM */
+ +    t = inputrec->init_t;
+ +
+ +    if (bFirst ||
+ +        (DOMAINDECOMP(cr) && ems->s.ddp_count < cr->dd->ddp_count))
+ +    {
+ +        /* This the first state or an old state used before the last ns */
+ +        bNS = TRUE;
+ +    }
+ +    else
+ +    {
+ +        bNS = FALSE;
+ +        if (inputrec->nstlist > 0)
+ +        {
+ +            bNS = TRUE;
+ +        }
+ +        else if (inputrec->nstlist == -1)
+ +        {
+ +            nabnsb = natoms_beyond_ns_buffer(inputrec, fr, &top->cgs, NULL, ems->s.x);
+ +            if (PAR(cr))
+ +            {
+ +                gmx_sumi(1, &nabnsb, cr);
+ +            }
+ +            bNS = (nabnsb > 0);
+ +        }
+ +    }
+ +
+ +    if (vsite)
+ +    {
+ +        construct_vsites(vsite, ems->s.x, 1, NULL,
+ +                         top->idef.iparams, top->idef.il,
+ +                         fr->ePBC, fr->bMolPBC, graph, cr, ems->s.box);
+ +    }
+ +
+ +    if (DOMAINDECOMP(cr))
+ +    {
+ +        if (bNS)
+ +        {
+ +            /* Repartition the domain decomposition */
+ +            em_dd_partition_system(fplog, count, cr, top_global, inputrec,
+ +                                   ems, top, mdatoms, fr, vsite, constr,
+ +                                   nrnb, wcycle);
+ +        }
+ +    }
+ +
+ +    /* Calc force & energy on new trial position  */
+ +    /* do_force always puts the charge groups in the box and shifts again
+ +     * We do not unshift, so molecules are always whole in congrad.c
+ +     */
+ +    do_force(fplog, cr, inputrec,
+ +             count, nrnb, wcycle, top, &top_global->groups,
+ +             ems->s.box, ems->s.x, &ems->s.hist,
+ +             ems->f, force_vir, mdatoms, enerd, fcd,
+ +             ems->s.lambda, graph, fr, vsite, mu_tot, t, NULL, NULL, TRUE,
+ +             GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES |
+ +             GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY |
+ +             (bNS ? GMX_FORCE_NS | GMX_FORCE_DO_LR : 0));
+ +
+ +    /* Clear the unused shake virial and pressure */
+ +    clear_mat(shake_vir);
+ +    clear_mat(pres);
+ +
+ +    /* Communicate stuff when parallel */
+ +    if (PAR(cr) && inputrec->eI != eiNM)
+ +    {
+ +        wallcycle_start(wcycle, ewcMoveE);
+ +
+ +        global_stat(fplog, gstat, cr, enerd, force_vir, shake_vir, mu_tot,
+ +                    inputrec, NULL, NULL, NULL, 1, &terminate,
+ +                    top_global, &ems->s, FALSE,
+ +                    CGLO_ENERGY |
+ +                    CGLO_PRESSURE |
+ +                    CGLO_CONSTRAINT |
+ +                    CGLO_FIRSTITERATE);
+ +
+ +        wallcycle_stop(wcycle, ewcMoveE);
+ +    }
+ +
+ +    /* Calculate long range corrections to pressure and energy */
+ +    calc_dispcorr(fplog, inputrec, fr, count, top_global->natoms, ems->s.box, ems->s.lambda[efptVDW],
+ +                  pres, force_vir, &prescorr, &enercorr, &dvdlcorr);
+ +    enerd->term[F_DISPCORR] = enercorr;
+ +    enerd->term[F_EPOT]    += enercorr;
+ +    enerd->term[F_PRES]    += prescorr;
+ +    enerd->term[F_DVDL]    += dvdlcorr;
+ +
+ +    ems->epot = enerd->term[F_EPOT];
+ +
+ +    if (constr)
+ +    {
+ +        /* Project out the constraint components of the force */
+ +        wallcycle_start(wcycle, ewcCONSTR);
+ +        dvdl_constr = 0;
+ +        constrain(NULL, FALSE, FALSE, constr, &top->idef,
+ +                  inputrec, NULL, cr, count, 0, mdatoms,
+ +                  ems->s.x, ems->f, ems->f, fr->bMolPBC, ems->s.box,
+ +                  ems->s.lambda[efptBONDED], &dvdl_constr,
+ +                  NULL, &shake_vir, nrnb, econqForceDispl, FALSE, 0, 0);
+ +        if (fr->bSepDVDL && fplog)
+ +        {
+ +            gmx_print_sepdvdl(fplog, "Constraints", t, dvdl_constr);
+ +        }
+ +        enerd->term[F_DVDL_CONSTR] += dvdl_constr;
+ +        m_add(force_vir, shake_vir, vir);
+ +        wallcycle_stop(wcycle, ewcCONSTR);
+ +    }
+ +    else
+ +    {
+ +        copy_mat(force_vir, vir);
+ +    }
+ +
+ +    clear_mat(ekin);
+ +    enerd->term[F_PRES] =
+ +        calc_pres(fr->ePBC, inputrec->nwall, ems->s.box, ekin, vir, pres);
+ +
+ +    sum_dhdl(enerd, ems->s.lambda, inputrec->fepvals);
+ +
+ +    if (EI_ENERGY_MINIMIZATION(inputrec->eI))
+ +    {
+ +        get_state_f_norm_max(cr, &(inputrec->opts), mdatoms, ems);
+ +    }
+ +}
+ +
+ +static double reorder_partsum(t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms,
+ +                              gmx_mtop_t *mtop,
+ +                              em_state_t *s_min, em_state_t *s_b)
+ +{
+ +    rvec          *fm, *fb, *fmg;
+ +    t_block       *cgs_gl;
+ +    int            ncg, *cg_gl, *index, c, cg, i, a0, a1, a, gf, m;
+ +    double         partsum;
+ +    unsigned char *grpnrFREEZE;
+ +
+ +    if (debug)
+ +    {
+ +        fprintf(debug, "Doing reorder_partsum\n");
+ +    }
+ +
+ +    fm = s_min->f;
+ +    fb = s_b->f;
+ +
+ +    cgs_gl = dd_charge_groups_global(cr->dd);
+ +    index  = cgs_gl->index;
+ +
+ +    /* Collect fm in a global vector fmg.
+ +     * This conflicts with the spirit of domain decomposition,
+ +     * but to fully optimize this a much more complicated algorithm is required.
+ +     */
+ +    snew(fmg, mtop->natoms);
+ +
+ +    ncg   = s_min->s.ncg_gl;
+ +    cg_gl = s_min->s.cg_gl;
+ +    i     = 0;
+ +    for (c = 0; c < ncg; c++)
+ +    {
+ +        cg = cg_gl[c];
+ +        a0 = index[cg];
+ +        a1 = index[cg+1];
+ +        for (a = a0; a < a1; a++)
+ +        {
+ +            copy_rvec(fm[i], fmg[a]);
+ +            i++;
+ +        }
+ +    }
+ +    gmx_sum(mtop->natoms*3, fmg[0], cr);
+ +
+ +    /* Now we will determine the part of the sum for the cgs in state s_b */
+ +    ncg         = s_b->s.ncg_gl;
+ +    cg_gl       = s_b->s.cg_gl;
+ +    partsum     = 0;
+ +    i           = 0;
+ +    gf          = 0;
+ +    grpnrFREEZE = mtop->groups.grpnr[egcFREEZE];
+ +    for (c = 0; c < ncg; c++)
+ +    {
+ +        cg = cg_gl[c];
+ +        a0 = index[cg];
+ +        a1 = index[cg+1];
+ +        for (a = a0; a < a1; a++)
+ +        {
+ +            if (mdatoms->cFREEZE && grpnrFREEZE)
+ +            {
+ +                gf = grpnrFREEZE[i];
+ +            }
+ +            for (m = 0; m < DIM; m++)
+ +            {
+ +                if (!opts->nFreeze[gf][m])
+ +                {
+ +                    partsum += (fb[i][m] - fmg[a][m])*fb[i][m];
+ +                }
+ +            }
+ +            i++;
+ +        }
+ +    }
+ +
+ +    sfree(fmg);
+ +
+ +    return partsum;
+ +}
+ +
+ +static real pr_beta(t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms,
+ +                    gmx_mtop_t *mtop,
+ +                    em_state_t *s_min, em_state_t *s_b)
+ +{
+ +    rvec  *fm, *fb;
+ +    double sum;
+ +    int    gf, i, m;
+ +
+ +    /* This is just the classical Polak-Ribiere calculation of beta;
+ +     * it looks a bit complicated since we take freeze groups into account,
+ +     * and might have to sum it in parallel runs.
+ +     */
+ +
+ +    if (!DOMAINDECOMP(cr) ||
+ +        (s_min->s.ddp_count == cr->dd->ddp_count &&
+ +         s_b->s.ddp_count   == cr->dd->ddp_count))
+ +    {
+ +        fm  = s_min->f;
+ +        fb  = s_b->f;
+ +        sum = 0;
+ +        gf  = 0;
+ +        /* This part of code can be incorrect with DD,
+ +         * since the atom ordering in s_b and s_min might differ.
+ +         */
+ +        for (i = mdatoms->start; i < mdatoms->start+mdatoms->homenr; i++)
+ +        {
+ +            if (mdatoms->cFREEZE)
+ +            {
+ +                gf = mdatoms->cFREEZE[i];
+ +            }
+ +            for (m = 0; m < DIM; m++)
+ +            {
+ +                if (!opts->nFreeze[gf][m])
+ +                {
+ +                    sum += (fb[i][m] - fm[i][m])*fb[i][m];
+ +                }
+ +            }
+ +        }
+ +    }
+ +    else
+ +    {
+ +        /* We need to reorder cgs while summing */
+ +        sum = reorder_partsum(cr, opts, mdatoms, mtop, s_min, s_b);
+ +    }
+ +    if (PAR(cr))
+ +    {
+ +        gmx_sumd(1, &sum, cr);
+ +    }
+ +
+ +    return sum/sqr(s_min->fnorm);
+ +}
+ +
+ +double do_cg(FILE *fplog, t_commrec *cr,
+ +             int nfile, const t_filenm fnm[],
+ +             const output_env_t gmx_unused oenv, gmx_bool bVerbose, gmx_bool gmx_unused bCompact,
+ +             int gmx_unused nstglobalcomm,
+ +             gmx_vsite_t *vsite, gmx_constr_t constr,
+ +             int gmx_unused stepout,
+ +             t_inputrec *inputrec,
+ +             gmx_mtop_t *top_global, t_fcdata *fcd,
+ +             t_state *state_global,
+ +             t_mdatoms *mdatoms,
+ +             t_nrnb *nrnb, gmx_wallcycle_t wcycle,
+ +             gmx_edsam_t gmx_unused ed,
+ +             t_forcerec *fr,
+ +             int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed,
+ +             gmx_membed_t gmx_unused membed,
+ +             real gmx_unused cpt_period, real gmx_unused max_hours,
+ +             const char gmx_unused *deviceOptions,
+ +             unsigned long gmx_unused Flags,
+ +             gmx_walltime_accounting_t walltime_accounting)
+ +{
+ +    const char       *CG = "Polak-Ribiere Conjugate Gradients";
+ +
+ +    em_state_t       *s_min, *s_a, *s_b, *s_c;
+ +    gmx_localtop_t   *top;
+ +    gmx_enerdata_t   *enerd;
+ +    rvec             *f;
+ +    gmx_global_stat_t gstat;
+ +    t_graph          *graph;
+ +    rvec             *f_global, *p, *sf, *sfm;
+ +    double            gpa, gpb, gpc, tmp, sum[2], minstep;
+ +    real              fnormn;
+ +    real              stepsize;
+ +    real              a, b, c, beta = 0.0;
+ +    real              epot_repl = 0;
+ +    real              pnorm;
+ +    t_mdebin         *mdebin;
+ +    gmx_bool          converged, foundlower;
+ +    rvec              mu_tot;
+ +    gmx_bool          do_log = FALSE, do_ene = FALSE, do_x, do_f;
+ +    tensor            vir, pres;
+ +    int               number_steps, neval = 0, nstcg = inputrec->nstcgsteep;
+ +    gmx_mdoutf_t      outf;
+ +    int               i, m, gf, step, nminstep;
+ +    real              terminate = 0;
+ +
+ +    step = 0;
+ +
+ +    s_min = init_em_state();
+ +    s_a   = init_em_state();
+ +    s_b   = init_em_state();
+ +    s_c   = init_em_state();
+ +
+ +    /* Init em and store the local state in s_min */
+ +    init_em(fplog, CG, cr, inputrec,
+ +            state_global, top_global, s_min, &top, &f, &f_global,
+ +            nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr,
+ +            nfile, fnm, &outf, &mdebin);
+ +
+ +    /* Print to log file */
+ +    print_em_start(fplog, cr, walltime_accounting, wcycle, CG);
+ +
+ +    /* Max number of steps */
+ +    number_steps = inputrec->nsteps;
+ +
+ +    if (MASTER(cr))
+ +    {
+ +        sp_header(stderr, CG, inputrec->em_tol, number_steps);
+ +    }
+ +    if (fplog)
+ +    {
+ +        sp_header(fplog, CG, inputrec->em_tol, number_steps);
+ +    }
+ +
+ +    /* Call the force routine and some auxiliary (neighboursearching etc.) */
+ +    /* do_force always puts the charge groups in the box and shifts again
+ +     * We do not unshift, so molecules are always whole in congrad.c
+ +     */
+ +    evaluate_energy(fplog, cr,
+ +                    top_global, s_min, top,
+ +                    inputrec, nrnb, wcycle, gstat,
+ +                    vsite, constr, fcd, graph, mdatoms, fr,
+ +                    mu_tot, enerd, vir, pres, -1, TRUE);
+ +    where();
+ +
+ +    if (MASTER(cr))
+ +    {
+ +        /* Copy stuff to the energy bin for easy printing etc. */
+ +        upd_mdebin(mdebin, FALSE, FALSE, (double)step,
+ +                   mdatoms->tmass, enerd, &s_min->s, inputrec->fepvals, inputrec->expandedvals, s_min->s.box,
+ +                   NULL, NULL, vir, pres, NULL, mu_tot, constr);
+ +
+ +        print_ebin_header(fplog, step, step, s_min->s.lambda[efptFEP]);
+ +        print_ebin(mdoutf_get_fp_ene(outf), TRUE, FALSE, FALSE, fplog, step, step, eprNORMAL,
+ +                   TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts));
+ +    }
+ +    where();
+ +
+ +    /* Estimate/guess the initial stepsize */
+ +    stepsize = inputrec->em_stepsize/s_min->fnorm;
+ +
+ +    if (MASTER(cr))
+ +    {
+ +        fprintf(stderr, "   F-max             = %12.5e on atom %d\n",
+ +                s_min->fmax, s_min->a_fmax+1);
+ +        fprintf(stderr, "   F-Norm            = %12.5e\n",
+ +                s_min->fnorm/sqrt(state_global->natoms));
+ +        fprintf(stderr, "\n");
+ +        /* and copy to the log file too... */
+ +        fprintf(fplog, "   F-max             = %12.5e on atom %d\n",
+ +                s_min->fmax, s_min->a_fmax+1);
+ +        fprintf(fplog, "   F-Norm            = %12.5e\n",
+ +                s_min->fnorm/sqrt(state_global->natoms));
+ +        fprintf(fplog, "\n");
+ +    }
+ +    /* Start the loop over CG steps.
+ +     * Each successful step is counted, and we continue until
+ +     * we either converge or reach the max number of steps.
+ +     */
+ +    converged = FALSE;
+ +    for (step = 0; (number_steps < 0 || (number_steps >= 0 && step <= number_steps)) && !converged; step++)
+ +    {
+ +
+ +        /* start taking steps in a new direction
+ +         * First time we enter the routine, beta=0, and the direction is
+ +         * simply the negative gradient.
+ +         */
+ +
+ +        /* Calculate the new direction in p, and the gradient in this direction, gpa */
+ +        p   = s_min->s.cg_p;
+ +        sf  = s_min->f;
+ +        gpa = 0;
+ +        gf  = 0;
+ +        for (i = mdatoms->start; i < mdatoms->start+mdatoms->homenr; i++)
+ +        {
+ +            if (mdatoms->cFREEZE)
+ +            {
+ +                gf = mdatoms->cFREEZE[i];
+ +            }
+ +            for (m = 0; m < DIM; m++)
+ +            {
+ +                if (!inputrec->opts.nFreeze[gf][m])
+ +                {
+ +                    p[i][m] = sf[i][m] + beta*p[i][m];
+ +                    gpa    -= p[i][m]*sf[i][m];
+ +                    /* f is negative gradient, thus the sign */
+ +                }
+ +                else
+ +                {
+ +                    p[i][m] = 0;
+ +                }
+ +            }
+ +        }
+ +
+ +        /* Sum the gradient along the line across CPUs */
+ +        if (PAR(cr))
+ +        {
+ +            gmx_sumd(1, &gpa, cr);
+ +        }
+ +
+ +        /* Calculate the norm of the search vector */
+ +        get_f_norm_max(cr, &(inputrec->opts), mdatoms, p, &pnorm, NULL, NULL);
+ +
+ +        /* Just in case stepsize reaches zero due to numerical precision... */
+ +        if (stepsize <= 0)
+ +        {
+ +            stepsize = inputrec->em_stepsize/pnorm;
+ +        }
+ +
+ +        /*
+ +         * Double check the value of the derivative in the search direction.
+ +         * If it is positive it must be due to the old information in the
+ +         * CG formula, so just remove that and start over with beta=0.
+ +         * This corresponds to a steepest descent step.
+ +         */
+ +        if (gpa > 0)
+ +        {
+ +            beta = 0;
+ +            step--;   /* Don't count this step since we are restarting */
+ +            continue; /* Go back to the beginning of the big for-loop */
+ +        }
+ +
+ +        /* Calculate minimum allowed stepsize, before the average (norm)
+ +         * relative change in coordinate is smaller than precision
+ +         */
+ +        minstep = 0;
+ +        for (i = mdatoms->start; i < mdatoms->start+mdatoms->homenr; i++)
+ +        {
+ +            for (m = 0; m < DIM; m++)
+ +            {
+ +                tmp = fabs(s_min->s.x[i][m]);
+ +                if (tmp < 1.0)
+ +                {
+ +                    tmp = 1.0;
+ +                }
+ +                tmp      = p[i][m]/tmp;
+ +                minstep += tmp*tmp;
+ +            }
+ +        }
+ +        /* Add up from all CPUs */
+ +        if (PAR(cr))
+ +        {
+ +            gmx_sumd(1, &minstep, cr);
+ +        }
+ +
+ +        minstep = GMX_REAL_EPS/sqrt(minstep/(3*state_global->natoms));
+ +
+ +        if (stepsize < minstep)
+ +        {
+ +            converged = TRUE;
+ +            break;
+ +        }
+ +
+ +        /* Write coordinates if necessary */
+ +        do_x = do_per_step(step, inputrec->nstxout);
+ +        do_f = do_per_step(step, inputrec->nstfout);
+ +
+ +        write_em_traj(fplog, cr, outf, do_x, do_f, NULL,
+ +                      top_global, inputrec, step,
+ +                      s_min, state_global, f_global);
+ +
+ +        /* Take a step downhill.
+ +         * In theory, we should minimize the function along this direction.
+ +         * That is quite possible, but it turns out to take 5-10 function evaluations
+ +         * for each line. However, we dont really need to find the exact minimum -
+ +         * it is much better to start a new CG step in a modified direction as soon
+ +         * as we are close to it. This will save a lot of energy evaluations.
+ +         *
+ +         * In practice, we just try to take a single step.
+ +         * If it worked (i.e. lowered the energy), we increase the stepsize but
+ +         * the continue straight to the next CG step without trying to find any minimum.
+ +         * If it didn't work (higher energy), there must be a minimum somewhere between
+ +         * the old position and the new one.
+ +         *
+ +         * Due to the finite numerical accuracy, it turns out that it is a good idea
+ +         * to even accept a SMALL increase in energy, if the derivative is still downhill.
+ +         * This leads to lower final energies in the tests I've done. / Erik
+ +         */
+ +        s_a->epot = s_min->epot;
+ +        a         = 0.0;
+ +        c         = a + stepsize; /* reference position along line is zero */
+ +
+ +        if (DOMAINDECOMP(cr) && s_min->s.ddp_count < cr->dd->ddp_count)
+ +        {
+ +            em_dd_partition_system(fplog, step, cr, top_global, inputrec,
+ +                                   s_min, top, mdatoms, fr, vsite, constr,
+ +                                   nrnb, wcycle);
+ +        }
+ +
+ +        /* Take a trial step (new coords in s_c) */
+ +        do_em_step(cr, inputrec, mdatoms, fr->bMolPBC, s_min, c, s_min->s.cg_p, s_c,
+ +                   constr, top, nrnb, wcycle, -1);
+ +
+ +        neval++;
+ +        /* Calculate energy for the trial step */
+ +        evaluate_energy(fplog, cr,
+ +                        top_global, s_c, top,
+ +                        inputrec, nrnb, wcycle, gstat,
+ +                        vsite, constr, fcd, graph, mdatoms, fr,
+ +                        mu_tot, enerd, vir, pres, -1, FALSE);
+ +
+ +        /* Calc derivative along line */
+ +        p   = s_c->s.cg_p;
+ +        sf  = s_c->f;
+ +        gpc = 0;
+ +        for (i = mdatoms->start; i < mdatoms->start+mdatoms->homenr; i++)
+ +        {
+ +            for (m = 0; m < DIM; m++)
+ +            {
+ +                gpc -= p[i][m]*sf[i][m]; /* f is negative gradient, thus the sign */
+ +            }
+ +        }
+ +        /* Sum the gradient along the line across CPUs */
+ +        if (PAR(cr))
+ +        {
+ +            gmx_sumd(1, &gpc, cr);
+ +        }
+ +
+ +        /* This is the max amount of increase in energy we tolerate */
+ +        tmp = sqrt(GMX_REAL_EPS)*fabs(s_a->epot);
+ +
+ +        /* Accept the step if the energy is lower, or if it is not significantly higher
+ +         * and the line derivative is still negative.
+ +         */
+ +        if (s_c->epot < s_a->epot || (gpc < 0 && s_c->epot < (s_a->epot + tmp)))
+ +        {
+ +            foundlower = TRUE;
+ +            /* Great, we found a better energy. Increase step for next iteration
+ +             * if we are still going down, decrease it otherwise
+ +             */
+ +            if (gpc < 0)
+ +            {
+ +                stepsize *= 1.618034; /* The golden section */
+ +            }
+ +            else
+ +            {
+ +                stepsize *= 0.618034; /* 1/golden section */
+ +            }
+ +        }
+ +        else
+ +        {
+ +            /* New energy is the same or higher. We will have to do some work
+ +             * to find a smaller value in the interval. Take smaller step next time!
+ +             */
+ +            foundlower = FALSE;
+ +            stepsize  *= 0.618034;
+ +        }
+ +
+ +
+ +
+ +
+ +        /* OK, if we didn't find a lower value we will have to locate one now - there must
+ +         * be one in the interval [a=0,c].
+ +         * The same thing is valid here, though: Don't spend dozens of iterations to find
+ +         * the line minimum. We try to interpolate based on the derivative at the endpoints,
+ +         * and only continue until we find a lower value. In most cases this means 1-2 iterations.
+ +         *
+ +         * I also have a safeguard for potentially really patological functions so we never
+ +         * take more than 20 steps before we give up ...
+ +         *
+ +         * If we already found a lower value we just skip this step and continue to the update.
+ +         */
+ +        if (!foundlower)
+ +        {
+ +            nminstep = 0;
+ +
+ +            do
+ +            {
+ +                /* Select a new trial point.
+ +                 * If the derivatives at points a & c have different sign we interpolate to zero,
+ +                 * otherwise just do a bisection.
+ +                 */
+ +                if (gpa < 0 && gpc > 0)
+ +                {
+ +                    b = a + gpa*(a-c)/(gpc-gpa);
+ +                }
+ +                else
+ +                {
+ +                    b = 0.5*(a+c);
+ +                }
+ +
+ +                /* safeguard if interpolation close to machine accuracy causes errors:
+ +                 * never go outside the interval
+ +                 */
+ +                if (b <= a || b >= c)
+ +                {
+ +                    b = 0.5*(a+c);
+ +                }
+ +
+ +                if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count)
+ +                {
+ +                    /* Reload the old state */
+ +                    em_dd_partition_system(fplog, -1, cr, top_global, inputrec,
+ +                                           s_min, top, mdatoms, fr, vsite, constr,
+ +                                           nrnb, wcycle);
+ +                }
+ +
+ +                /* Take a trial step to this new point - new coords in s_b */
+ +                do_em_step(cr, inputrec, mdatoms, fr->bMolPBC, s_min, b, s_min->s.cg_p, s_b,
+ +                           constr, top, nrnb, wcycle, -1);
+ +
+ +                neval++;
+ +                /* Calculate energy for the trial step */
+ +                evaluate_energy(fplog, cr,
+ +                                top_global, s_b, top,
+ +                                inputrec, nrnb, wcycle, gstat,
+ +                                vsite, constr, fcd, graph, mdatoms, fr,
+ +                                mu_tot, enerd, vir, pres, -1, FALSE);
+ +
+ +                /* p does not change within a step, but since the domain decomposition
+ +                 * might change, we have to use cg_p of s_b here.
+ +                 */
+ +                p   = s_b->s.cg_p;
+ +                sf  = s_b->f;
+ +                gpb = 0;
+ +                for (i = mdatoms->start; i < mdatoms->start+mdatoms->homenr; i++)
+ +                {
+ +                    for (m = 0; m < DIM; m++)
+ +                    {
+ +                        gpb -= p[i][m]*sf[i][m]; /* f is negative gradient, thus the sign */
+ +                    }
+ +                }
+ +                /* Sum the gradient along the line across CPUs */
+ +                if (PAR(cr))
+ +                {
+ +                    gmx_sumd(1, &gpb, cr);
+ +                }
+ +
+ +                if (debug)
+ +                {
+ +                    fprintf(debug, "CGE: EpotA %f EpotB %f EpotC %f gpb %f\n",
+ +                            s_a->epot, s_b->epot, s_c->epot, gpb);
+ +                }
+ +
+ +                epot_repl = s_b->epot;
+ +
+ +                /* Keep one of the intervals based on the value of the derivative at the new point */
+ +                if (gpb > 0)
+ +                {
+ +                    /* Replace c endpoint with b */
+ +                    swap_em_state(s_b, s_c);
+ +                    c   = b;
+ +                    gpc = gpb;
+ +                }
+ +                else
+ +                {
+ +                    /* Replace a endpoint with b */
+ +                    swap_em_state(s_b, s_a);
+ +                    a   = b;
+ +                    gpa = gpb;
+ +                }
+ +
+ +                /*
+ +                 * Stop search as soon as we find a value smaller than the endpoints.
+ +                 * Never run more than 20 steps, no matter what.
+ +                 */
+ +                nminstep++;
+ +            }
+ +            while ((epot_repl > s_a->epot || epot_repl > s_c->epot) &&
+ +                   (nminstep < 20));
+ +
+ +            if (fabs(epot_repl - s_min->epot) < fabs(s_min->epot)*GMX_REAL_EPS ||
+ +                nminstep >= 20)
+ +            {
+ +                /* OK. We couldn't find a significantly lower energy.
+ +                 * If beta==0 this was steepest descent, and then we give up.
+ +                 * If not, set beta=0 and restart with steepest descent before quitting.
+ +                 */
+ +                if (beta == 0.0)
+ +                {
+ +                    /* Converged */
+ +                    converged = TRUE;
+ +                    break;
+ +                }
+ +                else
+ +                {
+ +                    /* Reset memory before giving up */
+ +                    beta = 0.0;
+ +                    continue;
+ +                }
+ +            }
+ +
+ +            /* Select min energy state of A & C, put the best in B.
+ +             */
+ +            if (s_c->epot < s_a->epot)
+ +            {
+ +                if (debug)
+ +                {
+ +                    fprintf(debug, "CGE: C (%f) is lower than A (%f), moving C to B\n",
+ +                            s_c->epot, s_a->epot);
+ +                }
+ +                swap_em_state(s_b, s_c);
+ +                gpb = gpc;
+ +                b   = c;
+ +            }
+ +            else
+ +            {
+ +                if (debug)
+ +                {
+ +                    fprintf(debug, "CGE: A (%f) is lower than C (%f), moving A to B\n",
+ +                            s_a->epot, s_c->epot);
+ +                }
+ +                swap_em_state(s_b, s_a);
+ +                gpb = gpa;
+ +                b   = a;
+ +            }
+ +
+ +        }
+ +        else
+ +        {
+ +            if (debug)
+ +            {
+ +                fprintf(debug, "CGE: Found a lower energy %f, moving C to B\n",
+ +                        s_c->epot);
+ +            }
+ +            swap_em_state(s_b, s_c);
+ +            gpb = gpc;
+ +            b   = c;
+ +        }
+ +
+ +        /* new search direction */
+ +        /* beta = 0 means forget all memory and restart with steepest descents. */
+ +        if (nstcg && ((step % nstcg) == 0))
+ +        {
+ +            beta = 0.0;
+ +        }
+ +        else
+ +        {
+ +            /* s_min->fnorm cannot be zero, because then we would have converged
+ +             * and broken out.
+ +             */
+ +
+ +            /* Polak-Ribiere update.
+ +             * Change to fnorm2/fnorm2_old for Fletcher-Reeves
+ +             */
+ +            beta = pr_beta(cr, &inputrec->opts, mdatoms, top_global, s_min, s_b);
+ +        }
+ +        /* Limit beta to prevent oscillations */
+ +        if (fabs(beta) > 5.0)
+ +        {
+ +            beta = 0.0;
+ +        }
+ +
+ +
+ +        /* update positions */
+ +        swap_em_state(s_min, s_b);
+ +        gpa = gpb;
+ +
+ +        /* Print it if necessary */
+ +        if (MASTER(cr))
+ +        {
+ +            if (bVerbose)
+ +            {
+ +                fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
+ +                        step, s_min->epot, s_min->fnorm/sqrt(state_global->natoms),
+ +                        s_min->fmax, s_min->a_fmax+1);
+ +            }
+ +            /* Store the new (lower) energies */
+ +            upd_mdebin(mdebin, FALSE, FALSE, (double)step,
+ +                       mdatoms->tmass, enerd, &s_min->s, inputrec->fepvals, inputrec->expandedvals, s_min->s.box,
+ +                       NULL, NULL, vir, pres, NULL, mu_tot, constr);
+ +
+ +            do_log = do_per_step(step, inputrec->nstlog);
+ +            do_ene = do_per_step(step, inputrec->nstenergy);
+ +            if (do_log)
+ +            {
+ +                print_ebin_header(fplog, step, step, s_min->s.lambda[efptFEP]);
+ +            }
+ +            print_ebin(mdoutf_get_fp_ene(outf), do_ene, FALSE, FALSE,
+ +                       do_log ? fplog : NULL, step, step, eprNORMAL,
+ +                       TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts));
+ +        }
+ +
+ +        /* Stop when the maximum force lies below tolerance.
+ +         * If we have reached machine precision, converged is already set to true.
+ +         */
+ +        converged = converged || (s_min->fmax < inputrec->em_tol);
+ +
+ +    } /* End of the loop */
+ +
+ +    if (converged)
+ +    {
+ +        step--; /* we never took that last step in this case */
+ +
+ +    }
+ +    if (s_min->fmax > inputrec->em_tol)
+ +    {
+ +        if (MASTER(cr))
+ +        {
+ +            warn_step(stderr, inputrec->em_tol, step-1 == number_steps, FALSE);
+ +            warn_step(fplog, inputrec->em_tol, step-1 == number_steps, FALSE);
+ +        }
+ +        converged = FALSE;
+ +    }
+ +
+ +    if (MASTER(cr))
+ +    {
+ +        /* If we printed energy and/or logfile last step (which was the last step)
+ +         * we don't have to do it again, but otherwise print the final values.
+ +         */
+ +        if (!do_log)
+ +        {
+ +            /* Write final value to log since we didn't do anything the last step */
+ +            print_ebin_header(fplog, step, step, s_min->s.lambda[efptFEP]);
+ +        }
+ +        if (!do_ene || !do_log)
+ +        {
+ +            /* Write final energy file entries */
+ +            print_ebin(mdoutf_get_fp_ene(outf), !do_ene, FALSE, FALSE,
+ +                       !do_log ? fplog : NULL, step, step, eprNORMAL,
+ +                       TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts));
+ +        }
+ +    }
+ +
+ +    /* Print some stuff... */
+ +    if (MASTER(cr))
+ +    {
+ +        fprintf(stderr, "\nwriting lowest energy coordinates.\n");
+ +    }
+ +
+ +    /* IMPORTANT!
+ +     * For accurate normal mode calculation it is imperative that we
+ +     * store the last conformation into the full precision binary trajectory.
+ +     *
+ +     * However, we should only do it if we did NOT already write this step
+ +     * above (which we did if do_x or do_f was true).
+ +     */
+ +    do_x = !do_per_step(step, inputrec->nstxout);
+ +    do_f = (inputrec->nstfout > 0 && !do_per_step(step, inputrec->nstfout));
+ +
+ +    write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm),
+ +                  top_global, inputrec, step,
+ +                  s_min, state_global, f_global);
+ +
+ +    fnormn = s_min->fnorm/sqrt(state_global->natoms);
+ +
+ +    if (MASTER(cr))
+ +    {
+ +        print_converged(stderr, CG, inputrec->em_tol, step, converged, number_steps,
+ +                        s_min->epot, s_min->fmax, s_min->a_fmax, fnormn);
+ +        print_converged(fplog, CG, inputrec->em_tol, step, converged, number_steps,
+ +                        s_min->epot, s_min->fmax, s_min->a_fmax, fnormn);
+ +
+ +        fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval);
+ +    }
+ +
+ +    finish_em(cr, outf, walltime_accounting, wcycle);
+ +
+ +    /* To print the actual number of steps we needed somewhere */
+ +    walltime_accounting_set_nsteps_done(walltime_accounting, step);
+ +
+ +    return 0;
+ +} /* That's all folks */
+ +
+ +
+ +double do_lbfgs(FILE *fplog, t_commrec *cr,
+ +                int nfile, const t_filenm fnm[],
+ +                const output_env_t gmx_unused oenv, gmx_bool bVerbose, gmx_bool gmx_unused bCompact,
+ +                int gmx_unused nstglobalcomm,
+ +                gmx_vsite_t *vsite, gmx_constr_t constr,
+ +                int gmx_unused stepout,
+ +                t_inputrec *inputrec,
+ +                gmx_mtop_t *top_global, t_fcdata *fcd,
+ +                t_state *state,
+ +                t_mdatoms *mdatoms,
+ +                t_nrnb *nrnb, gmx_wallcycle_t wcycle,
+ +                gmx_edsam_t gmx_unused ed,
+ +                t_forcerec *fr,
+ +                int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed,
+ +                gmx_membed_t gmx_unused membed,
+ +                real gmx_unused cpt_period, real gmx_unused max_hours,
+ +                const char gmx_unused *deviceOptions,
+ +                unsigned long gmx_unused Flags,
+ +                gmx_walltime_accounting_t walltime_accounting)
+ +{
+ +    static const char *LBFGS = "Low-Memory BFGS Minimizer";
+ +    em_state_t         ems;
+ +    gmx_localtop_t    *top;
+ +    gmx_enerdata_t    *enerd;
+ +    rvec              *f;
+ +    gmx_global_stat_t  gstat;
+ +    t_graph           *graph;
+ +    rvec              *f_global;
+ +    int                ncorr, nmaxcorr, point, cp, neval, nminstep;
+ +    double             stepsize, gpa, gpb, gpc, tmp, minstep;
+ +    real              *rho, *alpha, *ff, *xx, *p, *s, *lastx, *lastf, **dx, **dg;
+ +    real              *xa, *xb, *xc, *fa, *fb, *fc, *xtmp, *ftmp;
+ +    real               a, b, c, maxdelta, delta;
+ +    real               diag, Epot0, Epot, EpotA, EpotB, EpotC;
+ +    real               dgdx, dgdg, sq, yr, beta;
+ +    t_mdebin          *mdebin;
+ +    gmx_bool           converged, first;
+ +    rvec               mu_tot;
+ +    real               fnorm, fmax;
+ +    gmx_bool           do_log, do_ene, do_x, do_f, foundlower, *frozen;
+ +    tensor             vir, pres;
+ +    int                start, end, number_steps;
+ +    gmx_mdoutf_t       outf;
+ +    int                i, k, m, n, nfmax, gf, step;
+ +    int                mdof_flags;
+ +    /* not used */
+ +    real               terminate;
+ +
+ +    if (PAR(cr))
+ +    {
+ +        gmx_fatal(FARGS, "Cannot do parallel L-BFGS Minimization - yet.\n");
+ +    }
+ +
+ +    if (NULL != constr)
+ +    {
+ +        gmx_fatal(FARGS, "The combination of constraints and L-BFGS minimization is not implemented. Either do not use constraints, or use another minimizer (e.g. steepest descent).");
+ +    }
+ +
+ +    n        = 3*state->natoms;
+ +    nmaxcorr = inputrec->nbfgscorr;
+ +
+ +    /* Allocate memory */
+ +    /* Use pointers to real so we dont have to loop over both atoms and
+ +     * dimensions all the time...
+ +     * x/f are allocated as rvec *, so make new x0/f0 pointers-to-real
+ +     * that point to the same memory.
+ +     */
+ +    snew(xa, n);
+ +    snew(xb, n);
+ +    snew(xc, n);
+ +    snew(fa, n);
+ +    snew(fb, n);
+ +    snew(fc, n);
+ +    snew(frozen, n);
+ +
+ +    snew(p, n);
+ +    snew(lastx, n);
+ +    snew(lastf, n);
+ +    snew(rho, nmaxcorr);
+ +    snew(alpha, nmaxcorr);
+ +
+ +    snew(dx, nmaxcorr);
+ +    for (i = 0; i < nmaxcorr; i++)
+ +    {
+ +        snew(dx[i], n);
+ +    }
+ +
+ +    snew(dg, nmaxcorr);
+ +    for (i = 0; i < nmaxcorr; i++)
+ +    {
+ +        snew(dg[i], n);
+ +    }
+ +
+ +    step  = 0;
+ +    neval = 0;
+ +
+ +    /* Init em */
+ +    init_em(fplog, LBFGS, cr, inputrec,
+ +            state, top_global, &ems, &top, &f, &f_global,
+ +            nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr,
+ +            nfile, fnm, &outf, &mdebin);
+ +    /* Do_lbfgs is not completely updated like do_steep and do_cg,
+ +     * so we free some memory again.
+ +     */
+ +    sfree(ems.s.x);
+ +    sfree(ems.f);
+ +
+ +    xx = (real *)state->x;
+ +    ff = (real *)f;
+ +
+ +    start = mdatoms->start;
+ +    end   = mdatoms->homenr + start;
+ +
+ +    /* Print to log file */
+ +    print_em_start(fplog, cr, walltime_accounting, wcycle, LBFGS);
+ +
+ +    do_log = do_ene = do_x = do_f = TRUE;
+ +
+ +    /* Max number of steps */
+ +    number_steps = inputrec->nsteps;
+ +
+ +    /* Create a 3*natoms index to tell whether each degree of freedom is frozen */
+ +    gf = 0;
+ +    for (i = start; i < end; i++)
+ +    {
+ +        if (mdatoms->cFREEZE)
+ +        {
+ +            gf = mdatoms->cFREEZE[i];
+ +        }
+ +        for (m = 0; m < DIM; m++)
+ +        {
+ +            frozen[3*i+m] = inputrec->opts.nFreeze[gf][m];
+ +        }
+ +    }
+ +    if (MASTER(cr))
+ +    {
+ +        sp_header(stderr, LBFGS, inputrec->em_tol, number_steps);
+ +    }
+ +    if (fplog)
+ +    {
+ +        sp_header(fplog, LBFGS, inputrec->em_tol, number_steps);
+ +    }
+ +
+ +    if (vsite)
+ +    {
+ +        construct_vsites(vsite, state->x, 1, NULL,
+ +                         top->idef.iparams, top->idef.il,
+ +                         fr->ePBC, fr->bMolPBC, graph, cr, state->box);
+ +    }
+ +
+ +    /* Call the force routine and some auxiliary (neighboursearching etc.) */
+ +    /* do_force always puts the charge groups in the box and shifts again
+ +     * We do not unshift, so molecules are always whole
+ +     */
+ +    neval++;
+ +    ems.s.x = state->x;
+ +    ems.f   = f;
+ +    evaluate_energy(fplog, cr,
+ +                    top_global, &ems, top,
+ +                    inputrec, nrnb, wcycle, gstat,
+ +                    vsite, constr, fcd, graph, mdatoms, fr,
+ +                    mu_tot, enerd, vir, pres, -1, TRUE);
+ +    where();
+ +
+ +    if (MASTER(cr))
+ +    {
+ +        /* Copy stuff to the energy bin for easy printing etc. */
+ +        upd_mdebin(mdebin, FALSE, FALSE, (double)step,
+ +                   mdatoms->tmass, enerd, state, inputrec->fepvals, inputrec->expandedvals, state->box,
+ +                   NULL, NULL, vir, pres, NULL, mu_tot, constr);
+ +
+ +        print_ebin_header(fplog, step, step, state->lambda[efptFEP]);
+ +        print_ebin(mdoutf_get_fp_ene(outf), TRUE, FALSE, FALSE, fplog, step, step, eprNORMAL,
+ +                   TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts));
+ +    }
+ +    where();
+ +
+ +    /* This is the starting energy */
+ +    Epot = enerd->term[F_EPOT];
+ +
+ +    fnorm = ems.fnorm;
+ +    fmax  = ems.fmax;
+ +    nfmax = ems.a_fmax;
+ +
+ +    /* Set the initial step.
+ +     * since it will be multiplied by the non-normalized search direction
+ +     * vector (force vector the first time), we scale it by the
+ +     * norm of the force.
+ +     */
+ +
+ +    if (MASTER(cr))
+ +    {
+ +        fprintf(stderr, "Using %d BFGS correction steps.\n\n", nmaxcorr);
+ +        fprintf(stderr, "   F-max             = %12.5e on atom %d\n", fmax, nfmax+1);
+ +        fprintf(stderr, "   F-Norm            = %12.5e\n", fnorm/sqrt(state->natoms));
+ +        fprintf(stderr, "\n");
+ +        /* and copy to the log file too... */
+ +        fprintf(fplog, "Using %d BFGS correction steps.\n\n", nmaxcorr);
+ +        fprintf(fplog, "   F-max             = %12.5e on atom %d\n", fmax, nfmax+1);
+ +        fprintf(fplog, "   F-Norm            = %12.5e\n", fnorm/sqrt(state->natoms));
+ +        fprintf(fplog, "\n");
+ +    }
+ +
+ +    point = 0;
+ +    for (i = 0; i < n; i++)
+ +    {
+ +        if (!frozen[i])
+ +        {
+ +            dx[point][i] = ff[i]; /* Initial search direction */
+ +        }
+ +        else
+ +        {
+ +            dx[point][i] = 0;
+ +        }
+ +    }
+ +
+ +    stepsize  = 1.0/fnorm;
+ +    converged = FALSE;
+ +
+ +    /* Start the loop over BFGS steps.
+ +     * Each successful step is counted, and we continue until
+ +     * we either converge or reach the max number of steps.
+ +     */
+ +
+ +    ncorr = 0;
+ +
+ +    /* Set the gradient from the force */
+ +    converged = FALSE;
+ +    for (step = 0; (number_steps < 0 || (number_steps >= 0 && step <= number_steps)) && !converged; step++)
+ +    {
+ +
+ +        /* Write coordinates if necessary */
+ +        do_x = do_per_step(step, inputrec->nstxout);
+ +        do_f = do_per_step(step, inputrec->nstfout);
+ +
+ +        mdof_flags = 0;
+ +        if (do_x)
+ +        {
+ +            mdof_flags |= MDOF_X;
+ +        }
+ +
+ +        if (do_f)
+ +        {
+ +            mdof_flags |= MDOF_F;
+ +        }
+ +
+ +        mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags,
+ +                                         top_global, step, (real)step, state, state, f, f);
+ +
+ +        /* Do the linesearching in the direction dx[point][0..(n-1)] */
+ +
+ +        /* pointer to current direction - point=0 first time here */
+ +        s = dx[point];
+ +
+ +        /* calculate line gradient */
+ +        for (gpa = 0, i = 0; i < n; i++)
+ +        {
+ +            gpa -= s[i]*ff[i];
+ +        }
+ +
+ +        /* Calculate minimum allowed stepsize, before the average (norm)
+ +         * relative change in coordinate is smaller than precision
+ +         */
+ +        for (minstep = 0, i = 0; i < n; i++)
+ +        {
+ +            tmp = fabs(xx[i]);
+ +            if (tmp < 1.0)
+ +            {
+ +                tmp = 1.0;
+ +            }
+ +            tmp      = s[i]/tmp;
+ +            minstep += tmp*tmp;
+ +        }
+ +        minstep = GMX_REAL_EPS/sqrt(minstep/n);
+ +
+ +        if (stepsize < minstep)
+ +        {
+ +            converged = TRUE;
+ +            break;
+ +        }
+ +
+ +        /* Store old forces and coordinates */
+ +        for (i = 0; i < n; i++)
+ +        {
+ +            lastx[i] = xx[i];
+ +            lastf[i] = ff[i];
+ +        }
+ +        Epot0 = Epot;
+ +
+ +        first = TRUE;
+ +
+ +        for (i = 0; i < n; i++)
+ +        {
+ +            xa[i] = xx[i];
+ +        }
+ +
+ +        /* Take a step downhill.
+ +         * In theory, we should minimize the function along this direction.
+ +         * That is quite possible, but it turns out to take 5-10 function evaluations
+ +         * for each line. However, we dont really need to find the exact minimum -
+ +         * it is much better to start a new BFGS step in a modified direction as soon
+ +         * as we are close to it. This will save a lot of energy evaluations.
+ +         *
+ +         * In practice, we just try to take a single step.
+ +         * If it worked (i.e. lowered the energy), we increase the stepsize but
+ +         * the continue straight to the next BFGS step without trying to find any minimum.
+ +         * If it didn't work (higher energy), there must be a minimum somewhere between
+ +         * the old position and the new one.
+ +         *
+ +         * Due to the finite numerical accuracy, it turns out that it is a good idea
+ +         * to even accept a SMALL increase in energy, if the derivative is still downhill.
+ +         * This leads to lower final energies in the tests I've done. / Erik
+ +         */
+ +        foundlower = FALSE;
+ +        EpotA      = Epot0;
+ +        a          = 0.0;
+ +        c          = a + stepsize; /* reference position along line is zero */
+ +
+ +        /* Check stepsize first. We do not allow displacements
+ +         * larger than emstep.
+ +         */
+ +        do
+ +        {
+ +            c        = a + stepsize;
+ +            maxdelta = 0;
+ +            for (i = 0; i < n; i++)
+ +            {
+ +                delta = c*s[i];
+ +                if (delta > maxdelta)
+ +                {
+ +                    maxdelta = delta;
+ +                }
+ +            }
+ +            if (maxdelta > inputrec->em_stepsize)
+ +            {
+ +                stepsize *= 0.1;
+ +            }
+ +        }
+ +        while (maxdelta > inputrec->em_stepsize);
+ +
+ +        /* Take a trial step */
+ +        for (i = 0; i < n; i++)
+ +        {
+ +            xc[i] = lastx[i] + c*s[i];
+ +        }
+ +
+ +        neval++;
+ +        /* Calculate energy for the trial step */
+ +        ems.s.x = (rvec *)xc;
+ +        ems.f   = (rvec *)fc;
+ +        evaluate_energy(fplog, cr,
+ +                        top_global, &ems, top,
+ +                        inputrec, nrnb, wcycle, gstat,
+ +                        vsite, constr, fcd, graph, mdatoms, fr,
+ +                        mu_tot, enerd, vir, pres, step, FALSE);
+ +        EpotC = ems.epot;
+ +
+ +        /* Calc derivative along line */
+ +        for (gpc = 0, i = 0; i < n; i++)
+ +        {
+ +            gpc -= s[i]*fc[i]; /* f is negative gradient, thus the sign */
+ +        }
+ +        /* Sum the gradient along the line across CPUs */
+ +        if (PAR(cr))
+ +        {
+ +            gmx_sumd(1, &gpc, cr);
+ +        }
+ +
+ +        /* This is the max amount of increase in energy we tolerate */
+ +        tmp = sqrt(GMX_REAL_EPS)*fabs(EpotA);
+ +
+ +        /* Accept the step if the energy is lower, or if it is not significantly higher
+ +         * and the line derivative is still negative.
+ +         */
+ +        if (EpotC < EpotA || (gpc < 0 && EpotC < (EpotA+tmp)))
+ +        {
+ +            foundlower = TRUE;
+ +            /* Great, we found a better energy. Increase step for next iteration
+ +             * if we are still going down, decrease it otherwise
+ +             */
+ +            if (gpc < 0)
+ +            {
+ +                stepsize *= 1.618034; /* The golden section */
+ +            }
+ +            else
+ +            {
+ +                stepsize *= 0.618034; /* 1/golden section */
+ +            }
+ +        }
+ +        else
+ +        {
+ +            /* New energy is the same or higher. We will have to do some work
+ +             * to find a smaller value in the interval. Take smaller step next time!
+ +             */
+ +            foundlower = FALSE;
+ +            stepsize  *= 0.618034;
+ +        }
+ +
+ +        /* OK, if we didn't find a lower value we will have to locate one now - there must
+ +         * be one in the interval [a=0,c].
+ +         * The same thing is valid here, though: Don't spend dozens of iterations to find
+ +         * the line minimum. We try to interpolate based on the derivative at the endpoints,
+ +         * and only continue until we find a lower value. In most cases this means 1-2 iterations.
+ +         *
+ +         * I also have a safeguard for potentially really patological functions so we never
+ +         * take more than 20 steps before we give up ...
+ +         *
+ +         * If we already found a lower value we just skip this step and continue to the update.
+ +         */
+ +
+ +        if (!foundlower)
+ +        {
+ +
+ +            nminstep = 0;
+ +            do
+ +            {
+ +                /* Select a new trial point.
+ +                 * If the derivatives at points a & c have different sign we interpolate to zero,
+ +                 * otherwise just do a bisection.
+ +                 */
+ +
+ +                if (gpa < 0 && gpc > 0)
+ +                {
+ +                    b = a + gpa*(a-c)/(gpc-gpa);
+ +                }
+ +                else
+ +                {
+ +                    b = 0.5*(a+c);
+ +                }
+ +
+ +                /* safeguard if interpolation close to machine accuracy causes errors:
+ +                 * never go outside the interval
+ +                 */
+ +                if (b <= a || b >= c)
+ +                {
+ +                    b = 0.5*(a+c);
+ +                }
+ +
+ +                /* Take a trial step */
+ +                for (i = 0; i < n; i++)
+ +                {
+ +                    xb[i] = lastx[i] + b*s[i];
+ +                }
+ +
+ +                neval++;
+ +                /* Calculate energy for the trial step */
+ +                ems.s.x = (rvec *)xb;
+ +                ems.f   = (rvec *)fb;
+ +                evaluate_energy(fplog, cr,
+ +                                top_global, &ems, top,
+ +                                inputrec, nrnb, wcycle, gstat,
+ +                                vsite, constr, fcd, graph, mdatoms, fr,
+ +                                mu_tot, enerd, vir, pres, step, FALSE);
+ +                EpotB = ems.epot;
+ +
+ +                fnorm = ems.fnorm;
+ +
+ +                for (gpb = 0, i = 0; i < n; i++)
+ +                {
+ +                    gpb -= s[i]*fb[i]; /* f is negative gradient, thus the sign */
+ +
+ +                }
+ +                /* Sum the gradient along the line across CPUs */
+ +                if (PAR(cr))
+ +                {
+ +                    gmx_sumd(1, &gpb, cr);
+ +                }
+ +
+ +                /* Keep one of the intervals based on the value of the derivative at the new point */
+ +                if (gpb > 0)
+ +                {
+ +                    /* Replace c endpoint with b */
+ +                    EpotC = EpotB;
+ +                    c     = b;
+ +                    gpc   = gpb;
+ +                    /* swap coord pointers b/c */
+ +                    xtmp = xb;
+ +                    ftmp = fb;
+ +                    xb   = xc;
+ +                    fb   = fc;
+ +                    xc   = xtmp;
+ +                    fc   = ftmp;
+ +                }
+ +                else
+ +                {
+ +                    /* Replace a endpoint with b */
+ +                    EpotA = EpotB;
+ +                    a     = b;
+ +                    gpa   = gpb;
+ +                    /* swap coord pointers a/b */
+ +                    xtmp = xb;
+ +                    ftmp = fb;
+ +                    xb   = xa;
+ +                    fb   = fa;
+ +                    xa   = xtmp;
+ +                    fa   = ftmp;
+ +                }
+ +
+ +                /*
+ +                 * Stop search as soon as we find a value smaller than the endpoints,
+ +                 * or if the tolerance is below machine precision.
+ +                 * Never run more than 20 steps, no matter what.
+ +                 */
+ +                nminstep++;
+ +            }
+ +            while ((EpotB > EpotA || EpotB > EpotC) && (nminstep < 20));
+ +
+ +            if (fabs(EpotB-Epot0) < GMX_REAL_EPS || nminstep >= 20)
+ +            {
+ +                /* OK. We couldn't find a significantly lower energy.
+ +                 * If ncorr==0 this was steepest descent, and then we give up.
+ +                 * If not, reset memory to restart as steepest descent before quitting.
+ +                 */
+ +                if (ncorr == 0)
+ +                {
+ +                    /* Converged */
+ +                    converged = TRUE;
+ +                    break;
+ +                }
+ +                else
+ +                {
+ +                    /* Reset memory */
+ +                    ncorr = 0;
+ +                    /* Search in gradient direction */
+ +                    for (i = 0; i < n; i++)
+ +                    {
+ +                        dx[point][i] = ff[i];
+ +                    }
+ +                    /* Reset stepsize */
+ +                    stepsize = 1.0/fnorm;
+ +                    continue;
+ +                }
+ +            }
+ +
+ +            /* Select min energy state of A & C, put the best in xx/ff/Epot
+ +             */
+ +            if (EpotC < EpotA)
+ +            {
+ +                Epot = EpotC;
+ +                /* Use state C */
+ +                for (i = 0; i < n; i++)
+ +                {
+ +                    xx[i] = xc[i];
+ +                    ff[i] = fc[i];
+ +                }
+ +                stepsize = c;
+ +            }
+ +            else
+ +            {
+ +                Epot = EpotA;
+ +                /* Use state A */
+ +                for (i = 0; i < n; i++)
+ +                {
+ +                    xx[i] = xa[i];
+ +                    ff[i] = fa[i];
+ +                }
+ +                stepsize = a;
+ +            }
+ +
+ +        }
+ +        else
+ +        {
+ +            /* found lower */
+ +            Epot = EpotC;
+ +            /* Use state C */
+ +            for (i = 0; i < n; i++)
+ +            {
+ +                xx[i] = xc[i];
+ +                ff[i] = fc[i];
+ +            }
+ +            stepsize = c;
+ +        }
+ +
+ +        /* Update the memory information, and calculate a new
+ +         * approximation of the inverse hessian
+ +         */
+ +
+ +        /* Have new data in Epot, xx, ff */
+ +        if (ncorr < nmaxcorr)
+ +        {
+ +            ncorr++;
+ +        }
+ +
+ +        for (i = 0; i < n; i++)
+ +        {
+ +            dg[point][i]  = lastf[i]-ff[i];
+ +            dx[point][i] *= stepsize;
+ +        }
+ +
+ +        dgdg = 0;
+ +        dgdx = 0;
+ +        for (i = 0; i < n; i++)
+ +        {
+ +            dgdg += dg[point][i]*dg[point][i];
+ +            dgdx += dg[point][i]*dx[point][i];
+ +        }
+ +
+ +        diag = dgdx/dgdg;
+ +
+ +        rho[point] = 1.0/dgdx;
+ +        point++;
+ +
+ +        if (point >= nmaxcorr)
+ +        {
+ +            point = 0;
+ +        }
+ +
+ +        /* Update */
+ +        for (i = 0; i < n; i++)
+ +        {
+ +            p[i] = ff[i];
+ +        }
+ +
+ +        cp = point;
+ +
+ +        /* Recursive update. First go back over the memory points */
+ +        for (k = 0; k < ncorr; k++)
+ +        {
+ +            cp--;
+ +            if (cp < 0)
+ +            {
+ +                cp = ncorr-1;
+ +            }
+ +
+ +            sq = 0;
+ +            for (i = 0; i < n; i++)
+ +            {
+ +                sq += dx[cp][i]*p[i];
+ +            }
+ +
+ +            alpha[cp] = rho[cp]*sq;
+ +
+ +            for (i = 0; i < n; i++)
+ +            {
+ +                p[i] -= alpha[cp]*dg[cp][i];
+ +            }
+ +        }
+ +
+ +        for (i = 0; i < n; i++)
+ +        {
+ +            p[i] *= diag;
+ +        }
+ +
+ +        /* And then go forward again */
+ +        for (k = 0; k < ncorr; k++)
+ +        {
+ +            yr = 0;
+ +            for (i = 0; i < n; i++)
+ +            {
+ +                yr += p[i]*dg[cp][i];
+ +            }
+ +
+ +            beta = rho[cp]*yr;
+ +            beta = alpha[cp]-beta;
+ +
+ +            for (i = 0; i < n; i++)
+ +            {
+ +                p[i] += beta*dx[cp][i];
+ +            }
+ +
+ +            cp++;
+ +            if (cp >= ncorr)
+ +            {
+ +                cp = 0;
+ +            }
+ +        }
+ +
+ +        for (i = 0; i < n; i++)
+ +        {
+ +            if (!frozen[i])
+ +            {
+ +                dx[point][i] = p[i];
+ +            }
+ +            else
+ +            {
+ +                dx[point][i] = 0;
+ +            }
+ +        }
+ +
+ +        stepsize = 1.0;
+ +
+ +        /* Test whether the convergence criterion is met */
+ +        get_f_norm_max(cr, &(inputrec->opts), mdatoms, f, &fnorm, &fmax, &nfmax);
+ +
+ +        /* Print it if necessary */
+ +        if (MASTER(cr))
+ +        {
+ +            if (bVerbose)
+ +            {
+ +                fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
+ +                        step, Epot, fnorm/sqrt(state->natoms), fmax, nfmax+1);
+ +            }
+ +            /* Store the new (lower) energies */
+ +            upd_mdebin(mdebin, FALSE, FALSE, (double)step,
+ +                       mdatoms->tmass, enerd, state, inputrec->fepvals, inputrec->expandedvals, state->box,
+ +                       NULL, NULL, vir, pres, NULL, mu_tot, constr);
+ +            do_log = do_per_step(step, inputrec->nstlog);
+ +            do_ene = do_per_step(step, inputrec->nstenergy);
+ +            if (do_log)
+ +            {
+ +                print_ebin_header(fplog, step, step, state->lambda[efptFEP]);
+ +            }
+ +            print_ebin(mdoutf_get_fp_ene(outf), do_ene, FALSE, FALSE,
+ +                       do_log ? fplog : NULL, step, step, eprNORMAL,
+ +                       TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts));
+ +        }
+ +
+ +        /* Stop when the maximum force lies below tolerance.
+ +         * If we have reached machine precision, converged is already set to true.
+ +         */
+ +
+ +        converged = converged || (fmax < inputrec->em_tol);
+ +
+ +    } /* End of the loop */
+ +
+ +    if (converged)
+ +    {
+ +        step--; /* we never took that last step in this case */
+ +
+ +    }
+ +    if (fmax > inputrec->em_tol)
+ +    {
+ +        if (MASTER(cr))
+ +        {
+ +            warn_step(stderr, inputrec->em_tol, step-1 == number_steps, FALSE);
+ +            warn_step(fplog, inputrec->em_tol, step-1 == number_steps, FALSE);
+ +        }
+ +        converged = FALSE;
+ +    }
+ +
+ +    /* If we printed energy and/or logfile last step (which was the last step)
+ +     * we don't have to do it again, but otherwise print the final values.
+ +     */
+ +    if (!do_log) /* Write final value to log since we didn't do anythin last step */
+ +    {
+ +        print_ebin_header(fplog, step, step, state->lambda[efptFEP]);
+ +    }
+ +    if (!do_ene || !do_log) /* Write final energy file entries */
+ +    {
+ +        print_ebin(mdoutf_get_fp_ene(outf), !do_ene, FALSE, FALSE,
+ +                   !do_log ? fplog : NULL, step, step, eprNORMAL,
+ +                   TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts));
+ +    }
+ +
+ +    /* Print some stuff... */
+ +    if (MASTER(cr))
+ +    {
+ +        fprintf(stderr, "\nwriting lowest energy coordinates.\n");
+ +    }
+ +
+ +    /* IMPORTANT!
+ +     * For accurate normal mode calculation it is imperative that we
+ +     * store the last conformation into the full precision binary trajectory.
+ +     *
+ +     * However, we should only do it if we did NOT already write this step
+ +     * above (which we did if do_x or do_f was true).
+ +     */
+ +    do_x = !do_per_step(step, inputrec->nstxout);
+ +    do_f = !do_per_step(step, inputrec->nstfout);
+ +    write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm),
+ +                  top_global, inputrec, step,
+ +                  &ems, state, f);
+ +
+ +    if (MASTER(cr))
+ +    {
+ +        print_converged(stderr, LBFGS, inputrec->em_tol, step, converged,
+ +                        number_steps, Epot, fmax, nfmax, fnorm/sqrt(state->natoms));
+ +        print_converged(fplog, LBFGS, inputrec->em_tol, step, converged,
+ +                        number_steps, Epot, fmax, nfmax, fnorm/sqrt(state->natoms));
+ +
+ +        fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval);
+ +    }
+ +
+ +    finish_em(cr, outf, walltime_accounting, wcycle);
+ +
+ +    /* To print the actual number of steps we needed somewhere */
+ +    walltime_accounting_set_nsteps_done(walltime_accounting, step);
+ +
+ +    return 0;
+ +} /* That's all folks */
+ +
+ +
+ +double do_steep(FILE *fplog, t_commrec *cr,
+ +                int nfile, const t_filenm fnm[],
+ +                const output_env_t gmx_unused oenv, gmx_bool bVerbose, gmx_bool gmx_unused bCompact,
+ +                int gmx_unused nstglobalcomm,
+ +                gmx_vsite_t *vsite, gmx_constr_t constr,
+ +                int gmx_unused stepout,
+ +                t_inputrec *inputrec,
+ +                gmx_mtop_t *top_global, t_fcdata *fcd,
+ +                t_state *state_global,
+ +                t_mdatoms *mdatoms,
+ +                t_nrnb *nrnb, gmx_wallcycle_t wcycle,
+ +                gmx_edsam_t gmx_unused  ed,
+ +                t_forcerec *fr,
+ +                int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed,
+ +                gmx_membed_t gmx_unused membed,
+ +                real gmx_unused cpt_period, real gmx_unused max_hours,
+ +                const char  gmx_unused *deviceOptions,
+ +                unsigned long gmx_unused Flags,
+ +                gmx_walltime_accounting_t walltime_accounting)
+ +{
+ +    const char       *SD = "Steepest Descents";
+ +    em_state_t       *s_min, *s_try;
+ +    rvec             *f_global;
+ +    gmx_localtop_t   *top;
+ +    gmx_enerdata_t   *enerd;
+ +    rvec             *f;
+ +    gmx_global_stat_t gstat;
+ +    t_graph          *graph;
+ +    real              stepsize, constepsize;
+ +    real              ustep, fnormn;
+ +    gmx_mdoutf_t      outf;
+ +    t_mdebin         *mdebin;
+ +    gmx_bool          bDone, bAbort, do_x, do_f;
+ +    tensor            vir, pres;
+ +    rvec              mu_tot;
+ +    int               nsteps;
+ +    int               count          = 0;
+ +    int               steps_accepted = 0;
+ +    /* not used */
+ +    real              terminate = 0;
+ +
+ +    s_min = init_em_state();
+ +    s_try = init_em_state();
+ +
+ +    /* Init em and store the local state in s_try */
+ +    init_em(fplog, SD, cr, inputrec,
+ +            state_global, top_global, s_try, &top, &f, &f_global,
+ +            nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr,
+ +            nfile, fnm, &outf, &mdebin);
+ +
+ +    /* Print to log file  */
+ +    print_em_start(fplog, cr, walltime_accounting, wcycle, SD);
+ +
+ +    /* Set variables for stepsize (in nm). This is the largest
+ +     * step that we are going to make in any direction.
+ +     */
+ +    ustep    = inputrec->em_stepsize;
+ +    stepsize = 0;
+ +
+ +    /* Max number of steps  */
+ +    nsteps = inputrec->nsteps;
+ +
+ +    if (MASTER(cr))
+ +    {
+ +        /* Print to the screen  */
+ +        sp_header(stderr, SD, inputrec->em_tol, nsteps);
+ +    }
+ +    if (fplog)
+ +    {
+ +        sp_header(fplog, SD, inputrec->em_tol, nsteps);
+ +    }
+ +
+ +    /**** HERE STARTS THE LOOP ****
+ +     * count is the counter for the number of steps
+ +     * bDone will be TRUE when the minimization has converged
+ +     * bAbort will be TRUE when nsteps steps have been performed or when
+ +     * the stepsize becomes smaller than is reasonable for machine precision
+ +     */
+ +    count  = 0;
+ +    bDone  = FALSE;
+ +    bAbort = FALSE;
+ +    while (!bDone && !bAbort)
+ +    {
+ +        bAbort = (nsteps >= 0) && (count == nsteps);
+ +
+ +        /* set new coordinates, except for first step */
+ +        if (count > 0)
+ +        {
+ +            do_em_step(cr, inputrec, mdatoms, fr->bMolPBC,
+ +                       s_min, stepsize, s_min->f, s_try,
+ +                       constr, top, nrnb, wcycle, count);
+ +        }
+ +
+ +        evaluate_energy(fplog, cr,
+ +                        top_global, s_try, top,
+ +                        inputrec, nrnb, wcycle, gstat,
+ +                        vsite, constr, fcd, graph, mdatoms, fr,
+ +                        mu_tot, enerd, vir, pres, count, count == 0);
+ +
+ +        if (MASTER(cr))
+ +        {
+ +            print_ebin_header(fplog, count, count, s_try->s.lambda[efptFEP]);
+ +        }
+ +
+ +        if (count == 0)
+ +        {
+ +            s_min->epot = s_try->epot + 1;
+ +        }
+ +
+ +        /* Print it if necessary  */
+ +        if (MASTER(cr))
+ +        {
+ +            if (bVerbose)
+ +            {
+ +                fprintf(stderr, "Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c",
+ +                        count, ustep, s_try->epot, s_try->fmax, s_try->a_fmax+1,
+ +                        (s_try->epot < s_min->epot) ? '\n' : '\r');
+ +            }
+ +
+ +            if (s_try->epot < s_min->epot)
+ +            {
+ +                /* Store the new (lower) energies  */
+ +                upd_mdebin(mdebin, FALSE, FALSE, (double)count,
+ +                           mdatoms->tmass, enerd, &s_try->s, inputrec->fepvals, inputrec->expandedvals,
+ +                           s_try->s.box, NULL, NULL, vir, pres, NULL, mu_tot, constr);
+ +                print_ebin(mdoutf_get_fp_ene(outf), TRUE,
+ +                           do_per_step(steps_accepted, inputrec->nstdisreout),
+ +                           do_per_step(steps_accepted, inputrec->nstorireout),
+ +                           fplog, count, count, eprNORMAL, TRUE,
+ +                           mdebin, fcd, &(top_global->groups), &(inputrec->opts));
+ +                fflush(fplog);
+ +            }
+ +        }
+ +
+ +        /* Now if the new energy is smaller than the previous...
+ +         * or if this is the first step!
+ +         * or if we did random steps!
+ +         */
+ +
+ +        if ( (count == 0) || (s_try->epot < s_min->epot) )
+ +        {
+ +            steps_accepted++;
+ +
+ +            /* Test whether the convergence criterion is met...  */
+ +            bDone = (s_try->fmax < inputrec->em_tol);
+ +
+ +            /* Copy the arrays for force, positions and energy  */
+ +            /* The 'Min' array always holds the coords and forces of the minimal
+ +               sampled energy  */
+ +            swap_em_state(s_min, s_try);
+ +            if (count > 0)
+ +            {
+ +                ustep *= 1.2;
+ +            }
+ +
+ +            /* Write to trn, if necessary */
+ +            do_x = do_per_step(steps_accepted, inputrec->nstxout);
+ +            do_f = do_per_step(steps_accepted, inputrec->nstfout);
+ +            write_em_traj(fplog, cr, outf, do_x, do_f, NULL,
+ +                          top_global, inputrec, count,
+ +                          s_min, state_global, f_global);
+ +        }
+ +        else
+ +        {
+ +            /* If energy is not smaller make the step smaller...  */
+ +            ustep *= 0.5;
+ +
+ +            if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count)
+ +            {
+ +                /* Reload the old state */
+ +                em_dd_partition_system(fplog, count, cr, top_global, inputrec,
+ +                                       s_min, top, mdatoms, fr, vsite, constr,
+ +                                       nrnb, wcycle);
+ +            }
+ +        }
+ +
+ +        /* Determine new step  */
+ +        stepsize = ustep/s_min->fmax;
+ +
+ +        /* Check if stepsize is too small, with 1 nm as a characteristic length */
+ +#ifdef GMX_DOUBLE
+ +        if (count == nsteps || ustep < 1e-12)
+ +#else
+ +        if (count == nsteps || ustep < 1e-6)
+ +#endif
+ +        {
+ +            if (MASTER(cr))
+ +            {
+ +                warn_step(stderr, inputrec->em_tol, count == nsteps, constr != NULL);
+ +                warn_step(fplog, inputrec->em_tol, count == nsteps, constr != NULL);
+ +            }
+ +            bAbort = TRUE;
+ +        }
+ +
+ +        count++;
+ +    } /* End of the loop  */
+ +
+ +    /* Print some shit...  */
+ +    if (MASTER(cr))
+ +    {
+ +        fprintf(stderr, "\nwriting lowest energy coordinates.\n");
+ +    }
+ +    write_em_traj(fplog, cr, outf, TRUE, inputrec->nstfout, ftp2fn(efSTO, nfile, fnm),
+ +                  top_global, inputrec, count,
+ +                  s_min, state_global, f_global);
+ +
+ +    fnormn = s_min->fnorm/sqrt(state_global->natoms);
+ +
+ +    if (MASTER(cr))
+ +    {
+ +        print_converged(stderr, SD, inputrec->em_tol, count, bDone, nsteps,
+ +                        s_min->epot, s_min->fmax, s_min->a_fmax, fnormn);
+ +        print_converged(fplog, SD, inputrec->em_tol, count, bDone, nsteps,
+ +                        s_min->epot, s_min->fmax, s_min->a_fmax, fnormn);
+ +    }
+ +
+ +    finish_em(cr, outf, walltime_accounting, wcycle);
+ +
+ +    /* To print the actual number of steps we needed somewhere */
+ +    inputrec->nsteps = count;
+ +
+ +    walltime_accounting_set_nsteps_done(walltime_accounting, count);
+ +
+ +    return 0;
+ +} /* That's all folks */
+ +
+ +
+ +double do_nm(FILE *fplog, t_commrec *cr,
+ +             int nfile, const t_filenm fnm[],
+ +             const output_env_t gmx_unused oenv, gmx_bool bVerbose, gmx_bool gmx_unused  bCompact,
+ +             int gmx_unused nstglobalcomm,
+ +             gmx_vsite_t *vsite, gmx_constr_t constr,
+ +             int gmx_unused stepout,
+ +             t_inputrec *inputrec,
+ +             gmx_mtop_t *top_global, t_fcdata *fcd,
+ +             t_state *state_global,
+ +             t_mdatoms *mdatoms,
+ +             t_nrnb *nrnb, gmx_wallcycle_t wcycle,
+ +             gmx_edsam_t  gmx_unused ed,
+ +             t_forcerec *fr,
+ +             int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed,
+ +             gmx_membed_t gmx_unused membed,
+ +             real gmx_unused cpt_period, real gmx_unused max_hours,
+ +             const char gmx_unused *deviceOptions,
+ +             unsigned long gmx_unused Flags,
+ +             gmx_walltime_accounting_t walltime_accounting)
+ +{
+ +    const char          *NM = "Normal Mode Analysis";
+ +    gmx_mdoutf_t         outf;
+ +    int                  natoms, atom, d;
+ +    int                  nnodes, node;
+ +    rvec                *f_global;
+ +    gmx_localtop_t      *top;
+ +    gmx_enerdata_t      *enerd;
+ +    rvec                *f;
+ +    gmx_global_stat_t    gstat;
+ +    t_graph             *graph;
+ +    real                 t, t0, lambda, lam0;
+ +    gmx_bool             bNS;
+ +    tensor               vir, pres;
+ +    rvec                 mu_tot;
+ +    rvec                *fneg, *dfdx;
+ +    gmx_bool             bSparse; /* use sparse matrix storage format */
+ +    size_t               sz = 0;
+ +    gmx_sparsematrix_t * sparse_matrix           = NULL;
+ +    real           *     full_matrix             = NULL;
+ +    em_state_t       *   state_work;
+ +
+ +    /* added with respect to mdrun */
+ +    int        i, j, k, row, col;
+ +    real       der_range = 10.0*sqrt(GMX_REAL_EPS);
+ +    real       x_min;
+ +    real       fnorm, fmax;
+ +
+ +    if (constr != NULL)
+ +    {
+ +        gmx_fatal(FARGS, "Constraints present with Normal Mode Analysis, this combination is not supported");
+ +    }
+ +
+ +    state_work = init_em_state();
+ +
+ +    /* Init em and store the local state in state_minimum */
+ +    init_em(fplog, NM, cr, inputrec,
+ +            state_global, top_global, state_work, &top,
+ +            &f, &f_global,
+ +            nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr,
+ +            nfile, fnm, &outf, NULL);
+ +
+ +    natoms = top_global->natoms;
+ +    snew(fneg, natoms);
+ +    snew(dfdx, natoms);
+ +
+ +#ifndef GMX_DOUBLE
+ +    if (MASTER(cr))
+ +    {
+ +        fprintf(stderr,
+ +                "NOTE: This version of Gromacs has been compiled in single precision,\n"
+ +                "      which MIGHT not be accurate enough for normal mode analysis.\n"
+ +                "      Gromacs now uses sparse matrix storage, so the memory requirements\n"
+ +                "      are fairly modest even if you recompile in double precision.\n\n");
+ +    }
+ +#endif
+ +
+ +    /* Check if we can/should use sparse storage format.
+ +     *
+ +     * Sparse format is only useful when the Hessian itself is sparse, which it
+ +     * will be when we use a cutoff.
+ +     * For small systems (n<1000) it is easier to always use full matrix format, though.
+ +     */
+ +    if (EEL_FULL(fr->eeltype) || fr->rlist == 0.0)
+ +    {
+ +        md_print_info(cr, fplog, "Non-cutoff electrostatics used, forcing full Hessian format.\n");
+ +        bSparse = FALSE;
+ +    }
+ +    else if (top_global->natoms < 1000)
+ +    {
+ +        md_print_info(cr, fplog, "Small system size (N=%d), using full Hessian format.\n", top_global->natoms);
+ +        bSparse = FALSE;
+ +    }
+ +    else
+ +    {
+ +        md_print_info(cr, fplog, "Using compressed symmetric sparse Hessian format.\n");
+ +        bSparse = TRUE;
+ +    }
+ +
+ +    if (MASTER(cr))
+ +    {
+ +        sz = DIM*top_global->natoms;
+ +
+ +        fprintf(stderr, "Allocating Hessian memory...\n\n");
+ +
+ +        if (bSparse)
+ +        {
+ +            sparse_matrix = gmx_sparsematrix_init(sz);
+ +            sparse_matrix->compressed_symmetric = TRUE;
+ +        }
+ +        else
+ +        {
+ +            snew(full_matrix, sz*sz);
+ +        }
+ +    }
+ +
+ +    /* Initial values */
+ +    t0           = inputrec->init_t;
+ +    lam0         = inputrec->fepvals->init_lambda;
+ +    t            = t0;
+ +    lambda       = lam0;
+ +
+ +    init_nrnb(nrnb);
+ +
+ +    where();
+ +
+ +    /* Write start time and temperature */
+ +    print_em_start(fplog, cr, walltime_accounting, wcycle, NM);
+ +
+ +    /* fudge nr of steps to nr of atoms */
+ +    inputrec->nsteps = natoms*2;
+ +
+ +    if (MASTER(cr))
+ +    {
+ +        fprintf(stderr, "starting normal mode calculation '%s'\n%d steps.\n\n",
+ +                *(top_global->name), (int)inputrec->nsteps);
+ +    }
+ +
+ +    nnodes = cr->nnodes;
+ +
+ +    /* Make evaluate_energy do a single node force calculation */
+ +    cr->nnodes = 1;
+ +    evaluate_energy(fplog, cr,
+ +                    top_global, state_work, top,
+ +                    inputrec, nrnb, wcycle, gstat,
+ +                    vsite, constr, fcd, graph, mdatoms, fr,
+ +                    mu_tot, enerd, vir, pres, -1, TRUE);
+ +    cr->nnodes = nnodes;
+ +
+ +    /* if forces are not small, warn user */
+ +    get_state_f_norm_max(cr, &(inputrec->opts), mdatoms, state_work);
+ +
+ +    md_print_info(cr, fplog, "Maximum force:%12.5e\n", state_work->fmax);
+ +    if (state_work->fmax > 1.0e-3)
+ +    {
+ +        md_print_info(cr, fplog,
+ +                      "The force is probably not small enough to "
+ +                      "ensure that you are at a minimum.\n"
+ +                      "Be aware that negative eigenvalues may occur\n"
+ +                      "when the resulting matrix is diagonalized.\n\n");
+ +    }
+ +
+ +    /***********************************************************
+ +     *
+ +     *      Loop over all pairs in matrix
+ +     *
+ +     *      do_force called twice. Once with positive and
+ +     *      once with negative displacement
+ +     *
+ +     ************************************************************/
+ +
+ +    /* Steps are divided one by one over the nodes */
+ +    for (atom = cr->nodeid; atom < natoms; atom += nnodes)
+ +    {
+ +
+ +        for (d = 0; d < DIM; d++)
+ +        {
+ +            x_min = state_work->s.x[atom][d];
+ +
+ +            state_work->s.x[atom][d] = x_min - der_range;
+ +
+ +            /* Make evaluate_energy do a single node force calculation */
+ +            cr->nnodes = 1;
+ +            evaluate_energy(fplog, cr,
+ +                            top_global, state_work, top,
+ +                            inputrec, nrnb, wcycle, gstat,
+ +                            vsite, constr, fcd, graph, mdatoms, fr,
+ +                            mu_tot, enerd, vir, pres, atom*2, FALSE);
+ +
+ +            for (i = 0; i < natoms; i++)
+ +            {
+ +                copy_rvec(state_work->f[i], fneg[i]);
+ +            }
+ +
+ +            state_work->s.x[atom][d] = x_min + der_range;
+ +
+ +            evaluate_energy(fplog, cr,
+ +                            top_global, state_work, top,
+ +                            inputrec, nrnb, wcycle, gstat,
+ +                            vsite, constr, fcd, graph, mdatoms, fr,
+ +                            mu_tot, enerd, vir, pres, atom*2+1, FALSE);
+ +            cr->nnodes = nnodes;
+ +
+ +            /* x is restored to original */
+ +            state_work->s.x[atom][d] = x_min;
+ +
+ +            for (j = 0; j < natoms; j++)
+ +            {
+ +                for (k = 0; (k < DIM); k++)
+ +                {
+ +                    dfdx[j][k] =
+ +                        -(state_work->f[j][k] - fneg[j][k])/(2*der_range);
+ +                }
+ +            }
+ +
+ +            if (!MASTER(cr))
+ +            {
+ +#ifdef GMX_MPI
+ +#ifdef GMX_DOUBLE
+ +#define mpi_type MPI_DOUBLE
+ +#else
+ +#define mpi_type MPI_FLOAT
+ +#endif
+ +                MPI_Send(dfdx[0], natoms*DIM, mpi_type, MASTERNODE(cr), cr->nodeid,
+ +                         cr->mpi_comm_mygroup);
+ +#endif
+ +            }
+ +            else
+ +            {
+ +                for (node = 0; (node < nnodes && atom+node < natoms); node++)
+ +                {
+ +                    if (node > 0)
+ +                    {
+ +#ifdef GMX_MPI
+ +                        MPI_Status stat;
+ +                        MPI_Recv(dfdx[0], natoms*DIM, mpi_type, node, node,
+ +                                 cr->mpi_comm_mygroup, &stat);
+ +#undef mpi_type
+ +#endif
+ +                    }
+ +
+ +                    row = (atom + node)*DIM + d;
+ +
+ +                    for (j = 0; j < natoms; j++)
+ +                    {
+ +                        for (k = 0; k < DIM; k++)
+ +                        {
+ +                            col = j*DIM + k;
+ +
+ +                            if (bSparse)
+ +                            {
+ +                                if (col >= row && dfdx[j][k] != 0.0)
+ +                                {
+ +                                    gmx_sparsematrix_increment_value(sparse_matrix,
+ +                                                                     row, col, dfdx[j][k]);
+ +                                }
+ +                            }
+ +                            else
+ +                            {
+ +                                full_matrix[row*sz+col] = dfdx[j][k];
+ +                            }
+ +                        }
+ +                    }
+ +                }
+ +            }
+ +
+ +            if (bVerbose && fplog)
+ +            {
+ +                fflush(fplog);
+ +            }
+ +        }
+ +        /* write progress */
+ +        if (MASTER(cr) && bVerbose)
+ +        {
+ +            fprintf(stderr, "\rFinished step %d out of %d",
+ +                    min(atom+nnodes, natoms), natoms);
+ +            fflush(stderr);
+ +        }
+ +    }
+ +
+ +    if (MASTER(cr))
+ +    {
+ +        fprintf(stderr, "\n\nWriting Hessian...\n");
+ +        gmx_mtxio_write(ftp2fn(efMTX, nfile, fnm), sz, sz, full_matrix, sparse_matrix);
+ +    }
+ +
+ +    finish_em(cr, outf, walltime_accounting, wcycle);
+ +
+ +    walltime_accounting_set_nsteps_done(walltime_accounting, natoms*2);
+ +
+ +    return 0;
+ +}
diff --cc src/gromacs/mdlib/sim_util.c

index a06fae500db41631d1317db9fd62a16f6747c625,0000000000000000000000000000000000000000..73ba243b6b1641515b1dcbed19603851725462f4

mode 100644,000000..100644
--- 1/src/gromacs/mdlib/sim_util.c
--- /dev/null
+++ b/src/gromacs/mdlib/sim_util.c
@@@ -1,2779 -1,0 +1,2789 @@@
+ +/*
+ + * This file is part of the GROMACS molecular simulation package.
+ + *
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team.
+ + * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ + * and including many others, as listed in the AUTHORS file in the
+ + * top-level source directory and at http://www.gromacs.org.
+ + *
+ + * GROMACS is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU Lesser General Public License
+ + * as published by the Free Software Foundation; either version 2.1
+ + * of the License, or (at your option) any later version.
+ + *
+ + * GROMACS is distributed in the hope that it will be useful,
+ + * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ + * Lesser General Public License for more details.
+ + *
+ + * You should have received a copy of the GNU Lesser General Public
+ + * License along with GROMACS; if not, see
+ + * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ + *
+ + * If you want to redistribute modifications to GROMACS, please
+ + * consider that scientific software is very special. Version
+ + * control is crucial - bugs must be traceable. We will be happy to
+ + * consider code for inclusion in the official distribution, but
+ + * derived work must not be called official GROMACS. Details are found
+ + * in the README & COPYING files - if they are missing, get the
+ + * official version at http://www.gromacs.org.
+ + *
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the research papers on the package. Check out http://www.gromacs.org.
+ + */
+ +#ifdef HAVE_CONFIG_H
+ +#include <config.h>
+ +#endif
+ +
+ +#include <stdio.h>
+ +#ifdef HAVE_SYS_TIME_H
+ +#include <sys/time.h>
+ +#endif
+ +#include <math.h>
+ +#include "typedefs.h"
+ +#include "string2.h"
+ +#include "smalloc.h"
+ +#include "names.h"
+ +#include "mvdata.h"
+ +#include "txtdump.h"
+ +#include "pbc.h"
+ +#include "chargegroup.h"
+ +#include "vec.h"
+ +#include "nrnb.h"
+ +#include "mshift.h"
+ +#include "mdrun.h"
+ +#include "sim_util.h"
+ +#include "update.h"
+ +#include "physics.h"
+ +#include "main.h"
+ +#include "mdatoms.h"
+ +#include "force.h"
+ +#include "bondf.h"
+ +#include "pme.h"
+ +#include "disre.h"
+ +#include "orires.h"
+ +#include "network.h"
+ +#include "calcmu.h"
+ +#include "constr.h"
+ +#include "xvgr.h"
+ +#include "copyrite.h"
+ +#include "gmx_random.h"
+ +#include "domdec.h"
+ +#include "partdec.h"
+ +#include "genborn.h"
+ +#include "nbnxn_atomdata.h"
+ +#include "nbnxn_search.h"
+ +#include "nbnxn_kernels/nbnxn_kernel_ref.h"
+ +#include "nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.h"
+ +#include "nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn.h"
+ +#include "nbnxn_kernels/nbnxn_kernel_gpu_ref.h"
+ +
+ +#include "gromacs/timing/wallcycle.h"
+ +#include "gromacs/timing/walltime_accounting.h"
+ +#include "gromacs/utility/gmxmpi.h"
+ +#include "gromacs/essentialdynamics/edsam.h"
+ +#include "gromacs/pulling/pull.h"
+ +#include "gromacs/pulling/pull_rotation.h"
+ +
+ +#include "adress.h"
+ +#include "qmmm.h"
+ +
+ +#include "nbnxn_cuda_data_mgmt.h"
+ +#include "nbnxn_cuda/nbnxn_cuda.h"
+ +
+ +void print_time(FILE                     *out,
+ +                gmx_walltime_accounting_t walltime_accounting,
+ +                gmx_int64_t               step,
+ +                t_inputrec               *ir,
+ +                t_commrec gmx_unused     *cr)
+ +{
+ +    time_t finish;
+ +    char   timebuf[STRLEN];
+ +    double dt, elapsed_seconds, time_per_step;
+ +    char   buf[48];
+ +
+ +#ifndef GMX_THREAD_MPI
+ +    if (!PAR(cr))
+ +#endif
+ +    {
+ +        fprintf(out, "\r");
+ +    }
+ +    fprintf(out, "step %s", gmx_step_str(step, buf));
+ +    if ((step >= ir->nstlist))
+ +    {
+ +        double seconds_since_epoch = gmx_gettime();
+ +        elapsed_seconds = seconds_since_epoch - walltime_accounting_get_start_time_stamp(walltime_accounting);
+ +        time_per_step   = elapsed_seconds/(step - ir->init_step + 1);
+ +        dt              = (ir->nsteps + ir->init_step - step) * time_per_step;
+ +
+ +        if (ir->nsteps >= 0)
+ +        {
+ +            if (dt >= 300)
+ +            {
+ +                finish = (time_t) (seconds_since_epoch + dt);
+ +                gmx_ctime_r(&finish, timebuf, STRLEN);
+ +                sprintf(buf, "%s", timebuf);
+ +                buf[strlen(buf)-1] = '\0';
+ +                fprintf(out, ", will finish %s", buf);
+ +            }
+ +            else
+ +            {
+ +                fprintf(out, ", remaining wall clock time: %5d s          ", (int)dt);
+ +            }
+ +        }
+ +        else
+ +        {
+ +            fprintf(out, " performance: %.1f ns/day    ",
+ +                    ir->delta_t/1000*24*60*60/time_per_step);
+ +        }
+ +    }
+ +#ifndef GMX_THREAD_MPI
+ +    if (PAR(cr))
+ +    {
+ +        fprintf(out, "\n");
+ +    }
+ +#endif
+ +
+ +    fflush(out);
+ +}
+ +
+ +void print_date_and_time(FILE *fplog, int nodeid, const char *title,
+ +                         const gmx_walltime_accounting_t walltime_accounting)
+ +{
+ +    int    i;
+ +    char   timebuf[STRLEN];
+ +    char   time_string[STRLEN];
+ +    time_t tmptime;
+ +
+ +    if (fplog)
+ +    {
+ +        if (walltime_accounting != NULL)
+ +        {
+ +            tmptime = (time_t) walltime_accounting_get_start_time_stamp(walltime_accounting);
+ +            gmx_ctime_r(&tmptime, timebuf, STRLEN);
+ +        }
+ +        else
+ +        {
+ +            tmptime = (time_t) gmx_gettime();
+ +            gmx_ctime_r(&tmptime, timebuf, STRLEN);
+ +        }
+ +        for (i = 0; timebuf[i] >= ' '; i++)
+ +        {
+ +            time_string[i] = timebuf[i];
+ +        }
+ +        time_string[i] = '\0';
+ +
+ +        fprintf(fplog, "%s on node %d %s\n", title, nodeid, time_string);
+ +    }
+ +}
+ +
++void print_start(FILE *fplog, t_commrec *cr,
++                 gmx_walltime_accounting_t walltime_accounting,
++                 const char *name)
++{
++    char buf[STRLEN];
++
++    sprintf(buf, "Started %s", name);
++    print_date_and_time(fplog, cr->nodeid, buf, walltime_accounting);
++}
++
+ +static void sum_forces(int start, int end, rvec f[], rvec flr[])
+ +{
+ +    int i;
+ +
+ +    if (gmx_debug_at)
+ +    {
+ +        pr_rvecs(debug, 0, "fsr", f+start, end-start);
+ +        pr_rvecs(debug, 0, "flr", flr+start, end-start);
+ +    }
+ +    for (i = start; (i < end); i++)
+ +    {
+ +        rvec_inc(f[i], flr[i]);
+ +    }
+ +}
+ +
+ +/*
+ + * calc_f_el calculates forces due to an electric field.
+ + *
+ + * force is kJ mol^-1 nm^-1 = e * kJ mol^-1 nm^-1 / e
+ + *
+ + * Et[] contains the parameters for the time dependent
+ + * part of the field (not yet used).
+ + * Ex[] contains the parameters for
+ + * the spatial dependent part of the field. You can have cool periodic
+ + * fields in principle, but only a constant field is supported
+ + * now.
+ + * The function should return the energy due to the electric field
+ + * (if any) but for now returns 0.
+ + *
+ + * WARNING:
+ + * There can be problems with the virial.
+ + * Since the field is not self-consistent this is unavoidable.
+ + * For neutral molecules the virial is correct within this approximation.
+ + * For neutral systems with many charged molecules the error is small.
+ + * But for systems with a net charge or a few charged molecules
+ + * the error can be significant when the field is high.
+ + * Solution: implement a self-consitent electric field into PME.
+ + */
+ +static void calc_f_el(FILE *fp, int  start, int homenr,
+ +                      real charge[], rvec f[],
+ +                      t_cosines Ex[], t_cosines Et[], double t)
+ +{
+ +    rvec Ext;
+ +    real t0;
+ +    int  i, m;
+ +
+ +    for (m = 0; (m < DIM); m++)
+ +    {
+ +        if (Et[m].n > 0)
+ +        {
+ +            if (Et[m].n == 3)
+ +            {
+ +                t0     = Et[m].a[1];
+ +                Ext[m] = cos(Et[m].a[0]*(t-t0))*exp(-sqr(t-t0)/(2.0*sqr(Et[m].a[2])));
+ +            }
+ +            else
+ +            {
+ +                Ext[m] = cos(Et[m].a[0]*t);
+ +            }
+ +        }
+ +        else
+ +        {
+ +            Ext[m] = 1.0;
+ +        }
+ +        if (Ex[m].n > 0)
+ +        {
+ +            /* Convert the field strength from V/nm to MD-units */
+ +            Ext[m] *= Ex[m].a[0]*FIELDFAC;
+ +            for (i = start; (i < start+homenr); i++)
+ +            {
+ +                f[i][m] += charge[i]*Ext[m];
+ +            }
+ +        }
+ +        else
+ +        {
+ +            Ext[m] = 0;
+ +        }
+ +    }
+ +    if (fp != NULL)
+ +    {
+ +        fprintf(fp, "%10g  %10g  %10g  %10g #FIELD\n", t,
+ +                Ext[XX]/FIELDFAC, Ext[YY]/FIELDFAC, Ext[ZZ]/FIELDFAC);
+ +    }
+ +}
+ +
+ +static void calc_virial(int start, int homenr, rvec x[], rvec f[],
+ +                        tensor vir_part, t_graph *graph, matrix box,
+ +                        t_nrnb *nrnb, const t_forcerec *fr, int ePBC)
+ +{
+ +    int    i, j;
+ +    tensor virtest;
+ +
+ +    /* The short-range virial from surrounding boxes */
+ +    clear_mat(vir_part);
+ +    calc_vir(SHIFTS, fr->shift_vec, fr->fshift, vir_part, ePBC == epbcSCREW, box);
+ +    inc_nrnb(nrnb, eNR_VIRIAL, SHIFTS);
+ +
+ +    /* Calculate partial virial, for local atoms only, based on short range.
+ +     * Total virial is computed in global_stat, called from do_md
+ +     */
+ +    f_calc_vir(start, start+homenr, x, f, vir_part, graph, box);
+ +    inc_nrnb(nrnb, eNR_VIRIAL, homenr);
+ +
+ +    /* Add position restraint contribution */
+ +    for (i = 0; i < DIM; i++)
+ +    {
+ +        vir_part[i][i] += fr->vir_diag_posres[i];
+ +    }
+ +
+ +    /* Add wall contribution */
+ +    for (i = 0; i < DIM; i++)
+ +    {
+ +        vir_part[i][ZZ] += fr->vir_wall_z[i];
+ +    }
+ +
+ +    if (debug)
+ +    {
+ +        pr_rvecs(debug, 0, "vir_part", vir_part, DIM);
+ +    }
+ +}
+ +
+ +static void posres_wrapper(FILE *fplog,
+ +                           int flags,
+ +                           gmx_bool bSepDVDL,
+ +                           t_inputrec *ir,
+ +                           t_nrnb *nrnb,
+ +                           gmx_localtop_t *top,
+ +                           matrix box, rvec x[],
+ +                           gmx_enerdata_t *enerd,
+ +                           real *lambda,
+ +                           t_forcerec *fr)
+ +{
+ +    t_pbc pbc;
+ +    real  v, dvdl;
+ +    int   i;
+ +
+ +    /* Position restraints always require full pbc */
+ +    set_pbc(&pbc, ir->ePBC, box);
+ +    dvdl = 0;
+ +    v    = posres(top->idef.il[F_POSRES].nr, top->idef.il[F_POSRES].iatoms,
+ +                  top->idef.iparams_posres,
+ +                  (const rvec*)x, fr->f_novirsum, fr->vir_diag_posres,
+ +                  ir->ePBC == epbcNONE ? NULL : &pbc,
+ +                  lambda[efptRESTRAINT], &dvdl,
+ +                  fr->rc_scaling, fr->ePBC, fr->posres_com, fr->posres_comB);
+ +    if (bSepDVDL)
+ +    {
+ +        gmx_print_sepdvdl(fplog, interaction_function[F_POSRES].longname, v, dvdl);
+ +    }
+ +    enerd->term[F_POSRES] += v;
+ +    /* If just the force constant changes, the FEP term is linear,
+ +     * but if k changes, it is not.
+ +     */
+ +    enerd->dvdl_nonlin[efptRESTRAINT] += dvdl;
+ +    inc_nrnb(nrnb, eNR_POSRES, top->idef.il[F_POSRES].nr/2);
+ +
+ +    if ((ir->fepvals->n_lambda > 0) && (flags & GMX_FORCE_DHDL))
+ +    {
+ +        for (i = 0; i < enerd->n_lambda; i++)
+ +        {
+ +            real dvdl_dum, lambda_dum;
+ +
+ +            lambda_dum = (i == 0 ? lambda[efptRESTRAINT] : ir->fepvals->all_lambda[efptRESTRAINT][i-1]);
+ +            v          = posres(top->idef.il[F_POSRES].nr, top->idef.il[F_POSRES].iatoms,
+ +                                top->idef.iparams_posres,
+ +                                (const rvec*)x, NULL, NULL,
+ +                                ir->ePBC == epbcNONE ? NULL : &pbc, lambda_dum, &dvdl,
+ +                                fr->rc_scaling, fr->ePBC, fr->posres_com, fr->posres_comB);
+ +            enerd->enerpart_lambda[i] += v;
+ +        }
+ +    }
+ +}
+ +
+ +static void fbposres_wrapper(t_inputrec *ir,
+ +                             t_nrnb *nrnb,
+ +                             gmx_localtop_t *top,
+ +                             matrix box, rvec x[],
+ +                             gmx_enerdata_t *enerd,
+ +                             t_forcerec *fr)
+ +{
+ +    t_pbc pbc;
+ +    real  v;
+ +
+ +    /* Flat-bottomed position restraints always require full pbc */
+ +    set_pbc(&pbc, ir->ePBC, box);
+ +    v = fbposres(top->idef.il[F_FBPOSRES].nr, top->idef.il[F_FBPOSRES].iatoms,
+ +                 top->idef.iparams_fbposres,
+ +                 (const rvec*)x, fr->f_novirsum, fr->vir_diag_posres,
+ +                 ir->ePBC == epbcNONE ? NULL : &pbc,
+ +                 fr->rc_scaling, fr->ePBC, fr->posres_com);
+ +    enerd->term[F_FBPOSRES] += v;
+ +    inc_nrnb(nrnb, eNR_FBPOSRES, top->idef.il[F_FBPOSRES].nr/2);
+ +}
+ +
+ +static void pull_potential_wrapper(FILE *fplog,
+ +                                   gmx_bool bSepDVDL,
+ +                                   t_commrec *cr,
+ +                                   t_inputrec *ir,
+ +                                   matrix box, rvec x[],
+ +                                   rvec f[],
+ +                                   tensor vir_force,
+ +                                   t_mdatoms *mdatoms,
+ +                                   gmx_enerdata_t *enerd,
+ +                                   real *lambda,
+ +                                   double t)
+ +{
+ +    t_pbc  pbc;
+ +    real   dvdl;
+ +
+ +    /* Calculate the center of mass forces, this requires communication,
+ +     * which is why pull_potential is called close to other communication.
+ +     * The virial contribution is calculated directly,
+ +     * which is why we call pull_potential after calc_virial.
+ +     */
+ +    set_pbc(&pbc, ir->ePBC, box);
+ +    dvdl                     = 0;
+ +    enerd->term[F_COM_PULL] +=
+ +        pull_potential(ir->ePull, ir->pull, mdatoms, &pbc,
+ +                       cr, t, lambda[efptRESTRAINT], x, f, vir_force, &dvdl);
+ +    if (bSepDVDL)
+ +    {
+ +        gmx_print_sepdvdl(fplog, "Com pull", enerd->term[F_COM_PULL], dvdl);
+ +    }
+ +    enerd->dvdl_lin[efptRESTRAINT] += dvdl;
+ +}
+ +
+ +static void pme_receive_force_ener(FILE           *fplog,
+ +                                   gmx_bool        bSepDVDL,
+ +                                   t_commrec      *cr,
+ +                                   gmx_wallcycle_t wcycle,
+ +                                   gmx_enerdata_t *enerd,
+ +                                   t_forcerec     *fr)
+ +{
+ +    real   e_q, e_lj, v, dvdl_q, dvdl_lj;
+ +    float  cycles_ppdpme, cycles_seppme;
+ +
+ +    cycles_ppdpme = wallcycle_stop(wcycle, ewcPPDURINGPME);
+ +    dd_cycles_add(cr->dd, cycles_ppdpme, ddCyclPPduringPME);
+ +
+ +    /* In case of node-splitting, the PP nodes receive the long-range
+ +     * forces, virial and energy from the PME nodes here.
+ +     */
+ +    wallcycle_start(wcycle, ewcPP_PMEWAITRECVF);
+ +    dvdl_q  = 0;
+ +    dvdl_lj = 0;
+ +    gmx_pme_receive_f(cr, fr->f_novirsum, fr->vir_el_recip, &e_q,
+ +                      fr->vir_lj_recip, &e_lj, &dvdl_q, &dvdl_lj,
+ +                      &cycles_seppme);
+ +    if (bSepDVDL)
+ +    {
+ +        gmx_print_sepdvdl(fplog, "Electrostatic PME mesh", e_q, dvdl_q);
+ +        gmx_print_sepdvdl(fplog, "Lennard-Jones PME mesh", e_lj, dvdl_lj);
+ +    }
+ +    enerd->term[F_COUL_RECIP] += e_q;
+ +    enerd->term[F_LJ_RECIP]   += e_lj;
+ +    enerd->dvdl_lin[efptCOUL] += dvdl_q;
+ +    enerd->dvdl_lin[efptVDW]  += dvdl_lj;
+ +
+ +    if (wcycle)
+ +    {
+ +        dd_cycles_add(cr->dd, cycles_seppme, ddCyclPME);
+ +    }
+ +    wallcycle_stop(wcycle, ewcPP_PMEWAITRECVF);
+ +}
+ +
+ +static void print_large_forces(FILE *fp, t_mdatoms *md, t_commrec *cr,
+ +                               gmx_int64_t step, real pforce, rvec *x, rvec *f)
+ +{
+ +    int  i;
+ +    real pf2, fn2;
+ +    char buf[STEPSTRSIZE];
+ +
+ +    pf2 = sqr(pforce);
+ +    for (i = md->start; i < md->start+md->homenr; i++)
+ +    {
+ +        fn2 = norm2(f[i]);
+ +        /* We also catch NAN, if the compiler does not optimize this away. */
+ +        if (fn2 >= pf2 || fn2 != fn2)
+ +        {
+ +            fprintf(fp, "step %s  atom %6d  x %8.3f %8.3f %8.3f  force %12.5e\n",
+ +                    gmx_step_str(step, buf),
+ +                    ddglatnr(cr->dd, i), x[i][XX], x[i][YY], x[i][ZZ], sqrt(fn2));
+ +        }
+ +    }
+ +}
+ +
+ +static void post_process_forces(t_commrec *cr,
+ +                                gmx_int64_t step,
+ +                                t_nrnb *nrnb, gmx_wallcycle_t wcycle,
+ +                                gmx_localtop_t *top,
+ +                                matrix box, rvec x[],
+ +                                rvec f[],
+ +                                tensor vir_force,
+ +                                t_mdatoms *mdatoms,
+ +                                t_graph *graph,
+ +                                t_forcerec *fr, gmx_vsite_t *vsite,
+ +                                int flags)
+ +{
+ +    if (fr->bF_NoVirSum)
+ +    {
+ +        if (vsite)
+ +        {
+ +            /* Spread the mesh force on virtual sites to the other particles...
+ +             * This is parallellized. MPI communication is performed
+ +             * if the constructing atoms aren't local.
+ +             */
+ +            wallcycle_start(wcycle, ewcVSITESPREAD);
+ +            spread_vsite_f(vsite, x, fr->f_novirsum, NULL,
+ +                           (flags & GMX_FORCE_VIRIAL), fr->vir_el_recip,
+ +                           nrnb,
+ +                           &top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr);
+ +            wallcycle_stop(wcycle, ewcVSITESPREAD);
+ +        }
+ +        if (flags & GMX_FORCE_VIRIAL)
+ +        {
+ +            /* Now add the forces, this is local */
+ +            if (fr->bDomDec)
+ +            {
+ +                sum_forces(0, fr->f_novirsum_n, f, fr->f_novirsum);
+ +            }
+ +            else
+ +            {
+ +                sum_forces(mdatoms->start, mdatoms->start+mdatoms->homenr,
+ +                           f, fr->f_novirsum);
+ +            }
+ +            if (EEL_FULL(fr->eeltype))
+ +            {
+ +                /* Add the mesh contribution to the virial */
+ +                m_add(vir_force, fr->vir_el_recip, vir_force);
+ +            }
+ +            if (EVDW_PME(fr->vdwtype))
+ +            {
+ +                /* Add the mesh contribution to the virial */
+ +                m_add(vir_force, fr->vir_lj_recip, vir_force);
+ +            }
+ +            if (debug)
+ +            {
+ +                pr_rvecs(debug, 0, "vir_force", vir_force, DIM);
+ +            }
+ +        }
+ +    }
+ +
+ +    if (fr->print_force >= 0)
+ +    {
+ +        print_large_forces(stderr, mdatoms, cr, step, fr->print_force, x, f);
+ +    }
+ +}
+ +
+ +static void do_nb_verlet(t_forcerec *fr,
+ +                         interaction_const_t *ic,
+ +                         gmx_enerdata_t *enerd,
+ +                         int flags, int ilocality,
+ +                         int clearF,
+ +                         t_nrnb *nrnb,
+ +                         gmx_wallcycle_t wcycle)
+ +{
+ +    int                        nnbl, kernel_type, enr_nbnxn_kernel_ljc, enr_nbnxn_kernel_lj;
+ +    char                      *env;
+ +    nonbonded_verlet_group_t  *nbvg;
+ +    gmx_bool                   bCUDA;
+ +
+ +    if (!(flags & GMX_FORCE_NONBONDED))
+ +    {
+ +        /* skip non-bonded calculation */
+ +        return;
+ +    }
+ +
+ +    nbvg = &fr->nbv->grp[ilocality];
+ +
+ +    /* CUDA kernel launch overhead is already timed separately */
+ +    if (fr->cutoff_scheme != ecutsVERLET)
+ +    {
+ +        gmx_incons("Invalid cut-off scheme passed!");
+ +    }
+ +
+ +    bCUDA = (nbvg->kernel_type == nbnxnk8x8x8_CUDA);
+ +
+ +    if (!bCUDA)
+ +    {
+ +        wallcycle_sub_start(wcycle, ewcsNONBONDED);
+ +    }
+ +    switch (nbvg->kernel_type)
+ +    {
+ +        case nbnxnk4x4_PlainC:
+ +            nbnxn_kernel_ref(&nbvg->nbl_lists,
+ +                             nbvg->nbat, ic,
+ +                             fr->shift_vec,
+ +                             flags,
+ +                             clearF,
+ +                             fr->fshift[0],
+ +                             enerd->grpp.ener[egCOULSR],
+ +                             fr->bBHAM ?
+ +                             enerd->grpp.ener[egBHAMSR] :
+ +                             enerd->grpp.ener[egLJSR]);
+ +            break;
+ +
+ +        case nbnxnk4xN_SIMD_4xN:
+ +            nbnxn_kernel_simd_4xn(&nbvg->nbl_lists,
+ +                                  nbvg->nbat, ic,
+ +                                  nbvg->ewald_excl,
+ +                                  fr->shift_vec,
+ +                                  flags,
+ +                                  clearF,
+ +                                  fr->fshift[0],
+ +                                  enerd->grpp.ener[egCOULSR],
+ +                                  fr->bBHAM ?
+ +                                  enerd->grpp.ener[egBHAMSR] :
+ +                                  enerd->grpp.ener[egLJSR]);
+ +            break;
+ +        case nbnxnk4xN_SIMD_2xNN:
+ +            nbnxn_kernel_simd_2xnn(&nbvg->nbl_lists,
+ +                                   nbvg->nbat, ic,
+ +                                   nbvg->ewald_excl,
+ +                                   fr->shift_vec,
+ +                                   flags,
+ +                                   clearF,
+ +                                   fr->fshift[0],
+ +                                   enerd->grpp.ener[egCOULSR],
+ +                                   fr->bBHAM ?
+ +                                   enerd->grpp.ener[egBHAMSR] :
+ +                                   enerd->grpp.ener[egLJSR]);
+ +            break;
+ +
+ +        case nbnxnk8x8x8_CUDA:
+ +            nbnxn_cuda_launch_kernel(fr->nbv->cu_nbv, nbvg->nbat, flags, ilocality);
+ +            break;
+ +
+ +        case nbnxnk8x8x8_PlainC:
+ +            nbnxn_kernel_gpu_ref(nbvg->nbl_lists.nbl[0],
+ +                                 nbvg->nbat, ic,
+ +                                 fr->shift_vec,
+ +                                 flags,
+ +                                 clearF,
+ +                                 nbvg->nbat->out[0].f,
+ +                                 fr->fshift[0],
+ +                                 enerd->grpp.ener[egCOULSR],
+ +                                 fr->bBHAM ?
+ +                                 enerd->grpp.ener[egBHAMSR] :
+ +                                 enerd->grpp.ener[egLJSR]);
+ +            break;
+ +
+ +        default:
+ +            gmx_incons("Invalid nonbonded kernel type passed!");
+ +
+ +    }
+ +    if (!bCUDA)
+ +    {
+ +        wallcycle_sub_stop(wcycle, ewcsNONBONDED);
+ +    }
+ +
+ +    if (EEL_RF(ic->eeltype) || ic->eeltype == eelCUT)
+ +    {
+ +        enr_nbnxn_kernel_ljc = eNR_NBNXN_LJ_RF;
+ +    }
+ +    else if ((!bCUDA && nbvg->ewald_excl == ewaldexclAnalytical) ||
+ +             (bCUDA && nbnxn_cuda_is_kernel_ewald_analytical(fr->nbv->cu_nbv)))
+ +    {
+ +        enr_nbnxn_kernel_ljc = eNR_NBNXN_LJ_EWALD;
+ +    }
+ +    else
+ +    {
+ +        enr_nbnxn_kernel_ljc = eNR_NBNXN_LJ_TAB;
+ +    }
+ +    enr_nbnxn_kernel_lj = eNR_NBNXN_LJ;
+ +    if (flags & GMX_FORCE_ENERGY)
+ +    {
+ +        /* In eNR_??? the nbnxn F+E kernels are always the F kernel + 1 */
+ +        enr_nbnxn_kernel_ljc += 1;
+ +        enr_nbnxn_kernel_lj  += 1;
+ +    }
+ +
+ +    inc_nrnb(nrnb, enr_nbnxn_kernel_ljc,
+ +             nbvg->nbl_lists.natpair_ljq);
+ +    inc_nrnb(nrnb, enr_nbnxn_kernel_lj,
+ +             nbvg->nbl_lists.natpair_lj);
+ +    inc_nrnb(nrnb, enr_nbnxn_kernel_ljc-eNR_NBNXN_LJ_RF+eNR_NBNXN_RF,
+ +             nbvg->nbl_lists.natpair_q);
+ +}
+ +
+ +void do_force_cutsVERLET(FILE *fplog, t_commrec *cr,
+ +                         t_inputrec *inputrec,
+ +                         gmx_int64_t step, t_nrnb *nrnb, gmx_wallcycle_t wcycle,
+ +                         gmx_localtop_t *top,
+ +                         gmx_groups_t gmx_unused *groups,
+ +                         matrix box, rvec x[], history_t *hist,
+ +                         rvec f[],
+ +                         tensor vir_force,
+ +                         t_mdatoms *mdatoms,
+ +                         gmx_enerdata_t *enerd, t_fcdata *fcd,
+ +                         real *lambda, t_graph *graph,
+ +                         t_forcerec *fr, interaction_const_t *ic,
+ +                         gmx_vsite_t *vsite, rvec mu_tot,
+ +                         double t, FILE *field, gmx_edsam_t ed,
+ +                         gmx_bool bBornRadii,
+ +                         int flags)
+ +{
+ +    int                 cg0, cg1, i, j;
+ +    int                 start, homenr;
+ +    int                 nb_kernel_type;
+ +    double              mu[2*DIM];
+ +    gmx_bool            bSepDVDL, bStateChanged, bNS, bFillGrid, bCalcCGCM, bBS;
+ +    gmx_bool            bDoLongRange, bDoForces, bSepLRF, bUseGPU, bUseOrEmulGPU;
+ +    gmx_bool            bDiffKernels = FALSE;
+ +    matrix              boxs;
+ +    rvec                vzero, box_diag;
+ +    real                e, v, dvdl;
+ +    float               cycles_pme, cycles_force, cycles_wait_gpu;
+ +    nonbonded_verlet_t *nbv;
+ +
+ +    cycles_force    = 0;
+ +    cycles_wait_gpu = 0;
+ +    nbv             = fr->nbv;
+ +    nb_kernel_type  = fr->nbv->grp[0].kernel_type;
+ +
+ +    start  = mdatoms->start;
+ +    homenr = mdatoms->homenr;
+ +
+ +    bSepDVDL = (fr->bSepDVDL && do_per_step(step, inputrec->nstlog));
+ +
+ +    clear_mat(vir_force);
+ +
+ +    cg0 = 0;
+ +    if (DOMAINDECOMP(cr))
+ +    {
+ +        cg1 = cr->dd->ncg_tot;
+ +    }
+ +    else
+ +    {
+ +        cg1 = top->cgs.nr;
+ +    }
+ +    if (fr->n_tpi > 0)
+ +    {
+ +        cg1--;
+ +    }
+ +
+ +    bStateChanged = (flags & GMX_FORCE_STATECHANGED);
+ +    bNS           = (flags & GMX_FORCE_NS) && (fr->bAllvsAll == FALSE);
+ +    bFillGrid     = (bNS && bStateChanged);
+ +    bCalcCGCM     = (bFillGrid && !DOMAINDECOMP(cr));
+ +    bDoLongRange  = (fr->bTwinRange && bNS && (flags & GMX_FORCE_DO_LR));
+ +    bDoForces     = (flags & GMX_FORCE_FORCES);
+ +    bSepLRF       = (bDoLongRange && bDoForces && (flags & GMX_FORCE_SEPLRF));
+ +    bUseGPU       = fr->nbv->bUseGPU;
+ +    bUseOrEmulGPU = bUseGPU || (nbv->grp[0].kernel_type == nbnxnk8x8x8_PlainC);
+ +
+ +    if (bStateChanged)
+ +    {
+ +        update_forcerec(fr, box);
+ +
+ +        if (NEED_MUTOT(*inputrec))
+ +        {
+ +            /* Calculate total (local) dipole moment in a temporary common array.
+ +             * This makes it possible to sum them over nodes faster.
+ +             */
+ +            calc_mu(start, homenr,
+ +                    x, mdatoms->chargeA, mdatoms->chargeB, mdatoms->nChargePerturbed,
+ +                    mu, mu+DIM);
+ +        }
+ +    }
+ +
+ +    if (fr->ePBC != epbcNONE)
+ +    {
+ +        /* Compute shift vectors every step,
+ +         * because of pressure coupling or box deformation!
+ +         */
+ +        if ((flags & GMX_FORCE_DYNAMICBOX) && bStateChanged)
+ +        {
+ +            calc_shifts(box, fr->shift_vec);
+ +        }
+ +
+ +        if (bCalcCGCM)
+ +        {
+ +            put_atoms_in_box_omp(fr->ePBC, box, homenr, x);
+ +            inc_nrnb(nrnb, eNR_SHIFTX, homenr);
+ +        }
+ +        else if (EI_ENERGY_MINIMIZATION(inputrec->eI) && graph)
+ +        {
+ +            unshift_self(graph, box, x);
+ +        }
+ +    }
+ +
+ +    nbnxn_atomdata_copy_shiftvec(flags & GMX_FORCE_DYNAMICBOX,
+ +                                 fr->shift_vec, nbv->grp[0].nbat);
+ +
+ +#ifdef GMX_MPI
+ +    if (!(cr->duty & DUTY_PME))
+ +    {
+ +        /* Send particle coordinates to the pme nodes.
+ +         * Since this is only implemented for domain decomposition
+ +         * and domain decomposition does not use the graph,
+ +         * we do not need to worry about shifting.
+ +         */
+ +
+ +        int pme_flags = 0;
+ +
+ +        wallcycle_start(wcycle, ewcPP_PMESENDX);
+ +
+ +        bBS = (inputrec->nwall == 2);
+ +        if (bBS)
+ +        {
+ +            copy_mat(box, boxs);
+ +            svmul(inputrec->wall_ewald_zfac, boxs[ZZ], boxs[ZZ]);
+ +        }
+ +
+ +        if (EEL_PME(fr->eeltype))
+ +        {
+ +            pme_flags |= GMX_PME_DO_COULOMB;
+ +        }
+ +
+ +        if (EVDW_PME(fr->vdwtype))
+ +        {
+ +            pme_flags |= GMX_PME_DO_LJ;
+ +            if (fr->ljpme_combination_rule == eljpmeLB)
+ +            {
+ +                pme_flags |= GMX_PME_LJ_LB;
+ +            }
+ +        }
+ +
+ +        gmx_pme_send_coordinates(cr, bBS ? boxs : box, x,
+ +                                 mdatoms->nChargePerturbed, mdatoms->nTypePerturbed, lambda[efptCOUL], lambda[efptVDW],
+ +                                 (flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY)),
+ +                                 pme_flags, step);
+ +
+ +        wallcycle_stop(wcycle, ewcPP_PMESENDX);
+ +    }
+ +#endif /* GMX_MPI */
+ +
+ +    /* do gridding for pair search */
+ +    if (bNS)
+ +    {
+ +        if (graph && bStateChanged)
+ +        {
+ +            /* Calculate intramolecular shift vectors to make molecules whole */
+ +            mk_mshift(fplog, graph, fr->ePBC, box, x);
+ +        }
+ +
+ +        clear_rvec(vzero);
+ +        box_diag[XX] = box[XX][XX];
+ +        box_diag[YY] = box[YY][YY];
+ +        box_diag[ZZ] = box[ZZ][ZZ];
+ +
+ +        wallcycle_start(wcycle, ewcNS);
+ +        if (!fr->bDomDec)
+ +        {
+ +            wallcycle_sub_start(wcycle, ewcsNBS_GRID_LOCAL);
+ +            nbnxn_put_on_grid(nbv->nbs, fr->ePBC, box,
+ +                              0, vzero, box_diag,
+ +                              0, mdatoms->homenr, -1, fr->cginfo, x,
+ +                              0, NULL,
+ +                              nbv->grp[eintLocal].kernel_type,
+ +                              nbv->grp[eintLocal].nbat);
+ +            wallcycle_sub_stop(wcycle, ewcsNBS_GRID_LOCAL);
+ +        }
+ +        else
+ +        {
+ +            wallcycle_sub_start(wcycle, ewcsNBS_GRID_NONLOCAL);
+ +            nbnxn_put_on_grid_nonlocal(nbv->nbs, domdec_zones(cr->dd),
+ +                                       fr->cginfo, x,
+ +                                       nbv->grp[eintNonlocal].kernel_type,
+ +                                       nbv->grp[eintNonlocal].nbat);
+ +            wallcycle_sub_stop(wcycle, ewcsNBS_GRID_NONLOCAL);
+ +        }
+ +
+ +        if (nbv->ngrp == 1 ||
+ +            nbv->grp[eintNonlocal].nbat == nbv->grp[eintLocal].nbat)
+ +        {
+ +            nbnxn_atomdata_set(nbv->grp[eintLocal].nbat, eatAll,
+ +                               nbv->nbs, mdatoms, fr->cginfo);
+ +        }
+ +        else
+ +        {
+ +            nbnxn_atomdata_set(nbv->grp[eintLocal].nbat, eatLocal,
+ +                               nbv->nbs, mdatoms, fr->cginfo);
+ +            nbnxn_atomdata_set(nbv->grp[eintNonlocal].nbat, eatAll,
+ +                               nbv->nbs, mdatoms, fr->cginfo);
+ +        }
+ +        wallcycle_stop(wcycle, ewcNS);
+ +    }
+ +
+ +    /* initialize the GPU atom data and copy shift vector */
+ +    if (bUseGPU)
+ +    {
+ +        if (bNS)
+ +        {
+ +            wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU_NB);
+ +            nbnxn_cuda_init_atomdata(nbv->cu_nbv, nbv->grp[eintLocal].nbat);
+ +            wallcycle_stop(wcycle, ewcLAUNCH_GPU_NB);
+ +        }
+ +
+ +        wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU_NB);
+ +        nbnxn_cuda_upload_shiftvec(nbv->cu_nbv, nbv->grp[eintLocal].nbat);
+ +        wallcycle_stop(wcycle, ewcLAUNCH_GPU_NB);
+ +    }
+ +
+ +    /* do local pair search */
+ +    if (bNS)
+ +    {
+ +        wallcycle_start_nocount(wcycle, ewcNS);
+ +        wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_LOCAL);
+ +        nbnxn_make_pairlist(nbv->nbs, nbv->grp[eintLocal].nbat,
+ +                            &top->excls,
+ +                            ic->rlist,
+ +                            nbv->min_ci_balanced,
+ +                            &nbv->grp[eintLocal].nbl_lists,
+ +                            eintLocal,
+ +                            nbv->grp[eintLocal].kernel_type,
+ +                            nrnb);
+ +        wallcycle_sub_stop(wcycle, ewcsNBS_SEARCH_LOCAL);
+ +
+ +        if (bUseGPU)
+ +        {
+ +            /* initialize local pair-list on the GPU */
+ +            nbnxn_cuda_init_pairlist(nbv->cu_nbv,
+ +                                     nbv->grp[eintLocal].nbl_lists.nbl[0],
+ +                                     eintLocal);
+ +        }
+ +        wallcycle_stop(wcycle, ewcNS);
+ +    }
+ +    else
+ +    {
+ +        wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
+ +        wallcycle_sub_start(wcycle, ewcsNB_X_BUF_OPS);
+ +        nbnxn_atomdata_copy_x_to_nbat_x(nbv->nbs, eatLocal, FALSE, x,
+ +                                        nbv->grp[eintLocal].nbat);
+ +        wallcycle_sub_stop(wcycle, ewcsNB_X_BUF_OPS);
+ +        wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
+ +    }
+ +
+ +    if (bUseGPU)
+ +    {
+ +        wallcycle_start(wcycle, ewcLAUNCH_GPU_NB);
+ +        /* launch local nonbonded F on GPU */
+ +        do_nb_verlet(fr, ic, enerd, flags, eintLocal, enbvClearFNo,
+ +                     nrnb, wcycle);
+ +        wallcycle_stop(wcycle, ewcLAUNCH_GPU_NB);
+ +    }
+ +
+ +    /* Communicate coordinates and sum dipole if necessary +
+ +       do non-local pair search */
+ +    if (DOMAINDECOMP(cr))
+ +    {
+ +        bDiffKernels = (nbv->grp[eintNonlocal].kernel_type !=
+ +                        nbv->grp[eintLocal].kernel_type);
+ +
+ +        if (bDiffKernels)
+ +        {
+ +            /* With GPU+CPU non-bonded calculations we need to copy
+ +             * the local coordinates to the non-local nbat struct
+ +             * (in CPU format) as the non-local kernel call also
+ +             * calculates the local - non-local interactions.
+ +             */
+ +            wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
+ +            wallcycle_sub_start(wcycle, ewcsNB_X_BUF_OPS);
+ +            nbnxn_atomdata_copy_x_to_nbat_x(nbv->nbs, eatLocal, TRUE, x,
+ +                                            nbv->grp[eintNonlocal].nbat);
+ +            wallcycle_sub_stop(wcycle, ewcsNB_X_BUF_OPS);
+ +            wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
+ +        }
+ +
+ +        if (bNS)
+ +        {
+ +            wallcycle_start_nocount(wcycle, ewcNS);
+ +            wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_NONLOCAL);
+ +
+ +            if (bDiffKernels)
+ +            {
+ +                nbnxn_grid_add_simple(nbv->nbs, nbv->grp[eintNonlocal].nbat);
+ +            }
+ +
+ +            nbnxn_make_pairlist(nbv->nbs, nbv->grp[eintNonlocal].nbat,
+ +                                &top->excls,
+ +                                ic->rlist,
+ +                                nbv->min_ci_balanced,
+ +                                &nbv->grp[eintNonlocal].nbl_lists,
+ +                                eintNonlocal,
+ +                                nbv->grp[eintNonlocal].kernel_type,
+ +                                nrnb);
+ +
+ +            wallcycle_sub_stop(wcycle, ewcsNBS_SEARCH_NONLOCAL);
+ +
+ +            if (nbv->grp[eintNonlocal].kernel_type == nbnxnk8x8x8_CUDA)
+ +            {
+ +                /* initialize non-local pair-list on the GPU */
+ +                nbnxn_cuda_init_pairlist(nbv->cu_nbv,
+ +                                         nbv->grp[eintNonlocal].nbl_lists.nbl[0],
+ +                                         eintNonlocal);
+ +            }
+ +            wallcycle_stop(wcycle, ewcNS);
+ +        }
+ +        else
+ +        {
+ +            wallcycle_start(wcycle, ewcMOVEX);
+ +            dd_move_x(cr->dd, box, x);
+ +
+ +            /* When we don't need the total dipole we sum it in global_stat */
+ +            if (bStateChanged && NEED_MUTOT(*inputrec))
+ +            {
+ +                gmx_sumd(2*DIM, mu, cr);
+ +            }
+ +            wallcycle_stop(wcycle, ewcMOVEX);
+ +
+ +            wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
+ +            wallcycle_sub_start(wcycle, ewcsNB_X_BUF_OPS);
+ +            nbnxn_atomdata_copy_x_to_nbat_x(nbv->nbs, eatNonlocal, FALSE, x,
+ +                                            nbv->grp[eintNonlocal].nbat);
+ +            wallcycle_sub_stop(wcycle, ewcsNB_X_BUF_OPS);
+ +            cycles_force += wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
+ +        }
+ +
+ +        if (bUseGPU && !bDiffKernels)
+ +        {
+ +            wallcycle_start(wcycle, ewcLAUNCH_GPU_NB);
+ +            /* launch non-local nonbonded F on GPU */
+ +            do_nb_verlet(fr, ic, enerd, flags, eintNonlocal, enbvClearFNo,
+ +                         nrnb, wcycle);
+ +            cycles_force += wallcycle_stop(wcycle, ewcLAUNCH_GPU_NB);
+ +        }
+ +    }
+ +
+ +    if (bUseGPU)
+ +    {
+ +        /* launch D2H copy-back F */
+ +        wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU_NB);
+ +        if (DOMAINDECOMP(cr) && !bDiffKernels)
+ +        {
+ +            nbnxn_cuda_launch_cpyback(nbv->cu_nbv, nbv->grp[eintNonlocal].nbat,
+ +                                      flags, eatNonlocal);
+ +        }
+ +        nbnxn_cuda_launch_cpyback(nbv->cu_nbv, nbv->grp[eintLocal].nbat,
+ +                                  flags, eatLocal);
+ +        cycles_force += wallcycle_stop(wcycle, ewcLAUNCH_GPU_NB);
+ +    }
+ +
+ +    if (bStateChanged && NEED_MUTOT(*inputrec))
+ +    {
+ +        if (PAR(cr))
+ +        {
+ +            gmx_sumd(2*DIM, mu, cr);
+ +        }
+ +
+ +        for (i = 0; i < 2; i++)
+ +        {
+ +            for (j = 0; j < DIM; j++)
+ +            {
+ +                fr->mu_tot[i][j] = mu[i*DIM + j];
+ +            }
+ +        }
+ +    }
+ +    if (fr->efep == efepNO)
+ +    {
+ +        copy_rvec(fr->mu_tot[0], mu_tot);
+ +    }
+ +    else
+ +    {
+ +        for (j = 0; j < DIM; j++)
+ +        {
+ +            mu_tot[j] =
+ +                (1.0 - lambda[efptCOUL])*fr->mu_tot[0][j] +
+ +                lambda[efptCOUL]*fr->mu_tot[1][j];
+ +        }
+ +    }
+ +
+ +    /* Reset energies */
+ +    reset_enerdata(fr, bNS, enerd, MASTER(cr));
+ +    clear_rvecs(SHIFTS, fr->fshift);
+ +
+ +    if (DOMAINDECOMP(cr))
+ +    {
+ +        if (!(cr->duty & DUTY_PME))
+ +        {
+ +            wallcycle_start(wcycle, ewcPPDURINGPME);
+ +            dd_force_flop_start(cr->dd, nrnb);
+ +        }
+ +    }
+ +
+ +    if (inputrec->bRot)
+ +    {
+ +        /* Enforced rotation has its own cycle counter that starts after the collective
+ +         * coordinates have been communicated. It is added to ddCyclF to allow
+ +         * for proper load-balancing */
+ +        wallcycle_start(wcycle, ewcROT);
+ +        do_rotation(cr, inputrec, box, x, t, step, wcycle, bNS);
+ +        wallcycle_stop(wcycle, ewcROT);
+ +    }
+ +
+ +    /* Start the force cycle counter.
+ +     * This counter is stopped in do_forcelow_level.
+ +     * No parallel communication should occur while this counter is running,
+ +     * since that will interfere with the dynamic load balancing.
+ +     */
+ +    wallcycle_start(wcycle, ewcFORCE);
+ +    if (bDoForces)
+ +    {
+ +        /* Reset forces for which the virial is calculated separately:
+ +         * PME/Ewald forces if necessary */
+ +        if (fr->bF_NoVirSum)
+ +        {
+ +            if (flags & GMX_FORCE_VIRIAL)
+ +            {
+ +                fr->f_novirsum = fr->f_novirsum_alloc;
+ +                if (fr->bDomDec)
+ +                {
+ +                    clear_rvecs(fr->f_novirsum_n, fr->f_novirsum);
+ +                }
+ +                else
+ +                {
+ +                    clear_rvecs(homenr, fr->f_novirsum+start);
+ +                }
+ +            }
+ +            else
+ +            {
+ +                /* We are not calculating the pressure so we do not need
+ +                 * a separate array for forces that do not contribute
+ +                 * to the pressure.
+ +                 */
+ +                fr->f_novirsum = f;
+ +            }
+ +        }
+ +
+ +        /* Clear the short- and long-range forces */
+ +        clear_rvecs(fr->natoms_force_constr, f);
+ +        if (bSepLRF && do_per_step(step, inputrec->nstcalclr))
+ +        {
+ +            clear_rvecs(fr->natoms_force_constr, fr->f_twin);
+ +        }
+ +
+ +        clear_rvec(fr->vir_diag_posres);
+ +    }
+ +
+ +    if (inputrec->ePull == epullCONSTRAINT)
+ +    {
+ +        clear_pull_forces(inputrec->pull);
+ +    }
+ +
+ +    /* We calculate the non-bonded forces, when done on the CPU, here.
+ +     * We do this before calling do_force_lowlevel, as in there bondeds
+ +     * forces are calculated before PME, which does communication.
+ +     * With this order, non-bonded and bonded force calculation imbalance
+ +     * can be balanced out by the domain decomposition load balancing.
+ +     */
+ +
+ +    if (!bUseOrEmulGPU)
+ +    {
+ +        /* Maybe we should move this into do_force_lowlevel */
+ +        do_nb_verlet(fr, ic, enerd, flags, eintLocal, enbvClearFYes,
+ +                     nrnb, wcycle);
+ +    }
+ +
+ +    if (!bUseOrEmulGPU || bDiffKernels)
+ +    {
+ +        int aloc;
+ +
+ +        if (DOMAINDECOMP(cr))
+ +        {
+ +            do_nb_verlet(fr, ic, enerd, flags, eintNonlocal,
+ +                         bDiffKernels ? enbvClearFYes : enbvClearFNo,
+ +                         nrnb, wcycle);
+ +        }
+ +
+ +        if (!bUseOrEmulGPU)
+ +        {
+ +            aloc = eintLocal;
+ +        }
+ +        else
+ +        {
+ +            aloc = eintNonlocal;
+ +        }
+ +
+ +        /* Add all the non-bonded force to the normal force array.
+ +         * This can be split into a local a non-local part when overlapping
+ +         * communication with calculation with domain decomposition.
+ +         */
+ +        cycles_force += wallcycle_stop(wcycle, ewcFORCE);
+ +        wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
+ +        wallcycle_sub_start(wcycle, ewcsNB_F_BUF_OPS);
+ +        nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs, eatAll, nbv->grp[aloc].nbat, f);
+ +        wallcycle_sub_stop(wcycle, ewcsNB_F_BUF_OPS);
+ +        cycles_force += wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
+ +        wallcycle_start_nocount(wcycle, ewcFORCE);
+ +
+ +        /* if there are multiple fshift output buffers reduce them */
+ +        if ((flags & GMX_FORCE_VIRIAL) &&
+ +            nbv->grp[aloc].nbl_lists.nnbl > 1)
+ +        {
+ +            nbnxn_atomdata_add_nbat_fshift_to_fshift(nbv->grp[aloc].nbat,
+ +                                                     fr->fshift);
+ +        }
+ +    }
+ +
+ +    /* update QMMMrec, if necessary */
+ +    if (fr->bQMMM)
+ +    {
+ +        update_QMMMrec(cr, fr, x, mdatoms, box, top);
+ +    }
+ +
+ +    if ((flags & GMX_FORCE_BONDED) && top->idef.il[F_POSRES].nr > 0)
+ +    {
+ +        posres_wrapper(fplog, flags, bSepDVDL, inputrec, nrnb, top, box, x,
+ +                       enerd, lambda, fr);
+ +    }
+ +
+ +    if ((flags & GMX_FORCE_BONDED) && top->idef.il[F_FBPOSRES].nr > 0)
+ +    {
+ +        fbposres_wrapper(inputrec, nrnb, top, box, x, enerd, fr);
+ +    }
+ +
+ +    /* Compute the bonded and non-bonded energies and optionally forces */
+ +    do_force_lowlevel(fplog, step, fr, inputrec, &(top->idef),
+ +                      cr, nrnb, wcycle, mdatoms,
+ +                      x, hist, f, bSepLRF ? fr->f_twin : f, enerd, fcd, top, fr->born,
+ +                      &(top->atomtypes), bBornRadii, box,
+ +                      inputrec->fepvals, lambda, graph, &(top->excls), fr->mu_tot,
+ +                      flags, &cycles_pme);
+ +
+ +    if (bSepLRF)
+ +    {
+ +        if (do_per_step(step, inputrec->nstcalclr))
+ +        {
+ +            /* Add the long range forces to the short range forces */
+ +            for (i = 0; i < fr->natoms_force_constr; i++)
+ +            {
+ +                rvec_add(fr->f_twin[i], f[i], f[i]);
+ +            }
+ +        }
+ +    }
+ +
+ +    cycles_force += wallcycle_stop(wcycle, ewcFORCE);
+ +
+ +    if (ed)
+ +    {
+ +        do_flood(cr, inputrec, x, f, ed, box, step, bNS);
+ +    }
+ +
+ +    if (bUseOrEmulGPU && !bDiffKernels)
+ +    {
+ +        /* wait for non-local forces (or calculate in emulation mode) */
+ +        if (DOMAINDECOMP(cr))
+ +        {
+ +            if (bUseGPU)
+ +            {
+ +                float cycles_tmp;
+ +
+ +                wallcycle_start(wcycle, ewcWAIT_GPU_NB_NL);
+ +                nbnxn_cuda_wait_gpu(nbv->cu_nbv,
+ +                                    nbv->grp[eintNonlocal].nbat,
+ +                                    flags, eatNonlocal,
+ +                                    enerd->grpp.ener[egLJSR], enerd->grpp.ener[egCOULSR],
+ +                                    fr->fshift);
+ +                cycles_tmp       = wallcycle_stop(wcycle, ewcWAIT_GPU_NB_NL);
+ +                cycles_wait_gpu += cycles_tmp;
+ +                cycles_force    += cycles_tmp;
+ +            }
+ +            else
+ +            {
+ +                wallcycle_start_nocount(wcycle, ewcFORCE);
+ +                do_nb_verlet(fr, ic, enerd, flags, eintNonlocal, enbvClearFYes,
+ +                             nrnb, wcycle);
+ +                cycles_force += wallcycle_stop(wcycle, ewcFORCE);
+ +            }
+ +            wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
+ +            wallcycle_sub_start(wcycle, ewcsNB_F_BUF_OPS);
+ +            /* skip the reduction if there was no non-local work to do */
+ +            if (nbv->grp[eintLocal].nbl_lists.nbl[0]->nsci > 0)
+ +            {
+ +                nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs, eatNonlocal,
+ +                                               nbv->grp[eintNonlocal].nbat, f);
+ +            }
+ +            wallcycle_sub_stop(wcycle, ewcsNB_F_BUF_OPS);
+ +            cycles_force += wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
+ +        }
+ +    }
+ +
+ +    if (bDoForces)
+ +    {
+ +        /* Communicate the forces */
+ +        if (PAR(cr))
+ +        {
+ +            wallcycle_start(wcycle, ewcMOVEF);
+ +            if (DOMAINDECOMP(cr))
+ +            {
+ +                dd_move_f(cr->dd, f, fr->fshift);
+ +                /* Do we need to communicate the separate force array
+ +                 * for terms that do not contribute to the single sum virial?
+ +                 * Position restraints and electric fields do not introduce
+ +                 * inter-cg forces, only full electrostatics methods do.
+ +                 * When we do not calculate the virial, fr->f_novirsum = f,
+ +                 * so we have already communicated these forces.
+ +                 */
+ +                if (EEL_FULL(fr->eeltype) && cr->dd->n_intercg_excl &&
+ +                    (flags & GMX_FORCE_VIRIAL))
+ +                {
+ +                    dd_move_f(cr->dd, fr->f_novirsum, NULL);
+ +                }
+ +                if (bSepLRF)
+ +                {
+ +                    /* We should not update the shift forces here,
+ +                     * since f_twin is already included in f.
+ +                     */
+ +                    dd_move_f(cr->dd, fr->f_twin, NULL);
+ +                }
+ +            }
+ +            wallcycle_stop(wcycle, ewcMOVEF);
+ +        }
+ +    }
+ +
+ +    if (bUseOrEmulGPU)
+ +    {
+ +        /* wait for local forces (or calculate in emulation mode) */
+ +        if (bUseGPU)
+ +        {
+ +            wallcycle_start(wcycle, ewcWAIT_GPU_NB_L);
+ +            nbnxn_cuda_wait_gpu(nbv->cu_nbv,
+ +                                nbv->grp[eintLocal].nbat,
+ +                                flags, eatLocal,
+ +                                enerd->grpp.ener[egLJSR], enerd->grpp.ener[egCOULSR],
+ +                                fr->fshift);
+ +            cycles_wait_gpu += wallcycle_stop(wcycle, ewcWAIT_GPU_NB_L);
+ +
+ +            /* now clear the GPU outputs while we finish the step on the CPU */
+ +
+ +            wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU_NB);
+ +            nbnxn_cuda_clear_outputs(nbv->cu_nbv, flags);
+ +            wallcycle_stop(wcycle, ewcLAUNCH_GPU_NB);
+ +        }
+ +        else
+ +        {
+ +            wallcycle_start_nocount(wcycle, ewcFORCE);
+ +            do_nb_verlet(fr, ic, enerd, flags, eintLocal,
+ +                         DOMAINDECOMP(cr) ? enbvClearFNo : enbvClearFYes,
+ +                         nrnb, wcycle);
+ +            wallcycle_stop(wcycle, ewcFORCE);
+ +        }
+ +        wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
+ +        wallcycle_sub_start(wcycle, ewcsNB_F_BUF_OPS);
+ +        if (nbv->grp[eintLocal].nbl_lists.nbl[0]->nsci > 0)
+ +        {
+ +            /* skip the reduction if there was no non-local work to do */
+ +            nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs, eatLocal,
+ +                                           nbv->grp[eintLocal].nbat, f);
+ +        }
+ +        wallcycle_sub_stop(wcycle, ewcsNB_F_BUF_OPS);
+ +        wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
+ +    }
+ +
+ +    if (DOMAINDECOMP(cr))
+ +    {
+ +        dd_force_flop_stop(cr->dd, nrnb);
+ +        if (wcycle)
+ +        {
+ +            dd_cycles_add(cr->dd, cycles_force-cycles_pme, ddCyclF);
+ +            if (bUseGPU)
+ +            {
+ +                dd_cycles_add(cr->dd, cycles_wait_gpu, ddCyclWaitGPU);
+ +            }
+ +        }
+ +    }
+ +
+ +    if (bDoForces)
+ +    {
+ +        if (IR_ELEC_FIELD(*inputrec))
+ +        {
+ +            /* Compute forces due to electric field */
+ +            calc_f_el(MASTER(cr) ? field : NULL,
+ +                      start, homenr, mdatoms->chargeA, fr->f_novirsum,
+ +                      inputrec->ex, inputrec->et, t);
+ +        }
+ +
+ +        /* If we have NoVirSum forces, but we do not calculate the virial,
+ +         * we sum fr->f_novirum=f later.
+ +         */
+ +        if (vsite && !(fr->bF_NoVirSum && !(flags & GMX_FORCE_VIRIAL)))
+ +        {
+ +            wallcycle_start(wcycle, ewcVSITESPREAD);
+ +            spread_vsite_f(vsite, x, f, fr->fshift, FALSE, NULL, nrnb,
+ +                           &top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr);
+ +            wallcycle_stop(wcycle, ewcVSITESPREAD);
+ +
+ +            if (bSepLRF)
+ +            {
+ +                wallcycle_start(wcycle, ewcVSITESPREAD);
+ +                spread_vsite_f(vsite, x, fr->f_twin, NULL, FALSE, NULL,
+ +                               nrnb,
+ +                               &top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr);
+ +                wallcycle_stop(wcycle, ewcVSITESPREAD);
+ +            }
+ +        }
+ +
+ +        if (flags & GMX_FORCE_VIRIAL)
+ +        {
+ +            /* Calculation of the virial must be done after vsites! */
+ +            calc_virial(mdatoms->start, mdatoms->homenr, x, f,
+ +                        vir_force, graph, box, nrnb, fr, inputrec->ePBC);
+ +        }
+ +    }
+ +
+ +    if (inputrec->ePull == epullUMBRELLA || inputrec->ePull == epullCONST_F)
+ +    {
+ +        pull_potential_wrapper(fplog, bSepDVDL, cr, inputrec, box, x,
+ +                               f, vir_force, mdatoms, enerd, lambda, t);
+ +    }
+ +
+ +    /* Add the forces from enforced rotation potentials (if any) */
+ +    if (inputrec->bRot)
+ +    {
+ +        wallcycle_start(wcycle, ewcROTadd);
+ +        enerd->term[F_COM_PULL] += add_rot_forces(inputrec->rot, f, cr, step, t);
+ +        wallcycle_stop(wcycle, ewcROTadd);
+ +    }
+ +
+ +    if (PAR(cr) && !(cr->duty & DUTY_PME))
+ +    {
+ +        /* In case of node-splitting, the PP nodes receive the long-range
+ +         * forces, virial and energy from the PME nodes here.
+ +         */
+ +        pme_receive_force_ener(fplog, bSepDVDL, cr, wcycle, enerd, fr);
+ +    }
+ +
+ +    if (bDoForces)
+ +    {
+ +        post_process_forces(cr, step, nrnb, wcycle,
+ +                            top, box, x, f, vir_force, mdatoms, graph, fr, vsite,
+ +                            flags);
+ +    }
+ +
+ +    /* Sum the potential energy terms from group contributions */
+ +    sum_epot(&(enerd->grpp), enerd->term);
+ +}
+ +
+ +void do_force_cutsGROUP(FILE *fplog, t_commrec *cr,
+ +                        t_inputrec *inputrec,
+ +                        gmx_int64_t step, t_nrnb *nrnb, gmx_wallcycle_t wcycle,
+ +                        gmx_localtop_t *top,
+ +                        gmx_groups_t *groups,
+ +                        matrix box, rvec x[], history_t *hist,
+ +                        rvec f[],
+ +                        tensor vir_force,
+ +                        t_mdatoms *mdatoms,
+ +                        gmx_enerdata_t *enerd, t_fcdata *fcd,
+ +                        real *lambda, t_graph *graph,
+ +                        t_forcerec *fr, gmx_vsite_t *vsite, rvec mu_tot,
+ +                        double t, FILE *field, gmx_edsam_t ed,
+ +                        gmx_bool bBornRadii,
+ +                        int flags)
+ +{
+ +    int        cg0, cg1, i, j;
+ +    int        start, homenr;
+ +    double     mu[2*DIM];
+ +    gmx_bool   bSepDVDL, bStateChanged, bNS, bFillGrid, bCalcCGCM, bBS;
+ +    gmx_bool   bDoLongRangeNS, bDoForces, bDoPotential, bSepLRF;
+ +    gmx_bool   bDoAdressWF;
+ +    matrix     boxs;
+ +    rvec       vzero, box_diag;
+ +    real       e, v, dvdlambda[efptNR];
+ +    t_pbc      pbc;
+ +    float      cycles_pme, cycles_force;
+ +
+ +    start  = mdatoms->start;
+ +    homenr = mdatoms->homenr;
+ +
+ +    bSepDVDL = (fr->bSepDVDL && do_per_step(step, inputrec->nstlog));
+ +
+ +    clear_mat(vir_force);
+ +
+ +    if (PARTDECOMP(cr))
+ +    {
+ +        pd_cg_range(cr, &cg0, &cg1);
+ +    }
+ +    else
+ +    {
+ +        cg0 = 0;
+ +        if (DOMAINDECOMP(cr))
+ +        {
+ +            cg1 = cr->dd->ncg_tot;
+ +        }
+ +        else
+ +        {
+ +            cg1 = top->cgs.nr;
+ +        }
+ +        if (fr->n_tpi > 0)
+ +        {
+ +            cg1--;
+ +        }
+ +    }
+ +
+ +    bStateChanged  = (flags & GMX_FORCE_STATECHANGED);
+ +    bNS            = (flags & GMX_FORCE_NS) && (fr->bAllvsAll == FALSE);
+ +    /* Should we update the long-range neighborlists at this step? */
+ +    bDoLongRangeNS = fr->bTwinRange && bNS;
+ +    /* Should we perform the long-range nonbonded evaluation inside the neighborsearching? */
+ +    bFillGrid      = (bNS && bStateChanged);
+ +    bCalcCGCM      = (bFillGrid && !DOMAINDECOMP(cr));
+ +    bDoForces      = (flags & GMX_FORCE_FORCES);
+ +    bDoPotential   = (flags & GMX_FORCE_ENERGY);
+ +    bSepLRF        = ((inputrec->nstcalclr > 1) && bDoForces &&
+ +                      (flags & GMX_FORCE_SEPLRF) && (flags & GMX_FORCE_DO_LR));
+ +
+ +    /* should probably move this to the forcerec since it doesn't change */
+ +    bDoAdressWF   = ((fr->adress_type != eAdressOff));
+ +
+ +    if (bStateChanged)
+ +    {
+ +        update_forcerec(fr, box);
+ +
+ +        if (NEED_MUTOT(*inputrec))
+ +        {
+ +            /* Calculate total (local) dipole moment in a temporary common array.
+ +             * This makes it possible to sum them over nodes faster.
+ +             */
+ +            calc_mu(start, homenr,
+ +                    x, mdatoms->chargeA, mdatoms->chargeB, mdatoms->nChargePerturbed,
+ +                    mu, mu+DIM);
+ +        }
+ +    }
+ +
+ +    if (fr->ePBC != epbcNONE)
+ +    {
+ +        /* Compute shift vectors every step,
+ +         * because of pressure coupling or box deformation!
+ +         */
+ +        if ((flags & GMX_FORCE_DYNAMICBOX) && bStateChanged)
+ +        {
+ +            calc_shifts(box, fr->shift_vec);
+ +        }
+ +
+ +        if (bCalcCGCM)
+ +        {
+ +            put_charge_groups_in_box(fplog, cg0, cg1, fr->ePBC, box,
+ +                                     &(top->cgs), x, fr->cg_cm);
+ +            inc_nrnb(nrnb, eNR_CGCM, homenr);
+ +            inc_nrnb(nrnb, eNR_RESETX, cg1-cg0);
+ +        }
+ +        else if (EI_ENERGY_MINIMIZATION(inputrec->eI) && graph)
+ +        {
+ +            unshift_self(graph, box, x);
+ +        }
+ +    }
+ +    else if (bCalcCGCM)
+ +    {
+ +        calc_cgcm(fplog, cg0, cg1, &(top->cgs), x, fr->cg_cm);
+ +        inc_nrnb(nrnb, eNR_CGCM, homenr);
+ +    }
+ +
+ +    if (bCalcCGCM)
+ +    {
+ +        if (PAR(cr))
+ +        {
+ +            move_cgcm(fplog, cr, fr->cg_cm);
+ +        }
+ +        if (gmx_debug_at)
+ +        {
+ +            pr_rvecs(debug, 0, "cgcm", fr->cg_cm, top->cgs.nr);
+ +        }
+ +    }
+ +
+ +#ifdef GMX_MPI
+ +    if (!(cr->duty & DUTY_PME))
+ +    {
+ +        /* Send particle coordinates to the pme nodes.
+ +         * Since this is only implemented for domain decomposition
+ +         * and domain decomposition does not use the graph,
+ +         * we do not need to worry about shifting.
+ +         */
+ +
+ +        int pme_flags = 0;
+ +
+ +        wallcycle_start(wcycle, ewcPP_PMESENDX);
+ +
+ +        bBS = (inputrec->nwall == 2);
+ +        if (bBS)
+ +        {
+ +            copy_mat(box, boxs);
+ +            svmul(inputrec->wall_ewald_zfac, boxs[ZZ], boxs[ZZ]);
+ +        }
+ +
+ +        if (EEL_PME(fr->eeltype))
+ +        {
+ +            pme_flags |= GMX_PME_DO_COULOMB;
+ +        }
+ +
+ +        if (EVDW_PME(fr->vdwtype))
+ +        {
+ +            pme_flags |= GMX_PME_DO_LJ;
+ +            if (fr->ljpme_combination_rule == eljpmeLB)
+ +            {
+ +                pme_flags |= GMX_PME_LJ_LB;
+ +            }
+ +        }
+ +
+ +        gmx_pme_send_coordinates(cr, bBS ? boxs : box, x,
+ +                                 mdatoms->nChargePerturbed, mdatoms->nTypePerturbed, lambda[efptCOUL], lambda[efptVDW],
+ +                                 (flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY)),
+ +                                 pme_flags, step);
+ +
+ +        wallcycle_stop(wcycle, ewcPP_PMESENDX);
+ +    }
+ +#endif /* GMX_MPI */
+ +
+ +    /* Communicate coordinates and sum dipole if necessary */
+ +    if (PAR(cr))
+ +    {
+ +        wallcycle_start(wcycle, ewcMOVEX);
+ +        if (DOMAINDECOMP(cr))
+ +        {
+ +            dd_move_x(cr->dd, box, x);
+ +        }
+ +        else
+ +        {
+ +            move_x(cr, x, nrnb);
+ +        }
+ +        wallcycle_stop(wcycle, ewcMOVEX);
+ +    }
+ +
+ +    /* update adress weight beforehand */
+ +    if (bStateChanged && bDoAdressWF)
+ +    {
+ +        /* need pbc for adress weight calculation with pbc_dx */
+ +        set_pbc(&pbc, inputrec->ePBC, box);
+ +        if (fr->adress_site == eAdressSITEcog)
+ +        {
+ +            update_adress_weights_cog(top->idef.iparams, top->idef.il, x, fr, mdatoms,
+ +                                      inputrec->ePBC == epbcNONE ? NULL : &pbc);
+ +        }
+ +        else if (fr->adress_site == eAdressSITEcom)
+ +        {
+ +            update_adress_weights_com(fplog, cg0, cg1, &(top->cgs), x, fr, mdatoms,
+ +                                      inputrec->ePBC == epbcNONE ? NULL : &pbc);
+ +        }
+ +        else if (fr->adress_site == eAdressSITEatomatom)
+ +        {
+ +            update_adress_weights_atom_per_atom(cg0, cg1, &(top->cgs), x, fr, mdatoms,
+ +                                                inputrec->ePBC == epbcNONE ? NULL : &pbc);
+ +        }
+ +        else
+ +        {
+ +            update_adress_weights_atom(cg0, cg1, &(top->cgs), x, fr, mdatoms,
+ +                                       inputrec->ePBC == epbcNONE ? NULL : &pbc);
+ +        }
+ +    }
+ +
+ +    if (NEED_MUTOT(*inputrec))
+ +    {
+ +
+ +        if (bStateChanged)
+ +        {
+ +            if (PAR(cr))
+ +            {
+ +                gmx_sumd(2*DIM, mu, cr);
+ +            }
+ +            for (i = 0; i < 2; i++)
+ +            {
+ +                for (j = 0; j < DIM; j++)
+ +                {
+ +                    fr->mu_tot[i][j] = mu[i*DIM + j];
+ +                }
+ +            }
+ +        }
+ +        if (fr->efep == efepNO)
+ +        {
+ +            copy_rvec(fr->mu_tot[0], mu_tot);
+ +        }
+ +        else
+ +        {
+ +            for (j = 0; j < DIM; j++)
+ +            {
+ +                mu_tot[j] =
+ +                    (1.0 - lambda[efptCOUL])*fr->mu_tot[0][j] + lambda[efptCOUL]*fr->mu_tot[1][j];
+ +            }
+ +        }
+ +    }
+ +
+ +    /* Reset energies */
+ +    reset_enerdata(fr, bNS, enerd, MASTER(cr));
+ +    clear_rvecs(SHIFTS, fr->fshift);
+ +
+ +    if (bNS)
+ +    {
+ +        wallcycle_start(wcycle, ewcNS);
+ +
+ +        if (graph && bStateChanged)
+ +        {
+ +            /* Calculate intramolecular shift vectors to make molecules whole */
+ +            mk_mshift(fplog, graph, fr->ePBC, box, x);
+ +        }
+ +
+ +        /* Do the actual neighbour searching */
+ +        ns(fplog, fr, box,
+ +           groups, top, mdatoms,
+ +           cr, nrnb, bFillGrid,
+ +           bDoLongRangeNS);
+ +
+ +        wallcycle_stop(wcycle, ewcNS);
+ +    }
+ +
+ +    if (inputrec->implicit_solvent && bNS)
+ +    {
+ +        make_gb_nblist(cr, inputrec->gb_algorithm,
+ +                       x, box, fr, &top->idef, graph, fr->born);
+ +    }
+ +
+ +    if (DOMAINDECOMP(cr))
+ +    {
+ +        if (!(cr->duty & DUTY_PME))
+ +        {
+ +            wallcycle_start(wcycle, ewcPPDURINGPME);
+ +            dd_force_flop_start(cr->dd, nrnb);
+ +        }
+ +    }
+ +
+ +    if (inputrec->bRot)
+ +    {
+ +        /* Enforced rotation has its own cycle counter that starts after the collective
+ +         * coordinates have been communicated. It is added to ddCyclF to allow
+ +         * for proper load-balancing */
+ +        wallcycle_start(wcycle, ewcROT);
+ +        do_rotation(cr, inputrec, box, x, t, step, wcycle, bNS);
+ +        wallcycle_stop(wcycle, ewcROT);
+ +    }
+ +
+ +    /* Start the force cycle counter.
+ +     * This counter is stopped in do_forcelow_level.
+ +     * No parallel communication should occur while this counter is running,
+ +     * since that will interfere with the dynamic load balancing.
+ +     */
+ +    wallcycle_start(wcycle, ewcFORCE);
+ +
+ +    if (bDoForces)
+ +    {
+ +        /* Reset forces for which the virial is calculated separately:
+ +         * PME/Ewald forces if necessary */
+ +        if (fr->bF_NoVirSum)
+ +        {
+ +            if (flags & GMX_FORCE_VIRIAL)
+ +            {
+ +                fr->f_novirsum = fr->f_novirsum_alloc;
+ +                if (fr->bDomDec)
+ +                {
+ +                    clear_rvecs(fr->f_novirsum_n, fr->f_novirsum);
+ +                }
+ +                else
+ +                {
+ +                    clear_rvecs(homenr, fr->f_novirsum+start);
+ +                }
+ +            }
+ +            else
+ +            {
+ +                /* We are not calculating the pressure so we do not need
+ +                 * a separate array for forces that do not contribute
+ +                 * to the pressure.
+ +                 */
+ +                fr->f_novirsum = f;
+ +            }
+ +        }
+ +
+ +        /* Clear the short- and long-range forces */
+ +        clear_rvecs(fr->natoms_force_constr, f);
+ +        if (bSepLRF && do_per_step(step, inputrec->nstcalclr))
+ +        {
+ +            clear_rvecs(fr->natoms_force_constr, fr->f_twin);
+ +        }
+ +
+ +        clear_rvec(fr->vir_diag_posres);
+ +    }
+ +    if (inputrec->ePull == epullCONSTRAINT)
+ +    {
+ +        clear_pull_forces(inputrec->pull);
+ +    }
+ +
+ +    /* update QMMMrec, if necessary */
+ +    if (fr->bQMMM)
+ +    {
+ +        update_QMMMrec(cr, fr, x, mdatoms, box, top);
+ +    }
+ +
+ +    if ((flags & GMX_FORCE_BONDED) && top->idef.il[F_POSRES].nr > 0)
+ +    {
+ +        posres_wrapper(fplog, flags, bSepDVDL, inputrec, nrnb, top, box, x,
+ +                       enerd, lambda, fr);
+ +    }
+ +
+ +    if ((flags & GMX_FORCE_BONDED) && top->idef.il[F_FBPOSRES].nr > 0)
+ +    {
+ +        fbposres_wrapper(inputrec, nrnb, top, box, x, enerd, fr);
+ +    }
+ +
+ +    /* Compute the bonded and non-bonded energies and optionally forces */
+ +    do_force_lowlevel(fplog, step, fr, inputrec, &(top->idef),
+ +                      cr, nrnb, wcycle, mdatoms,
+ +                      x, hist, f, bSepLRF ? fr->f_twin : f, enerd, fcd, top, fr->born,
+ +                      &(top->atomtypes), bBornRadii, box,
+ +                      inputrec->fepvals, lambda,
+ +                      graph, &(top->excls), fr->mu_tot,
+ +                      flags,
+ +                      &cycles_pme);
+ +
+ +    if (bSepLRF)
+ +    {
+ +        if (do_per_step(step, inputrec->nstcalclr))
+ +        {
+ +            /* Add the long range forces to the short range forces */
+ +            for (i = 0; i < fr->natoms_force_constr; i++)
+ +            {
+ +                rvec_add(fr->f_twin[i], f[i], f[i]);
+ +            }
+ +        }
+ +    }
+ +
+ +    cycles_force = wallcycle_stop(wcycle, ewcFORCE);
+ +
+ +    if (ed)
+ +    {
+ +        do_flood(cr, inputrec, x, f, ed, box, step, bNS);
+ +    }
+ +
+ +    if (DOMAINDECOMP(cr))
+ +    {
+ +        dd_force_flop_stop(cr->dd, nrnb);
+ +        if (wcycle)
+ +        {
+ +            dd_cycles_add(cr->dd, cycles_force-cycles_pme, ddCyclF);
+ +        }
+ +    }
+ +
+ +    if (bDoForces)
+ +    {
+ +        if (IR_ELEC_FIELD(*inputrec))
+ +        {
+ +            /* Compute forces due to electric field */
+ +            calc_f_el(MASTER(cr) ? field : NULL,
+ +                      start, homenr, mdatoms->chargeA, fr->f_novirsum,
+ +                      inputrec->ex, inputrec->et, t);
+ +        }
+ +
+ +        if (bDoAdressWF && fr->adress_icor == eAdressICThermoForce)
+ +        {
+ +            /* Compute thermodynamic force in hybrid AdResS region */
+ +            adress_thermo_force(start, homenr, &(top->cgs), x, fr->f_novirsum, fr, mdatoms,
+ +                                inputrec->ePBC == epbcNONE ? NULL : &pbc);
+ +        }
+ +
+ +        /* Communicate the forces */
+ +        if (PAR(cr))
+ +        {
+ +            wallcycle_start(wcycle, ewcMOVEF);
+ +            if (DOMAINDECOMP(cr))
+ +            {
+ +                dd_move_f(cr->dd, f, fr->fshift);
+ +                /* Do we need to communicate the separate force array
+ +                 * for terms that do not contribute to the single sum virial?
+ +                 * Position restraints and electric fields do not introduce
+ +                 * inter-cg forces, only full electrostatics methods do.
+ +                 * When we do not calculate the virial, fr->f_novirsum = f,
+ +                 * so we have already communicated these forces.
+ +                 */
+ +                if (EEL_FULL(fr->eeltype) && cr->dd->n_intercg_excl &&
+ +                    (flags & GMX_FORCE_VIRIAL))
+ +                {
+ +                    dd_move_f(cr->dd, fr->f_novirsum, NULL);
+ +                }
+ +                if (bSepLRF)
+ +                {
+ +                    /* We should not update the shift forces here,
+ +                     * since f_twin is already included in f.
+ +                     */
+ +                    dd_move_f(cr->dd, fr->f_twin, NULL);
+ +                }
+ +            }
+ +            else
+ +            {
+ +                pd_move_f(cr, f, nrnb);
+ +                if (bSepLRF)
+ +                {
+ +                    pd_move_f(cr, fr->f_twin, nrnb);
+ +                }
+ +            }
+ +            wallcycle_stop(wcycle, ewcMOVEF);
+ +        }
+ +
+ +        /* If we have NoVirSum forces, but we do not calculate the virial,
+ +         * we sum fr->f_novirum=f later.
+ +         */
+ +        if (vsite && !(fr->bF_NoVirSum && !(flags & GMX_FORCE_VIRIAL)))
+ +        {
+ +            wallcycle_start(wcycle, ewcVSITESPREAD);
+ +            spread_vsite_f(vsite, x, f, fr->fshift, FALSE, NULL, nrnb,
+ +                           &top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr);
+ +            wallcycle_stop(wcycle, ewcVSITESPREAD);
+ +
+ +            if (bSepLRF)
+ +            {
+ +                wallcycle_start(wcycle, ewcVSITESPREAD);
+ +                spread_vsite_f(vsite, x, fr->f_twin, NULL, FALSE, NULL,
+ +                               nrnb,
+ +                               &top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr);
+ +                wallcycle_stop(wcycle, ewcVSITESPREAD);
+ +            }
+ +        }
+ +
+ +        if (flags & GMX_FORCE_VIRIAL)
+ +        {
+ +            /* Calculation of the virial must be done after vsites! */
+ +            calc_virial(mdatoms->start, mdatoms->homenr, x, f,
+ +                        vir_force, graph, box, nrnb, fr, inputrec->ePBC);
+ +        }
+ +    }
+ +
+ +    if (inputrec->ePull == epullUMBRELLA || inputrec->ePull == epullCONST_F)
+ +    {
+ +        pull_potential_wrapper(fplog, bSepDVDL, cr, inputrec, box, x,
+ +                               f, vir_force, mdatoms, enerd, lambda, t);
+ +    }
+ +
+ +    /* Add the forces from enforced rotation potentials (if any) */
+ +    if (inputrec->bRot)
+ +    {
+ +        wallcycle_start(wcycle, ewcROTadd);
+ +        enerd->term[F_COM_PULL] += add_rot_forces(inputrec->rot, f, cr, step, t);
+ +        wallcycle_stop(wcycle, ewcROTadd);
+ +    }
+ +
+ +    if (PAR(cr) && !(cr->duty & DUTY_PME))
+ +    {
+ +        /* In case of node-splitting, the PP nodes receive the long-range
+ +         * forces, virial and energy from the PME nodes here.
+ +         */
+ +        pme_receive_force_ener(fplog, bSepDVDL, cr, wcycle, enerd, fr);
+ +    }
+ +
+ +    if (bDoForces)
+ +    {
+ +        post_process_forces(cr, step, nrnb, wcycle,
+ +                            top, box, x, f, vir_force, mdatoms, graph, fr, vsite,
+ +                            flags);
+ +    }
+ +
+ +    /* Sum the potential energy terms from group contributions */
+ +    sum_epot(&(enerd->grpp), enerd->term);
+ +}
+ +
+ +void do_force(FILE *fplog, t_commrec *cr,
+ +              t_inputrec *inputrec,
+ +              gmx_int64_t step, t_nrnb *nrnb, gmx_wallcycle_t wcycle,
+ +              gmx_localtop_t *top,
+ +              gmx_groups_t *groups,
+ +              matrix box, rvec x[], history_t *hist,
+ +              rvec f[],
+ +              tensor vir_force,
+ +              t_mdatoms *mdatoms,
+ +              gmx_enerdata_t *enerd, t_fcdata *fcd,
+ +              real *lambda, t_graph *graph,
+ +              t_forcerec *fr,
+ +              gmx_vsite_t *vsite, rvec mu_tot,
+ +              double t, FILE *field, gmx_edsam_t ed,
+ +              gmx_bool bBornRadii,
+ +              int flags)
+ +{
+ +    /* modify force flag if not doing nonbonded */
+ +    if (!fr->bNonbonded)
+ +    {
+ +        flags &= ~GMX_FORCE_NONBONDED;
+ +    }
+ +
+ +    switch (inputrec->cutoff_scheme)
+ +    {
+ +        case ecutsVERLET:
+ +            do_force_cutsVERLET(fplog, cr, inputrec,
+ +                                step, nrnb, wcycle,
+ +                                top,
+ +                                groups,
+ +                                box, x, hist,
+ +                                f, vir_force,
+ +                                mdatoms,
+ +                                enerd, fcd,
+ +                                lambda, graph,
+ +                                fr, fr->ic,
+ +                                vsite, mu_tot,
+ +                                t, field, ed,
+ +                                bBornRadii,
+ +                                flags);
+ +            break;
+ +        case ecutsGROUP:
+ +            do_force_cutsGROUP(fplog, cr, inputrec,
+ +                               step, nrnb, wcycle,
+ +                               top,
+ +                               groups,
+ +                               box, x, hist,
+ +                               f, vir_force,
+ +                               mdatoms,
+ +                               enerd, fcd,
+ +                               lambda, graph,
+ +                               fr, vsite, mu_tot,
+ +                               t, field, ed,
+ +                               bBornRadii,
+ +                               flags);
+ +            break;
+ +        default:
+ +            gmx_incons("Invalid cut-off scheme passed!");
+ +    }
+ +}
+ +
+ +
+ +void do_constrain_first(FILE *fplog, gmx_constr_t constr,
+ +                        t_inputrec *ir, t_mdatoms *md,
+ +                        t_state *state, t_commrec *cr, t_nrnb *nrnb,
+ +                        t_forcerec *fr, gmx_localtop_t *top)
+ +{
+ +    int             i, m, start, end;
+ +    gmx_int64_t     step;
+ +    real            dt = ir->delta_t;
+ +    real            dvdl_dum;
+ +    rvec           *savex;
+ +
+ +    snew(savex, state->natoms);
+ +
+ +    start = md->start;
+ +    end   = md->homenr + start;
+ +
+ +    if (debug)
+ +    {
+ +        fprintf(debug, "vcm: start=%d, homenr=%d, end=%d\n",
+ +                start, md->homenr, end);
+ +    }
+ +    /* Do a first constrain to reset particles... */
+ +    step = ir->init_step;
+ +    if (fplog)
+ +    {
+ +        char buf[STEPSTRSIZE];
+ +        fprintf(fplog, "\nConstraining the starting coordinates (step %s)\n",
+ +                gmx_step_str(step, buf));
+ +    }
+ +    dvdl_dum = 0;
+ +
+ +    /* constrain the current position */
+ +    constrain(NULL, TRUE, FALSE, constr, &(top->idef),
+ +              ir, NULL, cr, step, 0, md,
+ +              state->x, state->x, NULL,
+ +              fr->bMolPBC, state->box,
+ +              state->lambda[efptBONDED], &dvdl_dum,
+ +              NULL, NULL, nrnb, econqCoord,
+ +              ir->epc == epcMTTK, state->veta, state->veta);
+ +    if (EI_VV(ir->eI))
+ +    {
+ +        /* constrain the inital velocity, and save it */
+ +        /* also may be useful if we need the ekin from the halfstep for velocity verlet */
+ +        /* might not yet treat veta correctly */
+ +        constrain(NULL, TRUE, FALSE, constr, &(top->idef),
+ +                  ir, NULL, cr, step, 0, md,
+ +                  state->x, state->v, state->v,
+ +                  fr->bMolPBC, state->box,
+ +                  state->lambda[efptBONDED], &dvdl_dum,
+ +                  NULL, NULL, nrnb, econqVeloc,
+ +                  ir->epc == epcMTTK, state->veta, state->veta);
+ +    }
+ +    /* constrain the inital velocities at t-dt/2 */
+ +    if (EI_STATE_VELOCITY(ir->eI) && ir->eI != eiVV)
+ +    {
+ +        for (i = start; (i < end); i++)
+ +        {
+ +            for (m = 0; (m < DIM); m++)
+ +            {
+ +                /* Reverse the velocity */
+ +                state->v[i][m] = -state->v[i][m];
+ +                /* Store the position at t-dt in buf */
+ +                savex[i][m] = state->x[i][m] + dt*state->v[i][m];
+ +            }
+ +        }
+ +        /* Shake the positions at t=-dt with the positions at t=0
+ +         * as reference coordinates.
+ +         */
+ +        if (fplog)
+ +        {
+ +            char buf[STEPSTRSIZE];
+ +            fprintf(fplog, "\nConstraining the coordinates at t0-dt (step %s)\n",
+ +                    gmx_step_str(step, buf));
+ +        }
+ +        dvdl_dum = 0;
+ +        constrain(NULL, TRUE, FALSE, constr, &(top->idef),
+ +                  ir, NULL, cr, step, -1, md,
+ +                  state->x, savex, NULL,
+ +                  fr->bMolPBC, state->box,
+ +                  state->lambda[efptBONDED], &dvdl_dum,
+ +                  state->v, NULL, nrnb, econqCoord,
+ +                  ir->epc == epcMTTK, state->veta, state->veta);
+ +
+ +        for (i = start; i < end; i++)
+ +        {
+ +            for (m = 0; m < DIM; m++)
+ +            {
+ +                /* Re-reverse the velocities */
+ +                state->v[i][m] = -state->v[i][m];
+ +            }
+ +        }
+ +    }
+ +    sfree(savex);
+ +}
+ +
+ +
+ +static void
+ +integrate_table(real vdwtab[], real scale, int offstart, int rstart, int rend,
+ +                double *enerout, double *virout)
+ +{
+ +    double enersum, virsum;
+ +    double invscale, invscale2, invscale3;
+ +    double r, ea, eb, ec, pa, pb, pc, pd;
+ +    double y0, f, g, h;
+ +    int    ri, offset, tabfactor;
+ +
+ +    invscale  = 1.0/scale;
+ +    invscale2 = invscale*invscale;
+ +    invscale3 = invscale*invscale2;
+ +
+ +    /* Following summation derived from cubic spline definition,
+ +     * Numerical Recipies in C, second edition, p. 113-116.  Exact for
+ +     * the cubic spline.  We first calculate the negative of the
+ +     * energy from rvdw to rvdw_switch, assuming that g(r)=1, and then
+ +     * add the more standard, abrupt cutoff correction to that result,
+ +     * yielding the long-range correction for a switched function.  We
+ +     * perform both the pressure and energy loops at the same time for
+ +     * simplicity, as the computational cost is low. */
+ +
+ +    if (offstart == 0)
+ +    {
+ +        /* Since the dispersion table has been scaled down a factor
+ +         * 6.0 and the repulsion a factor 12.0 to compensate for the
+ +         * c6/c12 parameters inside nbfp[] being scaled up (to save
+ +         * flops in kernels), we need to correct for this.
+ +         */
+ +        tabfactor = 6.0;
+ +    }
+ +    else
+ +    {
+ +        tabfactor = 12.0;
+ +    }
+ +
+ +    enersum = 0.0;
+ +    virsum  = 0.0;
+ +    for (ri = rstart; ri < rend; ++ri)
+ +    {
+ +        r  = ri*invscale;
+ +        ea = invscale3;
+ +        eb = 2.0*invscale2*r;
+ +        ec = invscale*r*r;
+ +
+ +        pa = invscale3;
+ +        pb = 3.0*invscale2*r;
+ +        pc = 3.0*invscale*r*r;
+ +        pd = r*r*r;
+ +
+ +        /* this "8" is from the packing in the vdwtab array - perhaps
+ +           should be defined? */
+ +
+ +        offset = 8*ri + offstart;
+ +        y0     = vdwtab[offset];
+ +        f      = vdwtab[offset+1];
+ +        g      = vdwtab[offset+2];
+ +        h      = vdwtab[offset+3];
+ +
+ +        enersum += y0*(ea/3 + eb/2 + ec) + f*(ea/4 + eb/3 + ec/2) + g*(ea/5 + eb/4 + ec/3) + h*(ea/6 + eb/5 + ec/4);
+ +        virsum  +=  f*(pa/4 + pb/3 + pc/2 + pd) + 2*g*(pa/5 + pb/4 + pc/3 + pd/2) + 3*h*(pa/6 + pb/5 + pc/4 + pd/3);
+ +    }
+ +    *enerout = 4.0*M_PI*enersum*tabfactor;
+ +    *virout  = 4.0*M_PI*virsum*tabfactor;
+ +}
+ +
+ +void calc_enervirdiff(FILE *fplog, int eDispCorr, t_forcerec *fr)
+ +{
+ +    double eners[2], virs[2], enersum, virsum, y0, f, g, h;
+ +    double r0, r1, r, rc3, rc9, ea, eb, ec, pa, pb, pc, pd;
+ +    double invscale, invscale2, invscale3;
+ +    int    ri0, ri1, ri, i, offstart, offset;
+ +    real   scale, *vdwtab, tabfactor, tmp;
+ +
+ +    fr->enershiftsix    = 0;
+ +    fr->enershifttwelve = 0;
+ +    fr->enerdiffsix     = 0;
+ +    fr->enerdifftwelve  = 0;
+ +    fr->virdiffsix      = 0;
+ +    fr->virdifftwelve   = 0;
+ +
+ +    if (eDispCorr != edispcNO)
+ +    {
+ +        for (i = 0; i < 2; i++)
+ +        {
+ +            eners[i] = 0;
+ +            virs[i]  = 0;
+ +        }
+ +        if ((fr->vdwtype == evdwSWITCH) || (fr->vdwtype == evdwSHIFT))
+ +        {
+ +            if (fr->rvdw_switch == 0)
+ +            {
+ +                gmx_fatal(FARGS,
+ +                          "With dispersion correction rvdw-switch can not be zero "
+ +                          "for vdw-type = %s", evdw_names[fr->vdwtype]);
+ +            }
+ +
+ +            scale  = fr->nblists[0].table_elec_vdw.scale;
+ +            vdwtab = fr->nblists[0].table_vdw.data;
+ +
+ +            /* Round the cut-offs to exact table values for precision */
+ +            ri0  = floor(fr->rvdw_switch*scale);
+ +            ri1  = ceil(fr->rvdw*scale);
+ +            r0   = ri0/scale;
+ +            r1   = ri1/scale;
+ +            rc3  = r0*r0*r0;
+ +            rc9  = rc3*rc3*rc3;
+ +
+ +            if (fr->vdwtype == evdwSHIFT)
+ +            {
+ +                /* Determine the constant energy shift below rvdw_switch.
+ +                 * Table has a scale factor since we have scaled it down to compensate
+ +                 * for scaling-up c6/c12 with the derivative factors to save flops in analytical kernels.
+ +                 */
+ +                fr->enershiftsix    = (real)(-1.0/(rc3*rc3)) - 6.0*vdwtab[8*ri0];
+ +                fr->enershifttwelve = (real)( 1.0/(rc9*rc3)) - 12.0*vdwtab[8*ri0 + 4];
+ +            }
+ +            /* Add the constant part from 0 to rvdw_switch.
+ +             * This integration from 0 to rvdw_switch overcounts the number
+ +             * of interactions by 1, as it also counts the self interaction.
+ +             * We will correct for this later.
+ +             */
+ +            eners[0] += 4.0*M_PI*fr->enershiftsix*rc3/3.0;
+ +            eners[1] += 4.0*M_PI*fr->enershifttwelve*rc3/3.0;
+ +            for (i = 0; i < 2; i++)
+ +            {
+ +                enersum = 0;
+ +                virsum  = 0;
+ +                integrate_table(vdwtab, scale, (i == 0 ? 0 : 4), ri0, ri1, &enersum, &virsum);
+ +                eners[i] -= enersum;
+ +                virs[i]  -= virsum;
+ +            }
+ +
+ +            /* now add the correction for rvdw_switch to infinity */
+ +            eners[0] += -4.0*M_PI/(3.0*rc3);
+ +            eners[1] +=  4.0*M_PI/(9.0*rc9);
+ +            virs[0]  +=  8.0*M_PI/rc3;
+ +            virs[1]  += -16.0*M_PI/(3.0*rc9);
+ +        }
+ +        else if (EVDW_PME(fr->vdwtype))
+ +        {
+ +            if (EVDW_SWITCHED(fr->vdwtype) && fr->rvdw_switch == 0)
+ +            {
+ +                gmx_fatal(FARGS,
+ +                          "With dispersion correction rvdw-switch can not be zero "
+ +                          "for vdw-type = %s", evdw_names[fr->vdwtype]);
+ +            }
+ +
+ +            scale  = fr->nblists[0].table_vdw.scale;
+ +            vdwtab = fr->nblists[0].table_vdw.data;
+ +
+ +            ri0  = floor(fr->rvdw_switch*scale);
+ +            ri1  = ceil(fr->rvdw*scale);
+ +            r0   = ri0/scale;
+ +            r1   = ri1/scale;
+ +            rc3  = r0*r0*r0;
+ +            rc9  = rc3*rc3*rc3;
+ +
+ +            /* Calculate self-interaction coefficient (assuming that
+ +             * the reciprocal-space contribution is constant in the
+ +             * region that contributes to the self-interaction).
+ +             */
+ +            fr->enershiftsix = pow(fr->ewaldcoeff_lj, 6) / 6.0;
+ +
+ +            /* Calculate C12 values as without PME. */
+ +            if (EVDW_SWITCHED(fr->vdwtype))
+ +            {
+ +                enersum = 0;
+ +                virsum  = 0;
+ +                integrate_table(vdwtab, scale, 4, ri0, ri1, &enersum, &virsum);
+ +                eners[1] -= enersum;
+ +                virs[1]  -= virsum;
+ +            }
+ +            /* Add analytical corrections, C6 for the whole range, C12
+ +             * from rvdw_switch to infinity.
+ +             */
+ +
+ +            eners[0] += -pow(sqrt(M_PI)*fr->ewaldcoeff_lj, 3)/3.0;
+ +            eners[1] +=  4.0*M_PI/(9.0*rc9);
+ +            virs[0]  +=  pow(sqrt(M_PI)*fr->ewaldcoeff_lj, 3);
+ +            virs[1]  += -16.0*M_PI/(3.0*rc9);
+ +        }
+ +        else if ((fr->vdwtype == evdwCUT) || (fr->vdwtype == evdwUSER))
+ +        {
+ +            if (fr->vdwtype == evdwUSER && fplog)
+ +            {
+ +                fprintf(fplog,
+ +                        "WARNING: using dispersion correction with user tables\n");
+ +            }
+ +            rc3  = fr->rvdw*fr->rvdw*fr->rvdw;
+ +            rc9  = rc3*rc3*rc3;
+ +            /* Contribution beyond the cut-off */
+ +            eners[0] += -4.0*M_PI/(3.0*rc3);
+ +            eners[1] +=  4.0*M_PI/(9.0*rc9);
+ +            if (fr->vdw_modifier == eintmodPOTSHIFT)
+ +            {
+ +                /* Contribution within the cut-off */
+ +                eners[0] += -4.0*M_PI/(3.0*rc3);
+ +                eners[1] +=  4.0*M_PI/(3.0*rc9);
+ +            }
+ +            /* Contribution beyond the cut-off */
+ +            virs[0]  +=  8.0*M_PI/rc3;
+ +            virs[1]  += -16.0*M_PI/(3.0*rc9);
+ +        }
+ +        else
+ +        {
+ +            gmx_fatal(FARGS,
+ +                      "Dispersion correction is not implemented for vdw-type = %s",
+ +                      evdw_names[fr->vdwtype]);
+ +        }
+ +        fr->enerdiffsix    = eners[0];
+ +        fr->enerdifftwelve = eners[1];
+ +        /* The 0.5 is due to the Gromacs definition of the virial */
+ +        fr->virdiffsix     = 0.5*virs[0];
+ +        fr->virdifftwelve  = 0.5*virs[1];
+ +    }
+ +}
+ +
+ +void calc_dispcorr(FILE *fplog, t_inputrec *ir, t_forcerec *fr,
+ +                   gmx_int64_t step, int natoms,
+ +                   matrix box, real lambda, tensor pres, tensor virial,
+ +                   real *prescorr, real *enercorr, real *dvdlcorr)
+ +{
+ +    gmx_bool bCorrAll, bCorrPres;
+ +    real     dvdlambda, invvol, dens, ninter, avcsix, avctwelve, enerdiff, svir = 0, spres = 0;
+ +    int      m;
+ +
+ +    *prescorr = 0;
+ +    *enercorr = 0;
+ +    *dvdlcorr = 0;
+ +
+ +    clear_mat(virial);
+ +    clear_mat(pres);
+ +
+ +    if (ir->eDispCorr != edispcNO)
+ +    {
+ +        bCorrAll  = (ir->eDispCorr == edispcAllEner ||
+ +                     ir->eDispCorr == edispcAllEnerPres);
+ +        bCorrPres = (ir->eDispCorr == edispcEnerPres ||
+ +                     ir->eDispCorr == edispcAllEnerPres);
+ +
+ +        invvol = 1/det(box);
+ +        if (fr->n_tpi)
+ +        {
+ +            /* Only correct for the interactions with the inserted molecule */
+ +            dens   = (natoms - fr->n_tpi)*invvol;
+ +            ninter = fr->n_tpi;
+ +        }
+ +        else
+ +        {
+ +            dens   = natoms*invvol;
+ +            ninter = 0.5*natoms;
+ +        }
+ +
+ +        if (ir->efep == efepNO)
+ +        {
+ +            avcsix    = fr->avcsix[0];
+ +            avctwelve = fr->avctwelve[0];
+ +        }
+ +        else
+ +        {
+ +            avcsix    = (1 - lambda)*fr->avcsix[0]    + lambda*fr->avcsix[1];
+ +            avctwelve = (1 - lambda)*fr->avctwelve[0] + lambda*fr->avctwelve[1];
+ +        }
+ +
+ +        enerdiff   = ninter*(dens*fr->enerdiffsix - fr->enershiftsix);
+ +        *enercorr += avcsix*enerdiff;
+ +        dvdlambda  = 0.0;
+ +        if (ir->efep != efepNO)
+ +        {
+ +            dvdlambda += (fr->avcsix[1] - fr->avcsix[0])*enerdiff;
+ +        }
+ +        if (bCorrAll)
+ +        {
+ +            enerdiff   = ninter*(dens*fr->enerdifftwelve - fr->enershifttwelve);
+ +            *enercorr += avctwelve*enerdiff;
+ +            if (fr->efep != efepNO)
+ +            {
+ +                dvdlambda += (fr->avctwelve[1] - fr->avctwelve[0])*enerdiff;
+ +            }
+ +        }
+ +
+ +        if (bCorrPres)
+ +        {
+ +            svir = ninter*dens*avcsix*fr->virdiffsix/3.0;
+ +            if (ir->eDispCorr == edispcAllEnerPres)
+ +            {
+ +                svir += ninter*dens*avctwelve*fr->virdifftwelve/3.0;
+ +            }
+ +            /* The factor 2 is because of the Gromacs virial definition */
+ +            spres = -2.0*invvol*svir*PRESFAC;
+ +
+ +            for (m = 0; m < DIM; m++)
+ +            {
+ +                virial[m][m] += svir;
+ +                pres[m][m]   += spres;
+ +            }
+ +            *prescorr += spres;
+ +        }
+ +
+ +        /* Can't currently control when it prints, for now, just print when degugging */
+ +        if (debug)
+ +        {
+ +            if (bCorrAll)
+ +            {
+ +                fprintf(debug, "Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
+ +                        avcsix, avctwelve);
+ +            }
+ +            if (bCorrPres)
+ +            {
+ +                fprintf(debug,
+ +                        "Long Range LJ corr.: Epot %10g, Pres: %10g, Vir: %10g\n",
+ +                        *enercorr, spres, svir);
+ +            }
+ +            else
+ +            {
+ +                fprintf(debug, "Long Range LJ corr.: Epot %10g\n", *enercorr);
+ +            }
+ +        }
+ +
+ +        if (fr->bSepDVDL && do_per_step(step, ir->nstlog))
+ +        {
+ +            gmx_print_sepdvdl(fplog, "Dispersion correction", *enercorr, dvdlambda);
+ +        }
+ +        if (fr->efep != efepNO)
+ +        {
+ +            *dvdlcorr += dvdlambda;
+ +        }
+ +    }
+ +}
+ +
+ +void do_pbc_first(FILE *fplog, matrix box, t_forcerec *fr,
+ +                  t_graph *graph, rvec x[])
+ +{
+ +    if (fplog)
+ +    {
+ +        fprintf(fplog, "Removing pbc first time\n");
+ +    }
+ +    calc_shifts(box, fr->shift_vec);
+ +    if (graph)
+ +    {
+ +        mk_mshift(fplog, graph, fr->ePBC, box, x);
+ +        if (gmx_debug_at)
+ +        {
+ +            p_graph(debug, "do_pbc_first 1", graph);
+ +        }
+ +        shift_self(graph, box, x);
+ +        /* By doing an extra mk_mshift the molecules that are broken
+ +         * because they were e.g. imported from another software
+ +         * will be made whole again. Such are the healing powers
+ +         * of GROMACS.
+ +         */
+ +        mk_mshift(fplog, graph, fr->ePBC, box, x);
+ +        if (gmx_debug_at)
+ +        {
+ +            p_graph(debug, "do_pbc_first 2", graph);
+ +        }
+ +    }
+ +    if (fplog)
+ +    {
+ +        fprintf(fplog, "Done rmpbc\n");
+ +    }
+ +}
+ +
+ +static void low_do_pbc_mtop(FILE *fplog, int ePBC, matrix box,
+ +                            gmx_mtop_t *mtop, rvec x[],
+ +                            gmx_bool bFirst)
+ +{
+ +    t_graph        *graph;
+ +    int             mb, as, mol;
+ +    gmx_molblock_t *molb;
+ +
+ +    if (bFirst && fplog)
+ +    {
+ +        fprintf(fplog, "Removing pbc first time\n");
+ +    }
+ +
+ +    snew(graph, 1);
+ +    as = 0;
+ +    for (mb = 0; mb < mtop->nmolblock; mb++)
+ +    {
+ +        molb = &mtop->molblock[mb];
+ +        if (molb->natoms_mol == 1 ||
+ +            (!bFirst && mtop->moltype[molb->type].cgs.nr == 1))
+ +        {
+ +            /* Just one atom or charge group in the molecule, no PBC required */
+ +            as += molb->nmol*molb->natoms_mol;
+ +        }
+ +        else
+ +        {
+ +            /* Pass NULL iso fplog to avoid graph prints for each molecule type */
+ +            mk_graph_ilist(NULL, mtop->moltype[molb->type].ilist,
+ +                           0, molb->natoms_mol, FALSE, FALSE, graph);
+ +
+ +            for (mol = 0; mol < molb->nmol; mol++)
+ +            {
+ +                mk_mshift(fplog, graph, ePBC, box, x+as);
+ +
+ +                shift_self(graph, box, x+as);
+ +                /* The molecule is whole now.
+ +                 * We don't need the second mk_mshift call as in do_pbc_first,
+ +                 * since we no longer need this graph.
+ +                 */
+ +
+ +                as += molb->natoms_mol;
+ +            }
+ +            done_graph(graph);
+ +        }
+ +    }
+ +    sfree(graph);
+ +}
+ +
+ +void do_pbc_first_mtop(FILE *fplog, int ePBC, matrix box,
+ +                       gmx_mtop_t *mtop, rvec x[])
+ +{
+ +    low_do_pbc_mtop(fplog, ePBC, box, mtop, x, TRUE);
+ +}
+ +
+ +void do_pbc_mtop(FILE *fplog, int ePBC, matrix box,
+ +                 gmx_mtop_t *mtop, rvec x[])
+ +{
+ +    low_do_pbc_mtop(fplog, ePBC, box, mtop, x, FALSE);
+ +}
+ +
+ +void finish_run(FILE *fplog, t_commrec *cr,
+ +                t_inputrec *inputrec,
+ +                t_nrnb nrnb[], gmx_wallcycle_t wcycle,
+ +                gmx_walltime_accounting_t walltime_accounting,
+ +                wallclock_gpu_t *gputimes,
+ +                gmx_bool bWriteStat)
+ +{
+ +    int     i, j;
+ +    t_nrnb *nrnb_tot = NULL;
+ +    real    delta_t;
+ +    double  nbfs, mflop;
+ +    double  elapsed_time,
+ +            elapsed_time_over_all_ranks,
+ +            elapsed_time_over_all_threads,
+ +            elapsed_time_over_all_threads_over_all_ranks;
+ +    wallcycle_sum(cr, wcycle);
+ +
+ +    if (cr->nnodes > 1)
+ +    {
+ +        snew(nrnb_tot, 1);
+ +#ifdef GMX_MPI
+ +        MPI_Allreduce(nrnb->n, nrnb_tot->n, eNRNB, MPI_DOUBLE, MPI_SUM,
+ +                      cr->mpi_comm_mysim);
+ +#endif
+ +    }
+ +    else
+ +    {
+ +        nrnb_tot = nrnb;
+ +    }
+ +
+ +    elapsed_time                                 = walltime_accounting_get_elapsed_time(walltime_accounting);
+ +    elapsed_time_over_all_ranks                  = elapsed_time;
+ +    elapsed_time_over_all_threads                = walltime_accounting_get_elapsed_time_over_all_threads(walltime_accounting);
+ +    elapsed_time_over_all_threads_over_all_ranks = elapsed_time_over_all_threads;
+ +#ifdef GMX_MPI
+ +    if (cr->nnodes > 1)
+ +    {
+ +        /* reduce elapsed_time over all MPI ranks in the current simulation */
+ +        MPI_Allreduce(&elapsed_time,
+ +                      &elapsed_time_over_all_ranks,
+ +                      1, MPI_DOUBLE, MPI_SUM,
+ +                      cr->mpi_comm_mysim);
+ +        elapsed_time_over_all_ranks /= cr->nnodes;
+ +        /* Reduce elapsed_time_over_all_threads over all MPI ranks in the
+ +         * current simulation. */
+ +        MPI_Allreduce(&elapsed_time_over_all_threads,
+ +                      &elapsed_time_over_all_threads_over_all_ranks,
+ +                      1, MPI_DOUBLE, MPI_SUM,
+ +                      cr->mpi_comm_mysim);
+ +    }
+ +#endif
+ +
+ +    if (SIMMASTER(cr))
+ +    {
+ +        print_flop(fplog, nrnb_tot, &nbfs, &mflop);
+ +    }
+ +    if (cr->nnodes > 1)
+ +    {
+ +        sfree(nrnb_tot);
+ +    }
+ +
+ +    if ((cr->duty & DUTY_PP) && DOMAINDECOMP(cr))
+ +    {
+ +        print_dd_statistics(cr, inputrec, fplog);
+ +    }
+ +
+ +#ifdef GMX_MPI
+ +    if (PARTDECOMP(cr))
+ +    {
+ +        if (MASTER(cr))
+ +        {
+ +            t_nrnb     *nrnb_all;
+ +            int         s;
+ +            MPI_Status  stat;
+ +
+ +            snew(nrnb_all, cr->nnodes);
+ +            nrnb_all[0] = *nrnb;
+ +            for (s = 1; s < cr->nnodes; s++)
+ +            {
+ +                MPI_Recv(nrnb_all[s].n, eNRNB, MPI_DOUBLE, s, 0,
+ +                         cr->mpi_comm_mysim, &stat);
+ +            }
+ +            pr_load(fplog, cr, nrnb_all);
+ +            sfree(nrnb_all);
+ +        }
+ +        else
+ +        {
+ +            MPI_Send(nrnb->n, eNRNB, MPI_DOUBLE, MASTERRANK(cr), 0,
+ +                     cr->mpi_comm_mysim);
+ +        }
+ +    }
+ +#endif
+ +
+ +    if (SIMMASTER(cr))
+ +    {
+ +        wallcycle_print(fplog, cr->nnodes, cr->npmenodes,
+ +                        elapsed_time_over_all_ranks,
+ +                        wcycle, gputimes);
+ +
+ +        if (EI_DYNAMICS(inputrec->eI))
+ +        {
+ +            delta_t = inputrec->delta_t;
+ +        }
+ +        else
+ +        {
+ +            delta_t = 0;
+ +        }
+ +
+ +        if (fplog)
+ +        {
+ +            print_perf(fplog, elapsed_time_over_all_threads_over_all_ranks,
+ +                       elapsed_time_over_all_ranks,
+ +                       walltime_accounting_get_nsteps_done(walltime_accounting),
+ +                       delta_t, nbfs, mflop);
+ +        }
+ +        if (bWriteStat)
+ +        {
+ +            print_perf(stderr, elapsed_time_over_all_threads_over_all_ranks,
+ +                       elapsed_time_over_all_ranks,
+ +                       walltime_accounting_get_nsteps_done(walltime_accounting),
+ +                       delta_t, nbfs, mflop);
+ +        }
+ +    }
+ +}
+ +
+ +extern void initialize_lambdas(FILE *fplog, t_inputrec *ir, int *fep_state, real *lambda, double *lam0)
+ +{
+ +    /* this function works, but could probably use a logic rewrite to keep all the different
+ +       types of efep straight. */
+ +
+ +    int       i;
+ +    t_lambda *fep = ir->fepvals;
+ +
+ +    if ((ir->efep == efepNO) && (ir->bSimTemp == FALSE))
+ +    {
+ +        for (i = 0; i < efptNR; i++)
+ +        {
+ +            lambda[i] = 0.0;
+ +            if (lam0)
+ +            {
+ +                lam0[i] = 0.0;
+ +            }
+ +        }
+ +        return;
+ +    }
+ +    else
+ +    {
+ +        *fep_state = fep->init_fep_state; /* this might overwrite the checkpoint
+ +                                             if checkpoint is set -- a kludge is in for now
+ +                                             to prevent this.*/
+ +        for (i = 0; i < efptNR; i++)
+ +        {
+ +            /* overwrite lambda state with init_lambda for now for backwards compatibility */
+ +            if (fep->init_lambda >= 0) /* if it's -1, it was never initializd */
+ +            {
+ +                lambda[i] = fep->init_lambda;
+ +                if (lam0)
+ +                {
+ +                    lam0[i] = lambda[i];
+ +                }
+ +            }
+ +            else
+ +            {
+ +                lambda[i] = fep->all_lambda[i][*fep_state];
+ +                if (lam0)
+ +                {
+ +                    lam0[i] = lambda[i];
+ +                }
+ +            }
+ +        }
+ +        if (ir->bSimTemp)
+ +        {
+ +            /* need to rescale control temperatures to match current state */
+ +            for (i = 0; i < ir->opts.ngtc; i++)
+ +            {
+ +                if (ir->opts.ref_t[i] > 0)
+ +                {
+ +                    ir->opts.ref_t[i] = ir->simtempvals->temperatures[*fep_state];
+ +                }
+ +            }
+ +        }
+ +    }
+ +
+ +    /* Send to the log the information on the current lambdas */
+ +    if (fplog != NULL)
+ +    {
+ +        fprintf(fplog, "Initial vector of lambda components:[ ");
+ +        for (i = 0; i < efptNR; i++)
+ +        {
+ +            fprintf(fplog, "%10.4f ", lambda[i]);
+ +        }
+ +        fprintf(fplog, "]\n");
+ +    }
+ +    return;
+ +}
+ +
+ +
+ +void init_md(FILE *fplog,
+ +             t_commrec *cr, t_inputrec *ir, const output_env_t oenv,
+ +             double *t, double *t0,
+ +             real *lambda, int *fep_state, double *lam0,
+ +             t_nrnb *nrnb, gmx_mtop_t *mtop,
+ +             gmx_update_t *upd,
+ +             int nfile, const t_filenm fnm[],
+ +             gmx_mdoutf_t *outf, t_mdebin **mdebin,
+ +             tensor force_vir, tensor shake_vir, rvec mu_tot,
+ +             gmx_bool *bSimAnn, t_vcm **vcm, unsigned long Flags)
+ +{
+ +    int  i, j, n;
+ +    real tmpt, mod;
+ +
+ +    /* Initial values */
+ +    *t = *t0       = ir->init_t;
+ +
+ +    *bSimAnn = FALSE;
+ +    for (i = 0; i < ir->opts.ngtc; i++)
+ +    {
+ +        /* set bSimAnn if any group is being annealed */
+ +        if (ir->opts.annealing[i] != eannNO)
+ +        {
+ +            *bSimAnn = TRUE;
+ +        }
+ +    }
+ +    if (*bSimAnn)
+ +    {
+ +        update_annealing_target_temp(&(ir->opts), ir->init_t);
+ +    }
+ +
+ +    /* Initialize lambda variables */
+ +    initialize_lambdas(fplog, ir, fep_state, lambda, lam0);
+ +
+ +    if (upd)
+ +    {
+ +        *upd = init_update(ir);
+ +    }
+ +
+ +
+ +    if (vcm != NULL)
+ +    {
+ +        *vcm = init_vcm(fplog, &mtop->groups, ir);
+ +    }
+ +
+ +    if (EI_DYNAMICS(ir->eI) && !(Flags & MD_APPENDFILES))
+ +    {
+ +        if (ir->etc == etcBERENDSEN)
+ +        {
+ +            please_cite(fplog, "Berendsen84a");
+ +        }
+ +        if (ir->etc == etcVRESCALE)
+ +        {
+ +            please_cite(fplog, "Bussi2007a");
+ +        }
+ +    }
+ +
+ +    init_nrnb(nrnb);
+ +
+ +    if (nfile != -1)
+ +    {
+ +        *outf = init_mdoutf(nfile, fnm, Flags, cr, ir, mtop, oenv);
+ +
+ +        *mdebin = init_mdebin((Flags & MD_APPENDFILES) ? NULL : mdoutf_get_fp_ene(*outf),
+ +                              mtop, ir, mdoutf_get_fp_dhdl(*outf));
+ +    }
+ +
+ +    if (ir->bAdress)
+ +    {
+ +        please_cite(fplog, "Fritsch12");
+ +        please_cite(fplog, "Junghans10");
+ +    }
+ +    /* Initiate variables */
+ +    clear_mat(force_vir);
+ +    clear_mat(shake_vir);
+ +    clear_rvec(mu_tot);
+ +
+ +    debug_gmx();
+ +}
diff --cc src/gromacs/mdlib/tpi.c

index aa111f03df02cadd3654b9c4986327d40e12b951,0000000000000000000000000000000000000000..b104b3d390ebae7ef369f159bf132da6e9a0bbae

mode 100644,000000..100644
--- 1/src/gromacs/mdlib/tpi.c
--- /dev/null
+++ b/src/gromacs/mdlib/tpi.c
@@@ -1,843 -1,0 +1,841 @@@
-  * Copyright (c) 2013, by the GROMACS development team, led by
+ +/*
+ + * This file is part of the GROMACS molecular simulation package.
+ + *
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team.
-     print_date_and_time(fplog, cr->nodeid,
-                         "Started Test Particle Insertion",
-                         walltime_accounting);
++ * Copyright (c) 2013,2014, by the GROMACS development team, led by
+ + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ + * and including many others, as listed in the AUTHORS file in the
+ + * top-level source directory and at http://www.gromacs.org.
+ + *
+ + * GROMACS is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU Lesser General Public License
+ + * as published by the Free Software Foundation; either version 2.1
+ + * of the License, or (at your option) any later version.
+ + *
+ + * GROMACS is distributed in the hope that it will be useful,
+ + * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ + * Lesser General Public License for more details.
+ + *
+ + * You should have received a copy of the GNU Lesser General Public
+ + * License along with GROMACS; if not, see
+ + * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ + *
+ + * If you want to redistribute modifications to GROMACS, please
+ + * consider that scientific software is very special. Version
+ + * control is crucial - bugs must be traceable. We will be happy to
+ + * consider code for inclusion in the official distribution, but
+ + * derived work must not be called official GROMACS. Details are found
+ + * in the README & COPYING files - if they are missing, get the
+ + * official version at http://www.gromacs.org.
+ + *
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the research papers on the package. Check out http://www.gromacs.org.
+ + */
+ +#ifdef HAVE_CONFIG_H
+ +#include <config.h>
+ +#endif
+ +
+ +#include <string.h>
+ +#include <time.h>
+ +#include <math.h>
+ +#include "sysstuff.h"
+ +#include "string2.h"
+ +#include "network.h"
+ +#include "smalloc.h"
+ +#include "nrnb.h"
+ +#include "main.h"
+ +#include "chargegroup.h"
+ +#include "force.h"
+ +#include "macros.h"
+ +#include "random.h"
+ +#include "names.h"
+ +#include "gmx_fatal.h"
+ +#include "txtdump.h"
+ +#include "typedefs.h"
+ +#include "update.h"
+ +#include "random.h"
+ +#include "constr.h"
+ +#include "vec.h"
+ +#include "tgroup.h"
+ +#include "mdebin.h"
+ +#include "vsite.h"
+ +#include "force.h"
+ +#include "mdrun.h"
+ +#include "domdec.h"
+ +#include "partdec.h"
+ +#include "gmx_random.h"
+ +#include "physics.h"
+ +#include "xvgr.h"
+ +#include "mdatoms.h"
+ +#include "ns.h"
+ +#include "mtop_util.h"
+ +#include "pme.h"
+ +#include "gbutil.h"
+ +
+ +#include "gromacs/fileio/confio.h"
+ +#include "gromacs/fileio/gmxfio.h"
+ +#include "gromacs/fileio/trxio.h"
+ +#include "gromacs/timing/wallcycle.h"
+ +#include "gromacs/timing/walltime_accounting.h"
+ +
+ +#ifdef GMX_X86_SSE2
+ +#include "gromacs/simd/general_x86_sse2.h"
+ +#endif
+ +
+ +
+ +static void global_max(t_commrec *cr, int *n)
+ +{
+ +    int *sum, i;
+ +
+ +    snew(sum, cr->nnodes);
+ +    sum[cr->nodeid] = *n;
+ +    gmx_sumi(cr->nnodes, sum, cr);
+ +    for (i = 0; i < cr->nnodes; i++)
+ +    {
+ +        *n = max(*n, sum[i]);
+ +    }
+ +
+ +    sfree(sum);
+ +}
+ +
+ +static void realloc_bins(double **bin, int *nbin, int nbin_new)
+ +{
+ +    int i;
+ +
+ +    if (nbin_new != *nbin)
+ +    {
+ +        srenew(*bin, nbin_new);
+ +        for (i = *nbin; i < nbin_new; i++)
+ +        {
+ +            (*bin)[i] = 0;
+ +        }
+ +        *nbin = nbin_new;
+ +    }
+ +}
+ +
+ +double do_tpi(FILE *fplog, t_commrec *cr,
+ +              int nfile, const t_filenm fnm[],
+ +              const output_env_t oenv, gmx_bool bVerbose, gmx_bool gmx_unused bCompact,
+ +              int gmx_unused nstglobalcomm,
+ +              gmx_vsite_t gmx_unused *vsite, gmx_constr_t gmx_unused constr,
+ +              int gmx_unused stepout,
+ +              t_inputrec *inputrec,
+ +              gmx_mtop_t *top_global, t_fcdata *fcd,
+ +              t_state *state,
+ +              t_mdatoms *mdatoms,
+ +              t_nrnb *nrnb, gmx_wallcycle_t wcycle,
+ +              gmx_edsam_t gmx_unused ed,
+ +              t_forcerec *fr,
+ +              int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed,
+ +              gmx_membed_t gmx_unused membed,
+ +              real gmx_unused cpt_period, real gmx_unused max_hours,
+ +              const char gmx_unused *deviceOptions,
+ +              unsigned long gmx_unused Flags,
+ +              gmx_walltime_accounting_t walltime_accounting)
+ +{
+ +    const char     *TPI = "Test Particle Insertion";
+ +    gmx_localtop_t *top;
+ +    gmx_groups_t   *groups;
+ +    gmx_enerdata_t *enerd;
+ +    rvec           *f;
+ +    real            lambda, t, temp, beta, drmax, epot;
+ +    double          embU, sum_embU, *sum_UgembU, V, V_all, VembU_all;
+ +    t_trxstatus    *status;
+ +    t_trxframe      rerun_fr;
+ +    gmx_bool        bDispCorr, bCharge, bRFExcl, bNotLastFrame, bStateChanged, bNS, bOurStep;
+ +    tensor          force_vir, shake_vir, vir, pres;
+ +    int             cg_tp, a_tp0, a_tp1, ngid, gid_tp, nener, e;
+ +    rvec           *x_mol;
+ +    rvec            mu_tot, x_init, dx, x_tp;
+ +    int             nnodes, frame, nsteps, step;
+ +    int             i, start, end;
+ +    gmx_rng_t       tpi_rand;
+ +    FILE           *fp_tpi = NULL;
+ +    char           *ptr, *dump_pdb, **leg, str[STRLEN], str2[STRLEN];
+ +    double          dbl, dump_ener;
+ +    gmx_bool        bCavity;
+ +    int             nat_cavity  = 0, d;
+ +    real           *mass_cavity = NULL, mass_tot;
+ +    int             nbin;
+ +    double          invbinw, *bin, refvolshift, logV, bUlogV;
+ +    real            dvdl, prescorr, enercorr, dvdlcorr;
+ +    gmx_bool        bEnergyOutOfBounds;
+ +    const char     *tpid_leg[2] = {"direct", "reweighted"};
+ +
+ +    /* Since there is no upper limit to the insertion energies,
+ +     * we need to set an upper limit for the distribution output.
+ +     */
+ +    real bU_bin_limit      = 50;
+ +    real bU_logV_bin_limit = bU_bin_limit + 10;
+ +
+ +    nnodes = cr->nnodes;
+ +
+ +    top = gmx_mtop_generate_local_top(top_global, inputrec);
+ +
+ +    groups = &top_global->groups;
+ +
+ +    bCavity = (inputrec->eI == eiTPIC);
+ +    if (bCavity)
+ +    {
+ +        ptr = getenv("GMX_TPIC_MASSES");
+ +        if (ptr == NULL)
+ +        {
+ +            nat_cavity = 1;
+ +        }
+ +        else
+ +        {
+ +            /* Read (multiple) masses from env var GMX_TPIC_MASSES,
+ +             * The center of mass of the last atoms is then used for TPIC.
+ +             */
+ +            nat_cavity = 0;
+ +            while (sscanf(ptr, "%lf%n", &dbl, &i) > 0)
+ +            {
+ +                srenew(mass_cavity, nat_cavity+1);
+ +                mass_cavity[nat_cavity] = dbl;
+ +                fprintf(fplog, "mass[%d] = %f\n",
+ +                        nat_cavity+1, mass_cavity[nat_cavity]);
+ +                nat_cavity++;
+ +                ptr += i;
+ +            }
+ +            if (nat_cavity == 0)
+ +            {
+ +                gmx_fatal(FARGS, "Found %d masses in GMX_TPIC_MASSES", nat_cavity);
+ +            }
+ +        }
+ +    }
+ +
+ +    /*
+ +       init_em(fplog,TPI,inputrec,&lambda,nrnb,mu_tot,
+ +       state->box,fr,mdatoms,top,cr,nfile,fnm,NULL,NULL);*/
+ +    /* We never need full pbc for TPI */
+ +    fr->ePBC = epbcXYZ;
+ +    /* Determine the temperature for the Boltzmann weighting */
+ +    temp = inputrec->opts.ref_t[0];
+ +    if (fplog)
+ +    {
+ +        for (i = 1; (i < inputrec->opts.ngtc); i++)
+ +        {
+ +            if (inputrec->opts.ref_t[i] != temp)
+ +            {
+ +                fprintf(fplog, "\nWARNING: The temperatures of the different temperature coupling groups are not identical\n\n");
+ +                fprintf(stderr, "\nWARNING: The temperatures of the different temperature coupling groups are not identical\n\n");
+ +            }
+ +        }
+ +        fprintf(fplog,
+ +                "\n  The temperature for test particle insertion is %.3f K\n\n",
+ +                temp);
+ +    }
+ +    beta = 1.0/(BOLTZ*temp);
+ +
+ +    /* Number of insertions per frame */
+ +    nsteps = inputrec->nsteps;
+ +
+ +    /* Use the same neighborlist with more insertions points
+ +     * in a sphere of radius drmax around the initial point
+ +     */
+ +    /* This should be a proper mdp parameter */
+ +    drmax = inputrec->rtpi;
+ +
+ +    /* An environment variable can be set to dump all configurations
+ +     * to pdb with an insertion energy <= this value.
+ +     */
+ +    dump_pdb  = getenv("GMX_TPI_DUMP");
+ +    dump_ener = 0;
+ +    if (dump_pdb)
+ +    {
+ +        sscanf(dump_pdb, "%lf", &dump_ener);
+ +    }
+ +
+ +    atoms2md(top_global, inputrec, 0, NULL, 0, top_global->natoms, mdatoms);
+ +    update_mdatoms(mdatoms, inputrec->fepvals->init_lambda);
+ +
+ +    snew(enerd, 1);
+ +    init_enerdata(groups->grps[egcENER].nr, inputrec->fepvals->n_lambda, enerd);
+ +    snew(f, top_global->natoms);
+ +
+ +    /* Print to log file  */
+ +    walltime_accounting_start(walltime_accounting);
+ +    wallcycle_start(wcycle, ewcRUN);
++    print_start(fplog, cr, walltime_accounting, "Test Particle Insertion");
+ +
+ +    /* The last charge group is the group to be inserted */
+ +    cg_tp = top->cgs.nr - 1;
+ +    a_tp0 = top->cgs.index[cg_tp];
+ +    a_tp1 = top->cgs.index[cg_tp+1];
+ +    if (debug)
+ +    {
+ +        fprintf(debug, "TPI cg %d, atoms %d-%d\n", cg_tp, a_tp0, a_tp1);
+ +    }
+ +    if (a_tp1 - a_tp0 > 1 &&
+ +        (inputrec->rlist < inputrec->rcoulomb ||
+ +         inputrec->rlist < inputrec->rvdw))
+ +    {
+ +        gmx_fatal(FARGS, "Can not do TPI for multi-atom molecule with a twin-range cut-off");
+ +    }
+ +    snew(x_mol, a_tp1-a_tp0);
+ +
+ +    bDispCorr = (inputrec->eDispCorr != edispcNO);
+ +    bCharge   = FALSE;
+ +    for (i = a_tp0; i < a_tp1; i++)
+ +    {
+ +        /* Copy the coordinates of the molecule to be insterted */
+ +        copy_rvec(state->x[i], x_mol[i-a_tp0]);
+ +        /* Check if we need to print electrostatic energies */
+ +        bCharge |= (mdatoms->chargeA[i] != 0 ||
+ +                    (mdatoms->chargeB && mdatoms->chargeB[i] != 0));
+ +    }
+ +    bRFExcl = (bCharge && EEL_RF(fr->eeltype) && fr->eeltype != eelRF_NEC);
+ +
+ +    calc_cgcm(fplog, cg_tp, cg_tp+1, &(top->cgs), state->x, fr->cg_cm);
+ +    if (bCavity)
+ +    {
+ +        if (norm(fr->cg_cm[cg_tp]) > 0.5*inputrec->rlist && fplog)
+ +        {
+ +            fprintf(fplog, "WARNING: Your TPI molecule is not centered at 0,0,0\n");
+ +            fprintf(stderr, "WARNING: Your TPI molecule is not centered at 0,0,0\n");
+ +        }
+ +    }
+ +    else
+ +    {
+ +        /* Center the molecule to be inserted at zero */
+ +        for (i = 0; i < a_tp1-a_tp0; i++)
+ +        {
+ +            rvec_dec(x_mol[i], fr->cg_cm[cg_tp]);
+ +        }
+ +    }
+ +
+ +    if (fplog)
+ +    {
+ +        fprintf(fplog, "\nWill insert %d atoms %s partial charges\n",
+ +                a_tp1-a_tp0, bCharge ? "with" : "without");
+ +
+ +        fprintf(fplog, "\nWill insert %d times in each frame of %s\n",
+ +                nsteps, opt2fn("-rerun", nfile, fnm));
+ +    }
+ +
+ +    if (!bCavity)
+ +    {
+ +        if (inputrec->nstlist > 1)
+ +        {
+ +            if (drmax == 0 && a_tp1-a_tp0 == 1)
+ +            {
+ +                gmx_fatal(FARGS, "Re-using the neighborlist %d times for insertions of a single atom in a sphere of radius %f does not make sense", inputrec->nstlist, drmax);
+ +            }
+ +            if (fplog)
+ +            {
+ +                fprintf(fplog, "Will use the same neighborlist for %d insertions in a sphere of radius %f\n", inputrec->nstlist, drmax);
+ +            }
+ +        }
+ +    }
+ +    else
+ +    {
+ +        if (fplog)
+ +        {
+ +            fprintf(fplog, "Will insert randomly in a sphere of radius %f around the center of the cavity\n", drmax);
+ +        }
+ +    }
+ +
+ +    ngid   = groups->grps[egcENER].nr;
+ +    gid_tp = GET_CGINFO_GID(fr->cginfo[cg_tp]);
+ +    nener  = 1 + ngid;
+ +    if (bDispCorr)
+ +    {
+ +        nener += 1;
+ +    }
+ +    if (bCharge)
+ +    {
+ +        nener += ngid;
+ +        if (bRFExcl)
+ +        {
+ +            nener += 1;
+ +        }
+ +        if (EEL_FULL(fr->eeltype))
+ +        {
+ +            nener += 1;
+ +        }
+ +    }
+ +    snew(sum_UgembU, nener);
+ +
+ +    /* Initialize random generator */
+ +    tpi_rand = gmx_rng_init(inputrec->ld_seed);
+ +
+ +    if (MASTER(cr))
+ +    {
+ +        fp_tpi = xvgropen(opt2fn("-tpi", nfile, fnm),
+ +                          "TPI energies", "Time (ps)",
+ +                          "(kJ mol\\S-1\\N) / (nm\\S3\\N)", oenv);
+ +        xvgr_subtitle(fp_tpi, "f. are averages over one frame", oenv);
+ +        snew(leg, 4+nener);
+ +        e = 0;
+ +        sprintf(str, "-kT log(<Ve\\S-\\betaU\\N>/<V>)");
+ +        leg[e++] = strdup(str);
+ +        sprintf(str, "f. -kT log<e\\S-\\betaU\\N>");
+ +        leg[e++] = strdup(str);
+ +        sprintf(str, "f. <e\\S-\\betaU\\N>");
+ +        leg[e++] = strdup(str);
+ +        sprintf(str, "f. V");
+ +        leg[e++] = strdup(str);
+ +        sprintf(str, "f. <Ue\\S-\\betaU\\N>");
+ +        leg[e++] = strdup(str);
+ +        for (i = 0; i < ngid; i++)
+ +        {
+ +            sprintf(str, "f. <U\\sVdW %s\\Ne\\S-\\betaU\\N>",
+ +                    *(groups->grpname[groups->grps[egcENER].nm_ind[i]]));
+ +            leg[e++] = strdup(str);
+ +        }
+ +        if (bDispCorr)
+ +        {
+ +            sprintf(str, "f. <U\\sdisp c\\Ne\\S-\\betaU\\N>");
+ +            leg[e++] = strdup(str);
+ +        }
+ +        if (bCharge)
+ +        {
+ +            for (i = 0; i < ngid; i++)
+ +            {
+ +                sprintf(str, "f. <U\\sCoul %s\\Ne\\S-\\betaU\\N>",
+ +                        *(groups->grpname[groups->grps[egcENER].nm_ind[i]]));
+ +                leg[e++] = strdup(str);
+ +            }
+ +            if (bRFExcl)
+ +            {
+ +                sprintf(str, "f. <U\\sRF excl\\Ne\\S-\\betaU\\N>");
+ +                leg[e++] = strdup(str);
+ +            }
+ +            if (EEL_FULL(fr->eeltype))
+ +            {
+ +                sprintf(str, "f. <U\\sCoul recip\\Ne\\S-\\betaU\\N>");
+ +                leg[e++] = strdup(str);
+ +            }
+ +        }
+ +        xvgr_legend(fp_tpi, 4+nener, (const char**)leg, oenv);
+ +        for (i = 0; i < 4+nener; i++)
+ +        {
+ +            sfree(leg[i]);
+ +        }
+ +        sfree(leg);
+ +    }
+ +    clear_rvec(x_init);
+ +    V_all     = 0;
+ +    VembU_all = 0;
+ +
+ +    invbinw = 10;
+ +    nbin    = 10;
+ +    snew(bin, nbin);
+ +
+ +    bNotLastFrame = read_first_frame(oenv, &status, opt2fn("-rerun", nfile, fnm),
+ +                                     &rerun_fr, TRX_NEED_X);
+ +    frame = 0;
+ +
+ +    if (rerun_fr.natoms - (bCavity ? nat_cavity : 0) !=
+ +        mdatoms->nr - (a_tp1 - a_tp0))
+ +    {
+ +        gmx_fatal(FARGS, "Number of atoms in trajectory (%d)%s "
+ +                  "is not equal the number in the run input file (%d) "
+ +                  "minus the number of atoms to insert (%d)\n",
+ +                  rerun_fr.natoms, bCavity ? " minus one" : "",
+ +                  mdatoms->nr, a_tp1-a_tp0);
+ +    }
+ +
+ +    refvolshift = log(det(rerun_fr.box));
+ +
+ +#ifdef GMX_X86_SSE2
+ +    /* Make sure we don't detect SSE overflow generated before this point */
+ +    gmx_mm_check_and_reset_overflow();
+ +#endif
+ +
+ +    while (bNotLastFrame)
+ +    {
+ +        lambda = rerun_fr.lambda;
+ +        t      = rerun_fr.time;
+ +
+ +        sum_embU = 0;
+ +        for (e = 0; e < nener; e++)
+ +        {
+ +            sum_UgembU[e] = 0;
+ +        }
+ +
+ +        /* Copy the coordinates from the input trajectory */
+ +        for (i = 0; i < rerun_fr.natoms; i++)
+ +        {
+ +            copy_rvec(rerun_fr.x[i], state->x[i]);
+ +        }
+ +        copy_mat(rerun_fr.box, state->box);
+ +
+ +        V    = det(state->box);
+ +        logV = log(V);
+ +
+ +        bStateChanged = TRUE;
+ +        bNS           = TRUE;
+ +        for (step = 0; step < nsteps; step++)
+ +        {
+ +            /* In parallel all nodes generate all random configurations.
+ +             * In that way the result is identical to a single cpu tpi run.
+ +             */
+ +            if (!bCavity)
+ +            {
+ +                /* Random insertion in the whole volume */
+ +                bNS = (step % inputrec->nstlist == 0);
+ +                if (bNS)
+ +                {
+ +                    /* Generate a random position in the box */
+ +                    x_init[XX] = gmx_rng_uniform_real(tpi_rand)*state->box[XX][XX];
+ +                    x_init[YY] = gmx_rng_uniform_real(tpi_rand)*state->box[YY][YY];
+ +                    x_init[ZZ] = gmx_rng_uniform_real(tpi_rand)*state->box[ZZ][ZZ];
+ +                }
+ +                if (inputrec->nstlist == 1)
+ +                {
+ +                    copy_rvec(x_init, x_tp);
+ +                }
+ +                else
+ +                {
+ +                    /* Generate coordinates within |dx|=drmax of x_init */
+ +                    do
+ +                    {
+ +                        dx[XX] = (2*gmx_rng_uniform_real(tpi_rand) - 1)*drmax;
+ +                        dx[YY] = (2*gmx_rng_uniform_real(tpi_rand) - 1)*drmax;
+ +                        dx[ZZ] = (2*gmx_rng_uniform_real(tpi_rand) - 1)*drmax;
+ +                    }
+ +                    while (norm2(dx) > drmax*drmax);
+ +                    rvec_add(x_init, dx, x_tp);
+ +                }
+ +            }
+ +            else
+ +            {
+ +                /* Random insertion around a cavity location
+ +                 * given by the last coordinate of the trajectory.
+ +                 */
+ +                if (step == 0)
+ +                {
+ +                    if (nat_cavity == 1)
+ +                    {
+ +                        /* Copy the location of the cavity */
+ +                        copy_rvec(rerun_fr.x[rerun_fr.natoms-1], x_init);
+ +                    }
+ +                    else
+ +                    {
+ +                        /* Determine the center of mass of the last molecule */
+ +                        clear_rvec(x_init);
+ +                        mass_tot = 0;
+ +                        for (i = 0; i < nat_cavity; i++)
+ +                        {
+ +                            for (d = 0; d < DIM; d++)
+ +                            {
+ +                                x_init[d] +=
+ +                                    mass_cavity[i]*rerun_fr.x[rerun_fr.natoms-nat_cavity+i][d];
+ +                            }
+ +                            mass_tot += mass_cavity[i];
+ +                        }
+ +                        for (d = 0; d < DIM; d++)
+ +                        {
+ +                            x_init[d] /= mass_tot;
+ +                        }
+ +                    }
+ +                }
+ +                /* Generate coordinates within |dx|=drmax of x_init */
+ +                do
+ +                {
+ +                    dx[XX] = (2*gmx_rng_uniform_real(tpi_rand) - 1)*drmax;
+ +                    dx[YY] = (2*gmx_rng_uniform_real(tpi_rand) - 1)*drmax;
+ +                    dx[ZZ] = (2*gmx_rng_uniform_real(tpi_rand) - 1)*drmax;
+ +                }
+ +                while (norm2(dx) > drmax*drmax);
+ +                rvec_add(x_init, dx, x_tp);
+ +            }
+ +
+ +            if (a_tp1 - a_tp0 == 1)
+ +            {
+ +                /* Insert a single atom, just copy the insertion location */
+ +                copy_rvec(x_tp, state->x[a_tp0]);
+ +            }
+ +            else
+ +            {
+ +                /* Copy the coordinates from the top file */
+ +                for (i = a_tp0; i < a_tp1; i++)
+ +                {
+ +                    copy_rvec(x_mol[i-a_tp0], state->x[i]);
+ +                }
+ +                /* Rotate the molecule randomly */
+ +                rotate_conf(a_tp1-a_tp0, state->x+a_tp0, NULL,
+ +                            2*M_PI*gmx_rng_uniform_real(tpi_rand),
+ +                            2*M_PI*gmx_rng_uniform_real(tpi_rand),
+ +                            2*M_PI*gmx_rng_uniform_real(tpi_rand));
+ +                /* Shift to the insertion location */
+ +                for (i = a_tp0; i < a_tp1; i++)
+ +                {
+ +                    rvec_inc(state->x[i], x_tp);
+ +                }
+ +            }
+ +
+ +            /* Check if this insertion belongs to this node */
+ +            bOurStep = TRUE;
+ +            if (PAR(cr))
+ +            {
+ +                switch (inputrec->eI)
+ +                {
+ +                    case eiTPI:
+ +                        bOurStep = ((step / inputrec->nstlist) % nnodes == cr->nodeid);
+ +                        break;
+ +                    case eiTPIC:
+ +                        bOurStep = (step % nnodes == cr->nodeid);
+ +                        break;
+ +                    default:
+ +                        gmx_fatal(FARGS, "Unknown integrator %s", ei_names[inputrec->eI]);
+ +                }
+ +            }
+ +            if (bOurStep)
+ +            {
+ +                /* Clear some matrix variables  */
+ +                clear_mat(force_vir);
+ +                clear_mat(shake_vir);
+ +                clear_mat(vir);
+ +                clear_mat(pres);
+ +
+ +                /* Set the charge group center of mass of the test particle */
+ +                copy_rvec(x_init, fr->cg_cm[top->cgs.nr-1]);
+ +
+ +                /* Calc energy (no forces) on new positions.
+ +                 * Since we only need the intermolecular energy
+ +                 * and the RF exclusion terms of the inserted molecule occur
+ +                 * within a single charge group we can pass NULL for the graph.
+ +                 * This also avoids shifts that would move charge groups
+ +                 * out of the box.
+ +                 *
+ +                 * Some checks above ensure than we can not have
+ +                 * twin-range interactions together with nstlist > 1,
+ +                 * therefore we do not need to remember the LR energies.
+ +                 */
+ +                /* Make do_force do a single node force calculation */
+ +                cr->nnodes = 1;
+ +                do_force(fplog, cr, inputrec,
+ +                         step, nrnb, wcycle, top, &top_global->groups,
+ +                         state->box, state->x, &state->hist,
+ +                         f, force_vir, mdatoms, enerd, fcd,
+ +                         state->lambda,
+ +                         NULL, fr, NULL, mu_tot, t, NULL, NULL, FALSE,
+ +                         GMX_FORCE_NONBONDED | GMX_FORCE_ENERGY |
+ +                         (bNS ? GMX_FORCE_DYNAMICBOX | GMX_FORCE_NS | GMX_FORCE_DO_LR : 0) |
+ +                         (bStateChanged ? GMX_FORCE_STATECHANGED : 0));
+ +                cr->nnodes    = nnodes;
+ +                bStateChanged = FALSE;
+ +                bNS           = FALSE;
+ +
+ +                /* Calculate long range corrections to pressure and energy */
+ +                calc_dispcorr(fplog, inputrec, fr, step, top_global->natoms, state->box,
+ +                              lambda, pres, vir, &prescorr, &enercorr, &dvdlcorr);
+ +                /* figure out how to rearrange the next 4 lines MRS 8/4/2009 */
+ +                enerd->term[F_DISPCORR]  = enercorr;
+ +                enerd->term[F_EPOT]     += enercorr;
+ +                enerd->term[F_PRES]     += prescorr;
+ +                enerd->term[F_DVDL_VDW] += dvdlcorr;
+ +
+ +                epot               = enerd->term[F_EPOT];
+ +                bEnergyOutOfBounds = FALSE;
+ +#ifdef GMX_X86_SSE2
+ +                /* With SSE the energy can overflow, check for this */
+ +                if (gmx_mm_check_and_reset_overflow())
+ +                {
+ +                    if (debug)
+ +                    {
+ +                        fprintf(debug, "Found an SSE overflow, assuming the energy is out of bounds\n");
+ +                    }
+ +                    bEnergyOutOfBounds = TRUE;
+ +                }
+ +#endif
+ +                /* If the compiler doesn't optimize this check away
+ +                 * we catch the NAN energies.
+ +                 * The epot>GMX_REAL_MAX check catches inf values,
+ +                 * which should nicely result in embU=0 through the exp below,
+ +                 * but it does not hurt to check anyhow.
+ +                 */
+ +                /* Non-bonded Interaction usually diverge at r=0.
+ +                 * With tabulated interaction functions the first few entries
+ +                 * should be capped in a consistent fashion between
+ +                 * repulsion, dispersion and Coulomb to avoid accidental
+ +                 * negative values in the total energy.
+ +                 * The table generation code in tables.c does this.
+ +                 * With user tbales the user should take care of this.
+ +                 */
+ +                if (epot != epot || epot > GMX_REAL_MAX)
+ +                {
+ +                    bEnergyOutOfBounds = TRUE;
+ +                }
+ +                if (bEnergyOutOfBounds)
+ +                {
+ +                    if (debug)
+ +                    {
+ +                        fprintf(debug, "\n  time %.3f, step %d: non-finite energy %f, using exp(-bU)=0\n", t, step, epot);
+ +                    }
+ +                    embU = 0;
+ +                }
+ +                else
+ +                {
+ +                    embU      = exp(-beta*epot);
+ +                    sum_embU += embU;
+ +                    /* Determine the weighted energy contributions of each energy group */
+ +                    e                = 0;
+ +                    sum_UgembU[e++] += epot*embU;
+ +                    if (fr->bBHAM)
+ +                    {
+ +                        for (i = 0; i < ngid; i++)
+ +                        {
+ +                            sum_UgembU[e++] +=
+ +                                (enerd->grpp.ener[egBHAMSR][GID(i, gid_tp, ngid)] +
+ +                                 enerd->grpp.ener[egBHAMLR][GID(i, gid_tp, ngid)])*embU;
+ +                        }
+ +                    }
+ +                    else
+ +                    {
+ +                        for (i = 0; i < ngid; i++)
+ +                        {
+ +                            sum_UgembU[e++] +=
+ +                                (enerd->grpp.ener[egLJSR][GID(i, gid_tp, ngid)] +
+ +                                 enerd->grpp.ener[egLJLR][GID(i, gid_tp, ngid)])*embU;
+ +                        }
+ +                    }
+ +                    if (bDispCorr)
+ +                    {
+ +                        sum_UgembU[e++] += enerd->term[F_DISPCORR]*embU;
+ +                    }
+ +                    if (bCharge)
+ +                    {
+ +                        for (i = 0; i < ngid; i++)
+ +                        {
+ +                            sum_UgembU[e++] +=
+ +                                (enerd->grpp.ener[egCOULSR][GID(i, gid_tp, ngid)] +
+ +                                 enerd->grpp.ener[egCOULLR][GID(i, gid_tp, ngid)])*embU;
+ +                        }
+ +                        if (bRFExcl)
+ +                        {
+ +                            sum_UgembU[e++] += enerd->term[F_RF_EXCL]*embU;
+ +                        }
+ +                        if (EEL_FULL(fr->eeltype))
+ +                        {
+ +                            sum_UgembU[e++] += enerd->term[F_COUL_RECIP]*embU;
+ +                        }
+ +                    }
+ +                }
+ +
+ +                if (embU == 0 || beta*epot > bU_bin_limit)
+ +                {
+ +                    bin[0]++;
+ +                }
+ +                else
+ +                {
+ +                    i = (int)((bU_logV_bin_limit
+ +                               - (beta*epot - logV + refvolshift))*invbinw
+ +                              + 0.5);
+ +                    if (i < 0)
+ +                    {
+ +                        i = 0;
+ +                    }
+ +                    if (i >= nbin)
+ +                    {
+ +                        realloc_bins(&bin, &nbin, i+10);
+ +                    }
+ +                    bin[i]++;
+ +                }
+ +
+ +                if (debug)
+ +                {
+ +                    fprintf(debug, "TPI %7d %12.5e %12.5f %12.5f %12.5f\n",
+ +                            step, epot, x_tp[XX], x_tp[YY], x_tp[ZZ]);
+ +                }
+ +
+ +                if (dump_pdb && epot <= dump_ener)
+ +                {
+ +                    sprintf(str, "t%g_step%d.pdb", t, step);
+ +                    sprintf(str2, "t: %f step %d ener: %f", t, step, epot);
+ +                    write_sto_conf_mtop(str, str2, top_global, state->x, state->v,
+ +                                        inputrec->ePBC, state->box);
+ +                }
+ +            }
+ +        }
+ +
+ +        if (PAR(cr))
+ +        {
+ +            /* When running in parallel sum the energies over the processes */
+ +            gmx_sumd(1,    &sum_embU, cr);
+ +            gmx_sumd(nener, sum_UgembU, cr);
+ +        }
+ +
+ +        frame++;
+ +        V_all     += V;
+ +        VembU_all += V*sum_embU/nsteps;
+ +
+ +        if (fp_tpi)
+ +        {
+ +            if (bVerbose || frame%10 == 0 || frame < 10)
+ +            {
+ +                fprintf(stderr, "mu %10.3e <mu> %10.3e\n",
+ +                        -log(sum_embU/nsteps)/beta, -log(VembU_all/V_all)/beta);
+ +            }
+ +
+ +            fprintf(fp_tpi, "%10.3f %12.5e %12.5e %12.5e %12.5e",
+ +                    t,
+ +                    VembU_all == 0 ? 20/beta : -log(VembU_all/V_all)/beta,
+ +                    sum_embU == 0  ? 20/beta : -log(sum_embU/nsteps)/beta,
+ +                    sum_embU/nsteps, V);
+ +            for (e = 0; e < nener; e++)
+ +            {
+ +                fprintf(fp_tpi, " %12.5e", sum_UgembU[e]/nsteps);
+ +            }
+ +            fprintf(fp_tpi, "\n");
+ +            fflush(fp_tpi);
+ +        }
+ +
+ +        bNotLastFrame = read_next_frame(oenv, status, &rerun_fr);
+ +    } /* End of the loop  */
+ +    walltime_accounting_end(walltime_accounting);
+ +
+ +    close_trj(status);
+ +
+ +    if (fp_tpi != NULL)
+ +    {
+ +        gmx_fio_fclose(fp_tpi);
+ +    }
+ +
+ +    if (fplog != NULL)
+ +    {
+ +        fprintf(fplog, "\n");
+ +        fprintf(fplog, "  <V>  = %12.5e nm^3\n", V_all/frame);
+ +        fprintf(fplog, "  <mu> = %12.5e kJ/mol\n", -log(VembU_all/V_all)/beta);
+ +    }
+ +
+ +    /* Write the Boltzmann factor histogram */
+ +    if (PAR(cr))
+ +    {
+ +        /* When running in parallel sum the bins over the processes */
+ +        i = nbin;
+ +        global_max(cr, &i);
+ +        realloc_bins(&bin, &nbin, i);
+ +        gmx_sumd(nbin, bin, cr);
+ +    }
+ +    if (MASTER(cr))
+ +    {
+ +        fp_tpi = xvgropen(opt2fn("-tpid", nfile, fnm),
+ +                          "TPI energy distribution",
+ +                          "\\betaU - log(V/<V>)", "count", oenv);
+ +        sprintf(str, "number \\betaU > %g: %9.3e", bU_bin_limit, bin[0]);
+ +        xvgr_subtitle(fp_tpi, str, oenv);
+ +        xvgr_legend(fp_tpi, 2, (const char **)tpid_leg, oenv);
+ +        for (i = nbin-1; i > 0; i--)
+ +        {
+ +            bUlogV = -i/invbinw + bU_logV_bin_limit - refvolshift + log(V_all/frame);
+ +            fprintf(fp_tpi, "%6.2f %10d %12.5e\n",
+ +                    bUlogV,
+ +                    (int)(bin[i]+0.5),
+ +                    bin[i]*exp(-bUlogV)*V_all/VembU_all);
+ +        }
+ +        gmx_fio_fclose(fp_tpi);
+ +    }
+ +    sfree(bin);
+ +
+ +    sfree(sum_UgembU);
+ +
+ +    walltime_accounting_set_nsteps_done(walltime_accounting, frame*inputrec->nsteps);
+ +
+ +    return 0;
+ +}
diff --cc src/programs/mdrun/md.c

index 3173bea653ece43cfd347bb8d32976ca7ac79ee4,0000000000000000000000000000000000000000..1ab75d2ddc5d770df9f14ce5e2d5bd52fa078e18

mode 100644,000000..100644
--- 1/src/programs/mdrun/md.c
--- /dev/null
+++ b/src/programs/mdrun/md.c
@@@ -1,2011 -1,0 +1,2006 @@@
-     /* Set and write start time */
+ +/*
+ + * This file is part of the GROMACS molecular simulation package.
+ + *
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team.
+ + * Copyright (c) 2011,2012,2013,2014, by the GROMACS development team, led by
+ + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ + * and including many others, as listed in the AUTHORS file in the
+ + * top-level source directory and at http://www.gromacs.org.
+ + *
+ + * GROMACS is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU Lesser General Public License
+ + * as published by the Free Software Foundation; either version 2.1
+ + * of the License, or (at your option) any later version.
+ + *
+ + * GROMACS is distributed in the hope that it will be useful,
+ + * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ + * Lesser General Public License for more details.
+ + *
+ + * You should have received a copy of the GNU Lesser General Public
+ + * License along with GROMACS; if not, see
+ + * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ + *
+ + * If you want to redistribute modifications to GROMACS, please
+ + * consider that scientific software is very special. Version
+ + * control is crucial - bugs must be traceable. We will be happy to
+ + * consider code for inclusion in the official distribution, but
+ + * derived work must not be called official GROMACS. Details are found
+ + * in the README & COPYING files - if they are missing, get the
+ + * official version at http://www.gromacs.org.
+ + *
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the research papers on the package. Check out http://www.gromacs.org.
+ + */
+ +#ifdef HAVE_CONFIG_H
+ +#include <config.h>
+ +#endif
+ +
+ +#include "typedefs.h"
+ +#include "smalloc.h"
+ +#include "sysstuff.h"
+ +#include "vec.h"
+ +#include "vcm.h"
+ +#include "mdebin.h"
+ +#include "nrnb.h"
+ +#include "calcmu.h"
+ +#include "index.h"
+ +#include "vsite.h"
+ +#include "update.h"
+ +#include "ns.h"
+ +#include "mdrun.h"
+ +#include "md_support.h"
+ +#include "md_logging.h"
+ +#include "network.h"
+ +#include "xvgr.h"
+ +#include "physics.h"
+ +#include "names.h"
+ +#include "force.h"
+ +#include "disre.h"
+ +#include "orires.h"
+ +#include "pme.h"
+ +#include "mdatoms.h"
+ +#include "repl_ex.h"
+ +#include "qmmm.h"
+ +#include "domdec.h"
+ +#include "domdec_network.h"
+ +#include "partdec.h"
+ +#include "coulomb.h"
+ +#include "constr.h"
+ +#include "shellfc.h"
+ +#include "compute_io.h"
+ +#include "mvdata.h"
+ +#include "checkpoint.h"
+ +#include "mtop_util.h"
+ +#include "sighandler.h"
+ +#include "txtdump.h"
+ +#include "string2.h"
+ +#include "pme_loadbal.h"
+ +#include "bondf.h"
+ +#include "membed.h"
+ +#include "types/nlistheuristics.h"
+ +#include "types/iteratedconstraints.h"
+ +#include "nbnxn_cuda_data_mgmt.h"
+ +
+ +#include "gromacs/utility/gmxmpi.h"
+ +#include "gromacs/fileio/confio.h"
+ +#include "gromacs/fileio/trajectory_writing.h"
+ +#include "gromacs/fileio/trnio.h"
+ +#include "gromacs/fileio/trxio.h"
+ +#include "gromacs/fileio/xtcio.h"
+ +#include "gromacs/timing/wallcycle.h"
+ +#include "gromacs/timing/walltime_accounting.h"
+ +#include "gromacs/pulling/pull.h"
+ +#include "gromacs/swap/swapcoords.h"
+ +
+ +#ifdef GMX_FAHCORE
+ +#include "corewrap.h"
+ +#endif
+ +
+ +static void reset_all_counters(FILE *fplog, t_commrec *cr,
+ +                               gmx_int64_t step,
+ +                               gmx_int64_t *step_rel, t_inputrec *ir,
+ +                               gmx_wallcycle_t wcycle, t_nrnb *nrnb,
+ +                               gmx_walltime_accounting_t walltime_accounting,
+ +                               nbnxn_cuda_ptr_t cu_nbv)
+ +{
+ +    char sbuf[STEPSTRSIZE];
+ +
+ +    /* Reset all the counters related to performance over the run */
+ +    md_print_warn(cr, fplog, "step %s: resetting all time and cycle counters\n",
+ +                  gmx_step_str(step, sbuf));
+ +
+ +    if (cu_nbv)
+ +    {
+ +        nbnxn_cuda_reset_timings(cu_nbv);
+ +    }
+ +
+ +    wallcycle_stop(wcycle, ewcRUN);
+ +    wallcycle_reset_all(wcycle);
+ +    if (DOMAINDECOMP(cr))
+ +    {
+ +        reset_dd_statistics_counters(cr->dd);
+ +    }
+ +    init_nrnb(nrnb);
+ +    ir->init_step += *step_rel;
+ +    ir->nsteps    -= *step_rel;
+ +    *step_rel      = 0;
+ +    wallcycle_start(wcycle, ewcRUN);
+ +    walltime_accounting_start(walltime_accounting);
+ +    print_date_and_time(fplog, cr->nodeid, "Restarted time", walltime_accounting);
+ +}
+ +
+ +double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[],
+ +             const output_env_t oenv, gmx_bool bVerbose, gmx_bool bCompact,
+ +             int nstglobalcomm,
+ +             gmx_vsite_t *vsite, gmx_constr_t constr,
+ +             int stepout, t_inputrec *ir,
+ +             gmx_mtop_t *top_global,
+ +             t_fcdata *fcd,
+ +             t_state *state_global,
+ +             t_mdatoms *mdatoms,
+ +             t_nrnb *nrnb, gmx_wallcycle_t wcycle,
+ +             gmx_edsam_t ed, t_forcerec *fr,
+ +             int repl_ex_nst, int repl_ex_nex, int repl_ex_seed, gmx_membed_t membed,
+ +             real cpt_period, real max_hours,
+ +             const char gmx_unused *deviceOptions,
+ +             unsigned long Flags,
+ +             gmx_walltime_accounting_t walltime_accounting)
+ +{
+ +    gmx_mdoutf_t    outf = NULL;
+ +    gmx_int64_t     step, step_rel;
+ +    double          elapsed_time;
+ +    double          t, t0, lam0[efptNR];
+ +    gmx_bool        bGStatEveryStep, bGStat, bCalcVir, bCalcEner;
+ +    gmx_bool        bNS, bNStList, bSimAnn, bStopCM, bRerunMD, bNotLastFrame = FALSE,
+ +                    bFirstStep, bStateFromCP, bStateFromTPX, bInitStep, bLastStep,
+ +                    bBornRadii, bStartingFromCpt;
+ +    gmx_bool          bDoDHDL = FALSE, bDoFEP = FALSE, bDoExpanded = FALSE;
+ +    gmx_bool          do_ene, do_log, do_verbose, bRerunWarnNoV = TRUE,
+ +                      bForceUpdate = FALSE, bCPT;
+ +    gmx_bool          bMasterState;
+ +    int               force_flags, cglo_flags;
+ +    tensor            force_vir, shake_vir, total_vir, tmp_vir, pres;
+ +    int               i, m;
+ +    t_trxstatus      *status;
+ +    rvec              mu_tot;
+ +    t_vcm            *vcm;
+ +    t_state          *bufstate = NULL;
+ +    matrix           *scale_tot, pcoupl_mu, M, ebox;
+ +    gmx_nlheur_t      nlh;
+ +    t_trxframe        rerun_fr;
+ +    gmx_repl_ex_t     repl_ex = NULL;
+ +    int               nchkpt  = 1;
+ +    gmx_localtop_t   *top;
+ +    t_mdebin         *mdebin   = NULL;
+ +    t_state          *state    = NULL;
+ +    rvec             *f_global = NULL;
+ +    gmx_enerdata_t   *enerd;
+ +    rvec             *f = NULL;
+ +    gmx_global_stat_t gstat;
+ +    gmx_update_t      upd   = NULL;
+ +    t_graph          *graph = NULL;
+ +    globsig_t         gs;
+ +    gmx_rng_t         mcrng = NULL;
+ +    gmx_groups_t     *groups;
+ +    gmx_ekindata_t   *ekind, *ekind_save;
+ +    gmx_shellfc_t     shellfc;
+ +    int               count, nconverged = 0;
+ +    real              timestep   = 0;
+ +    double            tcount     = 0;
+ +    gmx_bool          bConverged = TRUE, bOK, bSumEkinhOld, bExchanged, bNeedRepartition;
+ +    gmx_bool          bAppend;
+ +    gmx_bool          bResetCountersHalfMaxH = FALSE;
+ +    gmx_bool          bVV, bIterativeCase, bFirstIterate, bTemp, bPres, bTrotter;
+ +    gmx_bool          bUpdateDoLR;
+ +    real              dvdl_constr;
+ +    int               a0, a1;
+ +    rvec             *cbuf = NULL;
+ +    matrix            lastbox;
+ +    real              veta_save, scalevir, tracevir;
+ +    real              vetanew = 0;
+ +    int               lamnew  = 0;
+ +    /* for FEP */
+ +    int               nstfep;
+ +    double            cycles;
+ +    real              saved_conserved_quantity = 0;
+ +    real              last_ekin                = 0;
+ +    int               iter_i;
+ +    t_extmass         MassQ;
+ +    int             **trotter_seq;
+ +    char              sbuf[STEPSTRSIZE], sbuf2[STEPSTRSIZE];
+ +    int               handled_stop_condition = gmx_stop_cond_none; /* compare to get_stop_condition*/
+ +    gmx_iterate_t     iterate;
+ +    gmx_int64_t       multisim_nsteps = -1;                        /* number of steps to do  before first multisim
+ +                                                                          simulation stops. If equal to zero, don't
+ +                                                                          communicate any more between multisims.*/
+ +    /* PME load balancing data for GPU kernels */
+ +    pme_load_balancing_t pme_loadbal = NULL;
+ +    double               cycles_pmes;
+ +    gmx_bool             bPMETuneTry = FALSE, bPMETuneRunning = FALSE;
+ +
+ +#ifdef GMX_FAHCORE
+ +    /* Temporary addition for FAHCORE checkpointing */
+ +    int chkpt_ret;
+ +#endif
+ +
+ +    /* Check for special mdrun options */
+ +    bRerunMD = (Flags & MD_RERUN);
+ +    bAppend  = (Flags & MD_APPENDFILES);
+ +    if (Flags & MD_RESETCOUNTERSHALFWAY)
+ +    {
+ +        if (ir->nsteps > 0)
+ +        {
+ +            /* Signal to reset the counters half the simulation steps. */
+ +            wcycle_set_reset_counters(wcycle, ir->nsteps/2);
+ +        }
+ +        /* Signal to reset the counters halfway the simulation time. */
+ +        bResetCountersHalfMaxH = (max_hours > 0);
+ +    }
+ +
+ +    /* md-vv uses averaged full step velocities for T-control
+ +       md-vv-avek uses averaged half step velocities for T-control (but full step ekin for P control)
+ +       md uses averaged half step kinetic energies to determine temperature unless defined otherwise by GMX_EKIN_AVE_VEL; */
+ +    bVV = EI_VV(ir->eI);
+ +    if (bVV) /* to store the initial velocities while computing virial */
+ +    {
+ +        snew(cbuf, top_global->natoms);
+ +    }
+ +    /* all the iteratative cases - only if there are constraints */
+ +    bIterativeCase = ((IR_NPH_TROTTER(ir) || IR_NPT_TROTTER(ir)) && (constr) && (!bRerunMD));
+ +    gmx_iterate_init(&iterate, FALSE); /* The default value of iterate->bIterationActive is set to
+ +                                          false in this step.  The correct value, true or false,
+ +                                          is set at each step, as it depends on the frequency of temperature
+ +                                          and pressure control.*/
+ +    bTrotter = (bVV && (IR_NPT_TROTTER(ir) || IR_NPH_TROTTER(ir) || IR_NVT_TROTTER(ir)));
+ +
+ +    if (bRerunMD)
+ +    {
+ +        /* Since we don't know if the frames read are related in any way,
+ +         * rebuild the neighborlist at every step.
+ +         */
+ +        ir->nstlist       = 1;
+ +        ir->nstcalcenergy = 1;
+ +        nstglobalcomm     = 1;
+ +    }
+ +
+ +    check_ir_old_tpx_versions(cr, fplog, ir, top_global);
+ +
+ +    nstglobalcomm   = check_nstglobalcomm(fplog, cr, nstglobalcomm, ir);
+ +    bGStatEveryStep = (nstglobalcomm == 1);
+ +
+ +    if (!bGStatEveryStep && ir->nstlist == -1 && fplog != NULL)
+ +    {
+ +        fprintf(fplog,
+ +                "To reduce the energy communication with nstlist = -1\n"
+ +                "the neighbor list validity should not be checked at every step,\n"
+ +                "this means that exact integration is not guaranteed.\n"
+ +                "The neighbor list validity is checked after:\n"
+ +                "  <n.list life time> - 2*std.dev.(n.list life time)  steps.\n"
+ +                "In most cases this will result in exact integration.\n"
+ +                "This reduces the energy communication by a factor of 2 to 3.\n"
+ +                "If you want less energy communication, set nstlist > 3.\n\n");
+ +    }
+ +
+ +    if (bRerunMD)
+ +    {
+ +        ir->nstxout_compressed = 0;
+ +    }
+ +    groups = &top_global->groups;
+ +
+ +    /* Initial values */
+ +    init_md(fplog, cr, ir, oenv, &t, &t0, state_global->lambda,
+ +            &(state_global->fep_state), lam0,
+ +            nrnb, top_global, &upd,
+ +            nfile, fnm, &outf, &mdebin,
+ +            force_vir, shake_vir, mu_tot, &bSimAnn, &vcm, Flags);
+ +
+ +    clear_mat(total_vir);
+ +    clear_mat(pres);
+ +    /* Energy terms and groups */
+ +    snew(enerd, 1);
+ +    init_enerdata(top_global->groups.grps[egcENER].nr, ir->fepvals->n_lambda,
+ +                  enerd);
+ +    if (DOMAINDECOMP(cr))
+ +    {
+ +        f = NULL;
+ +    }
+ +    else
+ +    {
+ +        snew(f, top_global->natoms);
+ +    }
+ +
+ +    /* Kinetic energy data */
+ +    snew(ekind, 1);
+ +    init_ekindata(fplog, top_global, &(ir->opts), ekind);
+ +    /* needed for iteration of constraints */
+ +    snew(ekind_save, 1);
+ +    init_ekindata(fplog, top_global, &(ir->opts), ekind_save);
+ +    /* Copy the cos acceleration to the groups struct */
+ +    ekind->cosacc.cos_accel = ir->cos_accel;
+ +
+ +    gstat = global_stat_init(ir);
+ +    debug_gmx();
+ +
+ +    /* Check for polarizable models and flexible constraints */
+ +    shellfc = init_shell_flexcon(fplog,
+ +                                 top_global, n_flexible_constraints(constr),
+ +                                 (ir->bContinuation ||
+ +                                  (DOMAINDECOMP(cr) && !MASTER(cr))) ?
+ +                                 NULL : state_global->x);
+ +
+ +    if (DEFORM(*ir))
+ +    {
+ +        tMPI_Thread_mutex_lock(&deform_init_box_mutex);
+ +        set_deform_reference_box(upd,
+ +                                 deform_init_init_step_tpx,
+ +                                 deform_init_box_tpx);
+ +        tMPI_Thread_mutex_unlock(&deform_init_box_mutex);
+ +    }
+ +
+ +    {
+ +        double io = compute_io(ir, top_global->natoms, groups, mdebin->ebin->nener, 1);
+ +        if ((io > 2000) && MASTER(cr))
+ +        {
+ +            fprintf(stderr,
+ +                    "\nWARNING: This run will generate roughly %.0f Mb of data\n\n",
+ +                    io);
+ +        }
+ +    }
+ +
+ +    if (DOMAINDECOMP(cr))
+ +    {
+ +        top = dd_init_local_top(top_global);
+ +
+ +        snew(state, 1);
+ +        dd_init_local_state(cr->dd, state_global, state);
+ +
+ +        if (DDMASTER(cr->dd) && ir->nstfout)
+ +        {
+ +            snew(f_global, state_global->natoms);
+ +        }
+ +    }
+ +    else
+ +    {
+ +        if (PAR(cr))
+ +        {
+ +            /* Initialize the particle decomposition and split the topology */
+ +            top = split_system(fplog, top_global, ir, cr);
+ +
+ +            pd_cg_range(cr, &fr->cg0, &fr->hcg);
+ +            pd_at_range(cr, &a0, &a1);
+ +        }
+ +        else
+ +        {
+ +            top = gmx_mtop_generate_local_top(top_global, ir);
+ +
+ +            a0 = 0;
+ +            a1 = top_global->natoms;
+ +        }
+ +
+ +        forcerec_set_excl_load(fr, top, cr);
+ +
+ +        state    = partdec_init_local_state(cr, state_global);
+ +        f_global = f;
+ +
+ +        atoms2md(top_global, ir, 0, NULL, a0, a1-a0, mdatoms);
+ +
+ +        if (vsite)
+ +        {
+ +            set_vsite_top(vsite, top, mdatoms, cr);
+ +        }
+ +
+ +        if (ir->ePBC != epbcNONE && !fr->bMolPBC)
+ +        {
+ +            graph = mk_graph(fplog, &(top->idef), 0, top_global->natoms, FALSE, FALSE);
+ +        }
+ +
+ +        if (shellfc)
+ +        {
+ +            make_local_shells(cr, mdatoms, shellfc);
+ +        }
+ +
+ +        setup_bonded_threading(fr, &top->idef);
+ +
+ +        if (ir->pull && PAR(cr))
+ +        {
+ +            dd_make_local_pull_groups(NULL, ir->pull, mdatoms);
+ +        }
+ +    }
+ +
+ +    if (DOMAINDECOMP(cr))
+ +    {
+ +        /* Distribute the charge groups over the nodes from the master node */
+ +        dd_partition_system(fplog, ir->init_step, cr, TRUE, 1,
+ +                            state_global, top_global, ir,
+ +                            state, &f, mdatoms, top, fr,
+ +                            vsite, shellfc, constr,
+ +                            nrnb, wcycle, FALSE);
+ +
+ +    }
+ +
+ +    update_mdatoms(mdatoms, state->lambda[efptMASS]);
+ +
+ +    if (opt2bSet("-cpi", nfile, fnm))
+ +    {
+ +        bStateFromCP = gmx_fexist_master(opt2fn_master("-cpi", nfile, fnm, cr), cr);
+ +    }
+ +    else
+ +    {
+ +        bStateFromCP = FALSE;
+ +    }
+ +
+ +    if (ir->bExpanded)
+ +    {
+ +        init_expanded_ensemble(bStateFromCP, ir, &mcrng, &state->dfhist);
+ +    }
+ +
+ +    if (MASTER(cr))
+ +    {
+ +        if (bStateFromCP)
+ +        {
+ +            /* Update mdebin with energy history if appending to output files */
+ +            if (Flags & MD_APPENDFILES)
+ +            {
+ +                restore_energyhistory_from_state(mdebin, &state_global->enerhist);
+ +            }
+ +            else
+ +            {
+ +                /* We might have read an energy history from checkpoint,
+ +                 * free the allocated memory and reset the counts.
+ +                 */
+ +                done_energyhistory(&state_global->enerhist);
+ +                init_energyhistory(&state_global->enerhist);
+ +            }
+ +        }
+ +        /* Set the initial energy history in state by updating once */
+ +        update_energyhistory(&state_global->enerhist, mdebin);
+ +    }
+ +
+ +    if ((state->flags & (1<<estLD_RNG)) && (Flags & MD_READ_RNG))
+ +    {
+ +        /* Set the random state if we read a checkpoint file */
+ +        set_stochd_state(upd, state);
+ +    }
+ +
+ +    if (state->flags & (1<<estMC_RNG))
+ +    {
+ +        set_mc_state(mcrng, state);
+ +    }
+ +
+ +    /* Initialize constraints */
+ +    if (constr)
+ +    {
+ +        if (!DOMAINDECOMP(cr))
+ +        {
+ +            set_constraints(constr, top, ir, mdatoms, cr);
+ +        }
+ +    }
+ +
+ +    if (repl_ex_nst > 0)
+ +    {
+ +        /* We need to be sure replica exchange can only occur
+ +         * when the energies are current */
+ +        check_nst_param(fplog, cr, "nstcalcenergy", ir->nstcalcenergy,
+ +                        "repl_ex_nst", &repl_ex_nst);
+ +        /* This check needs to happen before inter-simulation
+ +         * signals are initialized, too */
+ +    }
+ +    if (repl_ex_nst > 0 && MASTER(cr))
+ +    {
+ +        repl_ex = init_replica_exchange(fplog, cr->ms, state_global, ir,
+ +                                        repl_ex_nst, repl_ex_nex, repl_ex_seed);
+ +    }
+ +
+ +    /* PME tuning is only supported with GPUs or PME nodes and not with rerun or LJ-PME.
+ +     * With perturbed charges with soft-core we should not change the cut-off.
+ +     */
+ +    if ((Flags & MD_TUNEPME) &&
+ +        EEL_PME(fr->eeltype) &&
+ +        ( (fr->cutoff_scheme == ecutsVERLET && fr->nbv->bUseGPU) || !(cr->duty & DUTY_PME)) &&
+ +        !(ir->efep != efepNO && mdatoms->nChargePerturbed > 0 && ir->fepvals->bScCoul) &&
+ +        !bRerunMD && !EVDW_PME(fr->vdwtype))
+ +    {
+ +        pme_loadbal_init(&pme_loadbal, ir, state->box, fr->ic, fr->pmedata);
+ +        cycles_pmes = 0;
+ +        if (cr->duty & DUTY_PME)
+ +        {
+ +            /* Start tuning right away, as we can't measure the load */
+ +            bPMETuneRunning = TRUE;
+ +        }
+ +        else
+ +        {
+ +            /* Separate PME nodes, we can measure the PP/PME load balance */
+ +            bPMETuneTry = TRUE;
+ +        }
+ +    }
+ +
+ +    if (!ir->bContinuation && !bRerunMD)
+ +    {
+ +        if (mdatoms->cFREEZE && (state->flags & (1<<estV)))
+ +        {
+ +            /* Set the velocities of frozen particles to zero */
+ +            for (i = mdatoms->start; i < mdatoms->start+mdatoms->homenr; i++)
+ +            {
+ +                for (m = 0; m < DIM; m++)
+ +                {
+ +                    if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m])
+ +                    {
+ +                        state->v[i][m] = 0;
+ +                    }
+ +                }
+ +            }
+ +        }
+ +
+ +        if (constr)
+ +        {
+ +            /* Constrain the initial coordinates and velocities */
+ +            do_constrain_first(fplog, constr, ir, mdatoms, state,
+ +                               cr, nrnb, fr, top);
+ +        }
+ +        if (vsite)
+ +        {
+ +            /* Construct the virtual sites for the initial configuration */
+ +            construct_vsites(vsite, state->x, ir->delta_t, NULL,
+ +                             top->idef.iparams, top->idef.il,
+ +                             fr->ePBC, fr->bMolPBC, graph, cr, state->box);
+ +        }
+ +    }
+ +
+ +    debug_gmx();
+ +
+ +    /* set free energy calculation frequency as the minimum
+ +       greatest common denominator of nstdhdl, nstexpanded, and repl_ex_nst*/
+ +    nstfep = ir->fepvals->nstdhdl;
+ +    if (ir->bExpanded)
+ +    {
+ +        nstfep = gmx_greatest_common_divisor(ir->fepvals->nstdhdl, nstfep);
+ +    }
+ +    if (repl_ex_nst > 0)
+ +    {
+ +        nstfep = gmx_greatest_common_divisor(repl_ex_nst, nstfep);
+ +    }
+ +
+ +    /* I'm assuming we need global communication the first time! MRS */
+ +    cglo_flags = (CGLO_TEMPERATURE | CGLO_GSTAT
+ +                  | ((ir->comm_mode != ecmNO) ? CGLO_STOPCM : 0)
+ +                  | (bVV ? CGLO_PRESSURE : 0)
+ +                  | (bVV ? CGLO_CONSTRAINT : 0)
+ +                  | (bRerunMD ? CGLO_RERUNMD : 0)
+ +                  | ((Flags & MD_READ_EKIN) ? CGLO_READEKIN : 0));
+ +
+ +    bSumEkinhOld = FALSE;
+ +    compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm,
+ +                    NULL, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
+ +                    constr, NULL, FALSE, state->box,
+ +                    top_global, &bSumEkinhOld, cglo_flags);
+ +    if (ir->eI == eiVVAK)
+ +    {
+ +        /* a second call to get the half step temperature initialized as well */
+ +        /* we do the same call as above, but turn the pressure off -- internally to
+ +           compute_globals, this is recognized as a velocity verlet half-step
+ +           kinetic energy calculation.  This minimized excess variables, but
+ +           perhaps loses some logic?*/
+ +
+ +        compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm,
+ +                        NULL, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
+ +                        constr, NULL, FALSE, state->box,
+ +                        top_global, &bSumEkinhOld,
+ +                        cglo_flags &~(CGLO_STOPCM | CGLO_PRESSURE));
+ +    }
+ +
+ +    /* Calculate the initial half step temperature, and save the ekinh_old */
+ +    if (!(Flags & MD_STARTFROMCPT))
+ +    {
+ +        for (i = 0; (i < ir->opts.ngtc); i++)
+ +        {
+ +            copy_mat(ekind->tcstat[i].ekinh, ekind->tcstat[i].ekinh_old);
+ +        }
+ +    }
+ +    if (ir->eI != eiVV)
+ +    {
+ +        enerd->term[F_TEMP] *= 2; /* result of averages being done over previous and current step,
+ +                                     and there is no previous step */
+ +    }
+ +
+ +    /* if using an iterative algorithm, we need to create a working directory for the state. */
+ +    if (bIterativeCase)
+ +    {
+ +        bufstate = init_bufstate(state);
+ +    }
+ +
+ +    /* need to make an initiation call to get the Trotter variables set, as well as other constants for non-trotter
+ +       temperature control */
+ +    trotter_seq = init_npt_vars(ir, state, &MassQ, bTrotter);
+ +
+ +    if (MASTER(cr))
+ +    {
+ +        if (constr && !ir->bContinuation && ir->eConstrAlg == econtLINCS)
+ +        {
+ +            fprintf(fplog,
+ +                    "RMS relative constraint deviation after constraining: %.2e\n",
+ +                    constr_rmsd(constr, FALSE));
+ +        }
+ +        if (EI_STATE_VELOCITY(ir->eI))
+ +        {
+ +            fprintf(fplog, "Initial temperature: %g K\n", enerd->term[F_TEMP]);
+ +        }
+ +        if (bRerunMD)
+ +        {
+ +            fprintf(stderr, "starting md rerun '%s', reading coordinates from"
+ +                    " input trajectory '%s'\n\n",
+ +                    *(top_global->name), opt2fn("-rerun", nfile, fnm));
+ +            if (bVerbose)
+ +            {
+ +                fprintf(stderr, "Calculated time to finish depends on nsteps from "
+ +                        "run input file,\nwhich may not correspond to the time "
+ +                        "needed to process input trajectory.\n\n");
+ +            }
+ +        }
+ +        else
+ +        {
+ +            char tbuf[20];
+ +            fprintf(stderr, "starting mdrun '%s'\n",
+ +                    *(top_global->name));
+ +            if (ir->nsteps >= 0)
+ +            {
+ +                sprintf(tbuf, "%8.1f", (ir->init_step+ir->nsteps)*ir->delta_t);
+ +            }
+ +            else
+ +            {
+ +                sprintf(tbuf, "%s", "infinite");
+ +            }
+ +            if (ir->init_step > 0)
+ +            {
+ +                fprintf(stderr, "%s steps, %s ps (continuing from step %s, %8.1f ps).\n",
+ +                        gmx_step_str(ir->init_step+ir->nsteps, sbuf), tbuf,
+ +                        gmx_step_str(ir->init_step, sbuf2),
+ +                        ir->init_step*ir->delta_t);
+ +            }
+ +            else
+ +            {
+ +                fprintf(stderr, "%s steps, %s ps.\n",
+ +                        gmx_step_str(ir->nsteps, sbuf), tbuf);
+ +            }
+ +        }
+ +        fprintf(fplog, "\n");
+ +    }
+ +
-     print_date_and_time(fplog, cr->nodeid, "Started mdrun", walltime_accounting);
+ +    walltime_accounting_start(walltime_accounting);
-     if (fplog)
-     {
-         fprintf(fplog, "\n");
-     }
+ +    wallcycle_start(wcycle, ewcRUN);
++    print_start(fplog, cr, walltime_accounting, "mdrun");
+ +
+ +    /* safest point to do file checkpointing is here.  More general point would be immediately before integrator call */
+ +#ifdef GMX_FAHCORE
+ +    chkpt_ret = fcCheckPointParallel( cr->nodeid,
+ +                                      NULL, 0);
+ +    if (chkpt_ret == 0)
+ +    {
+ +        gmx_fatal( 3, __FILE__, __LINE__, "Checkpoint error on step %d\n", 0 );
+ +    }
+ +#endif
+ +
+ +    debug_gmx();
+ +    /***********************************************************
+ +     *
+ +     *             Loop over MD steps
+ +     *
+ +     ************************************************************/
+ +
+ +    /* if rerunMD then read coordinates and velocities from input trajectory */
+ +    if (bRerunMD)
+ +    {
+ +        if (getenv("GMX_FORCE_UPDATE"))
+ +        {
+ +            bForceUpdate = TRUE;
+ +        }
+ +
+ +        rerun_fr.natoms = 0;
+ +        if (MASTER(cr))
+ +        {
+ +            bNotLastFrame = read_first_frame(oenv, &status,
+ +                                             opt2fn("-rerun", nfile, fnm),
+ +                                             &rerun_fr, TRX_NEED_X | TRX_READ_V);
+ +            if (rerun_fr.natoms != top_global->natoms)
+ +            {
+ +                gmx_fatal(FARGS,
+ +                          "Number of atoms in trajectory (%d) does not match the "
+ +                          "run input file (%d)\n",
+ +                          rerun_fr.natoms, top_global->natoms);
+ +            }
+ +            if (ir->ePBC != epbcNONE)
+ +            {
+ +                if (!rerun_fr.bBox)
+ +                {
+ +                    gmx_fatal(FARGS, "Rerun trajectory frame step %d time %f does not contain a box, while pbc is used", rerun_fr.step, rerun_fr.time);
+ +                }
+ +                if (max_cutoff2(ir->ePBC, rerun_fr.box) < sqr(fr->rlistlong))
+ +                {
+ +                    gmx_fatal(FARGS, "Rerun trajectory frame step %d time %f has too small box dimensions", rerun_fr.step, rerun_fr.time);
+ +                }
+ +            }
+ +        }
+ +
+ +        if (PAR(cr))
+ +        {
+ +            rerun_parallel_comm(cr, &rerun_fr, &bNotLastFrame);
+ +        }
+ +
+ +        if (ir->ePBC != epbcNONE)
+ +        {
+ +            /* Set the shift vectors.
+ +             * Necessary here when have a static box different from the tpr box.
+ +             */
+ +            calc_shifts(rerun_fr.box, fr->shift_vec);
+ +        }
+ +    }
+ +
+ +    /* loop over MD steps or if rerunMD to end of input trajectory */
+ +    bFirstStep = TRUE;
+ +    /* Skip the first Nose-Hoover integration when we get the state from tpx */
+ +    bStateFromTPX    = !bStateFromCP;
+ +    bInitStep        = bFirstStep && (bStateFromTPX || bVV);
+ +    bStartingFromCpt = (Flags & MD_STARTFROMCPT) && bInitStep;
+ +    bLastStep        = FALSE;
+ +    bSumEkinhOld     = FALSE;
+ +    bExchanged       = FALSE;
+ +    bNeedRepartition = FALSE;
+ +
+ +    init_global_signals(&gs, cr, ir, repl_ex_nst);
+ +
+ +    step     = ir->init_step;
+ +    step_rel = 0;
+ +
+ +    if (ir->nstlist == -1)
+ +    {
+ +        init_nlistheuristics(&nlh, bGStatEveryStep, step);
+ +    }
+ +
+ +    if (MULTISIM(cr) && (repl_ex_nst <= 0 ))
+ +    {
+ +        /* check how many steps are left in other sims */
+ +        multisim_nsteps = get_multisim_nsteps(cr, ir->nsteps);
+ +    }
+ +
+ +
+ +    /* and stop now if we should */
+ +    bLastStep = (bRerunMD || (ir->nsteps >= 0 && step_rel > ir->nsteps) ||
+ +                 ((multisim_nsteps >= 0) && (step_rel >= multisim_nsteps )));
+ +    while (!bLastStep || (bRerunMD && bNotLastFrame))
+ +    {
+ +
+ +        wallcycle_start(wcycle, ewcSTEP);
+ +
+ +        if (bRerunMD)
+ +        {
+ +            if (rerun_fr.bStep)
+ +            {
+ +                step     = rerun_fr.step;
+ +                step_rel = step - ir->init_step;
+ +            }
+ +            if (rerun_fr.bTime)
+ +            {
+ +                t = rerun_fr.time;
+ +            }
+ +            else
+ +            {
+ +                t = step;
+ +            }
+ +        }
+ +        else
+ +        {
+ +            bLastStep = (step_rel == ir->nsteps);
+ +            t         = t0 + step*ir->delta_t;
+ +        }
+ +
+ +        if (ir->efep != efepNO || ir->bSimTemp)
+ +        {
+ +            /* find and set the current lambdas.  If rerunning, we either read in a state, or a lambda value,
+ +               requiring different logic. */
+ +
+ +            set_current_lambdas(step, ir->fepvals, bRerunMD, &rerun_fr, state_global, state, lam0);
+ +            bDoDHDL      = do_per_step(step, ir->fepvals->nstdhdl);
+ +            bDoFEP       = (do_per_step(step, nstfep) && (ir->efep != efepNO));
+ +            bDoExpanded  = (do_per_step(step, ir->expandedvals->nstexpanded)
+ +                            && (ir->bExpanded) && (step > 0) && (!bStartingFromCpt));
+ +        }
+ +
+ +        if (bSimAnn)
+ +        {
+ +            update_annealing_target_temp(&(ir->opts), t);
+ +        }
+ +
+ +        if (bRerunMD)
+ +        {
+ +            if (!(DOMAINDECOMP(cr) && !MASTER(cr)))
+ +            {
+ +                for (i = 0; i < state_global->natoms; i++)
+ +                {
+ +                    copy_rvec(rerun_fr.x[i], state_global->x[i]);
+ +                }
+ +                if (rerun_fr.bV)
+ +                {
+ +                    for (i = 0; i < state_global->natoms; i++)
+ +                    {
+ +                        copy_rvec(rerun_fr.v[i], state_global->v[i]);
+ +                    }
+ +                }
+ +                else
+ +                {
+ +                    for (i = 0; i < state_global->natoms; i++)
+ +                    {
+ +                        clear_rvec(state_global->v[i]);
+ +                    }
+ +                    if (bRerunWarnNoV)
+ +                    {
+ +                        fprintf(stderr, "\nWARNING: Some frames do not contain velocities.\n"
+ +                                "         Ekin, temperature and pressure are incorrect,\n"
+ +                                "         the virial will be incorrect when constraints are present.\n"
+ +                                "\n");
+ +                        bRerunWarnNoV = FALSE;
+ +                    }
+ +                }
+ +            }
+ +            copy_mat(rerun_fr.box, state_global->box);
+ +            copy_mat(state_global->box, state->box);
+ +
+ +            if (vsite && (Flags & MD_RERUN_VSITE))
+ +            {
+ +                if (DOMAINDECOMP(cr))
+ +                {
+ +                    gmx_fatal(FARGS, "Vsite recalculation with -rerun is not implemented for domain decomposition, use particle decomposition");
+ +                }
+ +                if (graph)
+ +                {
+ +                    /* Following is necessary because the graph may get out of sync
+ +                     * with the coordinates if we only have every N'th coordinate set
+ +                     */
+ +                    mk_mshift(fplog, graph, fr->ePBC, state->box, state->x);
+ +                    shift_self(graph, state->box, state->x);
+ +                }
+ +                construct_vsites(vsite, state->x, ir->delta_t, state->v,
+ +                                 top->idef.iparams, top->idef.il,
+ +                                 fr->ePBC, fr->bMolPBC, graph, cr, state->box);
+ +                if (graph)
+ +                {
+ +                    unshift_self(graph, state->box, state->x);
+ +                }
+ +            }
+ +        }
+ +
+ +        /* Stop Center of Mass motion */
+ +        bStopCM = (ir->comm_mode != ecmNO && do_per_step(step, ir->nstcomm));
+ +
+ +        if (bRerunMD)
+ +        {
+ +            /* for rerun MD always do Neighbour Searching */
+ +            bNS      = (bFirstStep || ir->nstlist != 0);
+ +            bNStList = bNS;
+ +        }
+ +        else
+ +        {
+ +            /* Determine whether or not to do Neighbour Searching and LR */
+ +            bNStList = (ir->nstlist > 0  && step % ir->nstlist == 0);
+ +
+ +            bNS = (bFirstStep || bExchanged || bNeedRepartition || bNStList || bDoFEP ||
+ +                   (ir->nstlist == -1 && nlh.nabnsb > 0));
+ +
+ +            if (bNS && ir->nstlist == -1)
+ +            {
+ +                set_nlistheuristics(&nlh, bFirstStep || bExchanged || bNeedRepartition || bDoFEP, step);
+ +            }
+ +        }
+ +
+ +        /* check whether we should stop because another simulation has
+ +           stopped. */
+ +        if (MULTISIM(cr))
+ +        {
+ +            if ( (multisim_nsteps >= 0) &&  (step_rel >= multisim_nsteps)  &&
+ +                 (multisim_nsteps != ir->nsteps) )
+ +            {
+ +                if (bNS)
+ +                {
+ +                    if (MASTER(cr))
+ +                    {
+ +                        fprintf(stderr,
+ +                                "Stopping simulation %d because another one has finished\n",
+ +                                cr->ms->sim);
+ +                    }
+ +                    bLastStep         = TRUE;
+ +                    gs.sig[eglsCHKPT] = 1;
+ +                }
+ +            }
+ +        }
+ +
+ +        /* < 0 means stop at next step, > 0 means stop at next NS step */
+ +        if ( (gs.set[eglsSTOPCOND] < 0) ||
+ +             ( (gs.set[eglsSTOPCOND] > 0) && (bNStList || ir->nstlist == 0) ) )
+ +        {
+ +            bLastStep = TRUE;
+ +        }
+ +
+ +        /* Determine whether or not to update the Born radii if doing GB */
+ +        bBornRadii = bFirstStep;
+ +        if (ir->implicit_solvent && (step % ir->nstgbradii == 0))
+ +        {
+ +            bBornRadii = TRUE;
+ +        }
+ +
+ +        do_log     = do_per_step(step, ir->nstlog) || bFirstStep || bLastStep;
+ +        do_verbose = bVerbose &&
+ +            (step % stepout == 0 || bFirstStep || bLastStep);
+ +
+ +        if (bNS && !(bFirstStep && ir->bContinuation && !bRerunMD))
+ +        {
+ +            if (bRerunMD)
+ +            {
+ +                bMasterState = TRUE;
+ +            }
+ +            else
+ +            {
+ +                bMasterState = FALSE;
+ +                /* Correct the new box if it is too skewed */
+ +                if (DYNAMIC_BOX(*ir))
+ +                {
+ +                    if (correct_box(fplog, step, state->box, graph))
+ +                    {
+ +                        bMasterState = TRUE;
+ +                    }
+ +                }
+ +                if (DOMAINDECOMP(cr) && bMasterState)
+ +                {
+ +                    dd_collect_state(cr->dd, state, state_global);
+ +                }
+ +            }
+ +
+ +            if (DOMAINDECOMP(cr))
+ +            {
+ +                /* Repartition the domain decomposition */
+ +                wallcycle_start(wcycle, ewcDOMDEC);
+ +                dd_partition_system(fplog, step, cr,
+ +                                    bMasterState, nstglobalcomm,
+ +                                    state_global, top_global, ir,
+ +                                    state, &f, mdatoms, top, fr,
+ +                                    vsite, shellfc, constr,
+ +                                    nrnb, wcycle,
+ +                                    do_verbose && !bPMETuneRunning);
+ +                wallcycle_stop(wcycle, ewcDOMDEC);
+ +                /* If using an iterative integrator, reallocate space to match the decomposition */
+ +            }
+ +        }
+ +
+ +        if (MASTER(cr) && do_log)
+ +        {
+ +            print_ebin_header(fplog, step, t, state->lambda[efptFEP]); /* can we improve the information printed here? */
+ +        }
+ +
+ +        if (ir->efep != efepNO)
+ +        {
+ +            update_mdatoms(mdatoms, state->lambda[efptMASS]);
+ +        }
+ +
+ +        if ((bRerunMD && rerun_fr.bV) || bExchanged)
+ +        {
+ +
+ +            /* We need the kinetic energy at minus the half step for determining
+ +             * the full step kinetic energy and possibly for T-coupling.*/
+ +            /* This may not be quite working correctly yet . . . . */
+ +            compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm,
+ +                            wcycle, enerd, NULL, NULL, NULL, NULL, mu_tot,
+ +                            constr, NULL, FALSE, state->box,
+ +                            top_global, &bSumEkinhOld,
+ +                            CGLO_RERUNMD | CGLO_GSTAT | CGLO_TEMPERATURE);
+ +        }
+ +        clear_mat(force_vir);
+ +
+ +        /* We write a checkpoint at this MD step when:
+ +         * either at an NS step when we signalled through gs,
+ +         * or at the last step (but not when we do not want confout),
+ +         * but never at the first step or with rerun.
+ +         */
+ +        bCPT = (((gs.set[eglsCHKPT] && (bNS || ir->nstlist == 0)) ||
+ +                 (bLastStep && (Flags & MD_CONFOUT))) &&
+ +                step > ir->init_step && !bRerunMD);
+ +        if (bCPT)
+ +        {
+ +            gs.set[eglsCHKPT] = 0;
+ +        }
+ +
+ +        /* Determine the energy and pressure:
+ +         * at nstcalcenergy steps and at energy output steps (set below).
+ +         */
+ +        if (EI_VV(ir->eI) && (!bInitStep))
+ +        {
+ +            /* for vv, the first half of the integration actually corresponds
+ +               to the previous step.  bCalcEner is only required to be evaluated on the 'next' step,
+ +               but the virial needs to be calculated on both the current step and the 'next' step. Future
+ +               reorganization may be able to get rid of one of the bCalcVir=TRUE steps. */
+ +
+ +            bCalcEner = do_per_step(step-1, ir->nstcalcenergy);
+ +            bCalcVir  = bCalcEner ||
+ +                (ir->epc != epcNO && (do_per_step(step, ir->nstpcouple) || do_per_step(step-1, ir->nstpcouple)));
+ +        }
+ +        else
+ +        {
+ +            bCalcEner = do_per_step(step, ir->nstcalcenergy);
+ +            bCalcVir  = bCalcEner ||
+ +                (ir->epc != epcNO && do_per_step(step, ir->nstpcouple));
+ +        }
+ +
+ +        /* Do we need global communication ? */
+ +        bGStat = (bCalcVir || bCalcEner || bStopCM ||
+ +                  do_per_step(step, nstglobalcomm) || (bVV && IR_NVT_TROTTER(ir) && do_per_step(step-1, nstglobalcomm)) ||
+ +                  (ir->nstlist == -1 && !bRerunMD && step >= nlh.step_nscheck));
+ +
+ +        do_ene = (do_per_step(step, ir->nstenergy) || bLastStep);
+ +
+ +        if (do_ene || do_log)
+ +        {
+ +            bCalcVir  = TRUE;
+ +            bCalcEner = TRUE;
+ +            bGStat    = TRUE;
+ +        }
+ +
+ +        /* these CGLO_ options remain the same throughout the iteration */
+ +        cglo_flags = ((bRerunMD ? CGLO_RERUNMD : 0) |
+ +                      (bGStat ? CGLO_GSTAT : 0)
+ +                      );
+ +
+ +        force_flags = (GMX_FORCE_STATECHANGED |
+ +                       ((DYNAMIC_BOX(*ir) || bRerunMD) ? GMX_FORCE_DYNAMICBOX : 0) |
+ +                       GMX_FORCE_ALLFORCES |
+ +                       GMX_FORCE_SEPLRF |
+ +                       (bCalcVir ? GMX_FORCE_VIRIAL : 0) |
+ +                       (bCalcEner ? GMX_FORCE_ENERGY : 0) |
+ +                       (bDoFEP ? GMX_FORCE_DHDL : 0)
+ +                       );
+ +
+ +        if (fr->bTwinRange)
+ +        {
+ +            if (do_per_step(step, ir->nstcalclr))
+ +            {
+ +                force_flags |= GMX_FORCE_DO_LR;
+ +            }
+ +        }
+ +
+ +        if (shellfc)
+ +        {
+ +            /* Now is the time to relax the shells */
+ +            count = relax_shell_flexcon(fplog, cr, bVerbose, step,
+ +                                        ir, bNS, force_flags,
+ +                                        top,
+ +                                        constr, enerd, fcd,
+ +                                        state, f, force_vir, mdatoms,
+ +                                        nrnb, wcycle, graph, groups,
+ +                                        shellfc, fr, bBornRadii, t, mu_tot,
+ +                                        &bConverged, vsite,
+ +                                        mdoutf_get_fp_field(outf));
+ +            tcount += count;
+ +
+ +            if (bConverged)
+ +            {
+ +                nconverged++;
+ +            }
+ +        }
+ +        else
+ +        {
+ +            /* The coordinates (x) are shifted (to get whole molecules)
+ +             * in do_force.
+ +             * This is parallellized as well, and does communication too.
+ +             * Check comments in sim_util.c
+ +             */
+ +            do_force(fplog, cr, ir, step, nrnb, wcycle, top, groups,
+ +                     state->box, state->x, &state->hist,
+ +                     f, force_vir, mdatoms, enerd, fcd,
+ +                     state->lambda, graph,
+ +                     fr, vsite, mu_tot, t, mdoutf_get_fp_field(outf), ed, bBornRadii,
+ +                     (bNS ? GMX_FORCE_NS : 0) | force_flags);
+ +        }
+ +
+ +        if (bVV && !bStartingFromCpt && !bRerunMD)
+ +        /*  ############### START FIRST UPDATE HALF-STEP FOR VV METHODS############### */
+ +        {
+ +            if (ir->eI == eiVV && bInitStep)
+ +            {
+ +                /* if using velocity verlet with full time step Ekin,
+ +                 * take the first half step only to compute the
+ +                 * virial for the first step. From there,
+ +                 * revert back to the initial coordinates
+ +                 * so that the input is actually the initial step.
+ +                 */
+ +                copy_rvecn(state->v, cbuf, 0, state->natoms); /* should make this better for parallelizing? */
+ +            }
+ +            else
+ +            {
+ +                /* this is for NHC in the Ekin(t+dt/2) version of vv */
+ +                trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ1);
+ +            }
+ +
+ +            /* If we are using twin-range interactions where the long-range component
+ +             * is only evaluated every nstcalclr>1 steps, we should do a special update
+ +             * step to combine the long-range forces on these steps.
+ +             * For nstcalclr=1 this is not done, since the forces would have been added
+ +             * directly to the short-range forces already.
+ +             */
+ +            bUpdateDoLR = (fr->bTwinRange && do_per_step(step, ir->nstcalclr));
+ +
+ +            update_coords(fplog, step, ir, mdatoms, state, fr->bMolPBC,
+ +                          f, bUpdateDoLR, fr->f_twin, fcd,
+ +                          ekind, M, upd, bInitStep, etrtVELOCITY1,
+ +                          cr, nrnb, constr, &top->idef);
+ +
+ +            if (bIterativeCase && do_per_step(step-1, ir->nstpcouple) && !bInitStep)
+ +            {
+ +                gmx_iterate_init(&iterate, TRUE);
+ +            }
+ +            /* for iterations, we save these vectors, as we will be self-consistently iterating
+ +               the calculations */
+ +
+ +            /*#### UPDATE EXTENDED VARIABLES IN TROTTER FORMULATION */
+ +
+ +            /* save the state */
+ +            if (iterate.bIterationActive)
+ +            {
+ +                copy_coupling_state(state, bufstate, ekind, ekind_save, &(ir->opts));
+ +            }
+ +
+ +            bFirstIterate = TRUE;
+ +            while (bFirstIterate || iterate.bIterationActive)
+ +            {
+ +                if (iterate.bIterationActive)
+ +                {
+ +                    copy_coupling_state(bufstate, state, ekind_save, ekind, &(ir->opts));
+ +                    if (bFirstIterate && bTrotter)
+ +                    {
+ +                        /* The first time through, we need a decent first estimate
+ +                           of veta(t+dt) to compute the constraints.  Do
+ +                           this by computing the box volume part of the
+ +                           trotter integration at this time. Nothing else
+ +                           should be changed by this routine here.  If
+ +                           !(first time), we start with the previous value
+ +                           of veta.  */
+ +
+ +                        veta_save = state->veta;
+ +                        trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ0);
+ +                        vetanew     = state->veta;
+ +                        state->veta = veta_save;
+ +                    }
+ +                }
+ +
+ +                bOK = TRUE;
+ +                if (!bRerunMD || rerun_fr.bV || bForceUpdate)     /* Why is rerun_fr.bV here?  Unclear. */
+ +                {
+ +                    update_constraints(fplog, step, NULL, ir, ekind, mdatoms,
+ +                                       state, fr->bMolPBC, graph, f,
+ +                                       &top->idef, shake_vir,
+ +                                       cr, nrnb, wcycle, upd, constr,
+ +                                       TRUE, bCalcVir, vetanew);
+ +
+ +                    if (!bOK)
+ +                    {
+ +                        gmx_fatal(FARGS, "Constraint error: Shake, Lincs or Settle could not solve the constrains");
+ +                    }
+ +
+ +                }
+ +                else if (graph)
+ +                {
+ +                    /* Need to unshift here if a do_force has been
+ +                       called in the previous step */
+ +                    unshift_self(graph, state->box, state->x);
+ +                }
+ +
+ +                /* if VV, compute the pressure and constraints */
+ +                /* For VV2, we strictly only need this if using pressure
+ +                 * control, but we really would like to have accurate pressures
+ +                 * printed out.
+ +                 * Think about ways around this in the future?
+ +                 * For now, keep this choice in comments.
+ +                 */
+ +                /*bPres = (ir->eI==eiVV || IR_NPT_TROTTER(ir)); */
+ +                /*bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK && IR_NPT_TROTTER(ir)));*/
+ +                bPres = TRUE;
+ +                bTemp = ((ir->eI == eiVV && (!bInitStep)) || (ir->eI == eiVVAK));
+ +                if (bCalcEner && ir->eI == eiVVAK)  /*MRS:  7/9/2010 -- this still doesn't fix it?*/
+ +                {
+ +                    bSumEkinhOld = TRUE;
+ +                }
+ +                /* for vv, the first half of the integration actually corresponds to the previous step.
+ +                   So we need information from the last step in the first half of the integration */
+ +                if (bGStat || do_per_step(step-1, nstglobalcomm))
+ +                {
+ +                    compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm,
+ +                                    wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
+ +                                    constr, NULL, FALSE, state->box,
+ +                                    top_global, &bSumEkinhOld,
+ +                                    cglo_flags
+ +                                    | CGLO_ENERGY
+ +                                    | (bTemp ? CGLO_TEMPERATURE : 0)
+ +                                    | (bPres ? CGLO_PRESSURE : 0)
+ +                                    | (bPres ? CGLO_CONSTRAINT : 0)
+ +                                    | ((iterate.bIterationActive) ? CGLO_ITERATE : 0)
+ +                                    | (bFirstIterate ? CGLO_FIRSTITERATE : 0)
+ +                                    | CGLO_SCALEEKIN
+ +                                    );
+ +                    /* explanation of above:
+ +                       a) We compute Ekin at the full time step
+ +                       if 1) we are using the AveVel Ekin, and it's not the
+ +                       initial step, or 2) if we are using AveEkin, but need the full
+ +                       time step kinetic energy for the pressure (always true now, since we want accurate statistics).
+ +                       b) If we are using EkinAveEkin for the kinetic energy for the temperature control, we still feed in
+ +                       EkinAveVel because it's needed for the pressure */
+ +                }
+ +                /* temperature scaling and pressure scaling to produce the extended variables at t+dt */
+ +                if (!bInitStep)
+ +                {
+ +                    if (bTrotter)
+ +                    {
+ +                        m_add(force_vir, shake_vir, total_vir); /* we need the un-dispersion corrected total vir here */
+ +                        trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ2);
+ +                    }
+ +                    else
+ +                    {
+ +                        if (bExchanged)
+ +                        {
+ +
+ +                            /* We need the kinetic energy at minus the half step for determining
+ +                             * the full step kinetic energy and possibly for T-coupling.*/
+ +                            /* This may not be quite working correctly yet . . . . */
+ +                            compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm,
+ +                                            wcycle, enerd, NULL, NULL, NULL, NULL, mu_tot,
+ +                                            constr, NULL, FALSE, state->box,
+ +                                            top_global, &bSumEkinhOld,
+ +                                            CGLO_RERUNMD | CGLO_GSTAT | CGLO_TEMPERATURE);
+ +                        }
+ +                    }
+ +                }
+ +
+ +                if (iterate.bIterationActive &&
+ +                    done_iterating(cr, fplog, step, &iterate, bFirstIterate,
+ +                                   state->veta, &vetanew))
+ +                {
+ +                    break;
+ +                }
+ +                bFirstIterate = FALSE;
+ +            }
+ +
+ +            if (bTrotter && !bInitStep)
+ +            {
+ +                copy_mat(shake_vir, state->svir_prev);
+ +                copy_mat(force_vir, state->fvir_prev);
+ +                if (IR_NVT_TROTTER(ir) && ir->eI == eiVV)
+ +                {
+ +                    /* update temperature and kinetic energy now that step is over - this is the v(t+dt) point */
+ +                    enerd->term[F_TEMP] = sum_ekin(&(ir->opts), ekind, NULL, (ir->eI == eiVV), FALSE);
+ +                    enerd->term[F_EKIN] = trace(ekind->ekin);
+ +                }
+ +            }
+ +            /* if it's the initial step, we performed this first step just to get the constraint virial */
+ +            if (bInitStep && ir->eI == eiVV)
+ +            {
+ +                copy_rvecn(cbuf, state->v, 0, state->natoms);
+ +            }
+ +        }
+ +
+ +        /* MRS -- now done iterating -- compute the conserved quantity */
+ +        if (bVV)
+ +        {
+ +            saved_conserved_quantity = compute_conserved_from_auxiliary(ir, state, &MassQ);
+ +            if (ir->eI == eiVV)
+ +            {
+ +                last_ekin = enerd->term[F_EKIN];
+ +            }
+ +            if ((ir->eDispCorr != edispcEnerPres) && (ir->eDispCorr != edispcAllEnerPres))
+ +            {
+ +                saved_conserved_quantity -= enerd->term[F_DISPCORR];
+ +            }
+ +            /* sum up the foreign energy and dhdl terms for vv.  currently done every step so that dhdl is correct in the .edr */
+ +            if (!bRerunMD)
+ +            {
+ +                sum_dhdl(enerd, state->lambda, ir->fepvals);
+ +            }
+ +        }
+ +
+ +        /* ########  END FIRST UPDATE STEP  ############## */
+ +        /* ########  If doing VV, we now have v(dt) ###### */
+ +        if (bDoExpanded)
+ +        {
+ +            /* perform extended ensemble sampling in lambda - we don't
+ +               actually move to the new state before outputting
+ +               statistics, but if performing simulated tempering, we
+ +               do update the velocities and the tau_t. */
+ +
+ +            lamnew = ExpandedEnsembleDynamics(fplog, ir, enerd, state, &MassQ, state->fep_state, &state->dfhist, step, mcrng, state->v, mdatoms);
+ +            /* history is maintained in state->dfhist, but state_global is what is sent to trajectory and log output */
+ +            copy_df_history(&state_global->dfhist, &state->dfhist);
+ +        }
+ +
+ +        /* Now we have the energies and forces corresponding to the
+ +         * coordinates at time t. We must output all of this before
+ +         * the update.
+ +         */
+ +        do_md_trajectory_writing(fplog, cr, nfile, fnm, step, step_rel, t,
+ +                                 ir, state, state_global, top_global, fr, upd,
+ +                                 outf, mdebin, ekind, f, f_global,
+ +                                 wcycle, mcrng, &nchkpt,
+ +                                 bCPT, bRerunMD, bLastStep, (Flags & MD_CONFOUT),
+ +                                 bSumEkinhOld);
+ +
+ +        /* kludge -- virial is lost with restart for NPT control. Must restart */
+ +        if (bStartingFromCpt && bVV)
+ +        {
+ +            copy_mat(state->svir_prev, shake_vir);
+ +            copy_mat(state->fvir_prev, force_vir);
+ +        }
+ +
+ +        elapsed_time = walltime_accounting_get_current_elapsed_time(walltime_accounting);
+ +
+ +        /* Check whether everything is still allright */
+ +        if (((int)gmx_get_stop_condition() > handled_stop_condition)
+ +#ifdef GMX_THREAD_MPI
+ +            && MASTER(cr)
+ +#endif
+ +            )
+ +        {
+ +            /* this is just make gs.sig compatible with the hack
+ +               of sending signals around by MPI_Reduce with together with
+ +               other floats */
+ +            if (gmx_get_stop_condition() == gmx_stop_cond_next_ns)
+ +            {
+ +                gs.sig[eglsSTOPCOND] = 1;
+ +            }
+ +            if (gmx_get_stop_condition() == gmx_stop_cond_next)
+ +            {
+ +                gs.sig[eglsSTOPCOND] = -1;
+ +            }
+ +            /* < 0 means stop at next step, > 0 means stop at next NS step */
+ +            if (fplog)
+ +            {
+ +                fprintf(fplog,
+ +                        "\n\nReceived the %s signal, stopping at the next %sstep\n\n",
+ +                        gmx_get_signal_name(),
+ +                        gs.sig[eglsSTOPCOND] == 1 ? "NS " : "");
+ +                fflush(fplog);
+ +            }
+ +            fprintf(stderr,
+ +                    "\n\nReceived the %s signal, stopping at the next %sstep\n\n",
+ +                    gmx_get_signal_name(),
+ +                    gs.sig[eglsSTOPCOND] == 1 ? "NS " : "");
+ +            fflush(stderr);
+ +            handled_stop_condition = (int)gmx_get_stop_condition();
+ +        }
+ +        else if (MASTER(cr) && (bNS || ir->nstlist <= 0) &&
+ +                 (max_hours > 0 && elapsed_time > max_hours*60.0*60.0*0.99) &&
+ +                 gs.sig[eglsSTOPCOND] == 0 && gs.set[eglsSTOPCOND] == 0)
+ +        {
+ +            /* Signal to terminate the run */
+ +            gs.sig[eglsSTOPCOND] = 1;
+ +            if (fplog)
+ +            {
+ +                fprintf(fplog, "\nStep %s: Run time exceeded %.3f hours, will terminate the run\n", gmx_step_str(step, sbuf), max_hours*0.99);
+ +            }
+ +            fprintf(stderr, "\nStep %s: Run time exceeded %.3f hours, will terminate the run\n", gmx_step_str(step, sbuf), max_hours*0.99);
+ +        }
+ +
+ +        if (bResetCountersHalfMaxH && MASTER(cr) &&
+ +            elapsed_time > max_hours*60.0*60.0*0.495)
+ +        {
+ +            gs.sig[eglsRESETCOUNTERS] = 1;
+ +        }
+ +
+ +        if (ir->nstlist == -1 && !bRerunMD)
+ +        {
+ +            /* When bGStatEveryStep=FALSE, global_stat is only called
+ +             * when we check the atom displacements, not at NS steps.
+ +             * This means that also the bonded interaction count check is not
+ +             * performed immediately after NS. Therefore a few MD steps could
+ +             * be performed with missing interactions.
+ +             * But wrong energies are never written to file,
+ +             * since energies are only written after global_stat
+ +             * has been called.
+ +             */
+ +            if (step >= nlh.step_nscheck)
+ +            {
+ +                nlh.nabnsb = natoms_beyond_ns_buffer(ir, fr, &top->cgs,
+ +                                                     nlh.scale_tot, state->x);
+ +            }
+ +            else
+ +            {
+ +                /* This is not necessarily true,
+ +                 * but step_nscheck is determined quite conservatively.
+ +                 */
+ +                nlh.nabnsb = 0;
+ +            }
+ +        }
+ +
+ +        /* In parallel we only have to check for checkpointing in steps
+ +         * where we do global communication,
+ +         *  otherwise the other nodes don't know.
+ +         */
+ +        if (MASTER(cr) && ((bGStat || !PAR(cr)) &&
+ +                           cpt_period >= 0 &&
+ +                           (cpt_period == 0 ||
+ +                            elapsed_time >= nchkpt*cpt_period*60.0)) &&
+ +            gs.set[eglsCHKPT] == 0)
+ +        {
+ +            gs.sig[eglsCHKPT] = 1;
+ +        }
+ +
+ +        /* at the start of step, randomize or scale the velocities (trotter done elsewhere) */
+ +        if (EI_VV(ir->eI))
+ +        {
+ +            if (!bInitStep)
+ +            {
+ +                update_tcouple(step, ir, state, ekind, upd, &MassQ, mdatoms);
+ +            }
+ +            if (ETC_ANDERSEN(ir->etc)) /* keep this outside of update_tcouple because of the extra info required to pass */
+ +            {
+ +                gmx_bool bIfRandomize;
+ +                bIfRandomize = update_randomize_velocities(ir, step, mdatoms, state, upd, &top->idef, constr);
+ +                /* if we have constraints, we have to remove the kinetic energy parallel to the bonds */
+ +                if (constr && bIfRandomize)
+ +                {
+ +                    update_constraints(fplog, step, NULL, ir, ekind, mdatoms,
+ +                                       state, fr->bMolPBC, graph, f,
+ +                                       &top->idef, tmp_vir,
+ +                                       cr, nrnb, wcycle, upd, constr,
+ +                                       TRUE, bCalcVir, vetanew);
+ +                }
+ +            }
+ +        }
+ +
+ +        if (bIterativeCase && do_per_step(step, ir->nstpcouple))
+ +        {
+ +            gmx_iterate_init(&iterate, TRUE);
+ +            /* for iterations, we save these vectors, as we will be redoing the calculations */
+ +            copy_coupling_state(state, bufstate, ekind, ekind_save, &(ir->opts));
+ +        }
+ +
+ +        bFirstIterate = TRUE;
+ +        while (bFirstIterate || iterate.bIterationActive)
+ +        {
+ +            /* We now restore these vectors to redo the calculation with improved extended variables */
+ +            if (iterate.bIterationActive)
+ +            {
+ +                copy_coupling_state(bufstate, state, ekind_save, ekind, &(ir->opts));
+ +            }
+ +
+ +            /* We make the decision to break or not -after- the calculation of Ekin and Pressure,
+ +               so scroll down for that logic */
+ +
+ +            /* #########   START SECOND UPDATE STEP ################# */
+ +            /* Box is changed in update() when we do pressure coupling,
+ +             * but we should still use the old box for energy corrections and when
+ +             * writing it to the energy file, so it matches the trajectory files for
+ +             * the same timestep above. Make a copy in a separate array.
+ +             */
+ +            copy_mat(state->box, lastbox);
+ +
+ +            bOK         = TRUE;
+ +            dvdl_constr = 0;
+ +
+ +            if (!(bRerunMD && !rerun_fr.bV && !bForceUpdate))
+ +            {
+ +                wallcycle_start(wcycle, ewcUPDATE);
+ +                /* UPDATE PRESSURE VARIABLES IN TROTTER FORMULATION WITH CONSTRAINTS */
+ +                if (bTrotter)
+ +                {
+ +                    if (iterate.bIterationActive)
+ +                    {
+ +                        if (bFirstIterate)
+ +                        {
+ +                            scalevir = 1;
+ +                        }
+ +                        else
+ +                        {
+ +                            /* we use a new value of scalevir to converge the iterations faster */
+ +                            scalevir = tracevir/trace(shake_vir);
+ +                        }
+ +                        msmul(shake_vir, scalevir, shake_vir);
+ +                        m_add(force_vir, shake_vir, total_vir);
+ +                        clear_mat(shake_vir);
+ +                    }
+ +                    trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ3);
+ +                    /* We can only do Berendsen coupling after we have summed
+ +                     * the kinetic energy or virial. Since the happens
+ +                     * in global_state after update, we should only do it at
+ +                     * step % nstlist = 1 with bGStatEveryStep=FALSE.
+ +                     */
+ +                }
+ +                else
+ +                {
+ +                    update_tcouple(step, ir, state, ekind, upd, &MassQ, mdatoms);
+ +                    update_pcouple(fplog, step, ir, state, pcoupl_mu, M, bInitStep);
+ +                }
+ +
+ +                if (bVV)
+ +                {
+ +                    bUpdateDoLR = (fr->bTwinRange && do_per_step(step, ir->nstcalclr));
+ +
+ +                    /* velocity half-step update */
+ +                    update_coords(fplog, step, ir, mdatoms, state, fr->bMolPBC, f,
+ +                                  bUpdateDoLR, fr->f_twin, fcd,
+ +                                  ekind, M, upd, FALSE, etrtVELOCITY2,
+ +                                  cr, nrnb, constr, &top->idef);
+ +                }
+ +
+ +                /* Above, initialize just copies ekinh into ekin,
+ +                 * it doesn't copy position (for VV),
+ +                 * and entire integrator for MD.
+ +                 */
+ +
+ +                if (ir->eI == eiVVAK)
+ +                {
+ +                    copy_rvecn(state->x, cbuf, 0, state->natoms);
+ +                }
+ +                bUpdateDoLR = (fr->bTwinRange && do_per_step(step, ir->nstcalclr));
+ +
+ +                update_coords(fplog, step, ir, mdatoms, state, fr->bMolPBC, f,
+ +                              bUpdateDoLR, fr->f_twin, fcd,
+ +                              ekind, M, upd, bInitStep, etrtPOSITION, cr, nrnb, constr, &top->idef);
+ +                wallcycle_stop(wcycle, ewcUPDATE);
+ +
+ +                update_constraints(fplog, step, &dvdl_constr, ir, ekind, mdatoms, state,
+ +                                   fr->bMolPBC, graph, f,
+ +                                   &top->idef, shake_vir,
+ +                                   cr, nrnb, wcycle, upd, constr,
+ +                                   FALSE, bCalcVir, state->veta);
+ +
+ +                if (ir->eI == eiVVAK)
+ +                {
+ +                    /* erase F_EKIN and F_TEMP here? */
+ +                    /* just compute the kinetic energy at the half step to perform a trotter step */
+ +                    compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm,
+ +                                    wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
+ +                                    constr, NULL, FALSE, lastbox,
+ +                                    top_global, &bSumEkinhOld,
+ +                                    cglo_flags | CGLO_TEMPERATURE
+ +                                    );
+ +                    wallcycle_start(wcycle, ewcUPDATE);
+ +                    trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ4);
+ +                    /* now we know the scaling, we can compute the positions again again */
+ +                    copy_rvecn(cbuf, state->x, 0, state->natoms);
+ +
+ +                    bUpdateDoLR = (fr->bTwinRange && do_per_step(step, ir->nstcalclr));
+ +
+ +                    update_coords(fplog, step, ir, mdatoms, state, fr->bMolPBC, f,
+ +                                  bUpdateDoLR, fr->f_twin, fcd,
+ +                                  ekind, M, upd, bInitStep, etrtPOSITION, cr, nrnb, constr, &top->idef);
+ +                    wallcycle_stop(wcycle, ewcUPDATE);
+ +
+ +                    /* do we need an extra constraint here? just need to copy out of state->v to upd->xp? */
+ +                    /* are the small terms in the shake_vir here due
+ +                     * to numerical errors, or are they important
+ +                     * physically? I'm thinking they are just errors, but not completely sure.
+ +                     * For now, will call without actually constraining, constr=NULL*/
+ +                    update_constraints(fplog, step, NULL, ir, ekind, mdatoms,
+ +                                       state, fr->bMolPBC, graph, f,
+ +                                       &top->idef, tmp_vir,
+ +                                       cr, nrnb, wcycle, upd, NULL,
+ +                                       FALSE, bCalcVir,
+ +                                       state->veta);
+ +                }
+ +                if (!bOK)
+ +                {
+ +                    gmx_fatal(FARGS, "Constraint error: Shake, Lincs or Settle could not solve the constrains");
+ +                }
+ +
+ +                if (fr->bSepDVDL && fplog && do_log)
+ +                {
+ +                    gmx_print_sepdvdl(fplog, "Constraint dV/dl", 0.0, dvdl_constr);
+ +                }
+ +                if (bVV)
+ +                {
+ +                    /* this factor or 2 correction is necessary
+ +                       because half of the constraint force is removed
+ +                       in the vv step, so we have to double it.  See
+ +                       the Redmine issue #1255.  It is not yet clear
+ +                       if the factor of 2 is exact, or just a very
+ +                       good approximation, and this will be
+ +                       investigated.  The next step is to see if this
+ +                       can be done adding a dhdl contribution from the
+ +                       rattle step, but this is somewhat more
+ +                       complicated with the current code. Will be
+ +                       investigated, hopefully for 4.6.3. However,
+ +                       this current solution is much better than
+ +                       having it completely wrong.
+ +                     */
+ +                    enerd->term[F_DVDL_CONSTR] += 2*dvdl_constr;
+ +                }
+ +                else
+ +                {
+ +                    enerd->term[F_DVDL_CONSTR] += dvdl_constr;
+ +                }
+ +            }
+ +            else if (graph)
+ +            {
+ +                /* Need to unshift here */
+ +                unshift_self(graph, state->box, state->x);
+ +            }
+ +
+ +            if (vsite != NULL)
+ +            {
+ +                wallcycle_start(wcycle, ewcVSITECONSTR);
+ +                if (graph != NULL)
+ +                {
+ +                    shift_self(graph, state->box, state->x);
+ +                }
+ +                construct_vsites(vsite, state->x, ir->delta_t, state->v,
+ +                                 top->idef.iparams, top->idef.il,
+ +                                 fr->ePBC, fr->bMolPBC, graph, cr, state->box);
+ +
+ +                if (graph != NULL)
+ +                {
+ +                    unshift_self(graph, state->box, state->x);
+ +                }
+ +                wallcycle_stop(wcycle, ewcVSITECONSTR);
+ +            }
+ +
+ +            /* ############## IF NOT VV, Calculate globals HERE, also iterate constraints  ############ */
+ +            /* With Leap-Frog we can skip compute_globals at
+ +             * non-communication steps, but we need to calculate
+ +             * the kinetic energy one step before communication.
+ +             */
+ +            if (bGStat || (!EI_VV(ir->eI) && do_per_step(step+1, nstglobalcomm)))
+ +            {
+ +                if (ir->nstlist == -1 && bFirstIterate)
+ +                {
+ +                    gs.sig[eglsNABNSB] = nlh.nabnsb;
+ +                }
+ +                compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm,
+ +                                wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
+ +                                constr,
+ +                                bFirstIterate ? &gs : NULL,
+ +                                (step_rel % gs.nstms == 0) &&
+ +                                (multisim_nsteps < 0 || (step_rel < multisim_nsteps)),
+ +                                lastbox,
+ +                                top_global, &bSumEkinhOld,
+ +                                cglo_flags
+ +                                | (!EI_VV(ir->eI) || bRerunMD ? CGLO_ENERGY : 0)
+ +                                | (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0)
+ +                                | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0)
+ +                                | (!EI_VV(ir->eI) || bRerunMD ? CGLO_PRESSURE : 0)
+ +                                | (iterate.bIterationActive ? CGLO_ITERATE : 0)
+ +                                | (bFirstIterate ? CGLO_FIRSTITERATE : 0)
+ +                                | CGLO_CONSTRAINT
+ +                                );
+ +                if (ir->nstlist == -1 && bFirstIterate)
+ +                {
+ +                    nlh.nabnsb         = gs.set[eglsNABNSB];
+ +                    gs.set[eglsNABNSB] = 0;
+ +                }
+ +            }
+ +            /* bIterate is set to keep it from eliminating the old ekin kinetic energy terms */
+ +            /* #############  END CALC EKIN AND PRESSURE ################# */
+ +
+ +            /* Note: this is OK, but there are some numerical precision issues with using the convergence of
+ +               the virial that should probably be addressed eventually. state->veta has better properies,
+ +               but what we actually need entering the new cycle is the new shake_vir value. Ideally, we could
+ +               generate the new shake_vir, but test the veta value for convergence.  This will take some thought. */
+ +
+ +            if (iterate.bIterationActive &&
+ +                done_iterating(cr, fplog, step, &iterate, bFirstIterate,
+ +                               trace(shake_vir), &tracevir))
+ +            {
+ +                break;
+ +            }
+ +            bFirstIterate = FALSE;
+ +        }
+ +
+ +        if (!bVV || bRerunMD)
+ +        {
+ +            /* sum up the foreign energy and dhdl terms for md and sd. currently done every step so that dhdl is correct in the .edr */
+ +            sum_dhdl(enerd, state->lambda, ir->fepvals);
+ +        }
+ +        update_box(fplog, step, ir, mdatoms, state, f,
+ +                   ir->nstlist == -1 ? &nlh.scale_tot : NULL, pcoupl_mu, nrnb, upd);
+ +
+ +        /* ################# END UPDATE STEP 2 ################# */
+ +        /* #### We now have r(t+dt) and v(t+dt/2)  ############# */
+ +
+ +        /* The coordinates (x) were unshifted in update */
+ +        if (!bGStat)
+ +        {
+ +            /* We will not sum ekinh_old,
+ +             * so signal that we still have to do it.
+ +             */
+ +            bSumEkinhOld = TRUE;
+ +        }
+ +
+ +        /* #########  BEGIN PREPARING EDR OUTPUT  ###########  */
+ +
+ +        /* use the directly determined last velocity, not actually the averaged half steps */
+ +        if (bTrotter && ir->eI == eiVV)
+ +        {
+ +            enerd->term[F_EKIN] = last_ekin;
+ +        }
+ +        enerd->term[F_ETOT] = enerd->term[F_EPOT] + enerd->term[F_EKIN];
+ +
+ +        if (bVV)
+ +        {
+ +            enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + saved_conserved_quantity;
+ +        }
+ +        else
+ +        {
+ +            enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + compute_conserved_from_auxiliary(ir, state, &MassQ);
+ +        }
+ +        /* #########  END PREPARING EDR OUTPUT  ###########  */
+ +
+ +        /* Output stuff */
+ +        if (MASTER(cr))
+ +        {
+ +            gmx_bool do_dr, do_or;
+ +
+ +            if (fplog && do_log && bDoExpanded)
+ +            {
+ +                /* only needed if doing expanded ensemble */
+ +                PrintFreeEnergyInfoToFile(fplog, ir->fepvals, ir->expandedvals, ir->bSimTemp ? ir->simtempvals : NULL,
+ +                                          &state_global->dfhist, state->fep_state, ir->nstlog, step);
+ +            }
+ +            if (!(bStartingFromCpt && (EI_VV(ir->eI))))
+ +            {
+ +                if (bCalcEner)
+ +                {
+ +                    upd_mdebin(mdebin, bDoDHDL, TRUE,
+ +                               t, mdatoms->tmass, enerd, state,
+ +                               ir->fepvals, ir->expandedvals, lastbox,
+ +                               shake_vir, force_vir, total_vir, pres,
+ +                               ekind, mu_tot, constr);
+ +                }
+ +                else
+ +                {
+ +                    upd_mdebin_step(mdebin);
+ +                }
+ +
+ +                do_dr  = do_per_step(step, ir->nstdisreout);
+ +                do_or  = do_per_step(step, ir->nstorireout);
+ +
+ +                print_ebin(mdoutf_get_fp_ene(outf), do_ene, do_dr, do_or, do_log ? fplog : NULL,
+ +                           step, t,
+ +                           eprNORMAL, bCompact, mdebin, fcd, groups, &(ir->opts));
+ +            }
+ +            if (ir->ePull != epullNO)
+ +            {
+ +                pull_print_output(ir->pull, step, t);
+ +            }
+ +
+ +            if (do_per_step(step, ir->nstlog))
+ +            {
+ +                if (fflush(fplog) != 0)
+ +                {
+ +                    gmx_fatal(FARGS, "Cannot flush logfile - maybe you are out of disk space?");
+ +                }
+ +            }
+ +        }
+ +        if (bDoExpanded)
+ +        {
+ +            /* Have to do this part _after_ outputting the logfile and the edr file */
+ +            /* Gets written into the state at the beginning of next loop*/
+ +            state->fep_state = lamnew;
+ +        }
+ +        /* Print the remaining wall clock time for the run */
+ +        if (MULTIMASTER(cr) && (do_verbose || gmx_got_usr_signal()) && !bPMETuneRunning)
+ +        {
+ +            if (shellfc)
+ +            {
+ +                fprintf(stderr, "\n");
+ +            }
+ +            print_time(stderr, walltime_accounting, step, ir, cr);
+ +        }
+ +
+ +        /* Ion/water position swapping.
+ +         * Not done in last step since trajectory writing happens before this call
+ +         * in the MD loop and exchanges would be lost anyway. */
+ +        bNeedRepartition = FALSE;
+ +        if ((ir->eSwapCoords != eswapNO) && (step > 0) && !bLastStep &&
+ +            do_per_step(step, ir->swap->nstswap))
+ +        {
+ +            bNeedRepartition = do_swapcoords(cr, step, t, ir, wcycle,
+ +                                             bRerunMD ? rerun_fr.x   : state->x,
+ +                                             bRerunMD ? rerun_fr.box : state->box,
+ +                                             top_global, MASTER(cr) && bVerbose, bRerunMD);
+ +
+ +            if (bNeedRepartition && DOMAINDECOMP(cr))
+ +            {
+ +                dd_collect_state(cr->dd, state, state_global);
+ +            }
+ +        }
+ +
+ +        /* Replica exchange */
+ +        bExchanged = FALSE;
+ +        if ((repl_ex_nst > 0) && (step > 0) && !bLastStep &&
+ +            do_per_step(step, repl_ex_nst))
+ +        {
+ +            bExchanged = replica_exchange(fplog, cr, repl_ex,
+ +                                          state_global, enerd,
+ +                                          state, step, t);
+ +        }
+ +
+ +        if ( (bExchanged || bNeedRepartition) && DOMAINDECOMP(cr) )
+ +        {
+ +            dd_partition_system(fplog, step, cr, TRUE, 1,
+ +                                state_global, top_global, ir,
+ +                                state, &f, mdatoms, top, fr,
+ +                                vsite, shellfc, constr,
+ +                                nrnb, wcycle, FALSE);
+ +        }
+ +
+ +        bFirstStep       = FALSE;
+ +        bInitStep        = FALSE;
+ +        bStartingFromCpt = FALSE;
+ +
+ +        /* #######  SET VARIABLES FOR NEXT ITERATION IF THEY STILL NEED IT ###### */
+ +        /* With all integrators, except VV, we need to retain the pressure
+ +         * at the current step for coupling at the next step.
+ +         */
+ +        if ((state->flags & (1<<estPRES_PREV)) &&
+ +            (bGStatEveryStep ||
+ +             (ir->nstpcouple > 0 && step % ir->nstpcouple == 0)))
+ +        {
+ +            /* Store the pressure in t_state for pressure coupling
+ +             * at the next MD step.
+ +             */
+ +            copy_mat(pres, state->pres_prev);
+ +        }
+ +
+ +        /* #######  END SET VARIABLES FOR NEXT ITERATION ###### */
+ +
+ +        if ( (membed != NULL) && (!bLastStep) )
+ +        {
+ +            rescale_membed(step_rel, membed, state_global->x);
+ +        }
+ +
+ +        if (bRerunMD)
+ +        {
+ +            if (MASTER(cr))
+ +            {
+ +                /* read next frame from input trajectory */
+ +                bNotLastFrame = read_next_frame(oenv, status, &rerun_fr);
+ +            }
+ +
+ +            if (PAR(cr))
+ +            {
+ +                rerun_parallel_comm(cr, &rerun_fr, &bNotLastFrame);
+ +            }
+ +        }
+ +
+ +        if (!bRerunMD || !rerun_fr.bStep)
+ +        {
+ +            /* increase the MD step number */
+ +            step++;
+ +            step_rel++;
+ +        }
+ +
+ +        cycles = wallcycle_stop(wcycle, ewcSTEP);
+ +        if (DOMAINDECOMP(cr) && wcycle)
+ +        {
+ +            dd_cycles_add(cr->dd, cycles, ddCyclStep);
+ +        }
+ +
+ +        if (bPMETuneRunning || bPMETuneTry)
+ +        {
+ +            /* PME grid + cut-off optimization with GPUs or PME nodes */
+ +
+ +            /* Count the total cycles over the last steps */
+ +            cycles_pmes += cycles;
+ +
+ +            /* We can only switch cut-off at NS steps */
+ +            if (step % ir->nstlist == 0)
+ +            {
+ +                /* PME grid + cut-off optimization with GPUs or PME nodes */
+ +                if (bPMETuneTry)
+ +                {
+ +                    if (DDMASTER(cr->dd))
+ +                    {
+ +                        /* PME node load is too high, start tuning */
+ +                        bPMETuneRunning = (dd_pme_f_ratio(cr->dd) >= 1.05);
+ +                    }
+ +                    dd_bcast(cr->dd, sizeof(gmx_bool), &bPMETuneRunning);
+ +
+ +                    if (bPMETuneRunning || step_rel > ir->nstlist*50)
+ +                    {
+ +                        bPMETuneTry     = FALSE;
+ +                    }
+ +                }
+ +                if (bPMETuneRunning)
+ +                {
+ +                    /* init_step might not be a multiple of nstlist,
+ +                     * but the first cycle is always skipped anyhow.
+ +                     */
+ +                    bPMETuneRunning =
+ +                        pme_load_balance(pme_loadbal, cr,
+ +                                         (bVerbose && MASTER(cr)) ? stderr : NULL,
+ +                                         fplog,
+ +                                         ir, state, cycles_pmes,
+ +                                         fr->ic, fr->nbv, &fr->pmedata,
+ +                                         step);
+ +
+ +                    /* Update constants in forcerec/inputrec to keep them in sync with fr->ic */
+ +                    fr->ewaldcoeff_q = fr->ic->ewaldcoeff_q;
+ +                    fr->rlist        = fr->ic->rlist;
+ +                    fr->rlistlong    = fr->ic->rlistlong;
+ +                    fr->rcoulomb     = fr->ic->rcoulomb;
+ +                    fr->rvdw         = fr->ic->rvdw;
+ +                }
+ +                cycles_pmes = 0;
+ +            }
+ +        }
+ +
+ +        if (step_rel == wcycle_get_reset_counters(wcycle) ||
+ +            gs.set[eglsRESETCOUNTERS] != 0)
+ +        {
+ +            /* Reset all the counters related to performance over the run */
+ +            reset_all_counters(fplog, cr, step, &step_rel, ir, wcycle, nrnb, walltime_accounting,
+ +                               fr->nbv != NULL && fr->nbv->bUseGPU ? fr->nbv->cu_nbv : NULL);
+ +            wcycle_set_reset_counters(wcycle, -1);
+ +            if (!(cr->duty & DUTY_PME))
+ +            {
+ +                /* Tell our PME node to reset its counters */
+ +                gmx_pme_send_resetcounters(cr, step);
+ +            }
+ +            /* Correct max_hours for the elapsed time */
+ +            max_hours                -= elapsed_time/(60.0*60.0);
+ +            bResetCountersHalfMaxH    = FALSE;
+ +            gs.set[eglsRESETCOUNTERS] = 0;
+ +        }
+ +
+ +    }
+ +    /* End of main MD loop */
+ +    debug_gmx();
+ +
+ +    /* Stop measuring walltime */
+ +    walltime_accounting_end(walltime_accounting);
+ +
+ +    if (bRerunMD && MASTER(cr))
+ +    {
+ +        close_trj(status);
+ +    }
+ +
+ +    if (!(cr->duty & DUTY_PME))
+ +    {
+ +        /* Tell the PME only node to finish */
+ +        gmx_pme_send_finish(cr);
+ +    }
+ +
+ +    if (MASTER(cr))
+ +    {
+ +        if (ir->nstcalcenergy > 0 && !bRerunMD)
+ +        {
+ +            print_ebin(mdoutf_get_fp_ene(outf), FALSE, FALSE, FALSE, fplog, step, t,
+ +                       eprAVER, FALSE, mdebin, fcd, groups, &(ir->opts));
+ +        }
+ +    }
+ +
+ +    done_mdoutf(outf);
+ +    debug_gmx();
+ +
+ +    if (ir->nstlist == -1 && nlh.nns > 0 && fplog)
+ +    {
+ +        fprintf(fplog, "Average neighborlist lifetime: %.1f steps, std.dev.: %.1f steps\n", nlh.s1/nlh.nns, sqrt(nlh.s2/nlh.nns - sqr(nlh.s1/nlh.nns)));
+ +        fprintf(fplog, "Average number of atoms that crossed the half buffer length: %.1f\n\n", nlh.ab/nlh.nns);
+ +    }
+ +
+ +    if (pme_loadbal != NULL)
+ +    {
+ +        pme_loadbal_done(pme_loadbal, cr, fplog,
+ +                         fr->nbv != NULL && fr->nbv->bUseGPU);
+ +    }
+ +
+ +    if (shellfc && fplog)
+ +    {
+ +        fprintf(fplog, "Fraction of iterations that converged:           %.2f %%\n",
+ +                (nconverged*100.0)/step_rel);
+ +        fprintf(fplog, "Average number of force evaluations per MD step: %.2f\n\n",
+ +                tcount/step_rel);
+ +    }
+ +
+ +    if (repl_ex_nst > 0 && MASTER(cr))
+ +    {
+ +        print_replica_exchange_statistics(fplog, repl_ex);
+ +    }
+ +
+ +    walltime_accounting_set_nsteps_done(walltime_accounting, step_rel);
+ +
+ +    return 0;
+ +}
diff --cc src/programs/mdrun/mdrun.cpp

index bc2ec331c535b27b4a987ee9c0c64b836224bf80,0000000000000000000000000000000000000000..61cc64db84ce9149a911a6960abffda9f8c4743b

mode 100644,000000..100644
--- 1/src/programs/mdrun/mdrun.cpp
--- /dev/null
+++ b/src/programs/mdrun/mdrun.cpp
@@@ -1,763 -1,0 +1,769 @@@
-         "Note that using combined MPI+OpenMP parallelization is almost always",
-         "slower than single parallelization, except at the scaling limit, where",
-         "especially OpenMP parallelization of PME reduces the communication cost.",
-         "OpenMP-only parallelization is much faster than MPI-only parallelization",
+ +/*
+ + * This file is part of the GROMACS molecular simulation package.
+ + *
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team.
+ + * Copyright (c) 2011,2012,2013,2014, by the GROMACS development team, led by
+ + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ + * and including many others, as listed in the AUTHORS file in the
+ + * top-level source directory and at http://www.gromacs.org.
+ + *
+ + * GROMACS is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU Lesser General Public License
+ + * as published by the Free Software Foundation; either version 2.1
+ + * of the License, or (at your option) any later version.
+ + *
+ + * GROMACS is distributed in the hope that it will be useful,
+ + * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ + * Lesser General Public License for more details.
+ + *
+ + * You should have received a copy of the GNU Lesser General Public
+ + * License along with GROMACS; if not, see
+ + * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ + *
+ + * If you want to redistribute modifications to GROMACS, please
+ + * consider that scientific software is very special. Version
+ + * control is crucial - bugs must be traceable. We will be happy to
+ + * consider code for inclusion in the official distribution, but
+ + * derived work must not be called official GROMACS. Details are found
+ + * in the README & COPYING files - if they are missing, get the
+ + * official version at http://www.gromacs.org.
+ + *
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the research papers on the package. Check out http://www.gromacs.org.
+ + */
+ +#include "mdrun_main.h"
+ +
+ +#ifdef HAVE_CONFIG_H
+ +#include "config.h"
+ +#endif
+ +
+ +#include <stdio.h>
+ +
+ +#include "gromacs/legacyheaders/checkpoint.h"
+ +#include "gromacs/legacyheaders/copyrite.h"
+ +#include "gromacs/legacyheaders/gmx_fatal.h"
+ +#include "gromacs/legacyheaders/macros.h"
+ +#include "gromacs/legacyheaders/main.h"
+ +#include "gromacs/legacyheaders/mdrun.h"
+ +#include "gromacs/legacyheaders/network.h"
+ +#include "gromacs/legacyheaders/readinp.h"
+ +#include "gromacs/legacyheaders/typedefs.h"
+ +
+ +#include "gromacs/commandline/pargs.h"
+ +#include "gromacs/fileio/filenm.h"
+ +
+ +int gmx_mdrun(int argc, char *argv[])
+ +{
+ +    const char   *desc[] = {
+ +        "[THISMODULE] is the main computational chemistry engine",
+ +        "within GROMACS. Obviously, it performs Molecular Dynamics simulations,",
+ +        "but it can also perform Stochastic Dynamics, Energy Minimization,",
+ +        "test particle insertion or (re)calculation of energies.",
+ +        "Normal mode analysis is another option. In this case [TT]mdrun[tt]",
+ +        "builds a Hessian matrix from single conformation.",
+ +        "For usual Normal Modes-like calculations, make sure that",
+ +        "the structure provided is properly energy-minimized.",
+ +        "The generated matrix can be diagonalized by [gmx-nmeig].[PAR]",
+ +        "The [TT]mdrun[tt] program reads the run input file ([TT]-s[tt])",
+ +        "and distributes the topology over nodes if needed.",
+ +        "[TT]mdrun[tt] produces at least four output files.",
+ +        "A single log file ([TT]-g[tt]) is written, unless the option",
+ +        "[TT]-seppot[tt] is used, in which case each node writes a log file.",
+ +        "The trajectory file ([TT]-o[tt]), contains coordinates, velocities and",
+ +        "optionally forces.",
+ +        "The structure file ([TT]-c[tt]) contains the coordinates and",
+ +        "velocities of the last step.",
+ +        "The energy file ([TT]-e[tt]) contains energies, the temperature,",
+ +        "pressure, etc, a lot of these things are also printed in the log file.",
+ +        "Optionally coordinates can be written to a compressed trajectory file",
+ +        "([TT]-x[tt]).[PAR]",
+ +        "The option [TT]-dhdl[tt] is only used when free energy calculation is",
+ +        "turned on.[PAR]",
+ +        "A simulation can be run in parallel using two different parallelization",
+ +        "schemes: MPI parallelization and/or OpenMP thread parallelization.",
+ +        "The MPI parallelization uses multiple processes when [TT]mdrun[tt] is",
+ +        "compiled with a normal MPI library or threads when [TT]mdrun[tt] is",
+ +        "compiled with the GROMACS built-in thread-MPI library. OpenMP threads",
+ +        "are supported when [TT]mdrun[tt] is compiled with OpenMP. Full OpenMP support",
+ +        "is only available with the Verlet cut-off scheme, with the (older)",
+ +        "group scheme only PME-only processes can use OpenMP parallelization.",
+ +        "In all cases [TT]mdrun[tt] will by default try to use all the available",
+ +        "hardware resources. With a normal MPI library only the options",
+ +        "[TT]-ntomp[tt] (with the Verlet cut-off scheme) and [TT]-ntomp_pme[tt],",
+ +        "for PME-only processes, can be used to control the number of threads.",
+ +        "With thread-MPI there are additional options [TT]-nt[tt], which sets",
+ +        "the total number of threads, and [TT]-ntmpi[tt], which sets the number",
+ +        "of thread-MPI threads.",
++        "The number of OpenMP threads used by [TT]mdrun[tt] can also be set with",
++        "the standard environment variable, [TT]OMP_NUM_THREADS[tt].",
++        "The [TT]GMX_PME_NUM_THREADS[tt] environment variable can be used to specify",
++        "the number of threads used by the PME-only processes.[PAR]",
++        "Note that combined MPI+OpenMP parallelization is in many cases",
++        "slower than either on its own. However, at high parallelization, using the",
++        "combination is often beneficial as it reduces the number of domains and/or",
++        "the number of MPI ranks. (Less and larger domains can improve scaling,",
++        "with separate PME processes fewer MPI ranks reduces communication cost.)",
++        "OpenMP-only parallelization is typically faster than MPI-only parallelization",
+ +        "on a single CPU(-die). Since we currently don't have proper hardware",
+ +        "topology detection, [TT]mdrun[tt] compiled with thread-MPI will only",
+ +        "automatically use OpenMP-only parallelization when you use up to 4",
+ +        "threads, up to 12 threads with Intel Nehalem/Westmere, or up to 16",
+ +        "threads with Intel Sandy Bridge or newer CPUs. Otherwise MPI-only",
+ +        "parallelization is used (except with GPUs, see below).",
+ +        "[PAR]",
+ +        "To quickly test the performance of the new Verlet cut-off scheme",
+ +        "with old [TT].tpr[tt] files, either on CPUs or CPUs+GPUs, you can use",
+ +        "the [TT]-testverlet[tt] option. This should not be used for production,",
+ +        "since it can slightly modify potentials and it will remove charge groups",
+ +        "making analysis difficult, as the [TT].tpr[tt] file will still contain",
+ +        "charge groups. For production simulations it is highly recommended",
+ +        "to specify [TT]cutoff-scheme = Verlet[tt] in the [TT].mdp[tt] file.",
+ +        "[PAR]",
+ +        "With GPUs (only supported with the Verlet cut-off scheme), the number",
+ +        "of GPUs should match the number of MPI processes or MPI threads,",
+ +        "excluding PME-only processes/threads. With thread-MPI, unless set on the command line, the number",
+ +        "of MPI threads will automatically be set to the number of GPUs detected.",
+ +        "To use a subset of the available GPUs, or to manually provide a mapping of",
+ +        "GPUs to PP ranks, you can use the [TT]-gpu_id[tt] option. The argument of [TT]-gpu_id[tt] is",
+ +        "a string of digits (without delimiter) representing device id-s of the GPUs to be used.",
+ +        "For example, \"[TT]02[tt]\" specifies using GPUs 0 and 2 in the first and second PP ranks per compute node",
+ +        "respectively. To select different sets of GPU-s",
+ +        "on different nodes of a compute cluster, use the [TT]GMX_GPU_ID[tt] environment",
+ +        "variable instead. The format for [TT]GMX_GPU_ID[tt] is identical to ",
+ +        "[TT]-gpu_id[tt], with the difference that an environment variable can have",
+ +        "different values on different compute nodes. Multiple MPI ranks on each node",
+ +        "can share GPUs. This is accomplished by specifying the id(s) of the GPU(s)",
+ +        "multiple times, e.g. \"[TT]0011[tt]\" for four ranks sharing two GPUs in this node.",
+ +        "This works within a single simulation, or a multi-simulation, with any form of MPI.",
+ +        "[PAR]",
+ +        "With the Verlet cut-off scheme and verlet-buffer-tolerance set,",
+ +        "the pair-list update interval nstlist can be chosen freely with",
+ +        "the option [TT]-nstlist[tt]. [TT]mdrun[tt] will then adjust",
+ +        "the pair-list cut-off to maintain accuracy.",
+ +        "By default [TT]mdrun[tt] will try to increase nstlist to improve",
+ +        "the performance. For CPU runs nstlist might increase to 20, for GPU",
+ +        "runs up till 40. But for medium to high parallelization or with",
+ +        "fast GPUs, a (user supplied) larger nstlist value can give much",
+ +        "better performance.",
+ +        "[PAR]",
+ +        "When using PME with separate PME nodes or with a GPU, the two major",
+ +        "compute tasks, the non-bonded force calculation and the PME calculation",
+ +        "run on different compute resources. If this load is not balanced,",
+ +        "some of the resources will be idle part of time. With the Verlet",
+ +        "cut-off scheme this load is automatically balanced when the PME load",
+ +        "is too high (but not when it is too low). This is done by scaling",
+ +        "the Coulomb cut-off and PME grid spacing by the same amount. In the first",
+ +        "few hundred steps different settings are tried and the fastest is chosen",
+ +        "for the rest of the simulation. This does not affect the accuracy of",
+ +        "the results, but it does affect the decomposition of the Coulomb energy",
+ +        "into particle and mesh contributions. The auto-tuning can be turned off",
+ +        "with the option [TT]-notunepme[tt].",
+ +        "[PAR]",
+ +        "[TT]mdrun[tt] pins (sets affinity of) threads to specific cores,",
+ +        "when all (logical) cores on a compute node are used by [TT]mdrun[tt],",
+ +        "even when no multi-threading is used,",
+ +        "as this usually results in significantly better performance.",
+ +        "If the queuing systems or the OpenMP library pinned threads, we honor",
+ +        "this and don't pin again, even though the layout may be sub-optimal.",
+ +        "If you want to have [TT]mdrun[tt] override an already set thread affinity",
+ +        "or pin threads when using less cores, use [TT]-pin on[tt].",
+ +        "With SMT (simultaneous multithreading), e.g. Intel Hyper-Threading,",
+ +        "there are multiple logical cores per physical core.",
+ +        "The option [TT]-pinstride[tt] sets the stride in logical cores for",
+ +        "pinning consecutive threads. Without SMT, 1 is usually the best choice.",
+ +        "With Intel Hyper-Threading 2 is best when using half or less of the",
+ +        "logical cores, 1 otherwise. The default value of 0 do exactly that:",
+ +        "it minimizes the threads per logical core, to optimize performance.",
+ +        "If you want to run multiple [TT]mdrun[tt] jobs on the same physical node,"
+ +        "you should set [TT]-pinstride[tt] to 1 when using all logical cores.",
+ +        "When running multiple [TT]mdrun[tt] (or other) simulations on the same physical",
+ +        "node, some simulations need to start pinning from a non-zero core",
+ +        "to avoid overloading cores; with [TT]-pinoffset[tt] you can specify",
+ +        "the offset in logical cores for pinning.",
+ +        "[PAR]",
+ +        "When [TT]mdrun[tt] is started using MPI with more than 1 process",
+ +        "or with thread-MPI with more than 1 thread, MPI parallelization is used.",
+ +        "By default domain decomposition is used, unless the [TT]-pd[tt]",
+ +        "option is set, which selects particle decomposition.",
+ +        "[PAR]",
+ +        "With domain decomposition, the spatial decomposition can be set",
+ +        "with option [TT]-dd[tt]. By default [TT]mdrun[tt] selects a good decomposition.",
+ +        "The user only needs to change this when the system is very inhomogeneous.",
+ +        "Dynamic load balancing is set with the option [TT]-dlb[tt],",
+ +        "which can give a significant performance improvement,",
+ +        "especially for inhomogeneous systems. The only disadvantage of",
+ +        "dynamic load balancing is that runs are no longer binary reproducible,",
+ +        "but in most cases this is not important.",
+ +        "By default the dynamic load balancing is automatically turned on",
+ +        "when the measured performance loss due to load imbalance is 5% or more.",
+ +        "At low parallelization these are the only important options",
+ +        "for domain decomposition.",
+ +        "At high parallelization the options in the next two sections",
+ +        "could be important for increasing the performace.",
+ +        "[PAR]",
+ +        "When PME is used with domain decomposition, separate nodes can",
+ +        "be assigned to do only the PME mesh calculation;",
+ +        "this is computationally more efficient starting at about 12 nodes.",
+ +        "The number of PME nodes is set with option [TT]-npme[tt],",
+ +        "this can not be more than half of the nodes.",
+ +        "By default [TT]mdrun[tt] makes a guess for the number of PME",
+ +        "nodes when the number of nodes is larger than 11 or performance wise",
+ +        "not compatible with the PME grid x dimension.",
+ +        "But the user should optimize npme. Performance statistics on this issue",
+ +        "are written at the end of the log file.",
+ +        "For good load balancing at high parallelization, the PME grid x and y",
+ +        "dimensions should be divisible by the number of PME nodes",
+ +        "(the simulation will run correctly also when this is not the case).",
+ +        "[PAR]",
+ +        "This section lists all options that affect the domain decomposition.",
+ +        "[PAR]",
+ +        "Option [TT]-rdd[tt] can be used to set the required maximum distance",
+ +        "for inter charge-group bonded interactions.",
+ +        "Communication for two-body bonded interactions below the non-bonded",
+ +        "cut-off distance always comes for free with the non-bonded communication.",
+ +        "Atoms beyond the non-bonded cut-off are only communicated when they have",
+ +        "missing bonded interactions; this means that the extra cost is minor",
+ +        "and nearly indepedent of the value of [TT]-rdd[tt].",
+ +        "With dynamic load balancing option [TT]-rdd[tt] also sets",
+ +        "the lower limit for the domain decomposition cell sizes.",
+ +        "By default [TT]-rdd[tt] is determined by [TT]mdrun[tt] based on",
+ +        "the initial coordinates. The chosen value will be a balance",
+ +        "between interaction range and communication cost.",
+ +        "[PAR]",
+ +        "When inter charge-group bonded interactions are beyond",
+ +        "the bonded cut-off distance, [TT]mdrun[tt] terminates with an error message.",
+ +        "For pair interactions and tabulated bonds",
+ +        "that do not generate exclusions, this check can be turned off",
+ +        "with the option [TT]-noddcheck[tt].",
+ +        "[PAR]",
+ +        "When constraints are present, option [TT]-rcon[tt] influences",
+ +        "the cell size limit as well.",
+ +        "Atoms connected by NC constraints, where NC is the LINCS order plus 1,",
+ +        "should not be beyond the smallest cell size. A error message is",
+ +        "generated when this happens and the user should change the decomposition",
+ +        "or decrease the LINCS order and increase the number of LINCS iterations.",
+ +        "By default [TT]mdrun[tt] estimates the minimum cell size required for P-LINCS",
+ +        "in a conservative fashion. For high parallelization it can be useful",
+ +        "to set the distance required for P-LINCS with the option [TT]-rcon[tt].",
+ +        "[PAR]",
+ +        "The [TT]-dds[tt] option sets the minimum allowed x, y and/or z scaling",
+ +        "of the cells with dynamic load balancing. [TT]mdrun[tt] will ensure that",
+ +        "the cells can scale down by at least this factor. This option is used",
+ +        "for the automated spatial decomposition (when not using [TT]-dd[tt])",
+ +        "as well as for determining the number of grid pulses, which in turn",
+ +        "sets the minimum allowed cell size. Under certain circumstances",
+ +        "the value of [TT]-dds[tt] might need to be adjusted to account for",
+ +        "high or low spatial inhomogeneity of the system.",
+ +        "[PAR]",
+ +        "The option [TT]-gcom[tt] can be used to only do global communication",
+ +        "every n steps.",
+ +        "This can improve performance for highly parallel simulations",
+ +        "where this global communication step becomes the bottleneck.",
+ +        "For a global thermostat and/or barostat the temperature",
+ +        "and/or pressure will also only be updated every [TT]-gcom[tt] steps.",
+ +        "By default it is set to the minimum of nstcalcenergy and nstlist.[PAR]",
+ +        "With [TT]-rerun[tt] an input trajectory can be given for which ",
+ +        "forces and energies will be (re)calculated. Neighbor searching will be",
+ +        "performed for every frame, unless [TT]nstlist[tt] is zero",
+ +        "(see the [TT].mdp[tt] file).[PAR]",
+ +        "ED (essential dynamics) sampling and/or additional flooding potentials",
+ +        "are switched on by using the [TT]-ei[tt] flag followed by an [TT].edi[tt]",
+ +        "file. The [TT].edi[tt] file can be produced with the [TT]make_edi[tt] tool",
+ +        "or by using options in the essdyn menu of the WHAT IF program.",
+ +        "[TT]mdrun[tt] produces a [TT].xvg[tt] output file that",
+ +        "contains projections of positions, velocities and forces onto selected",
+ +        "eigenvectors.[PAR]",
+ +        "When user-defined potential functions have been selected in the",
+ +        "[TT].mdp[tt] file the [TT]-table[tt] option is used to pass [TT]mdrun[tt]",
+ +        "a formatted table with potential functions. The file is read from",
+ +        "either the current directory or from the [TT]GMXLIB[tt] directory.",
+ +        "A number of pre-formatted tables are presented in the [TT]GMXLIB[tt] dir,",
+ +        "for 6-8, 6-9, 6-10, 6-11, 6-12 Lennard-Jones potentials with",
+ +        "normal Coulomb.",
+ +        "When pair interactions are present, a separate table for pair interaction",
+ +        "functions is read using the [TT]-tablep[tt] option.[PAR]",
+ +        "When tabulated bonded functions are present in the topology,",
+ +        "interaction functions are read using the [TT]-tableb[tt] option.",
+ +        "For each different tabulated interaction type the table file name is",
+ +        "modified in a different way: before the file extension an underscore is",
+ +        "appended, then a 'b' for bonds, an 'a' for angles or a 'd' for dihedrals",
+ +        "and finally the table number of the interaction type.[PAR]",
+ +        "The options [TT]-px[tt] and [TT]-pf[tt] are used for writing pull COM",
+ +        "coordinates and forces when pulling is selected",
+ +        "in the [TT].mdp[tt] file.[PAR]",
+ +        "With [TT]-multi[tt] or [TT]-multidir[tt], multiple systems can be ",
+ +        "simulated in parallel.",
+ +        "As many input files/directories are required as the number of systems. ",
+ +        "The [TT]-multidir[tt] option takes a list of directories (one for each ",
+ +        "system) and runs in each of them, using the input/output file names, ",
+ +        "such as specified by e.g. the [TT]-s[tt] option, relative to these ",
+ +        "directories.",
+ +        "With [TT]-multi[tt], the system number is appended to the run input ",
+ +        "and each output filename, for instance [TT]topol.tpr[tt] becomes",
+ +        "[TT]topol0.tpr[tt], [TT]topol1.tpr[tt] etc.",
+ +        "The number of nodes per system is the total number of nodes",
+ +        "divided by the number of systems.",
+ +        "One use of this option is for NMR refinement: when distance",
+ +        "or orientation restraints are present these can be ensemble averaged",
+ +        "over all the systems.[PAR]",
+ +        "With [TT]-replex[tt] replica exchange is attempted every given number",
+ +        "of steps. The number of replicas is set with the [TT]-multi[tt] or ",
+ +        "[TT]-multidir[tt] option, described above.",
+ +        "All run input files should use a different coupling temperature,",
+ +        "the order of the files is not important. The random seed is set with",
+ +        "[TT]-reseed[tt]. The velocities are scaled and neighbor searching",
+ +        "is performed after every exchange.[PAR]",
+ +        "Finally some experimental algorithms can be tested when the",
+ +        "appropriate options have been given. Currently under",
+ +        "investigation are: polarizability.",
+ +        "[PAR]",
+ +        "The option [TT]-membed[tt] does what used to be g_membed, i.e. embed",
+ +        "a protein into a membrane. The data file should contain the options",
+ +        "that where passed to g_membed before. The [TT]-mn[tt] and [TT]-mp[tt]",
+ +        "both apply to this as well.",
+ +        "[PAR]",
+ +        "The option [TT]-pforce[tt] is useful when you suspect a simulation",
+ +        "crashes due to too large forces. With this option coordinates and",
+ +        "forces of atoms with a force larger than a certain value will",
+ +        "be printed to stderr.",
+ +        "[PAR]",
+ +        "Checkpoints containing the complete state of the system are written",
+ +        "at regular intervals (option [TT]-cpt[tt]) to the file [TT]-cpo[tt],",
+ +        "unless option [TT]-cpt[tt] is set to -1.",
+ +        "The previous checkpoint is backed up to [TT]state_prev.cpt[tt] to",
+ +        "make sure that a recent state of the system is always available,",
+ +        "even when the simulation is terminated while writing a checkpoint.",
+ +        "With [TT]-cpnum[tt] all checkpoint files are kept and appended",
+ +        "with the step number.",
+ +        "A simulation can be continued by reading the full state from file",
+ +        "with option [TT]-cpi[tt]. This option is intelligent in the way that",
+ +        "if no checkpoint file is found, Gromacs just assumes a normal run and",
+ +        "starts from the first step of the [TT].tpr[tt] file. By default the output",
+ +        "will be appending to the existing output files. The checkpoint file",
+ +        "contains checksums of all output files, such that you will never",
+ +        "loose data when some output files are modified, corrupt or removed.",
+ +        "There are three scenarios with [TT]-cpi[tt]:[PAR]",
+ +        "[TT]*[tt] no files with matching names are present: new output files are written[PAR]",
+ +        "[TT]*[tt] all files are present with names and checksums matching those stored",
+ +        "in the checkpoint file: files are appended[PAR]",
+ +        "[TT]*[tt] otherwise no files are modified and a fatal error is generated[PAR]",
+ +        "With [TT]-noappend[tt] new output files are opened and the simulation",
+ +        "part number is added to all output file names.",
+ +        "Note that in all cases the checkpoint file itself is not renamed",
+ +        "and will be overwritten, unless its name does not match",
+ +        "the [TT]-cpo[tt] option.",
+ +        "[PAR]",
+ +        "With checkpointing the output is appended to previously written",
+ +        "output files, unless [TT]-noappend[tt] is used or none of the previous",
+ +        "output files are present (except for the checkpoint file).",
+ +        "The integrity of the files to be appended is verified using checksums",
+ +        "which are stored in the checkpoint file. This ensures that output can",
+ +        "not be mixed up or corrupted due to file appending. When only some",
+ +        "of the previous output files are present, a fatal error is generated",
+ +        "and no old output files are modified and no new output files are opened.",
+ +        "The result with appending will be the same as from a single run.",
+ +        "The contents will be binary identical, unless you use a different number",
+ +        "of nodes or dynamic load balancing or the FFT library uses optimizations",
+ +        "through timing.",
+ +        "[PAR]",
+ +        "With option [TT]-maxh[tt] a simulation is terminated and a checkpoint",
+ +        "file is written at the first neighbor search step where the run time",
+ +        "exceeds [TT]-maxh[tt]*0.99 hours.",
+ +        "[PAR]",
+ +        "When [TT]mdrun[tt] receives a TERM signal, it will set nsteps to the current",
+ +        "step plus one. When [TT]mdrun[tt] receives an INT signal (e.g. when ctrl+C is",
+ +        "pressed), it will stop after the next neighbor search step ",
+ +        "(with nstlist=0 at the next step).",
+ +        "In both cases all the usual output will be written to file.",
+ +        "When running with MPI, a signal to one of the [TT]mdrun[tt] processes",
+ +        "is sufficient, this signal should not be sent to mpirun or",
+ +        "the [TT]mdrun[tt] process that is the parent of the others.",
+ +        "[PAR]",
+ +        "When [TT]mdrun[tt] is started with MPI, it does not run niced by default."
+ +    };
+ +    t_commrec    *cr;
+ +    t_filenm      fnm[] = {
+ +        { efTPX, NULL,      NULL,       ffREAD },
+ +        { efTRN, "-o",      NULL,       ffWRITE },
+ +        { efCOMPRESSED, "-x", NULL,     ffOPTWR },
+ +        { efCPT, "-cpi",    NULL,       ffOPTRD },
+ +        { efCPT, "-cpo",    NULL,       ffOPTWR },
+ +        { efSTO, "-c",      "confout",  ffWRITE },
+ +        { efEDR, "-e",      "ener",     ffWRITE },
+ +        { efLOG, "-g",      "md",       ffWRITE },
+ +        { efXVG, "-dhdl",   "dhdl",     ffOPTWR },
+ +        { efXVG, "-field",  "field",    ffOPTWR },
+ +        { efXVG, "-table",  "table",    ffOPTRD },
+ +        { efXVG, "-tabletf", "tabletf",    ffOPTRD },
+ +        { efXVG, "-tablep", "tablep",   ffOPTRD },
+ +        { efXVG, "-tableb", "table",    ffOPTRD },
+ +        { efTRX, "-rerun",  "rerun",    ffOPTRD },
+ +        { efXVG, "-tpi",    "tpi",      ffOPTWR },
+ +        { efXVG, "-tpid",   "tpidist",  ffOPTWR },
+ +        { efEDI, "-ei",     "sam",      ffOPTRD },
+ +        { efXVG, "-eo",     "edsam",    ffOPTWR },
+ +        { efXVG, "-devout", "deviatie", ffOPTWR },
+ +        { efXVG, "-runav",  "runaver",  ffOPTWR },
+ +        { efXVG, "-px",     "pullx",    ffOPTWR },
+ +        { efXVG, "-pf",     "pullf",    ffOPTWR },
+ +        { efXVG, "-ro",     "rotation", ffOPTWR },
+ +        { efLOG, "-ra",     "rotangles", ffOPTWR },
+ +        { efLOG, "-rs",     "rotslabs", ffOPTWR },
+ +        { efLOG, "-rt",     "rottorque", ffOPTWR },
+ +        { efMTX, "-mtx",    "nm",       ffOPTWR },
+ +        { efNDX, "-dn",     "dipole",   ffOPTWR },
+ +        { efRND, "-multidir", NULL,      ffOPTRDMULT},
+ +        { efDAT, "-membed", "membed",   ffOPTRD },
+ +        { efTOP, "-mp",     "membed",   ffOPTRD },
+ +        { efNDX, "-mn",     "membed",   ffOPTRD },
+ +        { efXVG, "-swap",   "swapions", ffOPTWR }
+ +    };
+ +#define NFILE asize(fnm)
+ +
+ +    /* Command line options ! */
+ +    gmx_bool        bPartDec      = FALSE;
+ +    gmx_bool        bDDBondCheck  = TRUE;
+ +    gmx_bool        bDDBondComm   = TRUE;
+ +    gmx_bool        bTunePME      = TRUE;
+ +    gmx_bool        bTestVerlet   = FALSE;
+ +    gmx_bool        bVerbose      = FALSE;
+ +    gmx_bool        bCompact      = TRUE;
+ +    gmx_bool        bSepPot       = FALSE;
+ +    gmx_bool        bRerunVSite   = FALSE;
+ +    gmx_bool        bConfout      = TRUE;
+ +    gmx_bool        bReproducible = FALSE;
+ +
+ +    int             npme          = -1;
+ +    int             nstlist       = 0;
+ +    int             nmultisim     = 0;
+ +    int             nstglobalcomm = -1;
+ +    int             repl_ex_nst   = 0;
+ +    int             repl_ex_seed  = -1;
+ +    int             repl_ex_nex   = 0;
+ +    int             nstepout      = 100;
+ +    int             resetstep     = -1;
+ +    gmx_int64_t     nsteps        = -2; /* the value -2 means that the mdp option will be used */
+ +
+ +    rvec            realddxyz          = {0, 0, 0};
+ +    const char     *ddno_opt[ddnoNR+1] =
+ +    { NULL, "interleave", "pp_pme", "cartesian", NULL };
+ +    const char     *dddlb_opt[] =
+ +    { NULL, "auto", "no", "yes", NULL };
+ +    const char     *thread_aff_opt[threadaffNR+1] =
+ +    { NULL, "auto", "on", "off", NULL };
+ +    const char     *nbpu_opt[] =
+ +    { NULL, "auto", "cpu", "gpu", "gpu_cpu", NULL };
+ +    real            rdd                   = 0.0, rconstr = 0.0, dlb_scale = 0.8, pforce = -1;
+ +    char           *ddcsx                 = NULL, *ddcsy = NULL, *ddcsz = NULL;
+ +    real            cpt_period            = 15.0, max_hours = -1;
+ +    gmx_bool        bAppendFiles          = TRUE;
+ +    gmx_bool        bKeepAndNumCPT        = FALSE;
+ +    gmx_bool        bResetCountersHalfWay = FALSE;
+ +    output_env_t    oenv                  = NULL;
+ +    const char     *deviceOptions         = "";
+ +
+ +    /* Non transparent initialization of a complex gmx_hw_opt_t struct.
+ +     * But unfortunately we are not allowed to call a function here,
+ +     * since declarations follow below.
+ +     */
+ +    gmx_hw_opt_t    hw_opt = {
+ +        0, 0, 0, 0, threadaffSEL, 0, 0,
+ +        { NULL, FALSE, 0, NULL }
+ +    };
+ +
+ +    t_pargs         pa[] = {
+ +
+ +        { "-pd",      FALSE, etBOOL, {&bPartDec},
+ +          "Use particle decompostion" },
+ +        { "-dd",      FALSE, etRVEC, {&realddxyz},
+ +          "Domain decomposition grid, 0 is optimize" },
+ +        { "-ddorder", FALSE, etENUM, {ddno_opt},
+ +          "DD node order" },
+ +        { "-npme",    FALSE, etINT, {&npme},
+ +          "Number of separate nodes to be used for PME, -1 is guess" },
+ +        { "-nt",      FALSE, etINT, {&hw_opt.nthreads_tot},
+ +          "Total number of threads to start (0 is guess)" },
+ +        { "-ntmpi",   FALSE, etINT, {&hw_opt.nthreads_tmpi},
+ +          "Number of thread-MPI threads to start (0 is guess)" },
+ +        { "-ntomp",   FALSE, etINT, {&hw_opt.nthreads_omp},
+ +          "Number of OpenMP threads per MPI process/thread to start (0 is guess)" },
+ +        { "-ntomp_pme", FALSE, etINT, {&hw_opt.nthreads_omp_pme},
+ +          "Number of OpenMP threads per MPI process/thread to start (0 is -ntomp)" },
+ +        { "-pin",     FALSE, etENUM, {thread_aff_opt},
+ +          "Fix threads (or processes) to specific cores" },
+ +        { "-pinoffset", FALSE, etINT, {&hw_opt.core_pinning_offset},
+ +          "The starting logical core number for pinning to cores; used to avoid pinning threads from different mdrun instances to the same core" },
+ +        { "-pinstride", FALSE, etINT, {&hw_opt.core_pinning_stride},
+ +          "Pinning distance in logical cores for threads, use 0 to minimize the number of threads per physical core" },
+ +        { "-gpu_id",  FALSE, etSTR, {&hw_opt.gpu_opt.gpu_id},
+ +          "List of GPU device id-s to use, specifies the per-node PP rank to GPU mapping" },
+ +        { "-ddcheck", FALSE, etBOOL, {&bDDBondCheck},
+ +          "Check for all bonded interactions with DD" },
+ +        { "-ddbondcomm", FALSE, etBOOL, {&bDDBondComm},
+ +          "HIDDENUse special bonded atom communication when [TT]-rdd[tt] > cut-off" },
+ +        { "-rdd",     FALSE, etREAL, {&rdd},
+ +          "The maximum distance for bonded interactions with DD (nm), 0 is determine from initial coordinates" },
+ +        { "-rcon",    FALSE, etREAL, {&rconstr},
+ +          "Maximum distance for P-LINCS (nm), 0 is estimate" },
+ +        { "-dlb",     FALSE, etENUM, {dddlb_opt},
+ +          "Dynamic load balancing (with DD)" },
+ +        { "-dds",     FALSE, etREAL, {&dlb_scale},
+ +          "Minimum allowed dlb scaling of the DD cell size" },
+ +        { "-ddcsx",   FALSE, etSTR, {&ddcsx},
+ +          "HIDDENThe DD cell sizes in x" },
+ +        { "-ddcsy",   FALSE, etSTR, {&ddcsy},
+ +          "HIDDENThe DD cell sizes in y" },
+ +        { "-ddcsz",   FALSE, etSTR, {&ddcsz},
+ +          "HIDDENThe DD cell sizes in z" },
+ +        { "-gcom",    FALSE, etINT, {&nstglobalcomm},
+ +          "Global communication frequency" },
+ +        { "-nb",      FALSE, etENUM, {&nbpu_opt},
+ +          "Calculate non-bonded interactions on" },
+ +        { "-nstlist", FALSE, etINT, {&nstlist},
+ +          "Set nstlist when using a Verlet buffer tolerance (0 is guess)" },
+ +        { "-tunepme", FALSE, etBOOL, {&bTunePME},
+ +          "Optimize PME load between PP/PME nodes or GPU/CPU" },
+ +        { "-testverlet", FALSE, etBOOL, {&bTestVerlet},
+ +          "Test the Verlet non-bonded scheme" },
+ +        { "-v",       FALSE, etBOOL, {&bVerbose},
+ +          "Be loud and noisy" },
+ +        { "-compact", FALSE, etBOOL, {&bCompact},
+ +          "Write a compact log file" },
+ +        { "-seppot",  FALSE, etBOOL, {&bSepPot},
+ +          "Write separate V and dVdl terms for each interaction type and node to the log file(s)" },
+ +        { "-pforce",  FALSE, etREAL, {&pforce},
+ +          "Print all forces larger than this (kJ/mol nm)" },
+ +        { "-reprod",  FALSE, etBOOL, {&bReproducible},
+ +          "Try to avoid optimizations that affect binary reproducibility" },
+ +        { "-cpt",     FALSE, etREAL, {&cpt_period},
+ +          "Checkpoint interval (minutes)" },
+ +        { "-cpnum",   FALSE, etBOOL, {&bKeepAndNumCPT},
+ +          "Keep and number checkpoint files" },
+ +        { "-append",  FALSE, etBOOL, {&bAppendFiles},
+ +          "Append to previous output files when continuing from checkpoint instead of adding the simulation part number to all file names" },
+ +        { "-nsteps",  FALSE, etINT64, {&nsteps},
+ +          "Run this number of steps, overrides .mdp file option" },
+ +        { "-maxh",   FALSE, etREAL, {&max_hours},
+ +          "Terminate after 0.99 times this time (hours)" },
+ +        { "-multi",   FALSE, etINT, {&nmultisim},
+ +          "Do multiple simulations in parallel" },
+ +        { "-replex",  FALSE, etINT, {&repl_ex_nst},
+ +          "Attempt replica exchange periodically with this period (steps)" },
+ +        { "-nex",  FALSE, etINT, {&repl_ex_nex},
+ +          "Number of random exchanges to carry out each exchange interval (N^3 is one suggestion).  -nex zero or not specified gives neighbor replica exchange." },
+ +        { "-reseed",  FALSE, etINT, {&repl_ex_seed},
+ +          "Seed for replica exchange, -1 is generate a seed" },
+ +        { "-rerunvsite", FALSE, etBOOL, {&bRerunVSite},
+ +          "HIDDENRecalculate virtual site coordinates with [TT]-rerun[tt]" },
+ +        { "-confout", FALSE, etBOOL, {&bConfout},
+ +          "HIDDENWrite the last configuration with [TT]-c[tt] and force checkpointing at the last step" },
+ +        { "-stepout", FALSE, etINT, {&nstepout},
+ +          "HIDDENFrequency of writing the remaining wall clock time for the run" },
+ +        { "-resetstep", FALSE, etINT, {&resetstep},
+ +          "HIDDENReset cycle counters after these many time steps" },
+ +        { "-resethway", FALSE, etBOOL, {&bResetCountersHalfWay},
+ +          "HIDDENReset the cycle counters after half the number of steps or halfway [TT]-maxh[tt]" }
+ +    };
+ +    unsigned long   Flags, PCA_Flags;
+ +    ivec            ddxyz;
+ +    int             dd_node_order;
+ +    gmx_bool        bAddPart;
+ +    FILE           *fplog, *fpmulti;
+ +    int             sim_part, sim_part_fn;
+ +    const char     *part_suffix = ".part";
+ +    char            suffix[STRLEN];
+ +    int             rc;
+ +    char          **multidir = NULL;
+ +
+ +
+ +    cr = init_commrec();
+ +
+ +    PCA_Flags = (PCA_CAN_SET_DEFFNM | (MASTER(cr) ? 0 : PCA_QUIET));
+ +
+ +    /* Comment this in to do fexist calls only on master
+ +     * works not with rerun or tables at the moment
+ +     * also comment out the version of init_forcerec in md.c
+ +     * with NULL instead of opt2fn
+ +     */
+ +    /*
+ +       if (!MASTER(cr))
+ +       {
+ +       PCA_Flags |= PCA_NOT_READ_NODE;
+ +       }
+ +     */
+ +
+ +    if (!parse_common_args(&argc, argv, PCA_Flags, NFILE, fnm, asize(pa), pa,
+ +                           asize(desc), desc, 0, NULL, &oenv))
+ +    {
+ +        return 0;
+ +    }
+ +
+ +
+ +    /* we set these early because they might be used in init_multisystem()
+ +       Note that there is the potential for npme>nnodes until the number of
+ +       threads is set later on, if there's thread parallelization. That shouldn't
+ +       lead to problems. */
+ +    dd_node_order = nenum(ddno_opt);
+ +    cr->npmenodes = npme;
+ +
+ +    hw_opt.thread_affinity = nenum(thread_aff_opt);
+ +
+ +    /* now check the -multi and -multidir option */
+ +    if (opt2bSet("-multidir", NFILE, fnm))
+ +    {
+ +        if (nmultisim > 0)
+ +        {
+ +            gmx_fatal(FARGS, "mdrun -multi and -multidir options are mutually exclusive.");
+ +        }
+ +        nmultisim = opt2fns(&multidir, "-multidir", NFILE, fnm);
+ +    }
+ +
+ +
+ +    if (repl_ex_nst != 0 && nmultisim < 2)
+ +    {
+ +        gmx_fatal(FARGS, "Need at least two replicas for replica exchange (option -multi)");
+ +    }
+ +
+ +    if (repl_ex_nex < 0)
+ +    {
+ +        gmx_fatal(FARGS, "Replica exchange number of exchanges needs to be positive");
+ +    }
+ +
+ +    if (nmultisim > 1)
+ +    {
+ +#ifndef GMX_THREAD_MPI
+ +        gmx_bool bParFn = (multidir == NULL);
+ +        init_multisystem(cr, nmultisim, multidir, NFILE, fnm, bParFn);
+ +#else
+ +        gmx_fatal(FARGS, "mdrun -multi is not supported with the thread library. "
+ +                  "Please compile GROMACS with MPI support");
+ +#endif
+ +    }
+ +
+ +    bAddPart = !bAppendFiles;
+ +
+ +    /* Check if there is ANY checkpoint file available */
+ +    sim_part    = 1;
+ +    sim_part_fn = sim_part;
+ +    if (opt2bSet("-cpi", NFILE, fnm))
+ +    {
+ +        if (bSepPot && bAppendFiles)
+ +        {
+ +            gmx_fatal(FARGS, "Output file appending is not supported with -seppot");
+ +        }
+ +
+ +        bAppendFiles =
+ +            read_checkpoint_simulation_part(opt2fn_master("-cpi", NFILE,
+ +                                                          fnm, cr),
+ +                                            &sim_part_fn, NULL, cr,
+ +                                            bAppendFiles, NFILE, fnm,
+ +                                            part_suffix, &bAddPart);
+ +        if (sim_part_fn == 0 && MULTIMASTER(cr))
+ +        {
+ +            fprintf(stdout, "No previous checkpoint file present, assuming this is a new run.\n");
+ +        }
+ +        else
+ +        {
+ +            sim_part = sim_part_fn + 1;
+ +        }
+ +
+ +        if (MULTISIM(cr) && MASTER(cr))
+ +        {
+ +            if (MULTIMASTER(cr))
+ +            {
+ +                /* Log file is not yet available, so if there's a
+ +                 * problem we can only write to stderr. */
+ +                fpmulti = stderr;
+ +            }
+ +            else
+ +            {
+ +                fpmulti = NULL;
+ +            }
+ +            check_multi_int(fpmulti, cr->ms, sim_part, "simulation part", TRUE);
+ +        }
+ +    }
+ +    else
+ +    {
+ +        bAppendFiles = FALSE;
+ +    }
+ +
+ +    if (!bAppendFiles)
+ +    {
+ +        sim_part_fn = sim_part;
+ +    }
+ +
+ +    if (bAddPart)
+ +    {
+ +        /* Rename all output files (except checkpoint files) */
+ +        /* create new part name first (zero-filled) */
+ +        sprintf(suffix, "%s%04d", part_suffix, sim_part_fn);
+ +
+ +        add_suffix_to_output_names(fnm, NFILE, suffix);
+ +        if (MULTIMASTER(cr))
+ +        {
+ +            fprintf(stdout, "Checkpoint file is from part %d, new output files will be suffixed '%s'.\n", sim_part-1, suffix);
+ +        }
+ +    }
+ +
+ +    Flags = opt2bSet("-rerun", NFILE, fnm) ? MD_RERUN : 0;
+ +    Flags = Flags | (bSepPot       ? MD_SEPPOT       : 0);
+ +    Flags = Flags | (bPartDec      ? MD_PARTDEC      : 0);
+ +    Flags = Flags | (bDDBondCheck  ? MD_DDBONDCHECK  : 0);
+ +    Flags = Flags | (bDDBondComm   ? MD_DDBONDCOMM   : 0);
+ +    Flags = Flags | (bTunePME      ? MD_TUNEPME      : 0);
+ +    Flags = Flags | (bTestVerlet   ? MD_TESTVERLET   : 0);
+ +    Flags = Flags | (bConfout      ? MD_CONFOUT      : 0);
+ +    Flags = Flags | (bRerunVSite   ? MD_RERUN_VSITE  : 0);
+ +    Flags = Flags | (bReproducible ? MD_REPRODUCIBLE : 0);
+ +    Flags = Flags | (bAppendFiles  ? MD_APPENDFILES  : 0);
+ +    Flags = Flags | (opt2parg_bSet("-append", asize(pa), pa) ? MD_APPENDFILESSET : 0);
+ +    Flags = Flags | (bKeepAndNumCPT ? MD_KEEPANDNUMCPT : 0);
+ +    Flags = Flags | (sim_part > 1    ? MD_STARTFROMCPT : 0);
+ +    Flags = Flags | (bResetCountersHalfWay ? MD_RESETCOUNTERSHALFWAY : 0);
+ +
+ +
+ +    /* We postpone opening the log file if we are appending, so we can
+ +       first truncate the old log file and append to the correct position
+ +       there instead.  */
+ +    if ((MASTER(cr) || bSepPot) && !bAppendFiles)
+ +    {
+ +        gmx_log_open(ftp2fn(efLOG, NFILE, fnm), cr,
+ +                     !bSepPot, Flags & MD_APPENDFILES, &fplog);
+ +        please_cite(fplog, "Hess2008b");
+ +        please_cite(fplog, "Spoel2005a");
+ +        please_cite(fplog, "Lindahl2001a");
+ +        please_cite(fplog, "Berendsen95a");
+ +    }
+ +    else if (!MASTER(cr) && bSepPot)
+ +    {
+ +        gmx_log_open(ftp2fn(efLOG, NFILE, fnm), cr, !bSepPot, Flags, &fplog);
+ +    }
+ +    else
+ +    {
+ +        fplog = NULL;
+ +    }
+ +
+ +    ddxyz[XX] = (int)(realddxyz[XX] + 0.5);
+ +    ddxyz[YY] = (int)(realddxyz[YY] + 0.5);
+ +    ddxyz[ZZ] = (int)(realddxyz[ZZ] + 0.5);
+ +
+ +    rc = mdrunner(&hw_opt, fplog, cr, NFILE, fnm, oenv, bVerbose, bCompact,
+ +                  nstglobalcomm, ddxyz, dd_node_order, rdd, rconstr,
+ +                  dddlb_opt[0], dlb_scale, ddcsx, ddcsy, ddcsz,
+ +                  nbpu_opt[0], nstlist,
+ +                  nsteps, nstepout, resetstep,
+ +                  nmultisim, repl_ex_nst, repl_ex_nex, repl_ex_seed,
+ +                  pforce, cpt_period, max_hours, deviceOptions, Flags);
+ +
+ +    /* Log file has to be closed in mdrunner if we are appending to it
+ +       (fplog not set here) */
+ +    if (MASTER(cr) && !bAppendFiles)
+ +    {
+ +        gmx_log_close(fplog);
+ +    }
+ +
+ +    return rc;
+ +}
author	Mark Abraham <mark.j.abraham@gmail.com>
	Tue, 28 Jan 2014 20:06:13 +0000 (21:06 +0100)
committer	Mark Abraham <mark.j.abraham@gmail.com>
	Tue, 28 Jan 2014 21:39:51 +0000 (22:39 +0100)
		1	2
src/contrib/fftw/CMakeLists.txt	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/gmxana/gmx_energy.c	patch \|	diff1 \|	\|	blob \| history
src/gromacs/legacyheaders/sim_util.h	patch \|	diff1 \|	\|	blob \| history
src/gromacs/mdlib/minimize.c	patch \|	diff1 \|	\|	blob \| history
src/gromacs/mdlib/sim_util.c	patch \|	diff1 \|	\|	blob \| history
src/gromacs/mdlib/tpi.c	patch \|	diff1 \|	\|	blob \| history
src/programs/mdrun/md.c	patch \|	diff1 \|	\|	blob \| history
src/programs/mdrun/mdrun.cpp	patch \|	diff1 \|	\|	blob \| history