Merge branch 'release-4-6'
authorRoland Schulz <roland@utk.edu>
Wed, 6 Jun 2012 12:13:00 +0000 (08:13 -0400)
committerRoland Schulz <roland@utk.edu>
Wed, 6 Jun 2012 12:14:15 +0000 (08:14 -0400)
Conflicts:
src/gromacs/gmxlib/dihres.c
src/gromacs/legacyheaders/dihre.h
src/gromacs/mdlib/force.c
src/gromacs/mdlib/mdebin.c
src/gromacs/mdlib/sim_util.c
src/programs/mdrun/md.c

Change-Id: I0c5e6d8e57f9f19da73a81737c3105dcd55488ec
Moved:
     src/mdlib/expanded.c ->
       src/gromacs/mdlib/expanded.c

77 files changed:
1  2 
src/gromacs/gmxlib/bondfree.c
src/gromacs/gmxlib/checkpoint.c
src/gromacs/gmxlib/enxio.c
src/gromacs/gmxlib/ifunc.c
src/gromacs/gmxlib/inputrec.c
src/gromacs/gmxlib/mvdata.c
src/gromacs/gmxlib/names.c
src/gromacs/gmxlib/nonbonded/nb_free_energy.c
src/gromacs/gmxlib/nonbonded/nb_free_energy.h
src/gromacs/gmxlib/nonbonded/nonbonded.c
src/gromacs/gmxlib/nrnb.c
src/gromacs/gmxlib/topsort.c
src/gromacs/gmxlib/tpxio.c
src/gromacs/gmxlib/trxio.c
src/gromacs/gmxlib/txtdump.c
src/gromacs/gmxlib/typedefs.c
src/gromacs/gmxpreprocess/compute_io.c
src/gromacs/gmxpreprocess/readir.c
src/gromacs/gmxpreprocess/readir.h
src/gromacs/gmxpreprocess/readpull.c
src/gromacs/legacyheaders/bondf.h
src/gromacs/legacyheaders/checkpoint.h
src/gromacs/legacyheaders/constr.h
src/gromacs/legacyheaders/force.h
src/gromacs/legacyheaders/mdebin.h
src/gromacs/legacyheaders/mdrun.h
src/gromacs/legacyheaders/names.h
src/gromacs/legacyheaders/nonbonded.h
src/gromacs/legacyheaders/ns.h
src/gromacs/legacyheaders/pull.h
src/gromacs/legacyheaders/tpxio.h
src/gromacs/legacyheaders/trnio.h
src/gromacs/legacyheaders/typedefs.h
src/gromacs/legacyheaders/types/enums.h
src/gromacs/legacyheaders/types/fcdata.h
src/gromacs/legacyheaders/types/forcerec.h
src/gromacs/legacyheaders/types/idef.h
src/gromacs/legacyheaders/types/inputrec.h
src/gromacs/legacyheaders/types/state.h
src/gromacs/legacyheaders/types/trx.h
src/gromacs/legacyheaders/update.h
src/gromacs/mdlib/constr.c
src/gromacs/mdlib/coupling.c
src/gromacs/mdlib/domdec.c
src/gromacs/mdlib/domdec_top.c
src/gromacs/mdlib/expanded.c
src/gromacs/mdlib/force.c
src/gromacs/mdlib/forcerec.c
src/gromacs/mdlib/init.c
src/gromacs/mdlib/md_support.c
src/gromacs/mdlib/mdebin.c
src/gromacs/mdlib/mdebin_bar.c
src/gromacs/mdlib/mdebin_bar.h
src/gromacs/mdlib/minimize.c
src/gromacs/mdlib/ns.c
src/gromacs/mdlib/partdec.c
src/gromacs/mdlib/pull.c
src/gromacs/mdlib/shellfc.c
src/gromacs/mdlib/sim_util.c
src/gromacs/mdlib/stat.c
src/gromacs/mdlib/tgroup.c
src/gromacs/mdlib/tpi.c
src/gromacs/mdlib/update.c
src/gromacs/mdlib/wall.c
src/programs/gmxcheck/gmxcheck.c
src/programs/gmxcheck/tpbcmp.c
src/programs/grompp/convparm.c
src/programs/grompp/grompp.c
src/programs/mdrun/md.c
src/programs/mdrun/md_openmm.c
src/programs/mdrun/md_openmm.h
src/programs/mdrun/mdrun.c
src/programs/mdrun/repl_ex.c
src/programs/mdrun/repl_ex.h
src/programs/mdrun/runner.c
src/programs/tpbconv/tpbconv.c
src/tools/gmx_bar.c

Simple merge
index ae1c1e4a6e8366d5b86dc4db9651ce52497c19b6,0000000000000000000000000000000000000000..5d9beaa2ef2462140a08e54e752db3ff3e406823
mode 100644,000000..100644
--- /dev/null
@@@ -1,2244 -1,0 +1,2398 @@@
-     "svir_prev", "nosehoover-vxi", "v_eta", "vol0", "nhpres_xi", "nhpres_vxi", "fvir_prev",
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + * This file is part of Gromacs        Copyright (c) 1991-2008
 + * David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
 + *
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the research papers on the package. Check out http://www.gromacs.org
 + * 
 + * And Hey:
 + * Gnomes, ROck Monsters And Chili Sauce
 + */
 +
 +/* The source code in this file should be thread-safe. 
 + Please keep it that way. */
 +
 +
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +#include "gromacs/utility/gmx_header_config.h"
 +
 +#include <string.h>
 +#include <time.h>
 +
 +#ifdef HAVE_SYS_TIME_H
 +#include <sys/time.h>
 +#endif
 +
 +#ifdef HAVE_UNISTD_H
 +#include <unistd.h>
 +#endif
 +
 +#ifdef GMX_NATIVE_WINDOWS
 +/* _chsize_s */
 +#include <io.h>
 +#include <sys/locking.h>
 +#endif
 +
 +
 +#include "filenm.h"
 +#include "names.h"
 +#include "typedefs.h"
 +#include "smalloc.h"
 +#include "gmxfio.h"
 +#include "xdrf.h"
 +#include "statutil.h"
 +#include "txtdump.h"
 +#include "vec.h"
++#include "mdrun.h"
 +#include "network.h"
 +#include "gmx_random.h"
 +#include "checkpoint.h"
 +#include "futil.h"
 +#include "string2.h"
 +#include <fcntl.h>
 +
 +
 +#ifdef GMX_FAHCORE
 +#include "corewrap.h"
 +#endif
 +
 +
 +/* Portable version of ctime_r implemented in src/gmxlib/string2.c, but we do not want it declared in public installed headers */
 +char *
 +gmx_ctime_r(const time_t *clock,char *buf, int n);
 +
 +
 +#define CPT_MAGIC1 171817
 +#define CPT_MAGIC2 171819
++#define CPTSTRLEN 1024
 +
 +#ifdef GMX_DOUBLE
 +#define GMX_CPT_BUILD_DP 1
 +#else
 +#define GMX_CPT_BUILD_DP 0
 +#endif
 +
 +/* cpt_version should normally only be changed
 + * when the header of footer format changes.
 + * The state data format itself is backward and forward compatible.
 + * But old code can not read a new entry that is present in the file
 + * (but can read a new format when new entries are not present).
 + */
 +static const int cpt_version = 13;
 +
 +
 +const char *est_names[estNR]=
 +{
 +    "FE-lambda",
 +    "box", "box-rel", "box-v", "pres_prev",
 +    "nosehoover-xi", "thermostat-integral",
 +    "x", "v", "SDx", "CGp", "LD-rng", "LD-rng-i",
 +    "disre_initf", "disre_rm3tav",
 +    "orire_initf", "orire_Dtav",
++    "svir_prev", "nosehoover-vxi", "v_eta", "vol0", "nhpres_xi", "nhpres_vxi", "fvir_prev","fep_state", "MC-rng", "MC-rng-i"
 +};
 +
 +enum { eeksEKIN_N, eeksEKINH, eeksDEKINDL, eeksMVCOS, eeksEKINF, eeksEKINO, eeksEKINSCALEF, eeksEKINSCALEH, eeksVSCALE, eeksEKINTOTAL, eeksNR };
 +
 +const char *eeks_names[eeksNR]=
 +{
 +    "Ekin_n", "Ekinh", "dEkindlambda", "mv_cos",
 +    "Ekinf", "Ekinh_old", "EkinScaleF_NHC", "EkinScaleH_NHC","Vscale_NHC","Ekin_Total"
 +};
 +
 +enum { eenhENERGY_N, eenhENERGY_AVER, eenhENERGY_SUM, eenhENERGY_NSUM,
 +       eenhENERGY_SUM_SIM, eenhENERGY_NSUM_SIM,
 +       eenhENERGY_NSTEPS, eenhENERGY_NSTEPS_SIM, 
 +       eenhENERGY_DELTA_H_NN,
 +       eenhENERGY_DELTA_H_LIST, 
 +       eenhENERGY_DELTA_H_STARTTIME, 
 +       eenhENERGY_DELTA_H_STARTLAMBDA, 
 +       eenhNR };
 +
 +const char *eenh_names[eenhNR]=
 +{
 +    "energy_n", "energy_aver", "energy_sum", "energy_nsum",
 +    "energy_sum_sim", "energy_nsum_sim",
 +    "energy_nsteps", "energy_nsteps_sim", 
 +    "energy_delta_h_nn",
 +    "energy_delta_h_list", 
 +    "energy_delta_h_start_time", 
 +    "energy_delta_h_start_lambda"
 +};
 +
-     case 0: return est_names [ecpt]; break;
-     case 1: return eeks_names[ecpt]; break;
-     case 2: return eenh_names[ecpt]; break;
++/* free energy history variables -- need to be preserved over checkpoint */
++enum { edfhBEQUIL,edfhNATLAMBDA,edfhWLHISTO,edfhWLDELTA,edfhSUMWEIGHTS,edfhSUMDG,edfhSUMMINVAR,edfhSUMVAR,
++       edfhACCUMP,edfhACCUMM,edfhACCUMP2,edfhACCUMM2,edfhTIJ,edfhTIJEMP,edfhNR };
++/* free energy history variable names  */
++const char *edfh_names[edfhNR]=
++{
++    "bEquilibrated","N_at_state", "Wang-Landau_Histogram", "Wang-Landau-delta", "Weights", "Free Energies", "minvar","variance",
++    "accumulated_plus", "accumulated_minus", "accumulated_plus_2",  "accumulated_minus_2", "Tij", "Tij_empirical"
++};
 +
 +#ifdef GMX_NATIVE_WINDOWS
 +static int
 +gmx_wintruncate(const char *filename, __int64 size)
 +{
 +#ifdef GMX_FAHCORE
 +    /*we do this elsewhere*/
 +    return 0;
 +#else
 +    FILE *fp;
 +    int   rc;
 +    
 +    fp=fopen(filename,"rb+");
 +    
 +    if(fp==NULL)
 +    {
 +        return -1;
 +    }
 +    
 +    return _chsize_s( fileno(fp), size);
 +#endif
 +}
 +#endif
 +
 +
 +enum { ecprREAL, ecprRVEC, ecprMATRIX };
 +
++enum { cptpEST, cptpEEKS, cptpEENH, cptpEDFH };
++/* enums for the different components of checkpoint variables, replacing the hard coded ones.
++   cptpEST - state variables.
++   cptpEEKS - Kinetic energy state variables.
++   cptpEENH - Energy history state variables.
++   cptpEDFH - free energy history variables.
++*/
++
++
 +static const char *st_names(int cptp,int ecpt)
 +{
 +    switch (cptp)
 +    {
- #define CPTSTRLEN 1024
++    case cptpEST: return est_names [ecpt]; break;
++    case cptpEEKS: return eeks_names[ecpt]; break;
++    case cptpEENH: return eenh_names[ecpt]; break;
++    case cptpEDFH: return edfh_names[ecpt]; break;
 +    }
 +
 +    return NULL;
 +}
 +
 +static void cp_warning(FILE *fp)
 +{
 +    fprintf(fp,"\nWARNING: Checkpoint file is corrupted or truncated\n\n");
 +}
 +
 +static void cp_error()
 +{
 +    gmx_fatal(FARGS,"Checkpoint file corrupted/truncated, or maybe you are out of disk space?");
 +}
 +
 +static void do_cpt_string_err(XDR *xd,gmx_bool bRead,const char *desc,char **s,FILE *list)
 +{
-                           int *flags_state,int *flags_eks,int *flags_enh,
 +    bool_t res=0;
 +    
 +    if (bRead)
 +    {
 +        snew(*s,CPTSTRLEN);
 +    }
 +    res = xdr_string(xd,s,CPTSTRLEN);
 +    if (res == 0)
 +    {
 +        cp_error();
 +    }
 +    if (list)
 +    {
 +        fprintf(list,"%s = %s\n",desc,*s);
 +        sfree(*s);
 +    }
 +}
 +
 +static int do_cpt_int(XDR *xd,const char *desc,int *i,FILE *list)
 +{
 +    bool_t res=0;
 +    
 +    res = xdr_int(xd,i);
 +    if (res == 0)
 +    {
 +        return -1;
 +    }
 +    if (list)
 +    {
 +        fprintf(list,"%s = %d\n",desc,*i);
 +    }
 +    return 0;
 +}
 +
 +static int do_cpt_u_chars(XDR *xd,const char *desc,int n,unsigned char *i,FILE *list)
 +{
 +    bool_t res=1;
 +    int j;
 +    if (list)
 +    {
 +        fprintf(list,"%s = ",desc);
 +    }
 +    for (j=0; j<n && res; j++)
 +    {
 +        res &= xdr_u_char(xd,&i[j]);
 +        if (list)
 +        {
 +            fprintf(list,"%02x",i[j]);
 +        }
 +    }
 +    if (list)
 +    {
 +        fprintf(list,"\n");
 +    }
 +    if (res == 0)
 +    {
 +        return -1;
 +    }
 +
 +    return 0;
 +}
 +
 +static void do_cpt_int_err(XDR *xd,const char *desc,int *i,FILE *list)
 +{
 +    if (do_cpt_int(xd,desc,i,list) < 0)
 +    {
 +        cp_error();
 +    }
 +}
 +
 +static void do_cpt_step_err(XDR *xd,const char *desc,gmx_large_int_t *i,FILE *list)
 +{
 +    bool_t res=0;
 +    char   buf[STEPSTRSIZE];
 +
 +    res = xdr_gmx_large_int(xd,i,"reading checkpoint file");
 +    if (res == 0)
 +    {
 +        cp_error();
 +    }
 +    if (list)
 +    {
 +        fprintf(list,"%s = %s\n",desc,gmx_step_str(*i,buf));
 +    }
 +}
 +
 +static void do_cpt_double_err(XDR *xd,const char *desc,double *f,FILE *list)
 +{
 +    bool_t res=0;
 +    
 +    res = xdr_double(xd,f);
 +    if (res == 0)
 +    {
 +        cp_error();
 +    }
 +    if (list)
 +    {
 +        fprintf(list,"%s = %f\n",desc,*f);
 +    }
 +}
 +
 +/* If nval >= 0, nval is used; on read this should match the passed value.
 + * If nval n<0, *nptr is used; on read the value is stored in nptr
 + */
 +static int do_cpte_reals_low(XDR *xd,int cptp,int ecpt,int sflags,
 +                             int nval,int *nptr,real **v,
 +                             FILE *list,int erealtype)
 +{
 +    bool_t res=0;
 +#ifndef GMX_DOUBLE
 +    int  dtc=xdr_datatype_float; 
 +#else
 +    int  dtc=xdr_datatype_double;
 +#endif
 +    real *vp,*va=NULL;
 +    float  *vf;
 +    double *vd;
 +    int  nf,dt,i;
 +    
 +    if (list == NULL)
 +    {
 +        if (nval >= 0)
 +        {
 +            nf = nval;
 +        }
 +        else
 +        {
 +        if (nptr == NULL)
 +        {
 +            gmx_incons("*ntpr=NULL in do_cpte_reals_low");
 +        }
 +        nf = *nptr;
 +        }
 +    }
 +    res = xdr_int(xd,&nf);
 +    if (res == 0)
 +    {
 +        return -1;
 +    }
 +    if (list == NULL)
 +    {
 +        if (nval >= 0)
 +        {
 +            if (nf != nval)
 +            {
 +                gmx_fatal(FARGS,"Count mismatch for state entry %s, code count is %d, file count is %d\n",st_names(cptp,ecpt),nval,nf);
 +            }
 +        }
 +        else
 +        {
 +            *nptr = nf;
 +        }
 +    }
 +    dt = dtc;
 +    res = xdr_int(xd,&dt);
 +    if (res == 0)
 +    {
 +        return -1;
 +    }
 +    if (dt != dtc)
 +    {
 +        fprintf(stderr,"Precision mismatch for state entry %s, code precision is %s, file precision is %s\n",
 +                st_names(cptp,ecpt),xdr_datatype_names[dtc],
 +                xdr_datatype_names[dt]);
 +    }
 +    if (list || !(sflags & (1<<ecpt)))
 +    {
 +        snew(va,nf);
 +        vp = va;
 +    }
 +    else
 +    {
 +        if (*v == NULL)
 +        {
 +            snew(*v,nf);
 +        }
 +        vp = *v;
 +    }
 +    if (dt == xdr_datatype_float)
 +    {
 +        if (dtc == xdr_datatype_float)
 +        {
 +            vf = (float *)vp;
 +        }
 +        else
 +        {
 +            snew(vf,nf);
 +        }
 +        res = xdr_vector(xd,(char *)vf,nf,
 +                         (unsigned int)sizeof(float),(xdrproc_t)xdr_float);
 +        if (res == 0)
 +        {
 +            return -1;
 +        }
 +        if (dtc != xdr_datatype_float)
 +        {
 +            for(i=0; i<nf; i++)
 +            {
 +                vp[i] = vf[i];
 +            }
 +            sfree(vf);
 +        }
 +    }
 +    else
 +    {
 +        if (dtc == xdr_datatype_double)
 +        {
 +            vd = (double *)vp;
 +        }
 +        else
 +        {
 +            snew(vd,nf);
 +        }
 +        res = xdr_vector(xd,(char *)vd,nf,
 +                         (unsigned int)sizeof(double),(xdrproc_t)xdr_double);
 +        if (res == 0)
 +        {
 +            return -1;
 +        }
 +        if (dtc != xdr_datatype_double)
 +        {
 +            for(i=0; i<nf; i++)
 +            {
 +                vp[i] = vd[i];
 +            }
 +            sfree(vd);
 +        }
 +    }
 +    
 +    if (list)
 +    {
 +        switch (erealtype)
 +        {
 +        case ecprREAL:
 +            pr_reals(list,0,st_names(cptp,ecpt),vp,nf);
 +            break;
 +        case ecprRVEC:
 +            pr_rvecs(list,0,st_names(cptp,ecpt),(rvec *)vp,nf/3);
 +            break;
 +        default:
 +            gmx_incons("Unknown checkpoint real type");
 +        }
 +    }
 +    if (va)
 +    {
 +        sfree(va);
 +    }
 +
 +    return 0;
 +}
 +
 +
 +/* This function stores n along with the reals for reading,
 + * but on reading it assumes that n matches the value in the checkpoint file,
 + * a fatal error is generated when this is not the case.
 + */
 +static int do_cpte_reals(XDR *xd,int cptp,int ecpt,int sflags,
 +                         int n,real **v,FILE *list)
 +{
 +    return do_cpte_reals_low(xd,cptp,ecpt,sflags,n,NULL,v,list,ecprREAL);
 +}
 +
 +/* This function does the same as do_cpte_reals,
 + * except that on reading it ignores the passed value of *n
 + * and stored the value read from the checkpoint file in *n.
 + */
 +static int do_cpte_n_reals(XDR *xd,int cptp,int ecpt,int sflags,
 +                           int *n,real **v,FILE *list)
 +{
 +    return do_cpte_reals_low(xd,cptp,ecpt,sflags,-1,n,v,list,ecprREAL);
 +}
 +
 +static int do_cpte_real(XDR *xd,int cptp,int ecpt,int sflags,
 +                        real *r,FILE *list)
 +{
 +    int n;
 +
 +    return do_cpte_reals_low(xd,cptp,ecpt,sflags,1,NULL,&r,list,ecprREAL);
 +}
 +
 +static int do_cpte_ints(XDR *xd,int cptp,int ecpt,int sflags,
 +                        int n,int **v,FILE *list)
 +{
 +    bool_t res=0;
 +    int  dtc=xdr_datatype_int;
 +    int *vp,*va=NULL;
 +    int  nf,dt,i;
 +    
 +    nf = n;
 +    res = xdr_int(xd,&nf);
 +    if (res == 0)
 +    {
 +        return -1;
 +    }
 +    if (list == NULL && v != NULL && nf != n)
 +    {
 +        gmx_fatal(FARGS,"Count mismatch for state entry %s, code count is %d, file count is %d\n",st_names(cptp,ecpt),n,nf);
 +    }
 +    dt = dtc;
 +    res = xdr_int(xd,&dt);
 +    if (res == 0)
 +    {
 +        return -1;
 +    }
 +    if (dt != dtc)
 +    {
 +        gmx_fatal(FARGS,"Type mismatch for state entry %s, code type is %s, file type is %s\n",
 +                  st_names(cptp,ecpt),xdr_datatype_names[dtc],
 +                  xdr_datatype_names[dt]);
 +    }
 +    if (list || !(sflags & (1<<ecpt)) || v == NULL)
 +    {
 +        snew(va,nf);
 +        vp = va;
 +    }
 +    else
 +    {
 +        if (*v == NULL)
 +        {
 +            snew(*v,nf);
 +        }
 +        vp = *v;
 +    }
 +    res = xdr_vector(xd,(char *)vp,nf,
 +                     (unsigned int)sizeof(int),(xdrproc_t)xdr_int);
 +    if (res == 0)
 +    {
 +        return -1;
 +    }
 +    if (list)
 +    {
 +        pr_ivec(list,0,st_names(cptp,ecpt),vp,nf,TRUE);
 +    }
 +    if (va)
 +    {
 +        sfree(va);
 +    }
 +
 +    return 0;
 +}
 +
 +static int do_cpte_int(XDR *xd,int cptp,int ecpt,int sflags,
 +                       int *i,FILE *list)
 +{
 +    return do_cpte_ints(xd,cptp,ecpt,sflags,1,&i,list);
 +}
 +
 +static int do_cpte_doubles(XDR *xd,int cptp,int ecpt,int sflags,
 +                           int n,double **v,FILE *list)
 +{
 +    bool_t res=0;
 +    int  dtc=xdr_datatype_double;
 +    double *vp,*va=NULL;
 +    int  nf,dt,i;
 +    
 +    nf = n;
 +    res = xdr_int(xd,&nf);
 +    if (res == 0)
 +    {
 +        return -1;
 +    }
 +    if (list == NULL && nf != n)
 +    {
 +        gmx_fatal(FARGS,"Count mismatch for state entry %s, code count is %d, file count is %d\n",st_names(cptp,ecpt),n,nf);
 +    }
 +    dt = dtc;
 +    res = xdr_int(xd,&dt);
 +    if (res == 0)
 +    {
 +        return -1;
 +    }
 +    if (dt != dtc)
 +    {
 +        gmx_fatal(FARGS,"Precision mismatch for state entry %s, code precision is %s, file precision is %s\n",
 +                  st_names(cptp,ecpt),xdr_datatype_names[dtc],
 +                  xdr_datatype_names[dt]);
 +    }
 +    if (list || !(sflags & (1<<ecpt)))
 +    {
 +        snew(va,nf);
 +        vp = va;
 +    }
 +    else
 +    {
 +        if (*v == NULL)
 +        {
 +            snew(*v,nf);
 +        }
 +        vp = *v;
 +    }
 +    res = xdr_vector(xd,(char *)vp,nf,
 +                     (unsigned int)sizeof(double),(xdrproc_t)xdr_double);
 +    if (res == 0)
 +    {
 +        return -1;
 +    }
 +    if (list)
 +    {
 +        pr_doubles(list,0,st_names(cptp,ecpt),vp,nf);
 +    }
 +    if (va)
 +    {
 +        sfree(va);
 +    }
 +
 +    return 0;
 +}
 +
 +static int do_cpte_double(XDR *xd,int cptp,int ecpt,int sflags,
 +                          double *r,FILE *list)
 +{
 +    return do_cpte_doubles(xd,cptp,ecpt,sflags,1,&r,list);
 +}
 +
 +
 +static int do_cpte_rvecs(XDR *xd,int cptp,int ecpt,int sflags,
 +                         int n,rvec **v,FILE *list)
 +{
 +    int n3;
 +
 +    return do_cpte_reals_low(xd,cptp,ecpt,sflags,
 +                             n*DIM,NULL,(real **)v,list,ecprRVEC);
 +}
 +
 +static int do_cpte_matrix(XDR *xd,int cptp,int ecpt,int sflags,
 +                          matrix v,FILE *list)
 +{
 +    real *vr;
 +    real ret;
 +
 +    vr = (real *)&(v[0][0]);
 +    ret = do_cpte_reals_low(xd,cptp,ecpt,sflags,
 +                            DIM*DIM,NULL,&vr,NULL,ecprMATRIX);
 +    
 +    if (list && ret == 0)
 +    {
 +        pr_rvecs(list,0,st_names(cptp,ecpt),v,DIM);
 +    }
 +    
 +    return ret;
 +}
 +
++
++static int do_cpte_nmatrix(XDR *xd,int cptp,int ecpt,int sflags,
++                           int n, real **v,FILE *list)
++{
++    int i;
++    real *vr;
++    real ret,reti;
++    char name[CPTSTRLEN];
++
++    ret = 0;
++    if (v==NULL)
++    {
++        snew(v,n);
++    }
++    for (i=0;i<n;i++)
++    {
++        reti = 0;
++        vr = v[i];
++        reti = do_cpte_reals_low(xd,cptp,ecpt,sflags,n,NULL,&(v[i]),NULL,ecprREAL);
++        if (list && reti == 0)
++        {
++            sprintf(name,"%s[%d]",st_names(cptp,ecpt),i);
++            pr_reals(list,0,name,v[i],n);
++        }
++        if (reti == 0)
++        {
++            ret = 0;
++        }
++    }
++    return ret;
++}
++
 +static int do_cpte_matrices(XDR *xd,int cptp,int ecpt,int sflags,
 +                            int n,matrix **v,FILE *list)
 +{
 +    bool_t res=0;
 +    matrix *vp,*va=NULL;
 +    real *vr;
 +    int  nf,i,j,k;
 +    int  ret;
 +
 +    nf = n;
 +    res = xdr_int(xd,&nf);
 +    if (res == 0)
 +    {
 +        return -1;
 +    }
 +    if (list == NULL && nf != n)
 +    {
 +        gmx_fatal(FARGS,"Count mismatch for state entry %s, code count is %d, file count is %d\n",st_names(cptp,ecpt),n,nf);
 +    }
 +    if (list || !(sflags & (1<<ecpt)))
 +    {
 +        snew(va,nf);
 +        vp = va;
 +    }
 +    else
 +    {
 +        if (*v == NULL)
 +        {
 +            snew(*v,nf);
 +        }
 +        vp = *v;
 +    }
 +    snew(vr,nf*DIM*DIM);
 +    for(i=0; i<nf; i++)
 +    {
 +        for(j=0; j<DIM; j++)
 +        {
 +            for(k=0; k<DIM; k++)
 +            {
 +                vr[(i*DIM+j)*DIM+k] = vp[i][j][k];
 +            }
 +        }
 +    }
 +    ret = do_cpte_reals_low(xd,cptp,ecpt,sflags,
 +                            nf*DIM*DIM,NULL,&vr,NULL,ecprMATRIX);
 +    for(i=0; i<nf; i++)
 +    {
 +        for(j=0; j<DIM; j++)
 +        {
 +            for(k=0; k<DIM; k++)
 +            {
 +                vp[i][j][k] = vr[(i*DIM+j)*DIM+k];
 +            }
 +        }
 +    }
 +    sfree(vr);
 +    
 +    if (list && ret == 0)
 +    {
 +        for(i=0; i<nf; i++)
 +        {
 +            pr_rvecs(list,0,st_names(cptp,ecpt),vp[i],DIM);
 +        }
 +    }
 +    if (va)
 +    {
 +        sfree(va);
 +    }
 +    
 +    return ret;
 +}
 +
 +static void do_cpt_header(XDR *xd,gmx_bool bRead,int *file_version,
 +                          char **version,char **btime,char **buser,char **bmach,
 +                          int *double_prec,
 +                          char **fprog,char **ftime,
 +                          int *eIntegrator,int *simulation_part,
 +                          gmx_large_int_t *step,double *t,
 +                          int *nnodes,int *dd_nc,int *npme,
 +                          int *natoms,int *ngtc, int *nnhpres, int *nhchainlength,
-             case estLAMBDA:  ret = do_cpte_real(xd,0,i,sflags,&state->lambda,list); break;
-             case estBOX:     ret = do_cpte_matrix(xd,0,i,sflags,state->box,list); break;
-             case estBOX_REL: ret = do_cpte_matrix(xd,0,i,sflags,state->box_rel,list); break;
-             case estBOXV:    ret = do_cpte_matrix(xd,0,i,sflags,state->boxv,list); break;
-             case estPRES_PREV: ret = do_cpte_matrix(xd,0,i,sflags,state->pres_prev,list); break;
-             case estSVIR_PREV:  ret = do_cpte_matrix(xd,0,i,sflags,state->svir_prev,list); break;
-             case estFVIR_PREV:  ret = do_cpte_matrix(xd,0,i,sflags,state->fvir_prev,list); break;
-             case estNH_XI:   ret = do_cpte_doubles(xd,0,i,sflags,nnht,&state->nosehoover_xi,list); break;
-             case estNH_VXI:  ret = do_cpte_doubles(xd,0,i,sflags,nnht,&state->nosehoover_vxi,list); break;
-             case estNHPRES_XI:   ret = do_cpte_doubles(xd,0,i,sflags,nnhtp,&state->nhpres_xi,list); break;
-             case estNHPRES_VXI:  ret = do_cpte_doubles(xd,0,i,sflags,nnhtp,&state->nhpres_vxi,list); break;
-             case estTC_INT:  ret = do_cpte_doubles(xd,0,i,sflags,state->ngtc,&state->therm_integral,list); break;
-             case estVETA:    ret = do_cpte_real(xd,0,i,sflags,&state->veta,list); break;
-             case estVOL0:    ret = do_cpte_real(xd,0,i,sflags,&state->vol0,list); break;
-             case estX:       ret = do_cpte_rvecs(xd,0,i,sflags,state->natoms,&state->x,list); break;
-             case estV:       ret = do_cpte_rvecs(xd,0,i,sflags,state->natoms,&state->v,list); break;
-             case estSDX:     ret = do_cpte_rvecs(xd,0,i,sflags,state->natoms,&state->sd_X,list); break;
-             case estLD_RNG:  ret = do_cpte_ints(xd,0,i,sflags,state->nrng,rng_p,list); break;
-             case estLD_RNGI: ret = do_cpte_ints(xd,0,i,sflags,state->nrngi,rngi_p,list); break;
-             case estDISRE_INITF:  ret = do_cpte_real (xd,0,i,sflags,&state->hist.disre_initf,list); break;
-             case estDISRE_RM3TAV: ret = do_cpte_n_reals(xd,0,i,sflags,&state->hist.ndisrepairs,&state->hist.disre_rm3tav,list); break;
-             case estORIRE_INITF:  ret = do_cpte_real (xd,0,i,sflags,&state->hist.orire_initf,list); break;
-             case estORIRE_DTAV:   ret = do_cpte_n_reals(xd,0,i,sflags,&state->hist.norire_Dtav,&state->hist.orire_Dtav,list); break;
++                          int *nlambda, int *flags_state,
++                          int *flags_eks,int *flags_enh, int *flags_dfh,
 +                          FILE *list)
 +{
 +    bool_t res=0;
 +    int  magic;
 +    int  idum=0;
 +    int  i;
 +    char *fhost;
 +
 +    if (bRead)
 +    {
 +        magic = -1;
 +    }
 +    else
 +    {
 +        magic = CPT_MAGIC1;
 +    }
 +    res = xdr_int(xd,&magic);
 +    if (res == 0)
 +    {
 +        gmx_fatal(FARGS,"The checkpoint file is empty/corrupted, or maybe you are out of disk space?");
 +    }
 +    if (magic != CPT_MAGIC1)
 +    {
 +        gmx_fatal(FARGS,"Start of file magic number mismatch, checkpoint file has %d, should be %d\n"
 +                  "The checkpoint file is corrupted or not a checkpoint file",
 +                  magic,CPT_MAGIC1);
 +    }
 +    if (!bRead)
 +    {
 +        snew(fhost,255);
 +#ifdef HAVE_UNISTD_H
 +        if (gethostname(fhost,255) != 0)
 +        {
 +            sprintf(fhost,"unknown");
 +        }
 +#else
 +        sprintf(fhost,"unknown");
 +#endif  
 +    }
 +    do_cpt_string_err(xd,bRead,"GROMACS version"           ,version,list);
 +    do_cpt_string_err(xd,bRead,"GROMACS build time"        ,btime,list);
 +    do_cpt_string_err(xd,bRead,"GROMACS build user"        ,buser,list);
 +    do_cpt_string_err(xd,bRead,"GROMACS build machine"     ,bmach,list);
 +    do_cpt_string_err(xd,bRead,"generating program"        ,fprog,list);
 +    do_cpt_string_err(xd,bRead,"generation time"           ,ftime,list);
 +    *file_version = cpt_version;
 +    do_cpt_int_err(xd,"checkpoint file version",file_version,list);
 +    if (*file_version > cpt_version)
 +    {
 +        gmx_fatal(FARGS,"Attempting to read a checkpoint file of version %d with code of version %d\n",*file_version,cpt_version);
 +    }
 +    if (*file_version >= 13)
 +    {
 +        do_cpt_int_err(xd,"GROMACS double precision",double_prec,list);
 +    }
 +    else
 +    {
 +        *double_prec = -1;
 +    }
 +    if (*file_version >= 12)
 +    {
 +        do_cpt_string_err(xd,bRead,"generating host"           ,&fhost,list);
 +        if (list == NULL)
 +        {
 +            sfree(fhost);
 +        }
 +    }
 +    do_cpt_int_err(xd,"#atoms"            ,natoms     ,list);
 +    do_cpt_int_err(xd,"#T-coupling groups",ngtc       ,list);
 +    if (*file_version >= 10) 
 +    {
 +        do_cpt_int_err(xd,"#Nose-Hoover T-chains",nhchainlength,list);
 +    }
 +    else
 +    {
 +        *nhchainlength = 1;
 +    }
 +    if (*file_version >= 11)
 +    {
 +        do_cpt_int_err(xd,"#Nose-Hoover T-chains for barostat ",nnhpres,list);
 +    }
 +    else
 +    {
 +        *nnhpres = 0;
 +    }
++    if (*file_version >= 12)
++    {
++        do_cpt_int_err(xd,"# of total lambda states ",nlambda,list);
++    }
++    else
++    {
++        *nlambda = 0;
++    }
 +    do_cpt_int_err(xd,"integrator"        ,eIntegrator,list);
 +      if (*file_version >= 3)
 +      {
 +              do_cpt_int_err(xd,"simulation part #", simulation_part,list);
 +      }
 +      else
 +      {
 +              *simulation_part = 1;
 +      }
 +    if (*file_version >= 5)
 +    {
 +        do_cpt_step_err(xd,"step"         ,step       ,list);
 +    }
 +    else
 +    {
 +        do_cpt_int_err(xd,"step"          ,&idum      ,list);
 +        *step = idum;
 +    }
 +    do_cpt_double_err(xd,"t"              ,t          ,list);
 +    do_cpt_int_err(xd,"#PP-nodes"         ,nnodes     ,list);
 +    idum = 1;
 +    do_cpt_int_err(xd,"dd_nc[x]",dd_nc ? &(dd_nc[0]) : &idum,list);
 +    do_cpt_int_err(xd,"dd_nc[y]",dd_nc ? &(dd_nc[1]) : &idum,list);
 +    do_cpt_int_err(xd,"dd_nc[z]",dd_nc ? &(dd_nc[2]) : &idum,list);
 +    do_cpt_int_err(xd,"#PME-only nodes",npme,list);
 +    do_cpt_int_err(xd,"state flags",flags_state,list);
 +      if (*file_version >= 4)
 +    {
 +        do_cpt_int_err(xd,"ekin data flags",flags_eks,list);
 +        do_cpt_int_err(xd,"energy history flags",flags_enh,list);
 +    }
 +    else
 +    {
 +        *flags_eks  = 0;
 +        *flags_enh   = (*flags_state >> (estORIRE_DTAV+1));
 +        *flags_state = (*flags_state & ~((1<<(estORIRE_DTAV+1)) |
 +                                         (1<<(estORIRE_DTAV+2)) |
 +                                         (1<<(estORIRE_DTAV+3))));
 +    }
++      if (*file_version >= 12)
++    {
++        do_cpt_int_err(xd,"df history flags",flags_dfh,list);
++    } else {
++        *flags_dfh = 0;
++    }
 +}
 +
 +static int do_cpt_footer(XDR *xd,gmx_bool bRead,int file_version)
 +{
 +    bool_t res=0;
 +    int  magic;
 +    
 +    if (file_version >= 2)
 +    {
 +        magic = CPT_MAGIC2;
 +        res = xdr_int(xd,&magic);
 +        if (res == 0)
 +        {
 +            cp_error();
 +        }
 +        if (magic != CPT_MAGIC2)
 +        {
 +            return -1;
 +        }
 +    }
 +
 +    return 0;
 +}
 +
 +static int do_cpt_state(XDR *xd,gmx_bool bRead,
 +                        int fflags,t_state *state,
 +                        gmx_bool bReadRNG,FILE *list)
 +{
 +    int  sflags;
 +    int  **rng_p,**rngi_p;
 +    int  i;
 +    int  ret;
 +    int  nnht,nnhtp;
 +
 +    ret = 0;
 +    
 +    nnht = state->nhchainlength*state->ngtc;
 +    nnhtp = state->nhchainlength*state->nnhpres;
 +
 +    if (bReadRNG)
 +    {
 +        rng_p  = (int **)&state->ld_rng;
 +        rngi_p = &state->ld_rngi;
 +    }
 +    else
 +    {
 +        /* Do not read the RNG data */
 +        rng_p  = NULL;
 +        rngi_p = NULL;
 +    }
++    /* We want the MC_RNG the same across all the notes for now -- lambda MC is global */
 +
 +    sflags = state->flags;
 +    for(i=0; (i<estNR && ret == 0); i++)
 +    {
 +        if (fflags & (1<<i))
 +        {
 +            switch (i)
 +            {
-                       case eeksEKIN_N:     ret = do_cpte_int(xd,1,i,fflags,&ekins->ekin_n,list); break;
-                       case eeksEKINH :     ret = do_cpte_matrices(xd,1,i,fflags,ekins->ekin_n,&ekins->ekinh,list); break;
-                       case eeksEKINF:      ret = do_cpte_matrices(xd,1,i,fflags,ekins->ekin_n,&ekins->ekinf,list); break;
-                       case eeksEKINO:      ret = do_cpte_matrices(xd,1,i,fflags,ekins->ekin_n,&ekins->ekinh_old,list); break;
-             case eeksEKINTOTAL:  ret = do_cpte_matrix(xd,1,i,fflags,ekins->ekin_total,list); break;
-             case eeksEKINSCALEF: ret = do_cpte_doubles(xd,1,i,fflags,ekins->ekin_n,&ekins->ekinscalef_nhc,list); break;
-             case eeksVSCALE:     ret = do_cpte_doubles(xd,1,i,fflags,ekins->ekin_n,&ekins->vscale_nhc,list); break;
-             case eeksEKINSCALEH: ret = do_cpte_doubles(xd,1,i,fflags,ekins->ekin_n,&ekins->ekinscaleh_nhc,list); break;
-                       case eeksDEKINDL :   ret = do_cpte_real(xd,1,i,fflags,&ekins->dekindl,list); break;
-             case eeksMVCOS:      ret = do_cpte_real(xd,1,i,fflags,&ekins->mvcos,list); break;                 
++            case estLAMBDA:  ret = do_cpte_reals(xd,cptpEST,i,sflags,efptNR,&(state->lambda),list); break;
++            case estFEPSTATE: ret = do_cpte_int (xd,cptpEST,i,sflags,&state->fep_state,list); break;
++            case estBOX:     ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->box,list); break;
++            case estBOX_REL: ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->box_rel,list); break;
++            case estBOXV:    ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->boxv,list); break;
++            case estPRES_PREV: ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->pres_prev,list); break;
++            case estSVIR_PREV:  ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->svir_prev,list); break;
++            case estFVIR_PREV:  ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->fvir_prev,list); break;
++            case estNH_XI:   ret = do_cpte_doubles(xd,cptpEST,i,sflags,nnht,&state->nosehoover_xi,list); break;
++            case estNH_VXI:  ret = do_cpte_doubles(xd,cptpEST,i,sflags,nnht,&state->nosehoover_vxi,list); break;
++            case estNHPRES_XI:   ret = do_cpte_doubles(xd,cptpEST,i,sflags,nnhtp,&state->nhpres_xi,list); break;
++            case estNHPRES_VXI:  ret = do_cpte_doubles(xd,cptpEST,i,sflags,nnhtp,&state->nhpres_vxi,list); break;
++            case estTC_INT:  ret = do_cpte_doubles(xd,cptpEST,i,sflags,state->ngtc,&state->therm_integral,list); break;
++            case estVETA:    ret = do_cpte_real(xd,cptpEST,i,sflags,&state->veta,list); break;
++            case estVOL0:    ret = do_cpte_real(xd,cptpEST,i,sflags,&state->vol0,list); break;
++            case estX:       ret = do_cpte_rvecs(xd,cptpEST,i,sflags,state->natoms,&state->x,list); break;
++            case estV:       ret = do_cpte_rvecs(xd,cptpEST,i,sflags,state->natoms,&state->v,list); break;
++            case estSDX:     ret = do_cpte_rvecs(xd,cptpEST,i,sflags,state->natoms,&state->sd_X,list); break;
++            case estLD_RNG:  ret = do_cpte_ints(xd,cptpEST,i,sflags,state->nrng,rng_p,list); break;
++            case estLD_RNGI: ret = do_cpte_ints(xd,cptpEST,i,sflags,state->nrngi,rngi_p,list); break;
++            case estMC_RNG:  ret = do_cpte_ints(xd,cptpEST,i,sflags,state->nmcrng,(int **)&state->mc_rng,list); break;
++            case estMC_RNGI: ret = do_cpte_ints(xd,cptpEST,i,sflags,1,&state->mc_rngi,list); break;
++            case estDISRE_INITF:  ret = do_cpte_real (xd,cptpEST,i,sflags,&state->hist.disre_initf,list); break;
++            case estDISRE_RM3TAV: ret = do_cpte_reals(xd,cptpEST,i,sflags,state->hist.ndisrepairs,&state->hist.disre_rm3tav,list); break;
++            case estORIRE_INITF:  ret = do_cpte_real (xd,cptpEST,i,sflags,&state->hist.orire_initf,list); break;
++            case estORIRE_DTAV:   ret = do_cpte_reals(xd,cptpEST,i,sflags,state->hist.norire_Dtav,&state->hist.orire_Dtav,list); break;
 +            default:
 +                gmx_fatal(FARGS,"Unknown state entry %d\n"
 +                          "You are probably reading a new checkpoint file with old code",i);
 +            }
 +        }
 +    }
 +    
 +    return ret;
 +}
 +
 +static int do_cpt_ekinstate(XDR *xd,gmx_bool bRead,
 +                            int fflags,ekinstate_t *ekins,
 +                            FILE *list)
 +{
 +    int  i;
 +    int  ret;
 +
 +    ret = 0;
 +
 +    for(i=0; (i<eeksNR && ret == 0); i++)
 +    {
 +        if (fflags & (1<<i))
 +        {
 +            switch (i)
 +            {
 +                
-                 case eenhENERGY_N:     ret = do_cpte_int(xd,2,i,fflags,&enerhist->nener,list); break;
-                 case eenhENERGY_AVER:  ret = do_cpte_doubles(xd,2,i,fflags,enerhist->nener,&enerhist->ener_ave,list); break;
-                 case eenhENERGY_SUM:   ret = do_cpte_doubles(xd,2,i,fflags,enerhist->nener,&enerhist->ener_sum,list); break;
-                 case eenhENERGY_NSUM:  do_cpt_step_err(xd,eenh_names[i],&enerhist->nsum,list); break;
-                 case eenhENERGY_SUM_SIM: ret = do_cpte_doubles(xd,2,i,fflags,enerhist->nener,&enerhist->ener_sum_sim,list); break;
-                 case eenhENERGY_NSUM_SIM:   do_cpt_step_err(xd,eenh_names[i],&enerhist->nsum_sim,list); break;
-                 case eenhENERGY_NSTEPS:     do_cpt_step_err(xd,eenh_names[i],&enerhist->nsteps,list); break;
-                 case eenhENERGY_NSTEPS_SIM: do_cpt_step_err(xd,eenh_names[i],&enerhist->nsteps_sim,list); break;
-                 case eenhENERGY_DELTA_H_NN: do_cpt_int_err(xd,eenh_names[i], &(enerhist->dht->nndh), list); 
-                     if (bRead) /* now allocate memory for it */
-                     {
-                         snew(enerhist->dht->dh, enerhist->dht->nndh);
-                         snew(enerhist->dht->ndh, enerhist->dht->nndh);
-                         for(j=0;j<enerhist->dht->nndh;j++)
-                         {
-                             enerhist->dht->ndh[j] = 0;
-                             enerhist->dht->dh[j] = NULL;
-                         }
-                     }
-                 break;
-                 case eenhENERGY_DELTA_H_LIST: 
++                      case eeksEKIN_N:     ret = do_cpte_int(xd,cptpEEKS,i,fflags,&ekins->ekin_n,list); break;
++                      case eeksEKINH :     ret = do_cpte_matrices(xd,cptpEEKS,i,fflags,ekins->ekin_n,&ekins->ekinh,list); break;
++                      case eeksEKINF:      ret = do_cpte_matrices(xd,cptpEEKS,i,fflags,ekins->ekin_n,&ekins->ekinf,list); break;
++                      case eeksEKINO:      ret = do_cpte_matrices(xd,cptpEEKS,i,fflags,ekins->ekin_n,&ekins->ekinh_old,list); break;
++            case eeksEKINTOTAL:  ret = do_cpte_matrix(xd,cptpEEKS,i,fflags,ekins->ekin_total,list); break;
++            case eeksEKINSCALEF: ret = do_cpte_doubles(xd,cptpEEKS,i,fflags,ekins->ekin_n,&ekins->ekinscalef_nhc,list); break;
++            case eeksVSCALE:     ret = do_cpte_doubles(xd,1,cptpEEKS,fflags,ekins->ekin_n,&ekins->vscale_nhc,list); break;
++            case eeksEKINSCALEH: ret = do_cpte_doubles(xd,1,cptpEEKS,fflags,ekins->ekin_n,&ekins->ekinscaleh_nhc,list); break;
++                      case eeksDEKINDL :   ret = do_cpte_real(xd,1,cptpEEKS,fflags,&ekins->dekindl,list); break;
++            case eeksMVCOS:      ret = do_cpte_real(xd,1,cptpEEKS,fflags,&ekins->mvcos,list); break;
 +            default:
 +                gmx_fatal(FARGS,"Unknown ekin data state entry %d\n"
 +                          "You are probably reading a new checkpoint file with old code",i);
 +            }
 +        }
 +    }
 +    
 +    return ret;
 +}
 +
 +
 +static int do_cpt_enerhist(XDR *xd,gmx_bool bRead,
 +                           int fflags,energyhistory_t *enerhist,
 +                           FILE *list)
 +{
 +    int  i;
 +    int  j;
 +    int  ret;
 +
 +    ret = 0;
 +
 +    if (bRead)
 +    {
 +        enerhist->nsteps     = 0;
 +        enerhist->nsum       = 0;
 +        enerhist->nsteps_sim = 0;
 +        enerhist->nsum_sim   = 0;
 +        enerhist->dht        = NULL;
 +
 +        if (fflags & (1<< eenhENERGY_DELTA_H_NN) )
 +        {
 +            snew(enerhist->dht,1);
 +            enerhist->dht->ndh = NULL;
 +            enerhist->dht->dh = NULL;
 +            enerhist->dht->start_lambda_set=FALSE;
 +        }
 +    }
 +
 +    for(i=0; (i<eenhNR && ret == 0); i++)
 +    {
 +        if (fflags & (1<<i))
 +        {
 +            switch (i)
 +            {
-                         ret=do_cpte_n_reals(xd, 2, i, fflags, &enerhist->dht->ndh[j], &(enerhist->dht->dh[j]), list); 
++                      case eenhENERGY_N:     ret = do_cpte_int(xd,cptpEENH,i,fflags,&enerhist->nener,list); break;
++                      case eenhENERGY_AVER:  ret = do_cpte_doubles(xd,cptpEENH,i,fflags,enerhist->nener,&enerhist->ener_ave,list); break;
++                      case eenhENERGY_SUM:   ret = do_cpte_doubles(xd,cptpEENH,i,fflags,enerhist->nener,&enerhist->ener_sum,list); break;
++            case eenhENERGY_NSUM:  do_cpt_step_err(xd,eenh_names[i],&enerhist->nsum,list); break;
++            case eenhENERGY_SUM_SIM: ret = do_cpte_doubles(xd,cptpEENH,i,fflags,enerhist->nener,&enerhist->ener_sum_sim,list); break;
++            case eenhENERGY_NSUM_SIM:   do_cpt_step_err(xd,eenh_names[i],&enerhist->nsum_sim,list); break;
++            case eenhENERGY_NSTEPS:     do_cpt_step_err(xd,eenh_names[i],&enerhist->nsteps,list); break;
++            case eenhENERGY_NSTEPS_SIM: do_cpt_step_err(xd,eenh_names[i],&enerhist->nsteps_sim,list); break;
++            case eenhENERGY_DELTA_H_NN: do_cpt_int_err(xd,eenh_names[i], &(enerhist->dht->nndh), list);
++                if (bRead) /* now allocate memory for it */
++                {
++                    snew(enerhist->dht->dh, enerhist->dht->nndh);
++                    snew(enerhist->dht->ndh, enerhist->dht->nndh);
 +                    for(j=0;j<enerhist->dht->nndh;j++)
 +                    {
-                     break;
-                 case eenhENERGY_DELTA_H_STARTTIME: 
-                     ret=do_cpte_double(xd, 2, i, fflags, &(enerhist->dht->start_time), list); break;
-                 case eenhENERGY_DELTA_H_STARTLAMBDA: 
-                     enerhist->dht->start_lambda_set=TRUE;
-                     ret=do_cpte_double(xd, 2, i, fflags, &(enerhist->dht->start_lambda), list); break;
-                 default:
-                     gmx_fatal(FARGS,"Unknown energy history entry %d\n"
-                               "You are probably reading a new checkpoint file with old code",i);
++                        enerhist->dht->ndh[j] = 0;
++                        enerhist->dht->dh[j] = NULL;
 +                    }
-     int  flags_eks,flags_enh,i;
++                }
++                break;
++            case eenhENERGY_DELTA_H_LIST:
++                for(j=0;j<enerhist->dht->nndh;j++)
++                {
++                    ret=do_cpte_n_reals(xd, cptpEENH, i, fflags, &enerhist->dht->ndh[j], &(enerhist->dht->dh[j]), list);
++                }
++                break;
++            case eenhENERGY_DELTA_H_STARTTIME:
++                ret=do_cpte_double(xd, cptpEENH, i, fflags, &(enerhist->dht->start_time), list); break;
++            case eenhENERGY_DELTA_H_STARTLAMBDA:
++                ret=do_cpte_double(xd, cptpEENH, i, fflags, &(enerhist->dht->start_lambda), list); break;
++            default:
++                gmx_fatal(FARGS,"Unknown energy history entry %d\n"
++                          "You are probably reading a new checkpoint file with old code",i);
 +            }
 +        }
 +    }
 +
 +    if ((fflags & (1<<eenhENERGY_SUM)) && !(fflags & (1<<eenhENERGY_SUM_SIM)))
 +    {
 +        /* Assume we have an old file format and copy sum to sum_sim */
 +        srenew(enerhist->ener_sum_sim,enerhist->nener);
 +        for(i=0; i<enerhist->nener; i++)
 +        {
 +            enerhist->ener_sum_sim[i] = enerhist->ener_sum[i];
 +        }
 +        fflags |= (1<<eenhENERGY_SUM_SIM);
 +    }
 +    
 +    if ( (fflags & (1<<eenhENERGY_NSUM)) &&
 +        !(fflags & (1<<eenhENERGY_NSTEPS)))
 +    {
 +        /* Assume we have an old file format and copy nsum to nsteps */
 +        enerhist->nsteps = enerhist->nsum;
 +        fflags |= (1<<eenhENERGY_NSTEPS);
 +    }
 +    if ( (fflags & (1<<eenhENERGY_NSUM_SIM)) &&
 +        !(fflags & (1<<eenhENERGY_NSTEPS_SIM)))
 +    {
 +        /* Assume we have an old file format and copy nsum to nsteps */
 +        enerhist->nsteps_sim = enerhist->nsum_sim;
 +        fflags |= (1<<eenhENERGY_NSTEPS_SIM);
 +    }
 +
 +    return ret;
 +}
 +
++static int do_cpt_df_hist(XDR *xd,gmx_bool bRead,int fflags,df_history_t *dfhist,FILE *list)
++{
++    int  i,nlambda;
++    int  ret;
++
++    nlambda = dfhist->nlambda;
++    ret = 0;
++
++    for(i=0; (i<edfhNR && ret == 0); i++)
++    {
++        if (fflags & (1<<i))
++        {
++            switch (i)
++            {
++            case edfhBEQUIL:       ret = do_cpte_int(xd,cptpEDFH,i,fflags,&dfhist->bEquil,list); break;
++            case edfhNATLAMBDA:    ret = do_cpte_ints(xd,cptpEDFH,i,fflags,nlambda,&dfhist->n_at_lam,list); break;
++            case edfhWLHISTO:      ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->wl_histo,list); break;
++            case edfhWLDELTA:      ret = do_cpte_real(xd,cptpEDFH,i,fflags,&dfhist->wl_delta,list); break;
++            case edfhSUMWEIGHTS:   ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->sum_weights,list); break;
++            case edfhSUMDG:        ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->sum_dg,list); break;
++            case edfhSUMMINVAR:    ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->sum_minvar,list); break;
++            case edfhSUMVAR:       ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->sum_variance,list); break;
++            case edfhACCUMP:       ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->accum_p,list); break;
++            case edfhACCUMM:       ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->accum_m,list); break;
++            case edfhACCUMP2:      ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->accum_p2,list); break;
++            case edfhACCUMM2:      ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->accum_m2,list); break;
++            case edfhTIJ:          ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->Tij,list); break;
++            case edfhTIJEMP:       ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->Tij_empirical,list); break;
++
++            default:
++                gmx_fatal(FARGS,"Unknown df history entry %d\n"
++                          "You are probably reading a new checkpoint file with old code",i);
++            }
++        }
++    }
++
++    return ret;
++}
++
 +static int do_cpt_files(XDR *xd, gmx_bool bRead, 
 +                        gmx_file_position_t **p_outputfiles, int *nfiles, 
 +                        FILE *list, int file_version)
 +{
 +    int    i,j;
 +    gmx_off_t  offset;
 +    gmx_off_t  mask = 0xFFFFFFFFL;
 +    int    offset_high,offset_low;
 +    char   *buf;
 +    gmx_file_position_t *outputfiles;
 +
 +    if (do_cpt_int(xd,"number of output files",nfiles,list) != 0)
 +    {
 +        return -1;
 +    }
 +
 +    if(bRead)
 +    {
 +        snew(*p_outputfiles,*nfiles);
 +    }
 +
 +    outputfiles = *p_outputfiles;
 +
 +    for(i=0;i<*nfiles;i++)
 +    {
 +        /* 64-bit XDR numbers are not portable, so it is stored as separate high/low fractions */
 +        if(bRead)
 +        {
 +            do_cpt_string_err(xd,bRead,"output filename",&buf,list);
 +            strncpy(outputfiles[i].filename,buf,CPTSTRLEN-1);
 +            if(list==NULL)
 +            {
 +                sfree(buf);                   
 +            }
 +
 +            if (do_cpt_int(xd,"file_offset_high",&offset_high,list) != 0)
 +            {
 +                return -1;
 +            }
 +            if (do_cpt_int(xd,"file_offset_low",&offset_low,list) != 0)
 +            {
 +                return -1;
 +            }
 +#if (SIZEOF_GMX_OFF_T > 4)
 +            outputfiles[i].offset = ( ((gmx_off_t) offset_high) << 32 ) | ( (gmx_off_t) offset_low & mask );
 +#else
 +            outputfiles[i].offset = offset_low;
 +#endif
 +        }
 +        else
 +        {
 +            buf = outputfiles[i].filename;
 +            do_cpt_string_err(xd,bRead,"output filename",&buf,list);
 +            /* writing */
 +            offset      = outputfiles[i].offset;
 +            if (offset == -1)
 +            {
 +                offset_low  = -1;
 +                offset_high = -1;
 +            }
 +            else
 +            {
 +#if (SIZEOF_GMX_OFF_T > 4)
 +                offset_low  = (int) (offset & mask);
 +                offset_high = (int) ((offset >> 32) & mask);
 +#else
 +                offset_low  = offset;
 +                offset_high = 0;
 +#endif
 +            }
 +            if (do_cpt_int(xd,"file_offset_high",&offset_high,list) != 0)
 +            {
 +                return -1;
 +            }
 +            if (do_cpt_int(xd,"file_offset_low",&offset_low,list) != 0)
 +            {
 +                return -1;
 +            }
 +        }
 +        if (file_version >= 8)
 +        {
 +            if (do_cpt_int(xd,"file_checksum_size",&(outputfiles[i].chksum_size),
 +                           list) != 0)
 +            {
 +                return -1;
 +            }
 +            if (do_cpt_u_chars(xd,"file_checksum",16,outputfiles[i].chksum,list) != 0)
 +            {
 +                return -1;
 +            }
 +        } 
 +        else 
 +        {
 +            outputfiles[i].chksum_size = -1;
 +        }
 +    }
 +    return 0;
 +}
 +
 +
 +void write_checkpoint(const char *fn,gmx_bool bNumberAndKeep,
 +                      FILE *fplog,t_commrec *cr,
 +                      int eIntegrator,int simulation_part,
++                      gmx_bool bExpanded, int elamstats,
 +                      gmx_large_int_t step,double t,t_state *state)
 +{
 +    t_fileio *fp;
 +    int  file_version;
 +    char *version;
 +    char *btime;
 +    char *buser;
 +    char *bmach;
 +    int  double_prec;
 +    char *fprog;
 +    char *fntemp; /* the temporary checkpoint file name */
 +    time_t now;
 +    char timebuf[STRLEN];
 +    int  nppnodes,npmenodes,flag_64bit;
 +    char buf[1024],suffix[5+STEPSTRSIZE],sbuf[STEPSTRSIZE];
 +    gmx_file_position_t *outputfiles;
 +    int  noutputfiles;
 +    char *ftime;
-     
++    int  flags_eks,flags_enh,flags_dfh,i;
 +    t_fileio *ret;
 +              
 +    if (PAR(cr))
 +    {
 +        if (DOMAINDECOMP(cr))
 +        {
 +            nppnodes  = cr->dd->nnodes;
 +            npmenodes = cr->npmenodes;
 +        }
 +        else
 +        {
 +            nppnodes  = cr->nnodes;
 +            npmenodes = 0;
 +        }
 +    }
 +    else
 +    {
 +        nppnodes  = 1;
 +        npmenodes = 0;
 +    }
 +
 +    /* make the new temporary filename */
 +    snew(fntemp, strlen(fn)+5+STEPSTRSIZE);
 +    strcpy(fntemp,fn);
 +    fntemp[strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
 +    sprintf(suffix,"_%s%s","step",gmx_step_str(step,sbuf));
 +    strcat(fntemp,suffix);
 +    strcat(fntemp,fn+strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1);
 +   
 +    time(&now);
 +    gmx_ctime_r(&now,timebuf,STRLEN);
 +
 +    if (fplog)
 +    { 
 +        fprintf(fplog,"Writing checkpoint, step %s at %s\n\n",
 +                gmx_step_str(step,buf),timebuf);
 +    }
 +    
 +    /* Get offsets for open files */
 +    gmx_fio_get_output_file_positions(&outputfiles, &noutputfiles);
 +
 +    fp = gmx_fio_open(fntemp,"w");
 +      
 +    if (state->ekinstate.bUpToDate)
 +    {
 +        flags_eks =
 +            ((1<<eeksEKIN_N) | (1<<eeksEKINH) | (1<<eeksEKINF) | 
 +             (1<<eeksEKINO) | (1<<eeksEKINSCALEF) | (1<<eeksEKINSCALEH) | 
 +             (1<<eeksVSCALE) | (1<<eeksDEKINDL) | (1<<eeksMVCOS));
 +    }
 +    else
 +    {
 +        flags_eks = 0;
 +    }
 +
 +    flags_enh = 0;
 +    if (state->enerhist.nsum > 0 || state->enerhist.nsum_sim > 0)
 +    {
 +        flags_enh |= (1<<eenhENERGY_N);
 +        if (state->enerhist.nsum > 0)
 +        {
 +            flags_enh |= ((1<<eenhENERGY_AVER) | (1<<eenhENERGY_SUM) |
 +                          (1<<eenhENERGY_NSTEPS) | (1<<eenhENERGY_NSUM));
 +        }
 +        if (state->enerhist.nsum_sim > 0)
 +        {
 +            flags_enh |= ((1<<eenhENERGY_SUM_SIM) | (1<<eenhENERGY_NSTEPS_SIM) |
 +                          (1<<eenhENERGY_NSUM_SIM));
 +        }
 +        if (state->enerhist.dht)
 +        {
 +            flags_enh |= ( (1<< eenhENERGY_DELTA_H_NN) |
 +                           (1<< eenhENERGY_DELTA_H_LIST) | 
 +                           (1<< eenhENERGY_DELTA_H_STARTTIME) |
 +                           (1<< eenhENERGY_DELTA_H_STARTLAMBDA) );
 +        }
 +    }
 +
-                   &state->nhchainlength, &state->flags,&flags_eks,&flags_enh,
++    if (bExpanded)
++    {
++        flags_dfh = ((1<<edfhBEQUIL) | (1<<edfhNATLAMBDA) | (1<<edfhSUMWEIGHTS) |  (1<<edfhSUMDG)  |
++                     (1<<edfhTIJ) | (1<<edfhTIJEMP));
++        if (EWL(elamstats))
++        {
++            flags_dfh |= ((1<<edfhWLDELTA) | (1<<edfhWLHISTO));
++        }
++        if ((elamstats == elamstatsMINVAR) || (elamstats == elamstatsBARKER) || (elamstats == elamstatsMETROPOLIS))
++        {
++            flags_dfh |= ((1<<edfhACCUMP) | (1<<edfhACCUMM) | (1<<edfhACCUMP2) | (1<<edfhACCUMM2)
++                          | (1<<edfhSUMMINVAR) | (1<<edfhSUMVAR));
++        }
++    } else {
++        flags_dfh = 0;
++    }
++
 +    version = gmx_strdup(VERSION);
 +    btime   = gmx_strdup(BUILD_TIME);
 +    buser   = gmx_strdup(BUILD_USER);
 +    bmach   = gmx_strdup(BUILD_MACHINE);
 +    double_prec = GMX_CPT_BUILD_DP;
 +    fprog   = gmx_strdup(Program());
 +
 +    ftime   = &(timebuf[0]);
 +    
 +    do_cpt_header(gmx_fio_getxdr(fp),FALSE,&file_version,
 +                  &version,&btime,&buser,&bmach,&double_prec,&fprog,&ftime,
 +                  &eIntegrator,&simulation_part,&step,&t,&nppnodes,
 +                  DOMAINDECOMP(cr) ? cr->dd->nc : NULL,&npmenodes,
 +                  &state->natoms,&state->ngtc,&state->nnhpres,
-                             int eIntegrator,gmx_large_int_t *step,double *t,
++                  &state->nhchainlength,&(state->dfhist.nlambda),&state->flags,&flags_eks,&flags_enh,&flags_dfh,
 +                  NULL);
 +    
 +    sfree(version);
 +    sfree(btime);
 +    sfree(buser);
 +    sfree(bmach);
 +    sfree(fprog);
 +
 +    if((do_cpt_state(gmx_fio_getxdr(fp),FALSE,state->flags,state,TRUE,NULL) < 0)        ||
 +       (do_cpt_ekinstate(gmx_fio_getxdr(fp),FALSE,flags_eks,&state->ekinstate,NULL) < 0)||
 +       (do_cpt_enerhist(gmx_fio_getxdr(fp),FALSE,flags_enh,&state->enerhist,NULL) < 0)  ||
++       (do_cpt_df_hist(gmx_fio_getxdr(fp),FALSE,flags_dfh,&state->dfhist,NULL) < 0)  ||
 +       (do_cpt_files(gmx_fio_getxdr(fp),FALSE,&outputfiles,&noutputfiles,NULL,
 +                     file_version) < 0))
 +    {
 +        gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
 +    }
 +
 +    do_cpt_footer(gmx_fio_getxdr(fp),FALSE,file_version);
 +
 +    /* we really, REALLY, want to make sure to physically write the checkpoint, 
 +       and all the files it depends on, out to disk. Because we've
 +       opened the checkpoint with gmx_fio_open(), it's in our list
 +       of open files.  */
 +    ret=gmx_fio_all_output_fsync();
 +
 +    if (ret)
 +    {
 +        char buf[STRLEN];
 +        sprintf(buf,
 +                "Cannot fsync '%s'; maybe you are out of disk space?",
 +                gmx_fio_getname(ret));
 +
 +        if (getenv(GMX_IGNORE_FSYNC_FAILURE_ENV)==NULL)
 +        {
 +            gmx_file(buf);
 +        }
 +        else
 +        {
 +            gmx_warning(buf);
 +        }
 +    }
 +
 +    if( gmx_fio_close(fp) != 0)
 +    {
 +        gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
 +    }
 +
 +    /* we don't move the checkpoint if the user specified they didn't want it,
 +       or if the fsyncs failed */
 +    if (!bNumberAndKeep && !ret)
 +    {
 +        if (gmx_fexist(fn))
 +        {
 +            /* Rename the previous checkpoint file */
 +            strcpy(buf,fn);
 +            buf[strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
 +            strcat(buf,"_prev");
 +            strcat(buf,fn+strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1);
 +#ifndef GMX_FAHCORE
 +            /* we copy here so that if something goes wrong between now and
 +             * the rename below, there's always a state.cpt.
 +             * If renames are atomic (such as in POSIX systems),
 +             * this copying should be unneccesary.
 +             */
 +            gmx_file_copy(fn, buf, FALSE);
 +            /* We don't really care if this fails: 
 +             * there's already a new checkpoint.
 +             */
 +#else
 +            gmx_file_rename(fn, buf);
 +#endif
 +        }
 +        if (gmx_file_rename(fntemp, fn) != 0)
 +        {
 +            gmx_file("Cannot rename checkpoint file; maybe you are out of disk space?");
 +        }
 +    }
 +
 +    sfree(outputfiles);
 +    sfree(fntemp);
 +
 +#ifdef GMX_FAHCORE
 +    /*code for alternate checkpointing scheme.  moved from top of loop over 
 +      steps */
 +    fcRequestCheckPoint();
 +    if ( fcCheckPointParallel( cr->nodeid, NULL,0) == 0 ) {
 +        gmx_fatal( 3,__FILE__,__LINE__, "Checkpoint error on step %d\n", step );
 +    }
 +#endif /* end GMX_FAHCORE block */
 +}
 +
 +static void print_flag_mismatch(FILE *fplog,int sflags,int fflags)
 +{
 +    int i;
 +    
 +    fprintf(fplog,"\nState entry mismatch between the simulation and the checkpoint file\n");
 +    fprintf(fplog,"Entries which are not present in the checkpoint file will not be updated\n");
 +    fprintf(fplog,"  %24s    %11s    %11s\n","","simulation","checkpoint");
 +    for(i=0; i<estNR; i++)
 +    {
 +        if ((sflags & (1<<i)) || (fflags & (1<<i)))
 +        {
 +            fprintf(fplog,"  %24s    %11s    %11s\n",
 +                    est_names[i],
 +                    (sflags & (1<<i)) ? "  present  " : "not present",
 +                    (fflags & (1<<i)) ? "  present  " : "not present");
 +        }
 +    }
 +}
 +
 +static void check_int(FILE *fplog,const char *type,int p,int f,gmx_bool *mm)
 +{
 +      FILE *fp = fplog ? fplog : stderr;
 +
 +    if (p != f)
 +    {
 +              fprintf(fp,"  %s mismatch,\n",type);
 +              fprintf(fp,"    current program: %d\n",p);
 +              fprintf(fp,"    checkpoint file: %d\n",f);
 +              fprintf(fp,"\n");
 +        *mm = TRUE;
 +    }
 +}
 +
 +static void check_string(FILE *fplog,const char *type,const char *p,
 +                         const char *f,gmx_bool *mm)
 +{
 +      FILE *fp = fplog ? fplog : stderr;
 +      
 +    if (strcmp(p,f) != 0)
 +    {
 +              fprintf(fp,"  %s mismatch,\n",type);
 +              fprintf(fp,"    current program: %s\n",p);
 +              fprintf(fp,"    checkpoint file: %s\n",f);
 +              fprintf(fp,"\n");
 +        *mm = TRUE;
 +    }
 +}
 +
 +static void check_match(FILE *fplog,
 +                        char *version,
 +                        char *btime,char *buser,char *bmach,int double_prec,
 +                        char *fprog,
 +                        t_commrec *cr,gmx_bool bPartDecomp,int npp_f,int npme_f,
 +                        ivec dd_nc,ivec dd_nc_f)
 +{
 +    int  npp;
 +    gmx_bool mm;
 +    
 +    mm = FALSE;
 +    
 +    check_string(fplog,"Version"      ,VERSION      ,version,&mm);
 +    check_string(fplog,"Build time"   ,BUILD_TIME   ,btime  ,&mm);
 +    check_string(fplog,"Build user"   ,BUILD_USER   ,buser  ,&mm);
 +    check_string(fplog,"Build machine",BUILD_MACHINE,bmach  ,&mm);
 +    check_int   (fplog,"Double prec." ,GMX_CPT_BUILD_DP,double_prec,&mm);
 +    check_string(fplog,"Program name" ,Program()    ,fprog  ,&mm);
 +    
 +    check_int   (fplog,"#nodes"       ,cr->nnodes   ,npp_f+npme_f ,&mm);
 +    if (bPartDecomp)
 +    {
 +        dd_nc[XX] = 1;
 +        dd_nc[YY] = 1;
 +        dd_nc[ZZ] = 1;
 +    }
 +    if (cr->nnodes > 1)
 +    {
 +        check_int (fplog,"#PME-nodes"  ,cr->npmenodes,npme_f     ,&mm);
 +
 +        npp = cr->nnodes;
 +        if (cr->npmenodes >= 0)
 +        {
 +            npp -= cr->npmenodes;
 +        }
 +        if (npp == npp_f)
 +        {
 +            check_int (fplog,"#DD-cells[x]",dd_nc[XX]    ,dd_nc_f[XX],&mm);
 +            check_int (fplog,"#DD-cells[y]",dd_nc[YY]    ,dd_nc_f[YY],&mm);
 +            check_int (fplog,"#DD-cells[z]",dd_nc[ZZ]    ,dd_nc_f[ZZ],&mm);
 +        }
 +    }
 +    
 +    if (mm)
 +    {
 +              fprintf(stderr,
 +                              "Gromacs binary or parallel settings not identical to previous run.\n"
 +                              "Continuation is exact, but is not guaranteed to be binary identical%s.\n\n",
 +                              fplog ? ",\n see the log file for details" : "");
 +              
 +        if (fplog)
 +        {
 +                      fprintf(fplog,
 +                                      "Gromacs binary or parallel settings not identical to previous run.\n"
 +                                      "Continuation is exact, but is not guaranteed to be binary identical.\n\n");
 +              }
 +    }
 +}
 +
 +static void read_checkpoint(const char *fn,FILE **pfplog,
 +                            t_commrec *cr,gmx_bool bPartDecomp,ivec dd_nc,
-     int  natoms,ngtc,nnhpres,nhchainlength,fflags,flags_eks,flags_enh;
++                            int eIntegrator, int *init_fep_state, gmx_large_int_t *step,double *t,
 +                            t_state *state,gmx_bool *bReadRNG,gmx_bool *bReadEkin,
 +                            int *simulation_part,
 +                            gmx_bool bAppendOutputFiles,gmx_bool bForceAppend)
 +{
 +    t_fileio *fp;
 +    int  i,j,rc;
 +    int  file_version;
 +    char *version,*btime,*buser,*bmach,*fprog,*ftime;
 +    int  double_prec;
 +      char filename[STRLEN],buf[STEPSTRSIZE];
 +    int  nppnodes,eIntegrator_f,nppnodes_f,npmenodes_f;
 +    ivec dd_nc_f;
-                   &natoms,&ngtc,&nnhpres,&nhchainlength,
-                   &fflags,&flags_eks,&flags_enh,NULL);
++    int  natoms,ngtc,nnhpres,nhchainlength,nlambda,fflags,flags_eks,flags_enh,flags_dfh;
 +    int  d;
 +    int  ret;
 +    gmx_file_position_t *outputfiles;
 +    int  nfiles;
 +    t_fileio *chksum_file;
 +    FILE* fplog = *pfplog;
 +    unsigned char digest[16];
 +#ifndef GMX_NATIVE_WINDOWS
 +    struct flock fl;  /* don't initialize here: the struct order is OS 
 +                         dependent! */
 +#endif
 +
 +    const char *int_warn=
 +              "WARNING: The checkpoint file was generated with integrator %s,\n"
 +              "         while the simulation uses integrator %s\n\n";
 +    const char *sd_note=
 +        "NOTE: The checkpoint file was for %d nodes doing SD or BD,\n"
 +        "      while the simulation uses %d SD or BD nodes,\n"
 +        "      continuation will be exact, except for the random state\n\n";
 +    
 +#ifndef GMX_NATIVE_WINDOWS
 +    fl.l_type=F_WRLCK;
 +    fl.l_whence=SEEK_SET;
 +    fl.l_start=0;
 +    fl.l_len=0;
 +    fl.l_pid=0;
 +#endif
 +
 +    if (PARTDECOMP(cr))
 +    {
 +        gmx_fatal(FARGS,
 +                  "read_checkpoint not (yet) supported with particle decomposition");
 +    }
 +    
 +    fp = gmx_fio_open(fn,"r");
 +    do_cpt_header(gmx_fio_getxdr(fp),TRUE,&file_version,
 +                  &version,&btime,&buser,&bmach,&double_prec,&fprog,&ftime,
 +                  &eIntegrator_f,simulation_part,step,t,
 +                  &nppnodes_f,dd_nc_f,&npmenodes_f,
-                       ir->eI,&step,&t,state,bReadRNG,bReadEkin,
++                  &natoms,&ngtc,&nnhpres,&nhchainlength,&nlambda,
++                  &fflags,&flags_eks,&flags_enh,&flags_dfh,NULL);
 +
 +    if (bAppendOutputFiles &&
 +        file_version >= 13 && double_prec != GMX_CPT_BUILD_DP)
 +    {
 +        gmx_fatal(FARGS,"Output file appending requested, but the code and checkpoint file precision (single/double) don't match");
 +    }
 +    
 +    if (cr == NULL || MASTER(cr))
 +    {
 +        fprintf(stderr,"\nReading checkpoint file %s generated: %s\n\n",
 +                fn,ftime);
 +    }
 +      
 +      /* This will not be written if we do appending, since fplog is still NULL then */
 +    if (fplog)
 +    {
 +        fprintf(fplog,"\n");
 +        fprintf(fplog,"Reading checkpoint file %s\n",fn);
 +        fprintf(fplog,"  file generated by:     %s\n",fprog);  
 +        fprintf(fplog,"  file generated at:     %s\n",ftime);  
 +        fprintf(fplog,"  GROMACS build time:    %s\n",btime);  
 +        fprintf(fplog,"  GROMACS build user:    %s\n",buser);  
 +        fprintf(fplog,"  GROMACS build machine: %s\n",bmach);  
 +        fprintf(fplog,"  GROMACS double prec.:  %d\n",double_prec);
 +        fprintf(fplog,"  simulation part #:     %d\n",*simulation_part);
 +        fprintf(fplog,"  step:                  %s\n",gmx_step_str(*step,buf));
 +        fprintf(fplog,"  time:                  %f\n",*t);  
 +        fprintf(fplog,"\n");
 +    }
 +    
 +    if (natoms != state->natoms)
 +    {
 +        gmx_fatal(FARGS,"Checkpoint file is for a system of %d atoms, while the current system consists of %d atoms",natoms,state->natoms);
 +    }
 +    if (ngtc != state->ngtc)
 +    {
 +        gmx_fatal(FARGS,"Checkpoint file is for a system of %d T-coupling groups, while the current system consists of %d T-coupling groups",ngtc,state->ngtc);
 +    }
 +    if (nnhpres != state->nnhpres)
 +    {
 +        gmx_fatal(FARGS,"Checkpoint file is for a system of %d NH-pressure-coupling variables, while the current system consists of %d NH-pressure-coupling variables",nnhpres,state->nnhpres);
 +    }
 +
++    if (nlambda != state->dfhist.nlambda)
++    {
++        gmx_fatal(FARGS,"Checkpoint file is for a system with %d lambda states, while the current system consists of %d lambda states",nlambda,state->dfhist.nlambda);
++    }
++
 +    init_gtc_state(state,state->ngtc,state->nnhpres,nhchainlength); /* need to keep this here to keep the tpr format working */
 +    /* write over whatever was read; we use the number of Nose-Hoover chains from the checkpoint */
 +    
 +    if (eIntegrator_f != eIntegrator)
 +    {
 +        if (MASTER(cr))
 +        {
 +            fprintf(stderr,int_warn,EI(eIntegrator_f),EI(eIntegrator));
 +        }
 +              if(bAppendOutputFiles)
 +              {
 +                      gmx_fatal(FARGS,
 +                                        "Output file appending requested, but input/checkpoint integrators do not match.\n"
 +                                        "Stopping the run to prevent you from ruining all your data...\n"
 +                                        "If you _really_ know what you are doing, try with the -noappend option.\n");
 +              }
 +        if (fplog)
 +        {
 +            fprintf(fplog,int_warn,EI(eIntegrator_f),EI(eIntegrator));
 +        }
 +    }
 +
 +    if (!PAR(cr))
 +    {
 +        nppnodes = 1;
 +        cr->npmenodes = 0;
 +    }
 +    else if (bPartDecomp)
 +    {
 +        nppnodes = cr->nnodes;
 +        cr->npmenodes = 0;
 +    }
 +    else if (cr->nnodes == nppnodes_f + npmenodes_f)
 +    {
 +        if (cr->npmenodes < 0)
 +        {
 +            cr->npmenodes = npmenodes_f;
 +        }
 +        nppnodes = cr->nnodes - cr->npmenodes;
 +        if (nppnodes == nppnodes_f)
 +        {
 +            for(d=0; d<DIM; d++)
 +            {
 +                if (dd_nc[d] == 0)
 +                {
 +                    dd_nc[d] = dd_nc_f[d];
 +                }
 +            }
 +        }
 +    }
 +    else
 +    {
 +        /* The number of PP nodes has not been set yet */
 +        nppnodes = -1;
 +    }
 +
 +    if ((EI_SD(eIntegrator) || eIntegrator == eiBD) && nppnodes > 0)
 +    {
 +        /* Correct the RNG state size for the number of PP nodes.
 +         * Such assignments should all be moved to one central function.
 +         */
 +        state->nrng  = nppnodes*gmx_rng_n();
 +        state->nrngi = nppnodes;
 +    }
 +    
 +    *bReadRNG = TRUE;
 +    if (fflags != state->flags)
 +    {
 +              
 +        if (MASTER(cr))
 +        {
 +                      if(bAppendOutputFiles)
 +                      {
 +                              gmx_fatal(FARGS,
 +                                                "Output file appending requested, but input and checkpoint states are not identical.\n"
 +                                                "Stopping the run to prevent you from ruining all your data...\n"
 +                                                "You can try with the -noappend option, and get more info in the log file.\n");
 +                      }
 +                      
 +            if (getenv("GMX_ALLOW_CPT_MISMATCH") == NULL)
 +            {
 +                gmx_fatal(FARGS,"You seem to have switched ensemble, integrator, T and/or P-coupling algorithm between the cpt and tpr file. The recommended way of doing this is passing the cpt file to grompp (with option -t) instead of to mdrun. If you know what you are doing, you can override this error by setting the env.var. GMX_ALLOW_CPT_MISMATCH");
 +            }
 +            else
 +            {
 +                fprintf(stderr,
 +                        "WARNING: The checkpoint state entries do not match the simulation,\n"
 +                        "         see the log file for details\n\n");
 +            }
 +        }
 +              
 +              if(fplog)
 +              {
 +                      print_flag_mismatch(fplog,state->flags,fflags);
 +              }
 +    }
 +    else
 +    {
 +        if ((EI_SD(eIntegrator) || eIntegrator == eiBD) &&
 +            nppnodes != nppnodes_f)
 +        {
 +            *bReadRNG = FALSE;
 +            if (MASTER(cr))
 +            {
 +                fprintf(stderr,sd_note,nppnodes_f,nppnodes);
 +            }
 +            if (fplog)
 +            {
 +                fprintf(fplog ,sd_note,nppnodes_f,nppnodes);
 +            }
 +        }
 +        if (MASTER(cr))
 +        {
 +            check_match(fplog,version,btime,buser,bmach,double_prec,fprog,
 +                        cr,bPartDecomp,nppnodes_f,npmenodes_f,dd_nc,dd_nc_f);
 +        }
 +    }
 +    ret = do_cpt_state(gmx_fio_getxdr(fp),TRUE,fflags,state,*bReadRNG,NULL);
++    *init_fep_state = state->fep_state;  /* there should be a better way to do this than setting it here.
++                                            Investigate for 5.0. */
 +    if (ret)
 +    {
 +        cp_error();
 +    }
 +    ret = do_cpt_ekinstate(gmx_fio_getxdr(fp),TRUE,
 +                           flags_eks,&state->ekinstate,NULL);
 +    if (ret)
 +    {
 +        cp_error();
 +    }
 +    *bReadEkin = ((flags_eks & (1<<eeksEKINH)) || (flags_eks & (1<<eeksEKINF)) || (flags_eks & (1<<eeksEKINO)) ||
 +                  ((flags_eks & (1<<eeksEKINSCALEF)) | (flags_eks & (1<<eeksEKINSCALEH)) | (flags_eks & (1<<eeksVSCALE))));
 +    
 +    ret = do_cpt_enerhist(gmx_fio_getxdr(fp),TRUE,
 +                          flags_enh,&state->enerhist,NULL);
 +    if (ret)
 +    {
 +        cp_error();
 +    }
 +
 +    if (file_version < 6)
 +    {
 +        const char *warn="Reading checkpoint file in old format, assuming that the run that generated this file started at step 0, if this is not the case the averages stored in the energy file will be incorrect.";
 +
 +        fprintf(stderr,"\nWARNING: %s\n\n",warn);
 +        if (fplog)
 +        {
 +            fprintf(fplog,"\nWARNING: %s\n\n",warn);
 +        }
 +        state->enerhist.nsum     = *step;
 +        state->enerhist.nsum_sim = *step;
 +    }
 +
++    ret = do_cpt_df_hist(gmx_fio_getxdr(fp),TRUE,
++                         flags_dfh,&state->dfhist,NULL);
++    if (ret)
++    {
++        cp_error();
++    }
++
 +      ret = do_cpt_files(gmx_fio_getxdr(fp),TRUE,&outputfiles,&nfiles,NULL,file_version);
 +      if (ret)
 +      {
 +              cp_error();
 +      }
 +                                         
 +    ret = do_cpt_footer(gmx_fio_getxdr(fp),TRUE,file_version);
 +    if (ret)
 +    {
 +        cp_error();
 +    }
 +    if( gmx_fio_close(fp) != 0)
 +      {
 +        gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
 +      }
 +    
 +    sfree(fprog);
 +    sfree(ftime);
 +    sfree(btime);
 +    sfree(buser);
 +    sfree(bmach);
 +      
 +      /* If the user wants to append to output files,
 +     * we use the file pointer positions of the output files stored
 +     * in the checkpoint file and truncate the files such that any frames
 +     * written after the checkpoint time are removed.
 +     * All files are md5sum checked such that we can be sure that
 +     * we do not truncate other (maybe imprortant) files.
 +       */
 +    if (bAppendOutputFiles)
 +    {
 +        if (fn2ftp(outputfiles[0].filename)!=efLOG)
 +        {
 +            /* make sure first file is log file so that it is OK to use it for 
 +             * locking
 +             */
 +            gmx_fatal(FARGS,"The first output file should always be the log "
 +                      "file but instead is: %s. Cannot do appending because of this condition.", outputfiles[0].filename);
 +        }
 +        for(i=0;i<nfiles;i++)
 +        {
 +            if (outputfiles[i].offset < 0)
 +            {
 +                gmx_fatal(FARGS,"The original run wrote a file called '%s' which "
 +                    "is larger than 2 GB, but mdrun did not support large file"
 +                    " offsets. Can not append. Run mdrun with -noappend",
 +                    outputfiles[i].filename);
 +            }
 +#ifdef GMX_FAHCORE
 +            chksum_file=gmx_fio_open(outputfiles[i].filename,"a");
 +
 +#else
 +            chksum_file=gmx_fio_open(outputfiles[i].filename,"r+");
 +
 +            /* lock log file */                
 +            if (i==0)
 +            {
 +                /* Note that there are systems where the lock operation
 +                 * will succeed, but a second process can also lock the file.
 +                 * We should probably try to detect this.
 +                 */
 +#ifndef GMX_NATIVE_WINDOWS
 +                if (fcntl(fileno(gmx_fio_getfp(chksum_file)), F_SETLK, &fl)
 +                    ==-1)
 +#else
 +                if (_locking(fileno(gmx_fio_getfp(chksum_file)), _LK_NBLCK, LONG_MAX)==-1)
 +#endif
 +                {
 +                    if (errno == ENOSYS)
 +                    {
 +                        if (!bForceAppend)
 +                        {
 +                            gmx_fatal(FARGS,"File locking is not supported on this system. Use -noappend or specify -append explicitly to append anyhow.");
 +                        }
 +                        else
 +                        {
 +                            fprintf(stderr,"\nNOTE: File locking is not supported on this system, will not lock %s\n\n",outputfiles[i].filename);
 +                            if (fplog)
 +                            {
 +                                fprintf(fplog,"\nNOTE: File locking not supported on this system, will not lock %s\n\n",outputfiles[i].filename);
 +                            }
 +                        }
 +                    }
 +                    else if (errno == EACCES || errno == EAGAIN)
 +                    {
 +                        gmx_fatal(FARGS,"Failed to lock: %s. Already running "
 +                                  "simulation?", outputfiles[i].filename);
 +                    }
 +                    else
 +                    {
 +                        gmx_fatal(FARGS,"Failed to lock: %s. %s.",
 +                                  outputfiles[i].filename, strerror(errno));
 +                    }
 +                }
 +            }
 +            
 +            /* compute md5 chksum */ 
 +            if (outputfiles[i].chksum_size != -1)
 +            {
 +                if (gmx_fio_get_file_md5(chksum_file,outputfiles[i].offset,
 +                                     digest) != outputfiles[i].chksum_size)  /*at the end of the call the file position is at the end of the file*/
 +                {
 +                    gmx_fatal(FARGS,"Can't read %d bytes of '%s' to compute checksum. The file has been replaced or its contents have been modified. Cannot do appending because of this condition.",
 +                              outputfiles[i].chksum_size, 
 +                              outputfiles[i].filename);
 +                }
 +            } 
 +            if (i==0)  /*log file needs to be seeked in case we need to truncate (other files are truncated below)*/
 +            {
 +                if (gmx_fio_seek(chksum_file,outputfiles[i].offset))
 +                {
 +                      gmx_fatal(FARGS,"Seek error! Failed to truncate log-file: %s.", strerror(errno));
 +                }
 +            }
 +#endif
 +            
 +            if (i==0) /*open log file here - so that lock is never lifted 
 +                        after chksum is calculated */
 +            {
 +                *pfplog = gmx_fio_getfp(chksum_file);
 +            }
 +            else
 +            {
 +                gmx_fio_close(chksum_file);
 +            }
 +#ifndef GMX_FAHCORE            
 +            /* compare md5 chksum */
 +            if (outputfiles[i].chksum_size != -1 &&
 +                memcmp(digest,outputfiles[i].chksum,16)!=0) 
 +            {
 +                if (debug)
 +                {
 +                    fprintf(debug,"chksum for %s: ",outputfiles[i].filename);
 +                    for (j=0; j<16; j++)
 +                    {
 +                        fprintf(debug,"%02x",digest[j]);
 +                    }
 +                    fprintf(debug,"\n");
 +                }
 +                gmx_fatal(FARGS,"Checksum wrong for '%s'. The file has been replaced or its contents have been modified. Cannot do appending because of this condition.",
 +                          outputfiles[i].filename);
 +            }
 +#endif        
 +
 +              
 +            if (i!=0) /*log file is already seeked to correct position */
 +            {
 +#ifdef GMX_NATIVE_WINDOWS
 +                rc = gmx_wintruncate(outputfiles[i].filename,outputfiles[i].offset);
 +#else            
 +                rc = truncate(outputfiles[i].filename,outputfiles[i].offset);
 +#endif
 +                if(rc!=0)
 +                {
 +                    gmx_fatal(FARGS,"Truncation of file %s failed. Cannot do appending because of this failure.",outputfiles[i].filename);
 +                }
 +            }
 +        }
 +    }
 +
 +    sfree(outputfiles);
 +}
 +
 +
 +void load_checkpoint(const char *fn,FILE **fplog,
 +                     t_commrec *cr,gmx_bool bPartDecomp,ivec dd_nc,
 +                     t_inputrec *ir,t_state *state,
 +                     gmx_bool *bReadRNG,gmx_bool *bReadEkin,
 +                     gmx_bool bAppend,gmx_bool bForceAppend)
 +{
 +    gmx_large_int_t step;
 +    double t;
 +
 +    if (SIMMASTER(cr)) {
 +      /* Read the state from the checkpoint file */
 +      read_checkpoint(fn,fplog,
 +                      cr,bPartDecomp,dd_nc,
-     int  flags_eks,flags_enh;
++                      ir->eI,&(ir->fepvals->init_fep_state),&step,&t,state,bReadRNG,bReadEkin,
 +                      &ir->simulation_part,bAppend,bForceAppend);
 +    }
 +    if (PAR(cr)) {
 +      gmx_bcast(sizeof(cr->npmenodes),&cr->npmenodes,cr);
 +      gmx_bcast(DIM*sizeof(dd_nc[0]),dd_nc,cr);
 +      gmx_bcast(sizeof(step),&step,cr);
 +      gmx_bcast(sizeof(*bReadRNG),bReadRNG,cr);
 +      gmx_bcast(sizeof(*bReadEkin),bReadEkin,cr);
 +    }
 +    ir->bContinuation    = TRUE;
 +    if (ir->nsteps >= 0)
 +    {
 +        ir->nsteps          += ir->init_step - step;
 +    }
 +    ir->init_step        = step;
 +      ir->simulation_part += 1;
 +}
 +
 +static void read_checkpoint_data(t_fileio *fp,int *simulation_part,
 +                                 gmx_large_int_t *step,double *t,t_state *state,
 +                                 gmx_bool bReadRNG,
 +                                 int *nfiles,gmx_file_position_t **outputfiles)
 +{
 +    int  file_version;
 +    char *version,*btime,*buser,*bmach,*fprog,*ftime;
 +    int  double_prec;
 +    int  eIntegrator;
 +    int  nppnodes,npme;
 +    ivec dd_nc;
-                   &state->flags,&flags_eks,&flags_enh,NULL);
++    int  flags_eks,flags_enh,flags_dfh;
 +    int  nfiles_loc;
 +    gmx_file_position_t *files_loc=NULL;
 +    int  ret;
 +      
 +    do_cpt_header(gmx_fio_getxdr(fp),TRUE,&file_version,
 +                  &version,&btime,&buser,&bmach,&double_prec,&fprog,&ftime,
 +                  &eIntegrator,simulation_part,step,t,&nppnodes,dd_nc,&npme,
 +                  &state->natoms,&state->ngtc,&state->nnhpres,&state->nhchainlength,
-     init_state(&state,0,0,0,0);
++                  &(state->dfhist.nlambda),&state->flags,&flags_eks,&flags_enh,&flags_dfh,NULL);
 +    ret =
 +        do_cpt_state(gmx_fio_getxdr(fp),TRUE,state->flags,state,bReadRNG,NULL);
 +    if (ret)
 +    {
 +        cp_error();
 +    }
 +    ret = do_cpt_ekinstate(gmx_fio_getxdr(fp),TRUE,
 +                           flags_eks,&state->ekinstate,NULL);
 +    if (ret)
 +    {
 +        cp_error();
 +    }
 +    ret = do_cpt_enerhist(gmx_fio_getxdr(fp),TRUE,
 +                          flags_enh,&state->enerhist,NULL);
 +    if (ret)
 +    {
 +        cp_error();
 +    }
++    ret = do_cpt_df_hist(gmx_fio_getxdr(fp),TRUE,
++                          flags_dfh,&state->dfhist,NULL);
++    if (ret)
++    {
++        cp_error();
++    }
 +
 +    ret = do_cpt_files(gmx_fio_getxdr(fp),TRUE,
 +                       outputfiles != NULL ? outputfiles : &files_loc,
 +                       outputfiles != NULL ? nfiles : &nfiles_loc,
 +                       NULL,file_version);
 +    if (files_loc != NULL)
 +    {
 +        sfree(files_loc);
 +    }
 +      
 +    if (ret)
 +    {
 +        cp_error();
 +    }
 +      
 +    ret = do_cpt_footer(gmx_fio_getxdr(fp),TRUE,file_version);
 +    if (ret)
 +    {
 +        cp_error();
 +    }
 +
 +    sfree(fprog);
 +    sfree(ftime);
 +    sfree(btime);
 +    sfree(buser);
 +    sfree(bmach);
 +}
 +
 +void 
 +read_checkpoint_state(const char *fn,int *simulation_part,
 +                      gmx_large_int_t *step,double *t,t_state *state)
 +{
 +    t_fileio *fp;
 +    
 +    fp = gmx_fio_open(fn,"r");
 +    read_checkpoint_data(fp,simulation_part,step,t,state,FALSE,NULL,NULL);
 +    if( gmx_fio_close(fp) != 0)
 +      {
 +        gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
 +      }
 +}
 +
 +void read_checkpoint_trxframe(t_fileio *fp,t_trxframe *fr)
 +{
 +    t_state state;
 +    int simulation_part;
 +    gmx_large_int_t step;
 +    double t;
 +    
-     fr->lambda  = state.lambda;
++    init_state(&state,0,0,0,0,0);
 +    
 +    read_checkpoint_data(fp,&simulation_part,&step,&t,&state,FALSE,NULL,NULL);
 +    
 +    fr->natoms  = state.natoms;
 +    fr->bTitle  = FALSE;
 +    fr->bStep   = TRUE;
 +    fr->step    = gmx_large_int_to_int(step,
 +                                    "conversion of checkpoint to trajectory");
 +    fr->bTime   = TRUE;
 +    fr->time    = t;
 +    fr->bLambda = TRUE;
-     int  flags_eks,flags_enh;
++    fr->lambda  = state.lambda[efptFEP];
++    fr->fep_state  = state.fep_state;
 +    fr->bAtoms  = FALSE;
 +    fr->bX      = (state.flags & (1<<estX));
 +    if (fr->bX)
 +    {
 +        fr->x     = state.x;
 +        state.x   = NULL;
 +    }
 +    fr->bV      = (state.flags & (1<<estV));
 +    if (fr->bV)
 +    {
 +        fr->v     = state.v;
 +        state.v   = NULL;
 +    }
 +    fr->bF      = FALSE;
 +    fr->bBox    = (state.flags & (1<<estBOX));
 +    if (fr->bBox)
 +    {
 +        copy_mat(state.box,fr->box);
 +    }
 +    done_state(&state);
 +}
 +
 +void list_checkpoint(const char *fn,FILE *out)
 +{
 +    t_fileio *fp;
 +    int  file_version;
 +    char *version,*btime,*buser,*bmach,*fprog,*ftime;
 +    int  double_prec;
 +    int  eIntegrator,simulation_part,nppnodes,npme;
 +    gmx_large_int_t step;
 +    double t;
 +    ivec dd_nc;
 +    t_state state;
-     init_state(&state,-1,-1,-1,-1);
++    int  flags_eks,flags_enh,flags_dfh;
 +    int  indent;
 +    int  i,j;
 +    int  ret;
 +    gmx_file_position_t *outputfiles;
 +      int  nfiles;
 +      
-                   &state.flags,&flags_eks,&flags_enh,out);
++    init_state(&state,-1,-1,-1,-1,0);
 +
 +    fp = gmx_fio_open(fn,"r");
 +    do_cpt_header(gmx_fio_getxdr(fp),TRUE,&file_version,
 +                  &version,&btime,&buser,&bmach,&double_prec,&fprog,&ftime,
 +                  &eIntegrator,&simulation_part,&step,&t,&nppnodes,dd_nc,&npme,
 +                  &state.natoms,&state.ngtc,&state.nnhpres,&state.nhchainlength,
-             init_state(&state,0,0,0,0);
++                  &(state.dfhist.nlambda),&state.flags,
++                  &flags_eks,&flags_enh,&flags_dfh,out);
 +    ret = do_cpt_state(gmx_fio_getxdr(fp),TRUE,state.flags,&state,TRUE,out);
 +    if (ret)
 +    {
 +        cp_error();
 +    }
 +    ret = do_cpt_ekinstate(gmx_fio_getxdr(fp),TRUE,
 +                           flags_eks,&state.ekinstate,out);
 +    if (ret)
 +    {
 +        cp_error();
 +    }
 +    ret = do_cpt_enerhist(gmx_fio_getxdr(fp),TRUE,
 +                          flags_enh,&state.enerhist,out);
 +
++    if (ret == 0)
++    {
++        init_df_history(&state.dfhist,state.dfhist.nlambda,0); /* reinitialize state with correct sizes */
++        ret = do_cpt_df_hist(gmx_fio_getxdr(fp),TRUE,
++                             flags_dfh,&state.dfhist,out);
++    }
 +    if (ret == 0)
 +    {
 +              do_cpt_files(gmx_fio_getxdr(fp),TRUE,&outputfiles,&nfiles,out,file_version);
 +      }
 +      
 +    if (ret == 0)
 +    {
 +        ret = do_cpt_footer(gmx_fio_getxdr(fp),TRUE,file_version);
 +    }
 +      
 +    if (ret)
 +    {
 +        cp_warning(out);
 +    }
 +    if( gmx_fio_close(fp) != 0)
 +      {
 +        gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
 +      }
 +    
 +    done_state(&state);
 +}
 +
 +
 +static gmx_bool exist_output_file(const char *fnm_cp,int nfile,const t_filenm fnm[])
 +{
 +    int i;
 +
 +    /* Check if the output file name stored in the checkpoint file
 +     * is one of the output file names of mdrun.
 +     */
 +    i = 0;
 +    while (i < nfile &&
 +           !(is_output(&fnm[i]) && strcmp(fnm_cp,fnm[i].fns[0]) == 0))
 +    {
 +        i++;
 +    }
 +    
 +    return (i < nfile && gmx_fexist(fnm_cp));
 +}
 +
 +/* This routine cannot print tons of data, since it is called before the log file is opened. */
 +gmx_bool read_checkpoint_simulation_part(const char *filename, int *simulation_part,
 +                                     gmx_large_int_t *cpt_step,t_commrec *cr,
 +                                     gmx_bool bAppendReq,
 +                                     int nfile,const t_filenm fnm[],
 +                                     const char *part_suffix,gmx_bool *bAddPart)
 +{
 +    t_fileio *fp;
 +    gmx_large_int_t step=0;
 +      double t;
 +    t_state state;
 +    int  nfiles;
 +    gmx_file_position_t *outputfiles;
 +    int  nexist,f;
 +    gmx_bool bAppend;
 +    char *fn,suf_up[STRLEN];
 +
 +    bAppend = FALSE;
 +
 +    if (SIMMASTER(cr)) {
 +        if(!gmx_fexist(filename) || (!(fp = gmx_fio_open(filename,"r")) ))
 +        {
 +            *simulation_part = 0;
 +        }
 +        else 
 +        {
++            init_state(&state,0,0,0,0,0);
 +
 +            read_checkpoint_data(fp,simulation_part,&step,&t,&state,FALSE,
 +                                 &nfiles,&outputfiles);
 +            if( gmx_fio_close(fp) != 0)
 +            {
 +                gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
 +            }
 +            done_state(&state);
 +
 +            if (bAppendReq)
 +            {
 +                nexist = 0;
 +                for(f=0; f<nfiles; f++)
 +                {
 +                    if (exist_output_file(outputfiles[f].filename,nfile,fnm))
 +                    {
 +                        nexist++;
 +                    }
 +                }
 +                if (nexist == nfiles)
 +                {
 +                    bAppend = bAppendReq;
 +                }
 +                else if (nexist > 0)
 +                {
 +                    fprintf(stderr,
 +                            "Output file appending has been requested,\n"
 +                            "but some output files listed in the checkpoint file %s\n"
 +                            "are not present or are named differently by the current program:\n",
 +                            filename);
 +                    fprintf(stderr,"output files present:");
 +                    for(f=0; f<nfiles; f++)
 +                    {
 +                        if (exist_output_file(outputfiles[f].filename,
 +                                              nfile,fnm))
 +                        {
 +                            fprintf(stderr," %s",outputfiles[f].filename);
 +                        }
 +                    }
 +                    fprintf(stderr,"\n");
 +                    fprintf(stderr,"output files not present or named differently:");
 +                    for(f=0; f<nfiles; f++)
 +                    {
 +                        if (!exist_output_file(outputfiles[f].filename,
 +                                               nfile,fnm))
 +                        {
 +                            fprintf(stderr," %s",outputfiles[f].filename);
 +                        }
 +                    }
 +                    fprintf(stderr,"\n");
 +                    
 +                    gmx_fatal(FARGS,"File appending requested, but only %d of the %d output files are present",nexist,nfiles);
 +                }
 +            }
 +            
 +            if (bAppend)
 +            {
 +                if (nfiles == 0)
 +                {
 +                    gmx_fatal(FARGS,"File appending requested, but no output file information is stored in the checkpoint file");
 +                }
 +                fn = outputfiles[0].filename;
 +                if (strlen(fn) < 4 ||
 +                    gmx_strcasecmp(fn+strlen(fn)-4,ftp2ext(efLOG)) == 0)
 +                {
 +                    gmx_fatal(FARGS,"File appending requested, but the log file is not the first file listed in the checkpoint file");
 +                }
 +                /* Set bAddPart to whether the suffix string '.part' is present
 +                 * in the log file name.
 +                 */
 +                strcpy(suf_up,part_suffix);
 +                upstring(suf_up);
 +                *bAddPart = (strstr(fn,part_suffix) != NULL ||
 +                             strstr(fn,suf_up) != NULL);
 +            }
 +
 +            sfree(outputfiles);
 +        }
 +    }
 +    if (PAR(cr))
 +    {
 +        gmx_bcast(sizeof(*simulation_part),simulation_part,cr);
 +
 +        if (*simulation_part > 0 && bAppendReq)
 +        {
 +            gmx_bcast(sizeof(bAppend),&bAppend,cr);
 +            gmx_bcast(sizeof(*bAddPart),bAddPart,cr);
 +        }
 +    }
 +    if (NULL != cpt_step)
 +    {
 +        *cpt_step = step;
 +    }
 +
 +    return bAppend;
 +}
index 308914f96188bbcc013a360581c41ba58fccc8b4,0000000000000000000000000000000000000000..ecfb806a144eaa3f8ef261cbb28da42ff7bcf621
mode 100644,000000..100644
--- /dev/null
@@@ -1,1146 -1,0 +1,1146 @@@
-       if (IR_NPT_TROTTER(ir)) 
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GROningen Mixture of Alchemy and Childrens' Stories
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include "futil.h"
 +#include "string2.h"
 +#include "gmx_fatal.h"
 +#include "smalloc.h"
 +#include "gmxfio.h"
 +#include "enxio.h"
 +#include "vec.h"
 +#include "xdrf.h"
 +#include "macros.h"
 +
 +/* The source code in this file should be thread-safe. 
 +         Please keep it that way. */
 +
 +/* This number should be increased whenever the file format changes! */
 +static const int enx_version = 5;
 +
 +const char *enx_block_id_name[] = {
 +    "Averaged orientation restraints",
 +    "Instantaneous orientation restraints",
 +    "Orientation restraint order tensor(s)",
 +    "Distance restraints",
 +    "Free energy data",
 +    "BAR histogram",
 +    "Delta H raw data"
 +};
 +
 +
 +/* Stuff for reading pre 4.1 energy files */
 +typedef struct {
 +    gmx_bool     bOldFileOpen;   /* Is this an open old file? */
 +    gmx_bool     bReadFirstStep; /* Did we read the first step? */
 +    int      first_step;     /* First step in the energy file */
 +    int      step_prev;      /* Previous step */
 +    int      nsum_prev;      /* Previous step sum length */
 +    t_energy *ener_prev;     /* Previous energy sums */
 +} ener_old_t;
 +
 +struct ener_file
 +{
 +    ener_old_t eo;
 +    t_fileio *fio;
 +    int framenr;
 +    real frametime;
 +};
 +
 +static void enxsubblock_init(t_enxsubblock *sb)
 +{
 +    sb->nr=0;
 +#ifdef GMX_DOUBLE
 +    sb->type=xdr_datatype_double;
 +#else
 +    sb->type=xdr_datatype_float;
 +#endif
 +    sb->fval = NULL;
 +    sb->dval = NULL;
 +    sb->ival = NULL;
 +    sb->lval = NULL;
 +    sb->cval = NULL;
 +    sb->sval = NULL;
 +    sb->fval_alloc = 0;
 +    sb->dval_alloc = 0;
 +    sb->ival_alloc = 0;
 +    sb->lval_alloc = 0;
 +    sb->cval_alloc = 0;
 +    sb->sval_alloc = 0;
 +}
 +
 +static void enxsubblock_free(t_enxsubblock *sb)
 +{
 +    if (sb->fval_alloc)
 +    {
 +        free(sb->fval);
 +        sb->fval_alloc=0;
 +        sb->fval=NULL;
 +    }
 +    if (sb->dval_alloc)
 +    {
 +        free(sb->dval);
 +        sb->dval_alloc=0;
 +        sb->dval=NULL;
 +    }
 +    if (sb->ival_alloc)
 +    {
 +        free(sb->ival);
 +        sb->ival_alloc=0;
 +        sb->ival=NULL;
 +    }
 +    if (sb->lval_alloc)
 +    {
 +        free(sb->lval);
 +        sb->lval_alloc=0;
 +        sb->lval=NULL;
 +    }
 +    if (sb->cval_alloc)
 +    {
 +        free(sb->cval);
 +        sb->cval_alloc=0;
 +        sb->cval=NULL;
 +    }
 +    if (sb->sval_alloc)
 +    {
 +        int i;
 +
 +        for(i=0;i<sb->sval_alloc;i++)
 +        {
 +            if (sb->sval[i])
 +            {
 +                free(sb->sval[i]);
 +            }
 +        }
 +        free(sb->sval);
 +        sb->sval_alloc=0;
 +        sb->sval=NULL;
 +    }
 +}
 +
 +/* allocate the appropriate amount of memory for the given type and nr */
 +static void enxsubblock_alloc(t_enxsubblock *sb)
 +{
 +    /* allocate the appropriate amount of memory */
 +    switch(sb->type)
 +    {
 +        case xdr_datatype_float:
 +            if (sb->nr > sb->fval_alloc)
 +            {
 +                srenew(sb->fval, sb->nr);
 +                sb->fval_alloc=sb->nr;
 +            }
 +            break;
 +        case xdr_datatype_double:
 +            if (sb->nr > sb->dval_alloc)
 +            {
 +                srenew(sb->dval, sb->nr);
 +                sb->dval_alloc=sb->nr;
 +            }
 +            break;
 +        case xdr_datatype_int:
 +            if (sb->nr > sb->ival_alloc)
 +            {
 +                srenew(sb->ival, sb->nr);
 +                sb->ival_alloc=sb->nr;
 +            }
 +            break;
 +        case xdr_datatype_large_int:
 +            if (sb->nr > sb->lval_alloc)
 +            {
 +                srenew(sb->lval, sb->nr);
 +                sb->lval_alloc=sb->nr;
 +            }
 +            break;
 +        case xdr_datatype_char:
 +            if (sb->nr > sb->cval_alloc)
 +            {
 +                srenew(sb->cval, sb->nr);
 +                sb->cval_alloc=sb->nr;
 +            }
 +            break;
 +        case xdr_datatype_string:
 +            if (sb->nr > sb->sval_alloc)
 +            {
 +                int i;
 +
 +                srenew(sb->sval, sb->nr);
 +                for(i=sb->sval_alloc;i<sb->nr;i++)
 +                {
 +                    sb->sval[i]=NULL;
 +                }
 +                sb->sval_alloc=sb->nr;
 +            }
 +            break;
 +        default:
 +            gmx_incons("Unknown block type: this file is corrupted or from the future");
 +    }
 +}
 +
 +static void enxblock_init(t_enxblock *eb)
 +{
 +    eb->id=enxOR;
 +    eb->nsub=0;
 +    eb->sub=NULL;
 +    eb->nsub_alloc=0;
 +}
 +
 +static void enxblock_free(t_enxblock *eb)
 +{
 +    if (eb->nsub_alloc>0)
 +    {
 +        int i;
 +        for(i=0;i<eb->nsub_alloc;i++)
 +        {
 +            enxsubblock_free(&(eb->sub[i]));
 +        }
 +        free(eb->sub);
 +        eb->nsub_alloc=0;
 +        eb->sub=NULL;
 +    }
 +}
 +
 +void init_enxframe(t_enxframe *fr)
 +{
 +    fr->e_alloc=0;
 +    fr->ener=NULL;
 +
 +    /*fr->d_alloc=0;*/
 +    fr->ener=NULL;
 +
 +    /*fr->ndisre=0;*/
 +
 +    fr->nblock=0;
 +    fr->nblock_alloc=0;
 +    fr->block=NULL;
 +}
 +
 +
 +void free_enxframe(t_enxframe *fr)
 +{
 +  int b;
 +
 +  if (fr->e_alloc)
 +  {
 +    sfree(fr->ener);
 +  }
 +  for(b=0; b<fr->nblock_alloc; b++)
 +  {
 +      enxblock_free(&(fr->block[b]));
 +  }
 +  free(fr->block);
 +}
 +
 +void add_blocks_enxframe(t_enxframe *fr, int n)
 +{
 +    fr->nblock=n;
 +    if (n > fr->nblock_alloc)
 +    {
 +        int b;
 +
 +        srenew(fr->block, n);
 +        for(b=fr->nblock_alloc;b<fr->nblock;b++)
 +        {
 +            enxblock_init(&(fr->block[b]));
 +        }
 +        fr->nblock_alloc=n;
 +    }
 +}
 +
 +t_enxblock *find_block_id_enxframe(t_enxframe *ef, int id, t_enxblock *prev)
 +{
 +    gmx_off_t starti=0;
 +    gmx_off_t i;
 +
 +    if (prev)
 +    {
 +        starti=(prev - ef->block) + 1;
 +    }
 +    for(i=starti; i<ef->nblock; i++)
 +    {
 +        if (ef->block[i].id == id)
 +            return &(ef->block[i]);
 +    }
 +    return NULL;
 +}
 +
 +void add_subblocks_enxblock(t_enxblock *eb, int n)
 +{
 +    eb->nsub=n;
 +    if (eb->nsub > eb->nsub_alloc)
 +    {
 +        int b;
 +
 +        srenew(eb->sub, n);
 +        for(b=eb->nsub_alloc; b<n; b++)
 +        {
 +            enxsubblock_init(&(eb->sub[b]));
 +        } 
 +        eb->nsub_alloc=n;
 +    }
 +}
 +
 +static void enx_warning(const char *msg)
 +{
 +    if (getenv("GMX_ENX_NO_FATAL") != NULL)
 +    {
 +        gmx_warning(msg);
 +    }
 +    else
 +    {
 +        gmx_fatal(FARGS,"%s\n%s",
 +                  msg,
 +                  "If you want to use the correct frames before the corrupted frame and avoid this fatal error set the env.var. GMX_ENX_NO_FATAL");
 +    }
 +}
 +
 +static void edr_strings(XDR *xdr,gmx_bool bRead,int file_version,
 +                        int n,gmx_enxnm_t **nms)
 +{
 +    int  i;
 +    gmx_enxnm_t *nm;
 +
 +    if (*nms == NULL)
 +    {
 +        snew(*nms,n);
 +    }
 +    for(i=0; i<n; i++)
 +    {
 +        nm = &(*nms)[i];
 +        if (bRead)
 +        {
 +            if (nm->name)
 +            {
 +                sfree(nm->name);
 +                nm->name = NULL;
 +            }
 +            if (nm->unit)
 +            {
 +                sfree(nm->unit);
 +                nm->unit = NULL;
 +            }
 +        }
 +        if(!xdr_string(xdr,&(nm->name),STRLEN))
 +        {
 +            gmx_file("Cannot write energy names to file; maybe you are out of disk space?");
 +        }
 +        if (file_version >= 2)
 +        {
 +            if(!xdr_string(xdr,&(nm->unit),STRLEN))
 +            {
 +                gmx_file("Cannot write energy names to file; maybe you are out of disk space?");
 +            }
 +        }
 +        else
 +        {
 +            nm->unit = strdup("kJ/mol");
 +        }
 +    }
 +}
 +
 +void do_enxnms(ener_file_t ef,int *nre,gmx_enxnm_t **nms)
 +{
 +    int  magic=-55555;
 +    XDR  *xdr;
 +    gmx_bool bRead = gmx_fio_getread(ef->fio);
 +    int  file_version;
 +    int  i;
 +   
 +    gmx_fio_checktype(ef->fio); 
 +
 +    xdr = gmx_fio_getxdr(ef->fio);
 +    
 +    if (!xdr_int(xdr,&magic))
 +    {
 +        if(!bRead)
 +        {
 +            gmx_file("Cannot write energy names to file; maybe you are out of disk space?");
 +        }
 +        *nre=0;
 +        return;
 +    }
 +    if (magic > 0)
 +    {
 +        /* Assume this is an old edr format */
 +        file_version = 1;
 +        *nre = magic;
 +        ef->eo.bOldFileOpen = TRUE;
 +        ef->eo.bReadFirstStep = FALSE;
 +        srenew(ef->eo.ener_prev,*nre);
 +    }
 +    else
 +    {
 +        ef->eo.bOldFileOpen=FALSE;
 +
 +        if (magic != -55555)
 +        {
 +            gmx_fatal(FARGS,"Energy names magic number mismatch, this is not a GROMACS edr file");
 +        }
 +        file_version = enx_version;
 +        xdr_int(xdr,&file_version);
 +        if (file_version > enx_version)
 +        {
 +            gmx_fatal(FARGS,"reading tpx file (%s) version %d with version %d program",gmx_fio_getname(ef->fio),file_version,enx_version);
 +        }
 +        xdr_int(xdr,nre);
 +    }
 +    if (file_version != enx_version)
 +    {
 +        fprintf(stderr,"Note: enx file_version %d, software version %d\n",
 +                file_version,enx_version);
 +    }
 +
 +    edr_strings(xdr,bRead,file_version,*nre,nms);
 +}
 +
 +static gmx_bool do_eheader(ener_file_t ef,int *file_version,t_enxframe *fr,
 +                       int nre_test,gmx_bool *bWrongPrecision,gmx_bool *bOK)
 +{
 +    int  magic=-7777777;
 +    real first_real_to_check;
 +    int  b,i,zero=0,dum=0;
 +    gmx_bool bRead = gmx_fio_getread(ef->fio);
 +    int  tempfix_nr=0;
 +    int  ndisre=0;
 +    int  startb=0;
 +#ifndef GMX_DOUBLE
 +    xdr_datatype dtreal=xdr_datatype_float; 
 +#else
 +    xdr_datatype dtreal=xdr_datatype_double; 
 +#endif
 +    
 +    if (bWrongPrecision)
 +    {
 +        *bWrongPrecision = FALSE;
 +    }
 +
 +    *bOK=TRUE;
 +    /* The original energy frame started with a real,
 +     * so we have to use a real for compatibility.
 +     * This is VERY DIRTY code, since do_eheader can be called
 +     * with the wrong precision set and then we could read r > -1e10,
 +     * while actually the intention was r < -1e10.
 +     * When nre_test >= 0, do_eheader should therefore terminate
 +     * before the number of i/o calls starts depending on what has been read
 +     * (which is the case for for instance the block sizes for variable
 +     * number of blocks, where this number is read before).
 +     */
 +    first_real_to_check = -2e10;
 +    if (!gmx_fio_do_real(ef->fio, first_real_to_check))
 +    {
 +        return FALSE;
 +    }
 +    if (first_real_to_check > -1e10)
 +    {
 +        /* Assume we are reading an old format */
 +        *file_version = 1;
 +        fr->t = first_real_to_check;
 +        if (!gmx_fio_do_int(ef->fio, dum))   *bOK = FALSE;
 +        fr->step = dum;
 +    }
 +    else
 +    {
 +        if (!gmx_fio_do_int(ef->fio, magic))       *bOK = FALSE;
 +        if (magic != -7777777)
 +        {
 +            enx_warning("Energy header magic number mismatch, this is not a GROMACS edr file");
 +            *bOK=FALSE;
 +            return FALSE;
 +        }
 +        *file_version = enx_version;
 +        if (!gmx_fio_do_int(ef->fio, *file_version)) *bOK = FALSE;
 +        if (*bOK && *file_version > enx_version)
 +        {
 +            gmx_fatal(FARGS,"reading tpx file (%s) version %d with version %d program",gmx_fio_getname(ef->fio),file_version,enx_version);
 +        }
 +        if (!gmx_fio_do_double(ef->fio, fr->t))       *bOK = FALSE;
 +        if (!gmx_fio_do_gmx_large_int(ef->fio, fr->step)) *bOK = FALSE;
 +        if (!bRead && fr->nsum == 1) {
 +            /* Do not store sums of length 1,
 +             * since this does not add information.
 +             */
 +            if (!gmx_fio_do_int(ef->fio, zero))      *bOK = FALSE;
 +        } else {
 +            if (!gmx_fio_do_int(ef->fio, fr->nsum))  *bOK = FALSE;
 +        }
 +        if (*file_version >= 3)
 +        {
 +            if (!gmx_fio_do_gmx_large_int(ef->fio, fr->nsteps)) *bOK = FALSE;
 +        }
 +        else
 +        {
 +            fr->nsteps = max(1,fr->nsum);
 +        }
 +        if (*file_version >= 5)
 +        {
 +            if (!gmx_fio_do_double(ef->fio, fr->dt)) *bOK = FALSE;
 +        }
 +        else
 +        {
 +            fr->dt = 0;
 +        }
 +    }
 +    if (!gmx_fio_do_int(ef->fio, fr->nre))     *bOK = FALSE;
 +    if (*file_version < 4)
 +    {
 +        if (!gmx_fio_do_int(ef->fio, ndisre))  *bOK = FALSE;
 +    }
 +    else
 +    {
 +        /* now reserved for possible future use */
 +        if (!gmx_fio_do_int(ef->fio, dum))  *bOK = FALSE;
 +    }
 +
 +    if (!gmx_fio_do_int(ef->fio, fr->nblock))  *bOK = FALSE;
 +    if (fr->nblock < 0) *bOK=FALSE;
 +
 +    if (ndisre!=0)
 +    {
 +        if (*file_version >= 4)
 +        {
 +            enx_warning("Distance restraint blocks in old style in new style file");
 +            *bOK=FALSE;
 +            return FALSE;
 +        }
 +        fr->nblock+=1;
 +    }
 +
 +
 +    /* Frames could have nre=0, so we can not rely only on the fr->nre check */
 +    if (bRead && nre_test >= 0 &&
 +        ((fr->nre > 0 && fr->nre != nre_test) ||
 +         fr->nre < 0 || ndisre < 0 || fr->nblock < 0))
 +    {
 +        *bWrongPrecision = TRUE;
 +        return *bOK;
 +    }
 +
 +    /* we now know what these should be, or we've already bailed out because
 +       of wrong precision */
 +    if ( *file_version==1 && (fr->t < 0 || fr->t > 1e20 || fr->step < 0 ) )
 +    {
 +        enx_warning("edr file with negative step number or unreasonable time (and without version number).");
 +        *bOK=FALSE;
 +        return FALSE;
 +    }
 +
 +
 +    if (*bOK && bRead)
 +    {
 +        add_blocks_enxframe(fr, fr->nblock);
 +    }
 +
 +    startb=0;
 +    if (ndisre>0)
 +    {
 +        /* sub[0] is the instantaneous data, sub[1] is time averaged */
 +        add_subblocks_enxblock(&(fr->block[0]), 2);
 +        fr->block[0].id=enxDISRE;
 +        fr->block[0].sub[0].nr=ndisre;
 +        fr->block[0].sub[1].nr=ndisre;
 +        fr->block[0].sub[0].type=dtreal;
 +        fr->block[0].sub[1].type=dtreal;
 +        startb++;
 +    }
 +
 +    /* read block header info */
 +    for(b=startb; b<fr->nblock; b++)
 +    {
 +        if (*file_version<4)
 +        {
 +            /* blocks in old version files always have 1 subblock that 
 +               consists of reals. */
 +            int nrint;
 +
 +            if (bRead)
 +            {
 +                add_subblocks_enxblock(&(fr->block[b]), 1);
 +            }
 +            else
 +            {
 +                if (fr->block[b].nsub != 1)
 +                {
 +                    gmx_incons("Writing an old version .edr file with too many subblocks");
 +                }
 +                if (fr->block[b].sub[0].type != dtreal)
 +                {
 +                    gmx_incons("Writing an old version .edr file the wrong subblock type");
 +                }
 +            }
 +            nrint = fr->block[b].sub[0].nr;
 +            
 +            if (!gmx_fio_do_int(ef->fio, nrint))
 +            {
 +                *bOK = FALSE;
 +            }
 +            fr->block[b].id          = b - startb;
 +            fr->block[b].sub[0].nr   = nrint;
 +            fr->block[b].sub[0].type = dtreal;
 +        }
 +        else
 +        {
 +            int i;
 +            /* in the new version files, the block header only contains
 +               the ID and the number of subblocks */
 +            int nsub=fr->block[b].nsub;
 +            *bOK = *bOK && gmx_fio_do_int(ef->fio, fr->block[b].id);
 +            *bOK = *bOK && gmx_fio_do_int(ef->fio, nsub);
 +
 +            fr->block[b].nsub=nsub;
 +            if (bRead)
 +                add_subblocks_enxblock(&(fr->block[b]), nsub);
 +
 +            /* read/write type & size for each subblock */
 +            for(i=0;i<nsub;i++)
 +            {
 +                t_enxsubblock *sub=&(fr->block[b].sub[i]); /* shortcut */
 +                int typenr=sub->type;
 +
 +                *bOK=*bOK && gmx_fio_do_int(ef->fio, typenr);
 +                *bOK=*bOK && gmx_fio_do_int(ef->fio, sub->nr);
 +
 +                sub->type = (xdr_datatype)typenr;
 +            }
 +        }
 +    }
 +    if (!gmx_fio_do_int(ef->fio, fr->e_size))  *bOK = FALSE;
 +
 +    /* now reserved for possible future use */
 +    if (!gmx_fio_do_int(ef->fio, dum))  *bOK = FALSE;
 +
 +    /* Do a dummy int to keep the format compatible with the old code */
 +    if (!gmx_fio_do_int(ef->fio, dum))         *bOK = FALSE;
 +    
 +    if (*bOK && *file_version == 1 && nre_test < 0)
 +    {
 +#if 0
 +        if (fp >= ener_old_nalloc)
 +        {
 +            gmx_incons("Problem with reading old format energy files");
 +        }
 +#endif
 +        
 +        if (!ef->eo.bReadFirstStep)
 +        {
 +            ef->eo.bReadFirstStep = TRUE;
 +            ef->eo.first_step     = fr->step;
 +            ef->eo.step_prev      = fr->step;
 +            ef->eo.nsum_prev      = 0;
 +        }
 +        
 +        fr->nsum   = fr->step - ef->eo.first_step + 1;
 +        fr->nsteps = fr->step - ef->eo.step_prev;
 +        fr->dt     = 0;
 +    }
 +      
 +    return *bOK;
 +}
 +
 +void free_enxnms(int n,gmx_enxnm_t *nms)
 +{
 +    int i;
 +
 +    for(i=0; i<n; i++)
 +    {
 +        sfree(nms[i].name);
 +        sfree(nms[i].unit);
 +    }
 +
 +    sfree(nms);
 +}
 +
 +void close_enx(ener_file_t ef)
 +{
 +    if(gmx_fio_close(ef->fio) != 0)
 +    {
 +        gmx_file("Cannot close energy file; it might be corrupt, or maybe you are out of disk space?");
 +    }
 +}
 +
 +static gmx_bool empty_file(const char *fn)
 +{
 +    FILE *fp;
 +    char dum;
 +    int  ret;
 +    gmx_bool bEmpty;
 +    
 +    fp = gmx_fio_fopen(fn,"r");
 +    ret = fread(&dum,sizeof(dum),1,fp);
 +    bEmpty = feof(fp);
 +    gmx_fio_fclose(fp);
 +    
 +    return bEmpty;
 +}
 +
 +
 +ener_file_t open_enx(const char *fn,const char *mode)
 +{
 +    int        nre,i;
 +    gmx_enxnm_t *nms=NULL;
 +    int        file_version=-1;
 +    t_enxframe *fr;
 +    gmx_bool       bWrongPrecision,bOK=TRUE;
 +    struct ener_file *ef;
 +
 +    snew(ef,1);
 +
 +    if (mode[0]=='r') {
 +        ef->fio=gmx_fio_open(fn,mode);
 +        gmx_fio_checktype(ef->fio);
 +        gmx_fio_setprecision(ef->fio,FALSE);
 +        do_enxnms(ef,&nre,&nms);
 +        snew(fr,1);
 +        do_eheader(ef,&file_version,fr,nre,&bWrongPrecision,&bOK);
 +        if(!bOK)
 +        {
 +            gmx_file("Cannot read energy file header. Corrupt file?");
 +        }
 +
 +        /* Now check whether this file is in single precision */
 +        if (!bWrongPrecision &&
 +            ((fr->e_size && (fr->nre == nre) && 
 +              (nre*4*(long int)sizeof(float) == fr->e_size)) ) )
 +        {
 +            fprintf(stderr,"Opened %s as single precision energy file\n",fn);
 +            free_enxnms(nre,nms);
 +        }
 +        else
 +        {
 +            gmx_fio_rewind(ef->fio);
 +            gmx_fio_checktype(ef->fio);
 +            gmx_fio_setprecision(ef->fio,TRUE);
 +            do_enxnms(ef,&nre,&nms);
 +            do_eheader(ef,&file_version,fr,nre,&bWrongPrecision,&bOK);
 +            if(!bOK)
 +            {
 +                gmx_file("Cannot write energy file header; maybe you are out of disk space?");
 +            }
 +
 +            if (((fr->e_size && (fr->nre == nre) && 
 +                            (nre*4*(long int)sizeof(double) == fr->e_size)) ))
 +                fprintf(stderr,"Opened %s as double precision energy file\n",
 +                        fn);
 +            else {
 +                if (empty_file(fn))
 +                    gmx_fatal(FARGS,"File %s is empty",fn);
 +                else
 +                    gmx_fatal(FARGS,"Energy file %s not recognized, maybe different CPU?",
 +                              fn);
 +            }
 +            free_enxnms(nre,nms);
 +        }
 +        free_enxframe(fr);
 +        sfree(fr);
 +        gmx_fio_rewind(ef->fio);
 +    }
 +    else 
 +        ef->fio = gmx_fio_open(fn,mode);
 +
 +    ef->framenr=0;
 +    ef->frametime=0;
 +    return ef;
 +}
 +
 +t_fileio *enx_file_pointer(const ener_file_t ef)
 +{
 +    return ef->fio;
 +}
 +
 +static void convert_full_sums(ener_old_t *ener_old,t_enxframe *fr)
 +{
 +    int nstep_all;
 +    int ne,ns,i;
 +    double esum_all,eav_all;
 +    
 +    if (fr->nsum > 0)
 +    {
 +        ne = 0;
 +        ns = 0;
 +        for(i=0; i<fr->nre; i++)
 +        {
 +            if (fr->ener[i].e    != 0) ne++;
 +            if (fr->ener[i].esum != 0) ns++;
 +        }
 +        if (ne > 0 && ns == 0)
 +        {
 +            /* We do not have all energy sums */
 +            fr->nsum = 0;
 +        }
 +    }
 +    
 +    /* Convert old full simulation sums to sums between energy frames */
 +    nstep_all = fr->step - ener_old->first_step + 1;
 +    if (fr->nsum > 1 && fr->nsum == nstep_all && ener_old->nsum_prev > 0)
 +    {
 +        /* Set the new sum length: the frame step difference */
 +        fr->nsum = fr->step - ener_old->step_prev;
 +        for(i=0; i<fr->nre; i++)
 +        {
 +            esum_all = fr->ener[i].esum;
 +            eav_all  = fr->ener[i].eav;
 +            fr->ener[i].esum = esum_all - ener_old->ener_prev[i].esum;
 +            fr->ener[i].eav  = eav_all  - ener_old->ener_prev[i].eav
 +                - dsqr(ener_old->ener_prev[i].esum/(nstep_all - fr->nsum)
 +                       - esum_all/nstep_all)*
 +                (nstep_all - fr->nsum)*nstep_all/(double)fr->nsum;
 +            ener_old->ener_prev[i].esum = esum_all;
 +            ener_old->ener_prev[i].eav  = eav_all;
 +        }
 +        ener_old->nsum_prev = nstep_all;
 +    }
 +    else if (fr->nsum > 0)
 +    {
 +        if (fr->nsum != nstep_all)
 +        {
 +            fprintf(stderr,"\nWARNING: something is wrong with the energy sums, will not use exact averages\n");
 +            ener_old->nsum_prev = 0;
 +        }
 +        else
 +        {
 +            ener_old->nsum_prev = nstep_all;
 +        }
 +        /* Copy all sums to ener_prev */
 +        for(i=0; i<fr->nre; i++)
 +        {
 +            ener_old->ener_prev[i].esum = fr->ener[i].esum;
 +            ener_old->ener_prev[i].eav  = fr->ener[i].eav;
 +        }
 +    }
 +    
 +    ener_old->step_prev = fr->step;
 +}
 +
 +gmx_bool do_enx(ener_file_t ef,t_enxframe *fr)
 +{
 +    int       file_version=-1;
 +    int       i,b;
 +    gmx_bool      bRead,bOK,bOK1,bSane;
 +    real      tmp1,tmp2,rdum;
 +    char      buf[22];
 +    /*int       d_size;*/
 +    
 +    bOK = TRUE;
 +    bRead = gmx_fio_getread(ef->fio);
 +    if (!bRead)
 +    {  
 +        fr->e_size = fr->nre*sizeof(fr->ener[0].e)*4;
 +        /*d_size = fr->ndisre*(sizeof(real)*2);*/
 +    }
 +    gmx_fio_checktype(ef->fio);
 +
 +    if (!do_eheader(ef,&file_version,fr,-1,NULL,&bOK))
 +    {
 +        if (bRead)
 +        {
 +            fprintf(stderr,"\rLast energy frame read %d time %8.3f         ",
 +                    ef->framenr-1,ef->frametime);
 +            if (!bOK)
 +            {
 +                fprintf(stderr,
 +                        "\nWARNING: Incomplete energy frame: nr %d time %8.3f\n",
 +                        ef->framenr,fr->t);
 +            }
 +        }
 +        else
 +        {
 +            gmx_file("Cannot write energy file header; maybe you are out of disk space?");
 +        }
 +        return FALSE;
 +    }
 +    if (bRead)
 +    {
 +        if ((ef->framenr <   20 || ef->framenr %   10 == 0) &&
 +            (ef->framenr <  200 || ef->framenr %  100 == 0) &&
 +            (ef->framenr < 2000 || ef->framenr % 1000 == 0))
 +        {
 +            fprintf(stderr,"\rReading energy frame %6d time %8.3f         ",
 +                    ef->framenr,fr->t);
 +        }
 +        ef->framenr++;
 +        ef->frametime = fr->t;
 +    }
 +    /* Check sanity of this header */
 +    bSane = fr->nre > 0 ;
 +    for(b=0; b<fr->nblock; b++)
 +    {
 +        bSane = bSane || (fr->block[b].nsub > 0);
 +    }
 +    if (!((fr->step >= 0) && bSane))
 +    {
 +        fprintf(stderr,"\nWARNING: there may be something wrong with energy file %s\n",
 +                gmx_fio_getname(ef->fio));
 +        fprintf(stderr,"Found: step=%s, nre=%d, nblock=%d, time=%g.\n"
 +                "Trying to skip frame expect a crash though\n",
 +                gmx_step_str(fr->step,buf),fr->nre,fr->nblock,fr->t);
 +    }
 +    if (bRead && fr->nre > fr->e_alloc)
 +    {
 +        srenew(fr->ener,fr->nre);
 +        for(i=fr->e_alloc; (i<fr->nre); i++)
 +        {
 +            fr->ener[i].e    = 0;
 +            fr->ener[i].eav  = 0;
 +            fr->ener[i].esum = 0;
 +        }
 +        fr->e_alloc = fr->nre;
 +    }
 +    
 +    for(i=0; i<fr->nre; i++)
 +    {
 +        bOK = bOK && gmx_fio_do_real(ef->fio, fr->ener[i].e);
 +        
 +        /* Do not store sums of length 1,
 +         * since this does not add information.
 +         */
 +        if (file_version == 1 ||
 +            (bRead && fr->nsum > 0) || fr->nsum > 1)
 +        {
 +            tmp1 = fr->ener[i].eav;
 +            bOK = bOK && gmx_fio_do_real(ef->fio, tmp1);
 +            if (bRead)
 +                fr->ener[i].eav = tmp1;
 +            
 +            /* This is to save only in single precision (unless compiled in DP) */
 +            tmp2 = fr->ener[i].esum;
 +            bOK = bOK && gmx_fio_do_real(ef->fio, tmp2);
 +            if (bRead)
 +                fr->ener[i].esum = tmp2;
 +            
 +            if (file_version == 1)
 +            {
 +                /* Old, unused real */
 +                rdum = 0;
 +                bOK = bOK && gmx_fio_do_real(ef->fio, rdum);
 +            }
 +        }
 +    }
 +    
 +    /* Here we can not check for file_version==1, since one could have
 +     * continued an old format simulation with a new one with mdrun -append.
 +     */
 +    if (bRead && ef->eo.bOldFileOpen)
 +    {
 +        /* Convert old full simulation sums to sums between energy frames */
 +        convert_full_sums(&(ef->eo),fr);
 +    }
 +    /* read the blocks */
 +    for(b=0; b<fr->nblock; b++)
 +    {
 +        /* now read the subblocks. */
 +        int nsub=fr->block[b].nsub; /* shortcut */
 +        int i;
 +
 +        for(i=0;i<nsub;i++)
 +        {
 +            t_enxsubblock *sub=&(fr->block[b].sub[i]); /* shortcut */
 +
 +            if (bRead)
 +            {
 +                enxsubblock_alloc(sub);
 +            }
 +
 +            /* read/write data */
 +            bOK1=TRUE;
 +            switch (sub->type)
 +            {
 +                case xdr_datatype_float:
 +                    bOK1=gmx_fio_ndo_float(ef->fio, sub->fval, sub->nr); 
 +                    break;
 +                case xdr_datatype_double:
 +                    bOK1=gmx_fio_ndo_double(ef->fio, sub->dval, sub->nr); 
 +                    break;
 +                case xdr_datatype_int:
 +                    bOK1=gmx_fio_ndo_int(ef->fio, sub->ival, sub->nr);
 +                    break;
 +                case xdr_datatype_large_int:
 +                    bOK1=gmx_fio_ndo_gmx_large_int(ef->fio, sub->lval, sub->nr);
 +                    break;
 +                case xdr_datatype_char:
 +                    bOK1=gmx_fio_ndo_uchar(ef->fio, sub->cval, sub->nr);
 +                    break;
 +                case xdr_datatype_string:
 +                    bOK1=gmx_fio_ndo_string(ef->fio, sub->sval, sub->nr);
 +                    break;
 +                default:
 +                    gmx_incons("Reading unknown block data type: this file is corrupted or from the future");
 +            }
 +            bOK = bOK && bOK1;
 +        }
 +    }
 +    
 +    if(!bRead)
 +    {
 +        if( gmx_fio_flush(ef->fio) != 0)
 +        {
 +            gmx_file("Cannot write energy file; maybe you are out of disk space?");
 +        }
 +    }
 +    
 +    if (!bOK)
 +    {
 +        if (bRead)
 +        {
 +            fprintf(stderr,"\nLast energy frame read %d",
 +                    ef->framenr-1);
 +            fprintf(stderr,"\nWARNING: Incomplete energy frame: nr %d time %8.3f\n",
 +                    ef->framenr,fr->t);
 +        }
 +        else
 +        {
 +            gmx_fatal(FARGS,"could not write energies");
 +        }
 +        return FALSE; 
 +    }
 +    
 +    return TRUE;
 +}
 +
 +static real find_energy(const char *name, int nre, gmx_enxnm_t *enm,
 +                        t_enxframe *fr)
 +{
 +    int i;
 +    
 +    for(i=0; i<nre; i++)
 +    {
 +        if (strcmp(enm[i].name,name) == 0)
 +        {
 +            return  fr->ener[i].e;
 +        }
 +    }
 +    
 +    gmx_fatal(FARGS,"Could not find energy term named '%s'",name);
 +    
 +    return 0;
 +}
 +
 +
 +void get_enx_state(const char *fn, real t, gmx_groups_t *groups, t_inputrec *ir,
 +                   t_state *state)
 +{
 +  /* Should match the names in mdebin.c */
 +  static const char *boxvel_nm[] = {
 +  "Box-Vel-XX", "Box-Vel-YY", "Box-Vel-ZZ",
 +  "Box-Vel-YX", "Box-Vel-ZX", "Box-Vel-ZY"
 +  };
 +  
 +  static const char *pcouplmu_nm[] = {
 +    "Pcoupl-Mu-XX", "Pcoupl-Mu-YY", "Pcoupl-Mu-ZZ",
 +    "Pcoupl-Mu-YX", "Pcoupl-Mu-ZX", "Pcoupl-Mu-ZY"
 +  };
 +  static const char *baro_nm[] = {
 +    "Barostat"
 +  };
 +
 +
 +  int ind0[] = { XX,YY,ZZ,YY,ZZ,ZZ };
 +  int ind1[] = { XX,YY,ZZ,XX,XX,YY };
 +  int nre,nfr,i,j,ni,npcoupl;
 +  char       buf[STRLEN];
 +  const char *bufi;
 +  gmx_enxnm_t *enm=NULL;
 +  t_enxframe *fr;
 +  ener_file_t in;
 +
 +  in = open_enx(fn,"r");
 +  do_enxnms(in,&nre,&enm);
 +  snew(fr,1);
 +  nfr = 0;
 +  while ((nfr==0 || fr->t != t) && do_enx(in,fr)) {
 +    nfr++;
 +  }
 +  close_enx(in);
 +  fprintf(stderr,"\n");
 +
 +  if (nfr == 0 || fr->t != t)
 +    gmx_fatal(FARGS,"Could not find frame with time %f in '%s'",t,fn);
 +  
 +  npcoupl = TRICLINIC(ir->compress) ? 6 : 3;
 +  if (ir->epc == epcPARRINELLORAHMAN) {
 +    clear_mat(state->boxv);
 +    for(i=0; i<npcoupl; i++) {
 +      state->boxv[ind0[i]][ind1[i]] =
 +      find_energy(boxvel_nm[i],nre,enm,fr);
 +    }
 +    fprintf(stderr,"\nREAD %d BOX VELOCITIES FROM %s\n\n",npcoupl,fn);
 +  }
 +
 +  if (ir->etc == etcNOSEHOOVER) 
 +  {
 +      char cns[20];
 +
 +      cns[0] = '\0';
 +
 +      for(i=0; i<state->ngtc; i++) {
 +          ni = groups->grps[egcTC].nm_ind[i];
 +          bufi = *(groups->grpname[ni]);
 +          for(j=0; (j<state->nhchainlength); j++) 
 +          {
 +              if (IR_NVT_TROTTER(ir))
 +              {
 +                  sprintf(cns,"-%d",j);
 +              }
 +              sprintf(buf,"Xi%s-%s",cns,bufi);
 +              state->nosehoover_xi[i] = find_energy(buf,nre,enm,fr);
 +              sprintf(buf,"vXi%s-%s",cns,bufi);
 +              state->nosehoover_vxi[i] = find_energy(buf,nre,enm,fr);
 +          }
 +
 +      }
 +      fprintf(stderr,"\nREAD %d NOSE-HOOVER Xi chains FROM %s\n\n",state->ngtc,fn);
 +
++      if (IR_NPT_TROTTER(ir) || IR_NPH_TROTTER(ir))
 +      {
 +          for(i=0; i<state->nnhpres; i++) {
 +              bufi = baro_nm[0]; /* All barostat DOF's together for now */
 +              for(j=0; (j<state->nhchainlength); j++) 
 +              {
 +                  sprintf(buf,"Xi-%d-%s",j,bufi); 
 +                  state->nhpres_xi[i] = find_energy(buf,nre,enm,fr);
 +                  sprintf(buf,"vXi-%d-%s",j,bufi);
 +                  state->nhpres_vxi[i] = find_energy(buf,nre,enm,fr);
 +              }
 +          }
 +          fprintf(stderr,"\nREAD %d NOSE-HOOVER BAROSTAT Xi chains FROM %s\n\n",state->nnhpres,fn);
 +      }
 +  } 
 +
 +  free_enxnms(nre,enm);
 +  free_enxframe(fr);
 +  sfree(fr);
 +}
 +
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
index 51bb616569ae8addeff57e298c42b88e0bc9941d,0000000000000000000000000000000000000000..22101ee6361b23de57bf832262da0cb738610f1a
mode 100644,000000..100644
--- /dev/null
@@@ -1,1511 -1,0 +1,1588 @@@
-     PI("andersen-seed",ir->andersen_seed);
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GROningen Mixture of Alchemy and Childrens' Stories
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +/* This file is completely threadsafe - please keep it that way! */
 +#ifdef GMX_THREAD_MPI
 +#include <thread_mpi.h>
 +#endif
 +
 +
 +#include <stdio.h>
 +#include "smalloc.h"
 +#include "typedefs.h"
 +#include "names.h"
 +#include "txtdump.h"
 +#include "string2.h"
 +#include "vec.h"
 +#include "macros.h"
 +
 +
 +int pr_indent(FILE *fp,int n)
 +{
 +  int i;
 +
 +  for (i=0; i<n; i++) (void) fprintf(fp," ");
 +  return n;
 +}
 +
 +int available(FILE *fp,void *p,int indent,const char *title)
 +{
 +  if (!p) {
 +    if (indent > 0)
 +      pr_indent(fp,indent);
 +    (void) fprintf(fp,"%s: not available\n",title);
 +  }
 +  return (p!=NULL);
 +}
 +
 +int pr_title(FILE *fp,int indent,const char *title)
 +{
 +  (void) pr_indent(fp,indent);
 +  (void) fprintf(fp,"%s:\n",title);
 +  return (indent+INDENT);
 +}
 +
 +int pr_title_n(FILE *fp,int indent,const char *title,int n)
 +{
 +  (void) pr_indent(fp,indent);
 +  (void) fprintf(fp,"%s (%d):\n",title,n);
 +  return (indent+INDENT);
 +}
 +
 +int pr_title_nxn(FILE *fp,int indent,const char *title,int n1,int n2)
 +{
 +  (void) pr_indent(fp,indent);
 +  (void) fprintf(fp,"%s (%dx%d):\n",title,n1,n2);
 +  return (indent+INDENT);
 +}
 +
 +void pr_ivec(FILE *fp,int indent,const char *title,int vec[],int n, gmx_bool bShowNumbers)
 +{
 +  int i;
 +
 +  if (available(fp,vec,indent,title))
 +    {
 +      indent=pr_title_n(fp,indent,title,n);
 +      for (i=0; i<n; i++)
 +        {
 +          (void) pr_indent(fp,indent);
 +          (void) fprintf(fp,"%s[%d]=%d\n",title,bShowNumbers?i:-1,vec[i]);
 +        }
 +    }
 +}
 +
 +void pr_ivec_block(FILE *fp,int indent,const char *title,int vec[],int n, gmx_bool bShowNumbers)
 +{
 +    int i,j;
 +    
 +    if (available(fp,vec,indent,title))
 +    {
 +        indent=pr_title_n(fp,indent,title,n);
 +        i = 0;
 +        while (i < n)
 +        {
 +            j = i+1;
 +            while (j < n && vec[j] == vec[j-1]+1)
 +            {
 +                j++;
 +            }
 +            /* Print consecutive groups of 3 or more as blocks */
 +            if (j - i < 3)
 +            {
 +                while(i < j)
 +                {
 +                    (void) pr_indent(fp,indent);
 +                    (void) fprintf(fp,"%s[%d]=%d\n",
 +                                   title,bShowNumbers?i:-1,vec[i]);
 +                    i++;
 +                }
 +            }
 +            else
 +            {
 +                (void) pr_indent(fp,indent);
 +                (void) fprintf(fp,"%s[%d,...,%d] = {%d,...,%d}\n",
 +                               title,
 +                               bShowNumbers?i:-1,
 +                               bShowNumbers?j-1:-1,
 +                               vec[i],vec[j-1]); 
 +                i = j;
 +            }
 +        }
 +    }
 +}
 +
 +void pr_bvec(FILE *fp,int indent,const char *title,gmx_bool vec[],int n, gmx_bool bShowNumbers)
 +{
 +  int i;
 +
 +  if (available(fp,vec,indent,title))
 +    {
 +      indent=pr_title_n(fp,indent,title,n);
 +      for (i=0; i<n; i++)
 +        {
 +          (void) pr_indent(fp,indent);
 +          (void) fprintf(fp,"%s[%d]=%s\n",title,bShowNumbers?i:-1,
 +                       EBOOL(vec[i]));
 +        }
 +    }
 +}
 +
 +void pr_ivecs(FILE *fp,int indent,const char *title,ivec vec[],int n, gmx_bool bShowNumbers)
 +{
 +  int i,j;
 +
 +  if (available(fp,vec,indent,title))
 +    {  
 +      indent=pr_title_nxn(fp,indent,title,n,DIM);
 +      for (i=0; i<n; i++)
 +        {
 +          (void) pr_indent(fp,indent);
 +          (void) fprintf(fp,"%s[%d]={",title,bShowNumbers?i:-1);
 +          for (j=0; j<DIM; j++)
 +            {
 +              if (j!=0) (void) fprintf(fp,", ");
 +              fprintf(fp,"%d",vec[i][j]);
 +            }
 +          (void) fprintf(fp,"}\n");
 +        }
 +    }
 +}
 +
 +void pr_rvec(FILE *fp,int indent,const char *title,real vec[],int n, gmx_bool bShowNumbers)
 +{
 +  int i;
 +
 +  if (available(fp,vec,indent,title))
 +    {  
 +      indent=pr_title_n(fp,indent,title,n);
 +      for (i=0; i<n; i++)
 +        {
 +          pr_indent(fp,indent);
 +          fprintf(fp,"%s[%d]=%12.5e\n",title,bShowNumbers?i:-1,vec[i]);
 +        }
 +    }
 +}
 +
 +void pr_dvec(FILE *fp,int indent,const char *title,double vec[],int n, gmx_bool bShowNumbers)
 +{
 +      int i;
 +      
 +      if (available(fp,vec,indent,title))
 +    {  
 +              indent=pr_title_n(fp,indent,title,n);
 +              for (i=0; i<n; i++)
 +        {
 +                      pr_indent(fp,indent);
 +                      fprintf(fp,"%s[%d]=%12.5e\n",title,bShowNumbers?i:-1,vec[i]);
 +        }
 +    }
 +}
 +
 +
 +/*
 +void pr_mat(FILE *fp,int indent,char *title,matrix m)
 +{
 +  int i,j;
 +  
 +  if (available(fp,m,indent,title)) {  
 +    indent=pr_title_n(fp,indent,title,n);
 +    for(i=0; i<n; i++) {
 +      pr_indent(fp,indent);
 +      fprintf(fp,"%s[%d]=%12.5e %12.5e %12.5e\n",
 +            title,bShowNumbers?i:-1,m[i][XX],m[i][YY],m[i][ZZ]);
 +    }
 +  }
 +}
 +*/
 +
 +void pr_rvecs_len(FILE *fp,int indent,const char *title,rvec vec[],int n)
 +{
 +  int i,j;
 +
 +  if (available(fp,vec,indent,title)) {  
 +    indent=pr_title_nxn(fp,indent,title,n,DIM);
 +    for (i=0; i<n; i++) {
 +      (void) pr_indent(fp,indent);
 +      (void) fprintf(fp,"%s[%5d]={",title,i);
 +      for (j=0; j<DIM; j++) {
 +      if (j != 0) 
 +        (void) fprintf(fp,", ");
 +      (void) fprintf(fp,"%12.5e",vec[i][j]);
 +      }
 +      (void) fprintf(fp,"} len=%12.5e\n",norm(vec[i]));
 +    }
 +  }
 +}
 +
 +void pr_rvecs(FILE *fp,int indent,const char *title,rvec vec[],int n)
 +{
 +  const char *fshort = "%12.5e";
 +  const char *flong  = "%15.8e";
 +  const char *format;
 +  int i,j;
 +
 +  if (getenv("LONGFORMAT") != NULL)
 +    format = flong;
 +  else
 +    format = fshort;
 +    
 +  if (available(fp,vec,indent,title)) {  
 +    indent=pr_title_nxn(fp,indent,title,n,DIM);
 +    for (i=0; i<n; i++) {
 +      (void) pr_indent(fp,indent);
 +      (void) fprintf(fp,"%s[%5d]={",title,i);
 +      for (j=0; j<DIM; j++) {
 +      if (j != 0) 
 +        (void) fprintf(fp,", ");
 +      (void) fprintf(fp,format,vec[i][j]);
 +      }
 +      (void) fprintf(fp,"}\n");
 +    }
 +  }
 +}
 +
 +
 +void pr_reals(FILE *fp,int indent,const char *title,real *vec,int n)
 +{
 +  int i;
 +    
 +  if (available(fp,vec,indent,title)) {  
 +    (void) pr_indent(fp,indent);
 +    (void) fprintf(fp,"%s:\t",title);
 +    for(i=0; i<n; i++)
 +      fprintf(fp,"  %10g",vec[i]);
 +    (void) fprintf(fp,"\n");
 +  }
 +}
 +
 +void pr_doubles(FILE *fp,int indent,const char *title,double *vec,int n)
 +{
 +  int i;
 +    
 +  if (available(fp,vec,indent,title)) {  
 +    (void) pr_indent(fp,indent);
 +    (void) fprintf(fp,"%s:\t",title);
 +    for(i=0; i<n; i++)
 +      fprintf(fp,"  %10g",vec[i]);
 +    (void) fprintf(fp,"\n");
 +  }
 +}
 +
 +static void pr_int(FILE *fp,int indent,const char *title,int i)
 +{
 +  pr_indent(fp,indent);
 +  fprintf(fp,"%-20s = %d\n",title,i);
 +}
 +
 +static void pr_gmx_large_int(FILE *fp,int indent,const char *title,gmx_large_int_t i)
 +{
 +  char buf[STEPSTRSIZE];
 +
 +  pr_indent(fp,indent);
 +  fprintf(fp,"%-20s = %s\n",title,gmx_step_str(i,buf));
 +}
 +
 +static void pr_real(FILE *fp,int indent,const char *title,real r)
 +{
 +  pr_indent(fp,indent);
 +  fprintf(fp,"%-20s = %g\n",title,r);
 +}
 +
 +static void pr_double(FILE *fp,int indent,const char *title,double d)
 +{
 +  pr_indent(fp,indent);
 +  fprintf(fp,"%-20s = %g\n",title,d);
 +}
 +
 +static void pr_str(FILE *fp,int indent,const char *title,const char *s)
 +{
 +  pr_indent(fp,indent);
 +  fprintf(fp,"%-20s = %s\n",title,s);
 +}
 +
 +void pr_qm_opts(FILE *fp,int indent,const char *title,t_grpopts *opts)
 +{
 +  int i,m,j;
 +
 +  fprintf(fp,"%s:\n",title);
 +  
 +  pr_int(fp,indent,"ngQM",opts->ngQM);
 +  if (opts->ngQM > 0) {
 +    pr_ivec(fp,indent,"QMmethod",opts->QMmethod,opts->ngQM,FALSE);
 +    pr_ivec(fp,indent,"QMbasis",opts->QMbasis,opts->ngQM,FALSE);
 +    pr_ivec(fp,indent,"QMcharge",opts->QMcharge,opts->ngQM,FALSE);
 +    pr_ivec(fp,indent,"QMmult",opts->QMmult,opts->ngQM,FALSE);
 +    pr_bvec(fp,indent,"bSH",opts->bSH,opts->ngQM,FALSE);
 +    pr_ivec(fp,indent,"CASorbitals",opts->CASorbitals,opts->ngQM,FALSE);
 +    pr_ivec(fp,indent,"CASelectrons",opts->CASelectrons,opts->ngQM,FALSE);
 +    pr_rvec(fp,indent,"SAon",opts->SAon,opts->ngQM,FALSE);
 +    pr_rvec(fp,indent,"SAon",opts->SAon,opts->ngQM,FALSE);
 +    pr_ivec(fp,indent,"SAsteps",opts->SAsteps,opts->ngQM,FALSE);
 +    pr_bvec(fp,indent,"bOPT",opts->bOPT,opts->ngQM,FALSE);
 +    pr_bvec(fp,indent,"bTS",opts->bTS,opts->ngQM,FALSE);
 +  }
 +}
 +
 +static void pr_grp_opts(FILE *out,int indent,const char *title,t_grpopts *opts,
 +                      gmx_bool bMDPformat)
 +{
 +  int i,m,j;
 +
 +  if (!bMDPformat)
 +    fprintf(out,"%s:\n",title);
 +  
 +  pr_indent(out,indent);
 +  fprintf(out,"nrdf%s",bMDPformat ? " = " : ":");
 +  for(i=0; (i<opts->ngtc); i++)
 +    fprintf(out,"  %10g",opts->nrdf[i]);
 +  fprintf(out,"\n");
 +  
 +  pr_indent(out,indent);
 +  fprintf(out,"ref-t%s",bMDPformat ? " = " : ":");
 +  for(i=0; (i<opts->ngtc); i++)
 +    fprintf(out,"  %10g",opts->ref_t[i]);
 +  fprintf(out,"\n");
 +
 +  pr_indent(out,indent);
 +  fprintf(out,"tau-t%s",bMDPformat ? " = " : ":");
 +  for(i=0; (i<opts->ngtc); i++)
 +    fprintf(out,"  %10g",opts->tau_t[i]);
 +  fprintf(out,"\n");  
 +  
 +  /* Pretty-print the simulated annealing info */
 +  fprintf(out,"anneal%s",bMDPformat ? " = " : ":");
 +  for(i=0; (i<opts->ngtc); i++)
 +    fprintf(out,"  %10s",EANNEAL(opts->annealing[i]));
 +  fprintf(out,"\n");  
 + 
 +  fprintf(out,"ann-npoints%s",bMDPformat ? " = " : ":");
 +  for(i=0; (i<opts->ngtc); i++)
 +    fprintf(out,"  %10d",opts->anneal_npoints[i]);
 +  fprintf(out,"\n");  
 + 
 +  for(i=0; (i<opts->ngtc); i++) {
 +    if(opts->anneal_npoints[i]>0) {
 +      fprintf(out,"ann. times [%d]:\t",i);
 +      for(j=0; (j<opts->anneal_npoints[i]); j++)
 +      fprintf(out,"  %10.1f",opts->anneal_time[i][j]);
 +      fprintf(out,"\n");  
 +      fprintf(out,"ann. temps [%d]:\t",i);
 +      for(j=0; (j<opts->anneal_npoints[i]); j++)
 +      fprintf(out,"  %10.1f",opts->anneal_temp[i][j]);
 +      fprintf(out,"\n");  
 +    }
 +  }
 +  
 +  pr_indent(out,indent);
 +  fprintf(out,"acc:\t");
 +  for(i=0; (i<opts->ngacc); i++)
 +    for(m=0; (m<DIM); m++)
 +      fprintf(out,"  %10g",opts->acc[i][m]);
 +  fprintf(out,"\n");
 +
 +  pr_indent(out,indent);
 +  fprintf(out,"nfreeze:");
 +  for(i=0; (i<opts->ngfrz); i++)
 +    for(m=0; (m<DIM); m++)
 +      fprintf(out,"  %10s",opts->nFreeze[i][m] ? "Y" : "N");
 +  fprintf(out,"\n");
 +
 +
 +  for(i=0; (i<opts->ngener); i++) {
 +    pr_indent(out,indent);
 +    fprintf(out,"energygrp-flags[%3d]:",i);
 +    for(m=0; (m<opts->ngener); m++)
 +      fprintf(out," %d",opts->egp_flags[opts->ngener*i+m]);
 +    fprintf(out,"\n");
 +  }
 +
 +  fflush(out);
 +}
 +
 +static void pr_matrix(FILE *fp,int indent,const char *title,rvec *m,
 +                    gmx_bool bMDPformat)
 +{
 +  if (bMDPformat)
 +    fprintf(fp,"%-10s    = %g %g %g %g %g %g\n",title,
 +          m[XX][XX],m[YY][YY],m[ZZ][ZZ],m[XX][YY],m[XX][ZZ],m[YY][ZZ]);
 +  else
 +    pr_rvecs(fp,indent,title,m,DIM);
 +}
 +
 +static void pr_cosine(FILE *fp,int indent,const char *title,t_cosines *cos,
 +                    gmx_bool bMDPformat)
 +{
 +  int j;
 +  
 +  if (bMDPformat) {
 +    fprintf(fp,"%s = %d\n",title,cos->n);
 +  }
 +  else {
 +    indent=pr_title(fp,indent,title);
 +    (void) pr_indent(fp,indent);
 +    fprintf(fp,"n = %d\n",cos->n);
 +    if (cos->n > 0) {
 +      (void) pr_indent(fp,indent+2);
 +      fprintf(fp,"a =");
 +      for(j=0; (j<cos->n); j++)
 +      fprintf(fp," %e",cos->a[j]);
 +      fprintf(fp,"\n");
 +      (void) pr_indent(fp,indent+2);
 +      fprintf(fp,"phi =");
 +      for(j=0; (j<cos->n); j++)
 +      fprintf(fp," %e",cos->phi[j]);
 +      fprintf(fp,"\n");
 +    }
 +  }
 +}
 +
 +#define PS(t,s) pr_str(fp,indent,t,s)
 +#define PI(t,s) pr_int(fp,indent,t,s)
 +#define PSTEP(t,s) pr_gmx_large_int(fp,indent,t,s)
 +#define PR(t,s) pr_real(fp,indent,t,s)
 +#define PD(t,s) pr_double(fp,indent,t,s)
 +
 +static void pr_pullgrp(FILE *fp,int indent,int g,t_pullgrp *pg)
 +{
 +  pr_indent(fp,indent);
 +  fprintf(fp,"pull-group %d:\n",g);
 +  indent += 2;
 +  pr_ivec_block(fp,indent,"atom",pg->ind,pg->nat,TRUE);
 +  pr_rvec(fp,indent,"weight",pg->weight,pg->nweight,TRUE);
 +  PI("pbcatom",pg->pbcatom);
 +  pr_rvec(fp,indent,"vec",pg->vec,DIM,TRUE);
 +  pr_rvec(fp,indent,"init",pg->init,DIM,TRUE);
 +  PR("rate",pg->rate);
 +  PR("k",pg->k);
 +  PR("kB",pg->kB);
 +}
 +
++static void pr_simtempvals(FILE *fp,int indent,t_simtemp *simtemp, int n_lambda, gmx_bool bMDPformat)
++{
++    PR("simtemp_low",simtemp->simtemp_low);
++    PR("simtemp_high",simtemp->simtemp_high);
++    PS("simulated-tempering-scaling",ESIMTEMP(simtemp->eSimTempScale));
++    pr_rvec(fp,indent,"simulated tempering temperatures",simtemp->temperatures,n_lambda,TRUE);
++}
++
++static void pr_expandedvals(FILE *fp,int indent,t_expanded *expand, int n_lambda, gmx_bool bMDPformat)
++{
++
++    PI("nstexpanded", expand->nstexpanded);
++    PS("lambda-stats", elamstats_names[expand->elamstats]);
++    PS("lambda-mc-move", elmcmove_names[expand->elmcmove]);
++    PI("lmc-repeats",expand->lmc_repeats);
++    PI("lmc-gibbsdelta",expand->gibbsdeltalam);
++    PI("lmc-nstart",expand->lmc_forced_nstart);
++    PS("symmetrized-transition-matrix", EBOOL(expand->bSymmetrizedTMatrix));
++    PI("nst-transition-matrix",expand->nstTij);
++    PI("mininum-var-min",expand->minvarmin); /*default is reasonable */
++    PI("weight-c-range",expand->c_range); /* default is just C=0 */
++    PR("wl-scale",expand->wl_scale);
++    PR("init-wl-delta",expand->init_wl_delta);
++    PR("wl-ratio",expand->wl_ratio);
++    PS("bWLoneovert",EBOOL(expand->bWLoneovert));
++    PI("lmc-seed",expand->lmc_seed);
++    PR("mc-temperature",expand->mc_temp);
++    PS("lmc-weights-equil",elmceq_names[expand->elmceq]);
++    if (expand->elmceq == elmceqNUMATLAM)
++    {
++        PI("weight-equil-number-all-lambda",expand->equil_n_at_lam);
++    }
++    if (expand->elmceq == elmceqSAMPLES)
++    {
++        PI("weight-equil-number-samples",expand->equil_samples);
++    }
++    if (expand->elmceq == elmceqSTEPS)
++    {
++        PI("weight-equil-number-steps",expand->equil_steps);
++    }
++    if (expand->elmceq == elmceqWLDELTA)
++    {
++        PR("weight-equil-wl-delta",expand->equil_wl_delta);
++    }
++    if (expand->elmceq == elmceqRATIO)
++    {
++        PR("weight-equil-count-ratio",expand->equil_ratio);
++    }
++
++    pr_indent(fp,indent);
++    pr_rvec(fp,indent,"init-lambda-weights",expand->init_lambda_weights,n_lambda,TRUE);
++    PS("init-weights",EBOOL(expand->bInit_weights));
++}
++
++static void pr_fepvals(FILE *fp,int indent,t_lambda *fep, gmx_bool bMDPformat)
++{
++    int i,j;
++
++    PI("nstdhdl",fep->nstdhdl);
++    PI("init-fep_state",fep->init_fep_state);
++    PR("init-lambda",fep->init_lambda);
++    PR("delta-lambda",fep->delta_lambda);
++    if (!bMDPformat)
++    {
++        PI("n-lambdas",fep->n_lambda);
++    }
++    if (fep->n_lambda > 0)
++    {
++        pr_indent(fp,indent);
++        fprintf(fp,"all-lambdas%s\n",bMDPformat ? " = " : ":");
++        for(i=0; i<efptNR; i++) {
++            fprintf(fp,"%18s = ",efpt_names[i]);
++            for(j=0; j<fep->n_lambda; j++)
++            {
++                fprintf(fp,"  %10g",fep->all_lambda[i][j]);
++            }
++            fprintf(fp,"\n");
++        }
++    }
++
++    PR("sc-alpha",fep->sc_alpha);
++    PS("bScCoul",EBOOL(fep->bScCoul));
++    PS("bScPrintEnergy",EBOOL(fep->bPrintEnergy));
++    PI("sc-power",fep->sc_power);
++    PR("sc-r-power",fep->sc_r_power);
++    PR("sc-sigma",fep->sc_sigma);
++    PR("sc-sigma_min",fep->sc_sigma_min);
++    PS("separate-dhdl-file", SEPDHDLFILETYPE(fep->separate_dhdl_file));
++    PS("dhdl-derivatives", DHDLDERIVATIVESTYPE(fep->dhdl_derivatives));
++    PI("dh-hist-size", fep->dh_hist_size);
++    PD("dh-hist-spacing", fep->dh_hist_spacing);
++};
++
 +static void pr_pull(FILE *fp,int indent,t_pull *pull)
 +{
 +  int g;
 +
 +  PS("pull-geometry",EPULLGEOM(pull->eGeom));
 +  pr_ivec(fp,indent,"pull-dim",pull->dim,DIM,TRUE);
 +  PR("pull-r1",pull->cyl_r1);
 +  PR("pull-r0",pull->cyl_r0);
 +  PR("pull-constr-tol",pull->constr_tol);
 +  PI("pull-nstxout",pull->nstxout);
 +  PI("pull-nstfout",pull->nstfout);
 +  PI("pull-ngrp",pull->ngrp);
 +  for(g=0; g<pull->ngrp+1; g++)
 +    pr_pullgrp(fp,indent,g,&pull->grp[g]);
 +}
 +
 +static void pr_rotgrp(FILE *fp,int indent,int g,t_rotgrp *rotg)
 +{
 +  pr_indent(fp,indent);
 +  fprintf(fp,"rotation_group %d:\n",g);
 +  indent += 2;
 +  PS("type",EROTGEOM(rotg->eType));
 +  PS("massw",EBOOL(rotg->bMassW));
 +  pr_ivec_block(fp,indent,"atom",rotg->ind,rotg->nat,TRUE);
 +  pr_rvecs(fp,indent,"x_ref",rotg->x_ref,rotg->nat);
 +  pr_rvec(fp,indent,"vec",rotg->vec,DIM,TRUE);
 +  pr_rvec(fp,indent,"pivot",rotg->pivot,DIM,TRUE);
 +  PR("rate",rotg->rate);
 +  PR("k",rotg->k);
 +  PR("slab_dist",rotg->slab_dist);
 +  PR("min_gaussian",rotg->min_gaussian);
 +  PR("epsilon",rotg->eps);
 +  PS("fit_method",EROTFIT(rotg->eFittype));
 +  PI("potfitangle_nstep",rotg->PotAngle_nstep);
 +  PR("potfitangle_step",rotg->PotAngle_step);
 +}
 +
 +static void pr_rot(FILE *fp,int indent,t_rot *rot)
 +{
 +  int g;
 +
 +  PI("rot_nstrout",rot->nstrout);
 +  PI("rot_nstsout",rot->nstsout);
 +  PI("rot_ngrp",rot->ngrp);
 +  for(g=0; g<rot->ngrp; g++)
 +    pr_rotgrp(fp,indent,g,&rot->grp[g]);
 +}
 +
 +void pr_inputrec(FILE *fp,int indent,const char *title,t_inputrec *ir,
 +                 gmx_bool bMDPformat)
 +{
 +  const char *infbuf="inf";
 +  int  i;
 +  
 +  if (available(fp,ir,indent,title)) {
 +    if (!bMDPformat)
 +      indent=pr_title(fp,indent,title);
 +    PS("integrator",EI(ir->eI));
 +    PSTEP("nsteps",ir->nsteps);
 +    PSTEP("init-step",ir->init_step);
 +    PS("ns-type",ENS(ir->ns_type));
 +    PI("nstlist",ir->nstlist);
 +    PI("ndelta",ir->ndelta);
 +    PI("nstcomm",ir->nstcomm);
 +    PS("comm-mode",ECOM(ir->comm_mode));
 +    PI("nstlog",ir->nstlog);
 +    PI("nstxout",ir->nstxout);
 +    PI("nstvout",ir->nstvout);
 +    PI("nstfout",ir->nstfout);
 +    PI("nstcalcenergy",ir->nstcalcenergy);
 +    PI("nstenergy",ir->nstenergy);
 +    PI("nstxtcout",ir->nstxtcout);
 +    PR("init-t",ir->init_t);
 +    PR("delta-t",ir->delta_t);
 +    
 +    PR("xtcprec",ir->xtcprec);
 +    PI("nkx",ir->nkx);
 +    PI("nky",ir->nky);
 +    PI("nkz",ir->nkz);
 +    PI("pme-order",ir->pme_order);
 +    PR("ewald-rtol",ir->ewald_rtol);
 +    PR("ewald-geometry",ir->ewald_geometry);
 +    PR("epsilon-surface",ir->epsilon_surface);
 +    PS("optimize-fft",EBOOL(ir->bOptFFT));
 +    PS("ePBC",EPBC(ir->ePBC));
 +    PS("bPeriodicMols",EBOOL(ir->bPeriodicMols));
 +    PS("bContinuation",EBOOL(ir->bContinuation));
 +    PS("bShakeSOR",EBOOL(ir->bShakeSOR));
 +    PS("etc",ETCOUPLTYPE(ir->etc));
++    PS("bPrintNHChains",EBOOL(ir->bPrintNHChains));
 +    PI("nsttcouple",ir->nsttcouple);
 +    PS("epc",EPCOUPLTYPE(ir->epc));
 +    PS("epctype",EPCOUPLTYPETYPE(ir->epct));
 +    PI("nstpcouple",ir->nstpcouple);
 +    PR("tau-p",ir->tau_p);
 +    pr_matrix(fp,indent,"ref-p",ir->ref_p,bMDPformat);
 +    pr_matrix(fp,indent,"compress",ir->compress,bMDPformat);
 +    PS("refcoord-scaling",EREFSCALINGTYPE(ir->refcoord_scaling));
 +    if (bMDPformat)
 +      fprintf(fp,"posres-com  = %g %g %g\n",ir->posres_com[XX],
 +            ir->posres_com[YY],ir->posres_com[ZZ]);
 +    else
 +      pr_rvec(fp,indent,"posres-com",ir->posres_com,DIM,TRUE);
 +    if (bMDPformat)
 +      fprintf(fp,"posres-comB = %g %g %g\n",ir->posres_comB[XX],
 +            ir->posres_comB[YY],ir->posres_comB[ZZ]);
 +    else
 +      pr_rvec(fp,indent,"posres-comB",ir->posres_comB,DIM,TRUE);
-         
 +    PR("rlist",ir->rlist);
 +    PR("rlistlong",ir->rlistlong);
 +    PR("rtpi",ir->rtpi);
 +    PS("coulombtype",EELTYPE(ir->coulombtype));
 +    PR("rcoulomb-switch",ir->rcoulomb_switch);
 +    PR("rcoulomb",ir->rcoulomb);
 +    PS("vdwtype",EVDWTYPE(ir->vdwtype));
 +    PR("rvdw-switch",ir->rvdw_switch);
 +    PR("rvdw",ir->rvdw);
 +    if (ir->epsilon_r != 0)
 +      PR("epsilon-r",ir->epsilon_r);
 +    else
 +      PS("epsilon-r",infbuf);
 +    if (ir->epsilon_rf != 0)
 +      PR("epsilon-rf",ir->epsilon_rf);
 +    else
 +      PS("epsilon-rf",infbuf);
 +    PR("tabext",ir->tabext);
 +    PS("implicit-solvent",EIMPLICITSOL(ir->implicit_solvent));
 +    PS("gb-algorithm",EGBALGORITHM(ir->gb_algorithm));
 +    PR("gb-epsilon-solvent",ir->gb_epsilon_solvent);
 +    PI("nstgbradii",ir->nstgbradii);
 +    PR("rgbradii",ir->rgbradii);
 +    PR("gb-saltconc",ir->gb_saltconc);
 +    PR("gb-obc-alpha",ir->gb_obc_alpha);
 +    PR("gb-obc-beta",ir->gb_obc_beta);
 +    PR("gb-obc-gamma",ir->gb_obc_gamma);
 +    PR("gb-dielectric-offset",ir->gb_dielectric_offset);
 +    PS("sa-algorithm",ESAALGORITHM(ir->gb_algorithm));
 +    PR("sa-surface-tension",ir->sa_surface_tension);
-     PR("init-lambda",ir->init_lambda);
-     PR("delta-lambda",ir->delta_lambda);
-     if (!bMDPformat)
-     {
-         PI("n-foreign-lambda",ir->n_flambda);
 +    PS("DispCorr",EDISPCORR(ir->eDispCorr));
++    PS("bSimTemp",EBOOL(ir->bSimTemp));
++    if (ir->bSimTemp) {
++        pr_simtempvals(fp,indent,ir->simtempvals,ir->fepvals->n_lambda,bMDPformat);
++    }
 +    PS("free-energy",EFEPTYPE(ir->efep));
-     if (ir->n_flambda > 0)
-     {
-         pr_indent(fp,indent);
-         fprintf(fp,"foreign-lambda%s",bMDPformat ? " = " : ":");
-         for(i=0; i<ir->n_flambda; i++)
-         {
-             fprintf(fp,"  %10g",ir->flambda[i]);
-         }
-         fprintf(fp,"\n");
++    if (ir->efep != efepNO || ir->bSimTemp) {
++        pr_fepvals(fp,indent,ir->fepvals,bMDPformat);
 +    }
-     PR("sc-alpha",ir->sc_alpha);
-     PI("sc-power",ir->sc_power);
-     PR("sc-sigma",ir->sc_sigma);
-     PR("sc-sigma-min",ir->sc_sigma_min);
-     PI("nstdhdl", ir->nstdhdl);
-     PS("separate-dhdl-file", SEPDHDLFILETYPE(ir->separate_dhdl_file));
-     PS("dhdl-derivatives", DHDLDERIVATIVESTYPE(ir->dhdl_derivatives));
-     PI("dh-hist-size", ir->dh_hist_size);
-     PD("dh-hist-spacing", ir->dh_hist_spacing);
++    if (ir->bExpanded) {
++        pr_expandedvals(fp,indent,ir->expandedvals,ir->fepvals->n_lambda,bMDPformat);
 +    }
-     fprintf(fp,"theta=%15.8e, ktheta=%15.8e, r13=%15.8e, kUB=%15.8e\n",
-           iparams->u_b.theta,iparams->u_b.ktheta,iparams->u_b.r13,iparams->u_b.kUB);
 +
 +    PI("nwall",ir->nwall);
 +    PS("wall-type",EWALLTYPE(ir->wall_type));
 +    PI("wall-atomtype[0]",ir->wall_atomtype[0]);
 +    PI("wall-atomtype[1]",ir->wall_atomtype[1]);
 +    PR("wall-density[0]",ir->wall_density[0]);
 +    PR("wall-density[1]",ir->wall_density[1]);
 +    PR("wall-ewald-zfac",ir->wall_ewald_zfac);
 +
 +    PS("pull",EPULLTYPE(ir->ePull));
 +    if (ir->ePull != epullNO)
 +      pr_pull(fp,indent,ir->pull);
 +    
 +    PS("rotation",EBOOL(ir->bRot));
 +    if (ir->bRot)
 +      pr_rot(fp,indent,ir->rot);
 +
 +    PS("disre",EDISRETYPE(ir->eDisre));
 +    PS("disre-weighting",EDISREWEIGHTING(ir->eDisreWeighting));
 +    PS("disre-mixed",EBOOL(ir->bDisreMixed));
 +    PR("dr-fc",ir->dr_fc);
 +    PR("dr-tau",ir->dr_tau);
 +    PR("nstdisreout",ir->nstdisreout);
 +    PR("orires-fc",ir->orires_fc);
 +    PR("orires-tau",ir->orires_tau);
 +    PR("nstorireout",ir->nstorireout);
 +
 +    PR("dihre-fc",ir->dihre_fc);
 +    
 +    PR("em-stepsize",ir->em_stepsize);
 +    PR("em-tol",ir->em_tol);
 +    PI("niter",ir->niter);
 +    PR("fc-stepsize",ir->fc_stepsize);
 +    PI("nstcgsteep",ir->nstcgsteep);
 +    PI("nbfgscorr",ir->nbfgscorr);
 +
 +    PS("ConstAlg",ECONSTRTYPE(ir->eConstrAlg));
 +    PR("shake-tol",ir->shake_tol);
 +    PI("lincs-order",ir->nProjOrder);
 +    PR("lincs-warnangle",ir->LincsWarnAngle);
 +    PI("lincs-iter",ir->nLincsIter);
 +    PR("bd-fric",ir->bd_fric);
 +    PI("ld-seed",ir->ld_seed);
 +    PR("cos-accel",ir->cos_accel);
 +    pr_matrix(fp,indent,"deform",ir->deform,bMDPformat);
 +
 +    PS("adress",EBOOL(ir->bAdress));
 +    if (ir->bAdress){
 +        PS("adress_type",EADRESSTYPE(ir->adress->type));
 +        PR("adress_const_wf",ir->adress->const_wf);
 +        PR("adress_ex_width",ir->adress->ex_width);
 +        PR("adress_hy_width",ir->adress->hy_width);
 +        PS("adress_interface_correction",EADRESSICTYPE(ir->adress->icor));
 +        PS("adress_site",EADRESSSITETYPE(ir->adress->site));
 +        PR("adress_ex_force_cap",ir->adress->ex_forcecap);
 +        PS("adress_do_hybridpairs", EBOOL(ir->adress->do_hybridpairs));
 +
 +        pr_rvec(fp,indent,"adress_reference_coords",ir->adress->refs,DIM,TRUE);
 +    }
 +    PI("userint1",ir->userint1);
 +    PI("userint2",ir->userint2);
 +    PI("userint3",ir->userint3);
 +    PI("userint4",ir->userint4);
 +    PR("userreal1",ir->userreal1);
 +    PR("userreal2",ir->userreal2);
 +    PR("userreal3",ir->userreal3);
 +    PR("userreal4",ir->userreal4);
 +    pr_grp_opts(fp,indent,"grpopts",&(ir->opts),bMDPformat);
 +    pr_cosine(fp,indent,"efield-x",&(ir->ex[XX]),bMDPformat);
 +    pr_cosine(fp,indent,"efield-xt",&(ir->et[XX]),bMDPformat);
 +    pr_cosine(fp,indent,"efield-y",&(ir->ex[YY]),bMDPformat);
 +    pr_cosine(fp,indent,"efield-yt",&(ir->et[YY]),bMDPformat);
 +    pr_cosine(fp,indent,"efield-z",&(ir->ex[ZZ]),bMDPformat);
 +    pr_cosine(fp,indent,"efield-zt",&(ir->et[ZZ]),bMDPformat);
 +    PS("bQMMM",EBOOL(ir->bQMMM));
 +    PI("QMconstraints",ir->QMconstraints);
 +    PI("QMMMscheme",ir->QMMMscheme);
 +    PR("scalefactor",ir->scalefactor);
 +    pr_qm_opts(fp,indent,"qm-opts",&(ir->opts));
 +  }
 +}
 +#undef PS
 +#undef PR
 +#undef PI
 +
 +static void pr_harm(FILE *fp,t_iparams *iparams,const char *r,const char *kr)
 +{
 +  fprintf(fp,"%sA=%12.5e, %sA=%12.5e, %sB=%12.5e, %sB=%12.5e\n",
 +        r,iparams->harmonic.rA,kr,iparams->harmonic.krA,
 +        r,iparams->harmonic.rB,kr,iparams->harmonic.krB);
 +}
 +
 +void pr_iparams(FILE *fp,t_functype ftype,t_iparams *iparams)
 +{
 +  int i;
 +  real VA[4],VB[4],*rbcA,*rbcB;
 +
 +  switch (ftype) {
 +  case F_ANGLES:
 +  case F_G96ANGLES:
 +    pr_harm(fp,iparams,"th","ct");
 +    break;
 +  case F_CROSS_BOND_BONDS:
 +    fprintf(fp,"r1e=%15.8e, r2e=%15.8e, krr=%15.8e\n",
 +          iparams->cross_bb.r1e,iparams->cross_bb.r2e,
 +          iparams->cross_bb.krr);
 +    break;
 +  case F_CROSS_BOND_ANGLES:
 +    fprintf(fp,"r1e=%15.8e, r1e=%15.8e, r3e=%15.8e, krt=%15.8e\n",
 +          iparams->cross_ba.r1e,iparams->cross_ba.r2e,
 +          iparams->cross_ba.r3e,iparams->cross_ba.krt);
 +    break;
 +  case F_LINEAR_ANGLES:
 +    fprintf(fp,"klinA=%15.8e, aA=%15.8e, klinB=%15.8e, aB=%15.8e\n",
 +            iparams->linangle.klinA,iparams->linangle.aA,
 +            iparams->linangle.klinB,iparams->linangle.aB);
 +    break;
 +  case F_UREY_BRADLEY:
-     fprintf(fp,"b0=%15.8e, cb=%15.8e, beta=%15.8e\n",
-           iparams->morse.b0,iparams->morse.cb,iparams->morse.beta);
++      fprintf(fp,"thetaA=%15.8e, kthetaA=%15.8e, r13A=%15.8e, kUBA=%15.8e, thetaB=%15.8e, kthetaB=%15.8e, r13B=%15.8e, kUBB=%15.8e\n",iparams->u_b.thetaA,iparams->u_b.kthetaA,iparams->u_b.r13A,iparams->u_b.kUBA,iparams->u_b.thetaB,iparams->u_b.kthetaB,iparams->u_b.r13B,iparams->u_b.kUBB);
 +    break;
 +  case F_QUARTIC_ANGLES:
 +    fprintf(fp,"theta=%15.8e",iparams->qangle.theta);
 +    for(i=0; i<5; i++)
 +      fprintf(fp,", c%c=%15.8e",'0'+i,iparams->qangle.c[i]);
 +    fprintf(fp,"\n");
 +    break;
 +  case F_BHAM:
 +    fprintf(fp,"a=%15.8e, b=%15.8e, c=%15.8e\n",
 +          iparams->bham.a,iparams->bham.b,iparams->bham.c);
 +    break;
 +  case F_BONDS:
 +  case F_G96BONDS:
 +  case F_HARMONIC:
 +    pr_harm(fp,iparams,"b0","cb");
 +    break;
 +  case F_IDIHS:
 +    pr_harm(fp,iparams,"xi","cx");
 +    break;
 +  case F_MORSE:
-     fprintf(fp,"label=%d, power=%4d phi=%15.8e, dphi=%15.8e, kfac=%15.8e)\n",
-           iparams->dihres.label,iparams->dihres.power,
-           iparams->dihres.phi,iparams->dihres.dphi,iparams->dihres.kfac);
++    fprintf(fp,"b0A=%15.8e, cbA=%15.8e, betaA=%15.8e, b0B=%15.8e, cbB=%15.8e, betaB=%15.8e\n",
++            iparams->morse.b0A,iparams->morse.cbA,iparams->morse.betaA,
++            iparams->morse.b0B,iparams->morse.cbB,iparams->morse.betaB);
 +    break;
 +  case F_CUBICBONDS:
 +    fprintf(fp,"b0=%15.8e, kb=%15.8e, kcub=%15.8e\n",
 +          iparams->cubic.b0,iparams->cubic.kb,iparams->cubic.kcub);
 +    break;
 +  case F_CONNBONDS:
 +    fprintf(fp,"\n");
 +    break;
 +  case F_FENEBONDS:
 +    fprintf(fp,"bm=%15.8e, kb=%15.8e\n",iparams->fene.bm,iparams->fene.kb);
 +    break;
 +  case F_RESTRBONDS:
 +      fprintf(fp,"lowA=%15.8e, up1A=%15.8e, up2A=%15.8e, kA=%15.8e, lowB=%15.8e, up1B=%15.8e, up2B=%15.8e, kB=%15.8e,\n",
 +              iparams->restraint.lowA,iparams->restraint.up1A,
 +              iparams->restraint.up2A,iparams->restraint.kA,
 +              iparams->restraint.lowB,iparams->restraint.up1B,
 +              iparams->restraint.up2B,iparams->restraint.kB);
 +      break;
 +  case F_TABBONDS:
 +  case F_TABBONDSNC:
 +  case F_TABANGLES:
 +  case F_TABDIHS:
 +    fprintf(fp,"tab=%d, kA=%15.8e, kB=%15.8e\n",
 +          iparams->tab.table,iparams->tab.kA,iparams->tab.kB);
 +    break;
 +  case F_POLARIZATION:
 +    fprintf(fp,"alpha=%15.8e\n",iparams->polarize.alpha);
 +    break;
 +  case F_ANHARM_POL:
 +    fprintf(fp,"alpha=%15.8e drcut=%15.8e khyp=%15.8e\n",
 +            iparams->anharm_polarize.alpha,
 +            iparams->anharm_polarize.drcut,
 +            iparams->anharm_polarize.khyp);
 +    break;
 +  case F_THOLE_POL:
 +    fprintf(fp,"a=%15.8e, alpha1=%15.8e, alpha2=%15.8e, rfac=%15.8e\n",
 +          iparams->thole.a,iparams->thole.alpha1,iparams->thole.alpha2,
 +          iparams->thole.rfac);
 +    break;
 +  case F_WATER_POL:
 +    fprintf(fp,"al_x=%15.8e, al_y=%15.8e, al_z=%15.8e, rOH=%9.6f, rHH=%9.6f, rOD=%9.6f\n",
 +          iparams->wpol.al_x,iparams->wpol.al_y,iparams->wpol.al_z,
 +          iparams->wpol.rOH,iparams->wpol.rHH,iparams->wpol.rOD);
 +    break;
 +  case F_LJ:
 +    fprintf(fp,"c6=%15.8e, c12=%15.8e\n",iparams->lj.c6,iparams->lj.c12);
 +    break;
 +  case F_LJ14:
 +    fprintf(fp,"c6A=%15.8e, c12A=%15.8e, c6B=%15.8e, c12B=%15.8e\n",
 +          iparams->lj14.c6A,iparams->lj14.c12A,
 +          iparams->lj14.c6B,iparams->lj14.c12B);
 +    break;
 +  case F_LJC14_Q:
 +    fprintf(fp,"fqq=%15.8e, qi=%15.8e, qj=%15.8e, c6=%15.8e, c12=%15.8e\n",
 +          iparams->ljc14.fqq,
 +          iparams->ljc14.qi,iparams->ljc14.qj,
 +          iparams->ljc14.c6,iparams->ljc14.c12);
 +    break;
 +  case F_LJC_PAIRS_NB:
 +    fprintf(fp,"qi=%15.8e, qj=%15.8e, c6=%15.8e, c12=%15.8e\n",
 +          iparams->ljcnb.qi,iparams->ljcnb.qj,
 +          iparams->ljcnb.c6,iparams->ljcnb.c12);
 +    break;
 +  case F_PDIHS:
 +  case F_PIDIHS:
 +  case F_ANGRES:
 +  case F_ANGRESZ:
 +    fprintf(fp,"phiA=%15.8e, cpA=%15.8e, phiB=%15.8e, cpB=%15.8e, mult=%d\n",
 +          iparams->pdihs.phiA,iparams->pdihs.cpA,
 +          iparams->pdihs.phiB,iparams->pdihs.cpB,
 +          iparams->pdihs.mult);
 +    break;
 +  case F_DISRES:
 +    fprintf(fp,"label=%4d, type=%1d, low=%15.8e, up1=%15.8e, up2=%15.8e, fac=%15.8e)\n",
 +          iparams->disres.label,iparams->disres.type,
 +          iparams->disres.low,iparams->disres.up1,
 +          iparams->disres.up2,iparams->disres.kfac);
 +    break;
 +  case F_ORIRES:
 +    fprintf(fp,"ex=%4d, label=%d, power=%4d, c=%15.8e, obs=%15.8e, kfac=%15.8e)\n",
 +          iparams->orires.ex,iparams->orires.label,iparams->orires.power,
 +          iparams->orires.c,iparams->orires.obs,iparams->orires.kfac);
 +    break;
 +  case F_DIHRES:
++      fprintf(fp,"phiA=%15.8e, dphiA=%15.8e, kfacA=%15.8e, phiB=%15.8e, dphiB=%15.8e, kfacB=%15.8e\n",
++              iparams->dihres.phiA,iparams->dihres.dphiA,iparams->dihres.kfacA,
++              iparams->dihres.phiB,iparams->dihres.dphiB,iparams->dihres.kfacB);
 +    break;
 +  case F_POSRES:
 +    fprintf(fp,"pos0A=(%15.8e,%15.8e,%15.8e), fcA=(%15.8e,%15.8e,%15.8e), pos0B=(%15.8e,%15.8e,%15.8e), fcB=(%15.8e,%15.8e,%15.8e)\n",
 +          iparams->posres.pos0A[XX],iparams->posres.pos0A[YY],
 +          iparams->posres.pos0A[ZZ],iparams->posres.fcA[XX],
 +          iparams->posres.fcA[YY],iparams->posres.fcA[ZZ],
 +          iparams->posres.pos0B[XX],iparams->posres.pos0B[YY],
 +          iparams->posres.pos0B[ZZ],iparams->posres.fcB[XX],
 +          iparams->posres.fcB[YY],iparams->posres.fcB[ZZ]);
 +    break;
 +  case F_RBDIHS:
 +    for (i=0; i<NR_RBDIHS; i++) 
 +      fprintf(fp,"%srbcA[%d]=%15.8e",i==0?"":", ",i,iparams->rbdihs.rbcA[i]);
 +    fprintf(fp,"\n");
 +    for (i=0; i<NR_RBDIHS; i++) 
 +      fprintf(fp,"%srbcB[%d]=%15.8e",i==0?"":", ",i,iparams->rbdihs.rbcB[i]);
 +    fprintf(fp,"\n");
 +    break;
 +  case F_FOURDIHS:
 +    /* Use the OPLS -> Ryckaert-Bellemans formula backwards to get the
 +     * OPLS potential constants back.
 +     */
 +    rbcA = iparams->rbdihs.rbcA;
 +    rbcB = iparams->rbdihs.rbcB;
 +
 +    VA[3] = -0.25*rbcA[4];
 +    VA[2] = -0.5*rbcA[3];
 +    VA[1] = 4.0*VA[3]-rbcA[2];
 +    VA[0] = 3.0*VA[2]-2.0*rbcA[1];
 +
 +    VB[3] = -0.25*rbcB[4];
 +    VB[2] = -0.5*rbcB[3];
 +    VB[1] = 4.0*VB[3]-rbcB[2];
 +    VB[0] = 3.0*VB[2]-2.0*rbcB[1];
 +
 +    for (i=0; i<NR_FOURDIHS; i++) 
 +      fprintf(fp,"%sFourA[%d]=%15.8e",i==0?"":", ",i,VA[i]);
 +    fprintf(fp,"\n");
 +    for (i=0; i<NR_FOURDIHS; i++) 
 +      fprintf(fp,"%sFourB[%d]=%15.8e",i==0?"":", ",i,VB[i]);
 +    fprintf(fp,"\n");
 +    break;
 +   
 +  case F_CONSTR:
 +  case F_CONSTRNC:
 +    fprintf(fp,"dA=%15.8e, dB=%15.8e\n",iparams->constr.dA,iparams->constr.dB);
 +    break;
 +  case F_SETTLE:
 +    fprintf(fp,"doh=%15.8e, dhh=%15.8e\n",iparams->settle.doh,
 +          iparams->settle.dhh);
 +    break;
 +  case F_VSITE2:
 +    fprintf(fp,"a=%15.8e\n",iparams->vsite.a);
 +    break;
 +  case F_VSITE3:
 +  case F_VSITE3FD:
 +  case F_VSITE3FAD:
 +    fprintf(fp,"a=%15.8e, b=%15.8e\n",iparams->vsite.a,iparams->vsite.b);
 +    break;
 +  case F_VSITE3OUT:
 +  case F_VSITE4FD:
 +  case F_VSITE4FDN:
 +    fprintf(fp,"a=%15.8e, b=%15.8e, c=%15.8e\n",
 +          iparams->vsite.a,iparams->vsite.b,iparams->vsite.c);
 +    break;
 +  case F_VSITEN:
 +    fprintf(fp,"n=%2d, a=%15.8e\n",iparams->vsiten.n,iparams->vsiten.a);
 +    break;
 +  case F_GB12:
 +  case F_GB13:
 +  case F_GB14:
 +    fprintf(fp, "sar=%15.8e, st=%15.8e, pi=%15.8e, gbr=%15.8e, bmlt=%15.8e\n",iparams->gb.sar,iparams->gb.st,iparams->gb.pi,iparams->gb.gbr,iparams->gb.bmlt);
 +    break;              
 +  case F_CMAP:
 +    fprintf(fp, "cmapA=%1d, cmapB=%1d\n",iparams->cmap.cmapA, iparams->cmap.cmapB);
 +    break;              
 +  default:
 +    gmx_fatal(FARGS,"unknown function type %d (%s) in %s line %d",
 +            ftype,interaction_function[ftype].name,__FILE__,__LINE__);
 +  }
 +}
 +
 +void pr_ilist(FILE *fp,int indent,const char *title,
 +              t_functype *functype,t_ilist *ilist, gmx_bool bShowNumbers)
 +{
 +    int i,j,k,type,ftype;
 +    t_iatom *iatoms;
 +    
 +    if (available(fp,ilist,indent,title) && ilist->nr > 0)
 +    {  
 +        indent=pr_title(fp,indent,title);
 +        (void) pr_indent(fp,indent);
 +        fprintf(fp,"nr: %d\n",ilist->nr);
 +        if (ilist->nr > 0) {
 +            (void) pr_indent(fp,indent);
 +            fprintf(fp,"iatoms:\n");
 +            iatoms=ilist->iatoms;
 +            for (i=j=0; i<ilist->nr;) {
 +#ifndef DEBUG
 +                (void) pr_indent(fp,indent+INDENT);
 +                type=*(iatoms++);
 +                ftype=functype[type];
 +                (void) fprintf(fp,"%d type=%d (%s)",
 +                               bShowNumbers?j:-1,bShowNumbers?type:-1,
 +                               interaction_function[ftype].name);
 +                j++;
 +                for (k=0; k<interaction_function[ftype].nratoms; k++)
 +                    (void) fprintf(fp," %u",*(iatoms++));
 +                (void) fprintf(fp,"\n");
 +                i+=1+interaction_function[ftype].nratoms;
 +#else
 +                fprintf(fp,"%5d%5d\n",i,iatoms[i]);
 +                i++;
 +#endif
 +            }
 +        }
 +    }
 +}
 +
 +static void pr_cmap(FILE *fp, int indent, const char *title,
 +                    gmx_cmap_t *cmap_grid, gmx_bool bShowNumbers)
 +{
 +    int i,j,nelem;
 +    real dx,idx;
 +      
 +    dx    = 360.0 / cmap_grid->grid_spacing;
 +    nelem = cmap_grid->grid_spacing*cmap_grid->grid_spacing;
 +      
 +    if(available(fp,cmap_grid,indent,title))
 +    {
 +        fprintf(fp,"%s\n",title);
 +              
 +        for(i=0;i<cmap_grid->ngrid;i++)
 +        {
 +            idx = -180.0;
 +            fprintf(fp,"%8s %8s %8s %8s\n","V","dVdx","dVdy","d2dV");
 +                      
 +            fprintf(fp,"grid[%3d]={\n",bShowNumbers?i:-1);
 +                      
 +            for(j=0;j<nelem;j++)
 +            {
 +                if( (j%cmap_grid->grid_spacing)==0)
 +                {
 +                    fprintf(fp,"%8.1f\n",idx);
 +                    idx+=dx;
 +                }
 +                              
 +                fprintf(fp,"%8.3f ",cmap_grid->cmapdata[i].cmap[j*4]);
 +                fprintf(fp,"%8.3f ",cmap_grid->cmapdata[i].cmap[j*4+1]);
 +                fprintf(fp,"%8.3f ",cmap_grid->cmapdata[i].cmap[j*4+2]);
 +                fprintf(fp,"%8.3f\n",cmap_grid->cmapdata[i].cmap[j*4+3]);
 +            }
 +            fprintf(fp,"\n");
 +        }
 +    }
 +      
 +}
 +
 +void pr_ffparams(FILE *fp,int indent,const char *title,
 +                 gmx_ffparams_t *ffparams,
 +                 gmx_bool bShowNumbers)
 +{
 +  int i,j;
 +  
 +  indent=pr_title(fp,indent,title);
 +  (void) pr_indent(fp,indent);
 +  (void) fprintf(fp,"atnr=%d\n",ffparams->atnr);
 +  (void) pr_indent(fp,indent);
 +  (void) fprintf(fp,"ntypes=%d\n",ffparams->ntypes);
 +  for (i=0; i<ffparams->ntypes; i++) {
 +      (void) pr_indent(fp,indent+INDENT);
 +      (void) fprintf(fp,"functype[%d]=%s, ",
 +                     bShowNumbers?i:-1,
 +                     interaction_function[ffparams->functype[i]].name);
 +      pr_iparams(fp,ffparams->functype[i],&ffparams->iparams[i]);
 +  }
 +  (void) pr_double(fp,indent,"reppow",ffparams->reppow);
 +  (void) pr_real(fp,indent,"fudgeQQ",ffparams->fudgeQQ);
 +  pr_cmap(fp,indent,"cmap",&ffparams->cmap_grid,bShowNumbers);
 +}
 +
 +void pr_idef(FILE *fp,int indent,const char *title,t_idef *idef, gmx_bool bShowNumbers)
 +{
 +  int i,j;
 +  
 +  if (available(fp,idef,indent,title)) {  
 +    indent=pr_title(fp,indent,title);
 +    (void) pr_indent(fp,indent);
 +    (void) fprintf(fp,"atnr=%d\n",idef->atnr);
 +    (void) pr_indent(fp,indent);
 +    (void) fprintf(fp,"ntypes=%d\n",idef->ntypes);
 +    for (i=0; i<idef->ntypes; i++) {
 +      (void) pr_indent(fp,indent+INDENT);
 +      (void) fprintf(fp,"functype[%d]=%s, ",
 +                   bShowNumbers?i:-1,
 +                   interaction_function[idef->functype[i]].name);
 +      pr_iparams(fp,idef->functype[i],&idef->iparams[i]);
 +    }
 +    (void) pr_real(fp,indent,"fudgeQQ",idef->fudgeQQ);
 +
 +    for(j=0; (j<F_NRE); j++)
 +      pr_ilist(fp,indent,interaction_function[j].longname,
 +               idef->functype,&idef->il[j],bShowNumbers);
 +  }
 +}
 +
 +static int pr_block_title(FILE *fp,int indent,const char *title,t_block *block)
 +{
 +  int i;
 +
 +  if (available(fp,block,indent,title))
 +    {
 +      indent=pr_title(fp,indent,title);
 +      (void) pr_indent(fp,indent);
 +      (void) fprintf(fp,"nr=%d\n",block->nr);
 +    }
 +  return indent;
 +}
 +
 +static int pr_blocka_title(FILE *fp,int indent,const char *title,t_blocka *block)
 +{
 +  int i;
 +
 +  if (available(fp,block,indent,title))
 +    {
 +      indent=pr_title(fp,indent,title);
 +      (void) pr_indent(fp,indent);
 +      (void) fprintf(fp,"nr=%d\n",block->nr);
 +      (void) pr_indent(fp,indent);
 +      (void) fprintf(fp,"nra=%d\n",block->nra);
 +    }
 +  return indent;
 +}
 +
 +static void low_pr_blocka(FILE *fp,int indent,const char *title,t_blocka *block, gmx_bool bShowNumbers)
 +{
 +  int i;
 +  
 +  if (available(fp,block,indent,title))
 +    {
 +      indent=pr_blocka_title(fp,indent,title,block);
 +      for (i=0; i<=block->nr; i++)
 +        {
 +          (void) pr_indent(fp,indent+INDENT);
 +          (void) fprintf(fp,"%s->index[%d]=%u\n",
 +                       title,bShowNumbers?i:-1,block->index[i]);
 +        }
 +      for (i=0; i<block->nra; i++)
 +        {
 +          (void) pr_indent(fp,indent+INDENT);
 +          (void) fprintf(fp,"%s->a[%d]=%u\n",
 +                       title,bShowNumbers?i:-1,block->a[i]);
 +        }
 +    }
 +}
 +
 +void pr_block(FILE *fp,int indent,const char *title,t_block *block,gmx_bool bShowNumbers)
 +{
 +  int i,j,ok,size,start,end;
 +  
 +  if (available(fp,block,indent,title))
 +    {
 +      indent=pr_block_title(fp,indent,title,block);
 +      start=0;
 +      end=start;
 +      if ((ok=(block->index[start]==0))==0)
 +        (void) fprintf(fp,"block->index[%d] should be 0\n",start);
 +      else
 +        for (i=0; i<block->nr; i++)
 +          {
 +            end=block->index[i+1];
 +            size=pr_indent(fp,indent);
 +            if (end<=start)
 +              size+=fprintf(fp,"%s[%d]={}\n",title,i);
 +            else
 +              size+=fprintf(fp,"%s[%d]={%d..%d}\n",
 +                          title,bShowNumbers?i:-1,
 +                          bShowNumbers?start:-1,bShowNumbers?end-1:-1);
 +            start=end;
 +          }
 +    }
 +}
 +
 +void pr_blocka(FILE *fp,int indent,const char *title,t_blocka *block,gmx_bool bShowNumbers)
 +{
 +  int i,j,ok,size,start,end;
 +  
 +  if (available(fp,block,indent,title))
 +    {
 +      indent=pr_blocka_title(fp,indent,title,block);
 +      start=0;
 +      end=start;
 +      if ((ok=(block->index[start]==0))==0)
 +        (void) fprintf(fp,"block->index[%d] should be 0\n",start);
 +      else
 +        for (i=0; i<block->nr; i++)
 +          {
 +            end=block->index[i+1];
 +            size=pr_indent(fp,indent);
 +            if (end<=start)
 +              size+=fprintf(fp,"%s[%d]={",title,i);
 +            else
 +              size+=fprintf(fp,"%s[%d][%d..%d]={",
 +                          title,bShowNumbers?i:-1,
 +                          bShowNumbers?start:-1,bShowNumbers?end-1:-1);
 +            for (j=start; j<end; j++)
 +              {
 +                if (j>start) size+=fprintf(fp,", ");
 +                if ((size)>(USE_WIDTH))
 +                  {
 +                    (void) fprintf(fp,"\n");
 +                    size=pr_indent(fp,indent+INDENT);
 +                  }
 +                size+=fprintf(fp,"%u",block->a[j]);
 +              }
 +            (void) fprintf(fp,"}\n");
 +            start=end;
 +          }
 +      if ((end!=block->nra)||(!ok)) 
 +        {
 +          (void) pr_indent(fp,indent);
 +          (void) fprintf(fp,"tables inconsistent, dumping complete tables:\n");
 +          low_pr_blocka(fp,indent,title,block,bShowNumbers);
 +        }
 +    }
 +}
 +
 +static void pr_strings(FILE *fp,int indent,const char *title,char ***nm,int n, gmx_bool bShowNumbers)
 +{
 +  int i;
 +
 +  if (available(fp,nm,indent,title))
 +    {  
 +      indent=pr_title_n(fp,indent,title,n);
 +      for (i=0; i<n; i++)
 +        {
 +          (void) pr_indent(fp,indent);
 +          (void) fprintf(fp,"%s[%d]={name=\"%s\"}\n",
 +                       title,bShowNumbers?i:-1,*(nm[i]));
 +        }
 +    }
 +}
 +
 +static void pr_strings2(FILE *fp,int indent,const char *title,
 +                      char ***nm,char ***nmB,int n, gmx_bool bShowNumbers)
 +{
 +  int i;
 +
 +  if (available(fp,nm,indent,title))
 +    {  
 +      indent=pr_title_n(fp,indent,title,n);
 +      for (i=0; i<n; i++)
 +        {
 +          (void) pr_indent(fp,indent);
 +          (void) fprintf(fp,"%s[%d]={name=\"%s\",nameB=\"%s\"}\n",
 +                       title,bShowNumbers?i:-1,*(nm[i]),*(nmB[i]));
 +        }
 +    }
 +}
 +
 +static void pr_resinfo(FILE *fp,int indent,const char *title,t_resinfo *resinfo,int n, gmx_bool bShowNumbers)
 +{
 +    int i;
 +    
 +    if (available(fp,resinfo,indent,title))
 +    {  
 +        indent=pr_title_n(fp,indent,title,n);
 +        for (i=0; i<n; i++)
 +        {
 +            (void) pr_indent(fp,indent);
 +            (void) fprintf(fp,"%s[%d]={name=\"%s\", nr=%d, ic='%c'}\n",
 +                           title,bShowNumbers?i:-1,
 +                           *(resinfo[i].name),resinfo[i].nr,
 +                           (resinfo[i].ic == '\0') ? ' ' : resinfo[i].ic);
 +        }
 +    }
 +}
 +
 +static void pr_atom(FILE *fp,int indent,const char *title,t_atom *atom,int n)
 +{
 +  int i,j;
 +  
 +  if (available(fp,atom,indent,title)) {  
 +    indent=pr_title_n(fp,indent,title,n);
 +    for (i=0; i<n; i++) {
 +      (void) pr_indent(fp,indent);
 +      fprintf(fp,"%s[%6d]={type=%3d, typeB=%3d, ptype=%8s, m=%12.5e, "
 +              "q=%12.5e, mB=%12.5e, qB=%12.5e, resind=%5d, atomnumber=%3d}\n",
 +              title,i,atom[i].type,atom[i].typeB,ptype_str[atom[i].ptype],
 +              atom[i].m,atom[i].q,atom[i].mB,atom[i].qB,
 +              atom[i].resind,atom[i].atomnumber);
 +    }
 +  }
 +}
 +
 +static void pr_grps(FILE *fp,int indent,const char *title,t_grps grps[],
 +                  char **grpname[], gmx_bool bShowNumbers)
 +{
 +    int i,j;
 +
 +    for(i=0; (i<egcNR); i++)
 +    {
 +        fprintf(fp,"%s[%-12s] nr=%d, name=[",title,gtypes[i],grps[i].nr);
 +        for(j=0; (j<grps[i].nr); j++)
 +        {
 +            fprintf(fp," %s",*(grpname[grps[i].nm_ind[j]]));
 +        }
 +        fprintf(fp,"]\n");
 +    }
 +}
 +
 +static void pr_groups(FILE *fp,int indent,const char *title,
 +                      gmx_groups_t *groups,
 +                      gmx_bool bShowNumbers)
 +{
 +    int grpnr[egcNR];
 +    int nat_max,i,g;
 +
 +    pr_grps(fp,indent,"grp",groups->grps,groups->grpname,bShowNumbers);
 +    pr_strings(fp,indent,"grpname",groups->grpname,groups->ngrpname,bShowNumbers);
 +
 +    (void) pr_indent(fp,indent);
 +    fprintf(fp,"groups          ");
 +    for(g=0; g<egcNR; g++)
 +    {
 +       printf(" %5.5s",gtypes[g]);
 +    }
 +    printf("\n");
 +
 +    (void) pr_indent(fp,indent);
 +    fprintf(fp,"allocated       ");
 +    nat_max = 0;
 +    for(g=0; g<egcNR; g++)
 +    {
 +        printf(" %5d",groups->ngrpnr[g]);
 +        nat_max = max(nat_max,groups->ngrpnr[g]);
 +    }
 +    printf("\n");
 +
 +    if (nat_max == 0)
 +    {
 +        (void) pr_indent(fp,indent);
 +        fprintf(fp,"groupnr[%5s] =","*");
 +        for(g=0; g<egcNR; g++)
 +        {
 +            fprintf(fp,"  %3d ",0);
 +        }
 +        fprintf(fp,"\n");
 +    }
 +    else
 +    {
 +        for(i=0; i<nat_max; i++)
 +        {
 +            (void) pr_indent(fp,indent);
 +            fprintf(fp,"groupnr[%5d] =",i);
 +            for(g=0; g<egcNR; g++)
 +            {
 +                fprintf(fp,"  %3d ",
 +                        groups->grpnr[g] ? groups->grpnr[g][i] : 0);
 +            }
 +            fprintf(fp,"\n");
 +        }
 +    }
 +}
 +
 +void pr_atoms(FILE *fp,int indent,const char *title,t_atoms *atoms, 
 +            gmx_bool bShownumbers)
 +{
 +  if (available(fp,atoms,indent,title))
 +    {
 +      indent=pr_title(fp,indent,title);
 +      pr_atom(fp,indent,"atom",atoms->atom,atoms->nr);
 +      pr_strings(fp,indent,"atom",atoms->atomname,atoms->nr,bShownumbers);
 +      pr_strings2(fp,indent,"type",atoms->atomtype,atoms->atomtypeB,atoms->nr,bShownumbers);
 +      pr_resinfo(fp,indent,"residue",atoms->resinfo,atoms->nres,bShownumbers);
 +    }
 +}
 +
 +
 +void pr_atomtypes(FILE *fp,int indent,const char *title,t_atomtypes *atomtypes, 
 +                gmx_bool bShowNumbers)
 +{
 +  int i;
 +  if (available(fp,atomtypes,indent,title)) 
 +  {
 +    indent=pr_title(fp,indent,title);
 +    for(i=0;i<atomtypes->nr;i++) {
 +      pr_indent(fp,indent);
 +              fprintf(fp,
 +                              "atomtype[%3d]={radius=%12.5e, volume=%12.5e, gb_radius=%12.5e, surftens=%12.5e, atomnumber=%4d, S_hct=%12.5e)}\n",
 +                              bShowNumbers?i:-1,atomtypes->radius[i],atomtypes->vol[i],
 +                              atomtypes->gb_radius[i],
 +                              atomtypes->surftens[i],atomtypes->atomnumber[i],atomtypes->S_hct[i]);
 +    }
 +  }
 +}
 +
 +static void pr_moltype(FILE *fp,int indent,const char *title,
 +                       gmx_moltype_t *molt,int n,
 +                       gmx_ffparams_t *ffparams,
 +                       gmx_bool bShowNumbers)
 +{
 +    int j;
 +
 +    indent = pr_title_n(fp,indent,title,n);
 +    (void) pr_indent(fp,indent);
 +    (void) fprintf(fp,"name=\"%s\"\n",*(molt->name));
 +    pr_atoms(fp,indent,"atoms",&(molt->atoms),bShowNumbers);
 +    pr_block(fp,indent,"cgs",&molt->cgs, bShowNumbers);
 +    pr_blocka(fp,indent,"excls",&molt->excls, bShowNumbers);
 +    for(j=0; (j<F_NRE); j++) {
 +        pr_ilist(fp,indent,interaction_function[j].longname,
 +                 ffparams->functype,&molt->ilist[j],bShowNumbers);
 +    }
 +}
 +
 +static void pr_molblock(FILE *fp,int indent,const char *title,
 +                        gmx_molblock_t *molb,int n,
 +                        gmx_moltype_t *molt,
 +                        gmx_bool bShowNumbers)
 +{
 +    indent = pr_title_n(fp,indent,title,n);
 +    (void) pr_indent(fp,indent);
 +    (void) fprintf(fp,"%-20s = %d \"%s\"\n",
 +                   "moltype",molb->type,*(molt[molb->type].name));
 +    pr_int(fp,indent,"#molecules",molb->nmol);
 +    pr_int(fp,indent,"#atoms_mol",molb->natoms_mol);
 +    pr_int(fp,indent,"#posres_xA",molb->nposres_xA);
 +    if (molb->nposres_xA > 0) {
 +        pr_rvecs(fp,indent,"posres_xA",molb->posres_xA,molb->nposres_xA);
 +    }
 +    pr_int(fp,indent,"#posres_xB",molb->nposres_xB);
 +    if (molb->nposres_xB > 0) {
 +        pr_rvecs(fp,indent,"posres_xB",molb->posres_xB,molb->nposres_xB);
 +    }
 +}
 +
 +void pr_mtop(FILE *fp,int indent,const char *title,gmx_mtop_t *mtop,
 +             gmx_bool bShowNumbers)
 +{
 +    int mt,mb;
 +
 +    if (available(fp,mtop,indent,title)) {
 +        indent=pr_title(fp,indent,title);
 +        (void) pr_indent(fp,indent);
 +        (void) fprintf(fp,"name=\"%s\"\n",*(mtop->name));
 +        pr_int(fp,indent,"#atoms",mtop->natoms);
 +        for(mb=0; mb<mtop->nmolblock; mb++) {
 +            pr_molblock(fp,indent,"molblock",&mtop->molblock[mb],mb,
 +                        mtop->moltype,bShowNumbers);
 +        }
 +        pr_ffparams(fp,indent,"ffparams",&(mtop->ffparams),bShowNumbers);
 +        pr_atomtypes(fp,indent,"atomtypes",&(mtop->atomtypes),bShowNumbers);
 +        for(mt=0; mt<mtop->nmoltype; mt++) {
 +            pr_moltype(fp,indent,"moltype",&mtop->moltype[mt],mt,
 +                       &mtop->ffparams,bShowNumbers);
 +        }
 +        pr_groups(fp,indent,"groups",&mtop->groups,bShowNumbers);
 +    }
 +}
 +
 +void pr_top(FILE *fp,int indent,const char *title,t_topology *top, gmx_bool bShowNumbers)
 +{
 +  if (available(fp,top,indent,title)) {
 +    indent=pr_title(fp,indent,title);
 +    (void) pr_indent(fp,indent);
 +    (void) fprintf(fp,"name=\"%s\"\n",*(top->name));
 +    pr_atoms(fp,indent,"atoms",&(top->atoms),bShowNumbers);
 +    pr_atomtypes(fp,indent,"atomtypes",&(top->atomtypes),bShowNumbers);
 +    pr_block(fp,indent,"cgs",&top->cgs, bShowNumbers);
 +    pr_block(fp,indent,"mols",&top->mols, bShowNumbers);
 +    pr_blocka(fp,indent,"excls",&top->excls, bShowNumbers);
 +    pr_idef(fp,indent,"idef",&top->idef,bShowNumbers);
 +  }
 +}
 +
 +void pr_header(FILE *fp,int indent,const char *title,t_tpxheader *sh)
 +{
 +  char buf[22];
 +    
 +  if (available(fp,sh,indent,title))
 +    {
 +      indent=pr_title(fp,indent,title);
 +      pr_indent(fp,indent);
 +      fprintf(fp,"bIr    = %spresent\n",sh->bIr?"":"not ");
 +      pr_indent(fp,indent);
 +      fprintf(fp,"bBox   = %spresent\n",sh->bBox?"":"not ");
 +      pr_indent(fp,indent);
 +      fprintf(fp,"bTop   = %spresent\n",sh->bTop?"":"not ");
 +      pr_indent(fp,indent);
 +      fprintf(fp,"bX     = %spresent\n",sh->bX?"":"not ");
 +      pr_indent(fp,indent);
 +      fprintf(fp,"bV     = %spresent\n",sh->bV?"":"not ");
 +      pr_indent(fp,indent);
 +      fprintf(fp,"bF     = %spresent\n",sh->bF?"":"not ");
 +      
 +      pr_indent(fp,indent);
 +      fprintf(fp,"natoms = %d\n",sh->natoms);
 +      pr_indent(fp,indent);
 +      fprintf(fp,"lambda = %e\n",sh->lambda);
 +    }
 +}
 +
 +void pr_commrec(FILE *fp,int indent,t_commrec *cr)
 +{
 +  pr_indent(fp,indent);
 +  fprintf(fp,"commrec:\n");
 +  indent+=2;
 +  pr_indent(fp,indent);
 +  fprintf(fp,"nodeid    = %d\n",cr->nodeid);
 +  pr_indent(fp,indent);
 +  fprintf(fp,"nnodes    = %d\n",cr->nnodes);
 +  pr_indent(fp,indent);
 +  fprintf(fp,"npmenodes = %d\n",cr->npmenodes);
 +  /*
 +  pr_indent(fp,indent);
 +  fprintf(fp,"threadid  = %d\n",cr->threadid);
 +  pr_indent(fp,indent);
 +  fprintf(fp,"nthreads  = %d\n",cr->nthreads);
 +  */
 +}
index e610c2617d9fe44f1d97afcad478c8d8d5777c7d,0000000000000000000000000000000000000000..3878e0ac0e17b1db45232f6666f5afab9c1ed2d2
mode 100644,000000..100644
--- /dev/null
@@@ -1,753 -1,0 +1,830 @@@
-   memset(ir,0,(size_t)sizeof(*ir));
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GROningen Mixture of Alchemy and Childrens' Stories
 + */
 +/* This file is completely threadsafe - keep it that way! */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include "smalloc.h"
 +#include "symtab.h"
 +#include "vec.h"
 +#include "pbc.h"
 +#include "macros.h"
 +#include <string.h>
 +
 +#ifdef GMX_THREAD_MPI
 +#include "thread_mpi.h"
 +#endif
 +
 +/* The source code in this file should be thread-safe. 
 +      Please keep it that way. */
 +
 +
 +
 +static gmx_bool bOverAllocDD=FALSE;
 +#ifdef GMX_THREAD_MPI
 +static tMPI_Thread_mutex_t over_alloc_mutex=TMPI_THREAD_MUTEX_INITIALIZER;
 +#endif
 +
 +
 +void set_over_alloc_dd(gmx_bool set)
 +{
 +#ifdef GMX_THREAD_MPI
 +    tMPI_Thread_mutex_lock(&over_alloc_mutex);
 +    /* we just make sure that we don't set this at the same time. 
 +       We don't worry too much about reading this rarely-set variable */
 +#endif    
 +    bOverAllocDD = set;
 +#ifdef GMX_THREAD_MPI
 +    tMPI_Thread_mutex_unlock(&over_alloc_mutex);
 +#endif    
 +}
 +
 +int over_alloc_dd(int n)
 +{
 +  if (bOverAllocDD)
 +    return OVER_ALLOC_FAC*n + 100;
 +  else
 +    return n;
 +}
 +
 +int gmx_large_int_to_int(gmx_large_int_t step,const char *warn)
 +{
 +  int i;
 +
 +  i = (int)step;
 +
 +  if (warn != NULL && (step < INT_MIN || step > INT_MAX)) {
 +    fprintf(stderr,"\nWARNING during %s:\n",warn);
 +    fprintf(stderr,"step value ");
 +    fprintf(stderr,gmx_large_int_pfmt,step);
 +    fprintf(stderr," does not fit in int, converted to %d\n\n",i);
 +  }
 +
 +  return i;
 +}
 +
 +char *gmx_step_str(gmx_large_int_t i,char *buf)
 +{
 +  sprintf(buf,gmx_large_int_pfmt,i);
 +
 +  return buf;
 +}
 +
 +void init_block(t_block *block)
 +{
 +  int i;
 +
 +  block->nr           = 0;
 +  block->nalloc_index = 1;
 +  snew(block->index,block->nalloc_index);
 +  block->index[0]     = 0;
 +}
 +
 +void init_blocka(t_blocka *block)
 +{
 +  int i;
 +
 +  block->nr           = 0;
 +  block->nra          = 0;
 +  block->nalloc_index = 1;
 +  snew(block->index,block->nalloc_index);
 +  block->index[0]     = 0;
 +  block->nalloc_a     = 0;
 +  block->a            = NULL;
 +}
 +
 +void init_atom(t_atoms *at)
 +{
 +  int i;
 +
 +  at->nr       = 0;
 +  at->nres     = 0;
 +  at->atom     = NULL;
 +  at->resinfo  = NULL;
 +  at->atomname = NULL;
 +  at->atomtype = NULL;
 +  at->atomtypeB= NULL;
 +  at->pdbinfo  = NULL;
 +}
 +
 +void init_atomtypes(t_atomtypes *at)
 +{
 +  at->nr = 0;
 +  at->radius = NULL;
 +  at->vol = NULL;
 +  at->atomnumber = NULL;
 +  at->gb_radius = NULL;
 +  at->S_hct = NULL;
 +}
 +
 +void init_groups(gmx_groups_t *groups)
 +{
 +  int g;
 +
 +  groups->ngrpname = 0;
 +  groups->grpname  = NULL;
 +  for(g=0; (g<egcNR); g++) {
 +    groups->grps[g].nm_ind = NULL;
 +    groups->ngrpnr[g] = 0;
 +    groups->grpnr[g]  = NULL;
 +  }
 +
 +}
 +
 +void init_mtop(gmx_mtop_t *mtop)
 +{
 +  mtop->name = NULL;
 +  mtop->nmoltype = 0;
 +  mtop->moltype = NULL;
 +  mtop->nmolblock = 0;
 +  mtop->molblock = NULL;
 +  mtop->maxres_renum = 0;
 +  mtop->maxresnr = -1;
 +  init_groups(&mtop->groups);
 +  init_block(&mtop->mols);
 +  open_symtab(&mtop->symtab);
 +}
 +
 +void init_top (t_topology *top)
 +{
 +  int i;
 +  
 +  top->name = NULL;
 +  init_atom (&(top->atoms));
 +  init_atomtypes(&(top->atomtypes));
 +  init_block(&top->cgs);
 +  init_block(&top->mols);
 +  init_blocka(&top->excls);
 +  open_symtab(&top->symtab);
 +}
 +
 +void init_inputrec(t_inputrec *ir)
 +{
-  {
++    memset(ir,0,(size_t)sizeof(*ir));
++    snew(ir->fepvals,1);
++    snew(ir->expandedvals,1);
++    snew(ir->simtempvals,1);
 +}
 +
 +void stupid_fill_block(t_block *grp,int natom,gmx_bool bOneIndexGroup)
 +{
 +  int i;
 +
 +  if (bOneIndexGroup) {
 +    grp->nalloc_index = 2;
 +    snew(grp->index,grp->nalloc_index);
 +    grp->index[0]=0;
 +    grp->index[1]=natom;
 +    grp->nr=1;
 +  }
 +  else {
 +    grp->nalloc_index = natom+1;
 +    snew(grp->index,grp->nalloc_index);
 +    snew(grp->index,natom+1);
 +    for(i=0; (i<=natom); i++)
 +      grp->index[i]=i;
 +    grp->nr=natom;
 +  }
 +}
 +
 +void stupid_fill_blocka(t_blocka *grp,int natom)
 +{
 +  int i;
 +
 +  grp->nalloc_a = natom;
 +  snew(grp->a,grp->nalloc_a);
 +  for(i=0; (i<natom); i++)
 +    grp->a[i]=i;
 +  grp->nra=natom;
 +  
 +  grp->nalloc_index = natom + 1;
 +  snew(grp->index,grp->nalloc_index);
 +  for(i=0; (i<=natom); i++)
 +    grp->index[i]=i;
 +  grp->nr=natom;
 +}
 +
 +void copy_blocka(const t_blocka *src,t_blocka *dest)
 +{
 +  int i;
 +
 +  dest->nr = src->nr;
 +  dest->nalloc_index = dest->nr + 1;
 +  snew(dest->index,dest->nalloc_index);
 +  for(i=0; i<dest->nr+1; i++) {
 +    dest->index[i] = src->index[i];
 +  }
 +  dest->nra = src->nra;
 +  dest->nalloc_a = dest->nra + 1;
 +  snew(dest->a,dest->nalloc_a);
 +  for(i=0; i<dest->nra+1; i++) {
 +    dest->a[i] = src->a[i];
 +  }
 +}
 +
 +void done_block(t_block *block)
 +{
 +  block->nr    = 0;
 +  sfree(block->index);
 +  block->nalloc_index = 0;
 +}
 +
 +void done_blocka(t_blocka *block)
 +{
 +  block->nr    = 0;
 +  block->nra   = 0;
 +  sfree(block->index);
 +  if (block->a)
 +    sfree(block->a);
 +  block->nalloc_index = 0;
 +  block->nalloc_a = 0;
 +}
 +
 +void done_atom (t_atoms *at)
 +{
 +  at->nr       = 0;
 +  at->nres     = 0;
 +  sfree(at->atom);
 +  sfree(at->resinfo);
 +  sfree(at->atomname);
 +  sfree(at->atomtype);
 +  sfree(at->atomtypeB);
 +}
 +
 +void done_atomtypes(t_atomtypes *atype)
 +{
 +  atype->nr = 0;
 +  sfree(atype->radius);
 +  sfree(atype->vol);
 +  sfree(atype->surftens);
 +  sfree(atype->atomnumber);
 +  sfree(atype->gb_radius);
 +  sfree(atype->S_hct);
 +}
 +
 +void done_moltype(gmx_moltype_t *molt)
 +{
 +  int f;
 +  
 +  done_atom(&molt->atoms);
 +  done_block(&molt->cgs);
 +  done_blocka(&molt->excls);
 +
 +  for(f=0; f<F_NRE; f++) {
 +    sfree(molt->ilist[f].iatoms);
 +    molt->ilist[f].nalloc = 0;
 +  }
 +}
 +
 +void done_molblock(gmx_molblock_t *molb)
 +{
 +  if (molb->nposres_xA > 0) {
 +    molb->nposres_xA = 0;
 +    free(molb->posres_xA);
 +  }
 +  if (molb->nposres_xB > 0) {
 +    molb->nposres_xB = 0;
 +    free(molb->posres_xB);
 +  }
 +}
 +
 +void done_mtop(gmx_mtop_t *mtop,gmx_bool bDoneSymtab)
 +{
 +  int i;
 +
 +  if (bDoneSymtab) {
 +    done_symtab(&mtop->symtab);
 +  }
 +
 +  sfree(mtop->ffparams.functype);
 +  sfree(mtop->ffparams.iparams);
 +
 +  for(i=0; i<mtop->nmoltype; i++) {
 +    done_moltype(&mtop->moltype[i]);
 +  }
 +  sfree(mtop->moltype);
 +  for(i=0; i<mtop->nmolblock; i++) {
 +    done_molblock(&mtop->molblock[i]);
 +  }
 +  sfree(mtop->molblock);
 +  done_block(&mtop->mols);
 +}
 +
 +void done_top(t_topology *top)
 +{
 +  int f;
 +  
 +  sfree(top->idef.functype);
 +  sfree(top->idef.iparams);
 +  for (f = 0; f < F_NRE; ++f)
 +  {
 +      sfree(top->idef.il[f].iatoms);
 +      top->idef.il[f].iatoms = NULL;
 +      top->idef.il[f].nalloc = 0;
 +  }
 +
 +  done_atom (&(top->atoms));
 +
 +  /* For GB */
 +  done_atomtypes(&(top->atomtypes));
 +
 +  done_symtab(&(top->symtab));
 +  done_block(&(top->cgs));
 +  done_block(&(top->mols));
 +  done_blocka(&(top->excls));
 +}
 +
 +static void done_pullgrp(t_pullgrp *pgrp)
 +{
 +  sfree(pgrp->ind);
 +  sfree(pgrp->ind_loc);
 +  sfree(pgrp->weight);
 +  sfree(pgrp->weight_loc);
 +}
 +
 +static void done_pull(t_pull *pull)
 +{
 +  int i;
 +
 +  for(i=0; i<pull->ngrp+1; i++) {
 +    done_pullgrp(pull->grp);
 +    done_pullgrp(pull->dyna);
 +  }
 +}
 +
 +void done_inputrec(t_inputrec *ir)
 +{
 +  int m;
 +  
 +  for(m=0; (m<DIM); m++) {
 +    if (ir->ex[m].a)   sfree(ir->ex[m].a);
 +    if (ir->ex[m].phi) sfree(ir->ex[m].phi);
 +    if (ir->et[m].a)   sfree(ir->et[m].a);
 +    if (ir->et[m].phi) sfree(ir->et[m].phi);
 +  }
 +
 +  sfree(ir->opts.nrdf);
 +  sfree(ir->opts.ref_t);
 +  sfree(ir->opts.annealing); 
 +  sfree(ir->opts.anneal_npoints); 
 +  sfree(ir->opts.anneal_time); 
 +  sfree(ir->opts.anneal_temp); 
 +  sfree(ir->opts.tau_t);
 +  sfree(ir->opts.acc);
 +  sfree(ir->opts.nFreeze);
 +  sfree(ir->opts.QMmethod);
 +  sfree(ir->opts.QMbasis);
 +  sfree(ir->opts.QMcharge);
 +  sfree(ir->opts.QMmult);
 +  sfree(ir->opts.bSH);
 +  sfree(ir->opts.CASorbitals);
 +  sfree(ir->opts.CASelectrons);
 +  sfree(ir->opts.SAon);
 +  sfree(ir->opts.SAoff);
 +  sfree(ir->opts.SAsteps);
 +  sfree(ir->opts.bOPT);
 +  sfree(ir->opts.bTS);
 +
 +  if (ir->pull) {
 +    done_pull(ir->pull);
 +    sfree(ir->pull);
 +  }
 +}
 +
 +static void zero_ekinstate(ekinstate_t *eks)
 +{
 +  eks->ekin_n         = 0;
 +  eks->ekinh          = NULL;
 +  eks->ekinf          = NULL;
 +  eks->ekinh_old      = NULL;
 +  eks->ekinscalef_nhc = NULL;
 +  eks->ekinscaleh_nhc = NULL;
 +  eks->vscale_nhc     = NULL;
 +  eks->dekindl        = 0;
 +  eks->mvcos          = 0;
 +}
 +
 +void init_energyhistory(energyhistory_t * enerhist)
 +{
 +    enerhist->nener = 0;
 +
 +    enerhist->ener_ave     = NULL;
 +    enerhist->ener_sum     = NULL;
 +    enerhist->ener_sum_sim = NULL;
 +    enerhist->dht          = NULL;
 +
 +    enerhist->nsteps     = 0;
 +    enerhist->nsum       = 0;
 +    enerhist->nsteps_sim = 0;
 +    enerhist->nsum_sim   = 0;
 +
 +    enerhist->dht = NULL;
 +}
 +
 +static void done_delta_h_history(delta_h_history_t *dht)
 +{
 +    int i;
 +
 +    for(i=0; i<dht->nndh; i++)
 +    {
 +        sfree(dht->dh[i]);
 +    }
 +    sfree(dht->dh);
 +    sfree(dht->ndh);
 +}
 +
 +void done_energyhistory(energyhistory_t * enerhist)
 +{
 +    sfree(enerhist->ener_ave);
 +    sfree(enerhist->ener_sum);
 +    sfree(enerhist->ener_sum_sim);
 +
 +    if (enerhist->dht != NULL)
 +    {
 +        done_delta_h_history(enerhist->dht);
 +        sfree(enerhist->dht);
 +    }
 +}
 +
 +void init_gtc_state(t_state *state, int ngtc, int nnhpres, int nhchainlength)
 +{
 +    int i,j;
 +
 +    state->ngtc = ngtc;
 +    state->nnhpres = nnhpres;
 +    state->nhchainlength = nhchainlength;
 +    if (state->ngtc > 0)
 +    {
 +        snew(state->nosehoover_xi,state->nhchainlength*state->ngtc); 
 +        snew(state->nosehoover_vxi,state->nhchainlength*state->ngtc);
 +        snew(state->therm_integral,state->ngtc);
 +        for(i=0; i<state->ngtc; i++)
 +        {
 +            for (j=0;j<state->nhchainlength;j++)
- void init_state(t_state *state, int natoms, int ngtc, int nnhpres, int nhchainlength)
++            {
 +                state->nosehoover_xi[i*state->nhchainlength + j]  = 0.0;
 +                state->nosehoover_vxi[i*state->nhchainlength + j]  = 0.0;
 +            }
 +        }
 +        for(i=0; i<state->ngtc; i++) {
 +            state->therm_integral[i]  = 0.0;
 +        }
 +    }
 +    else
 +    {
 +        state->nosehoover_xi  = NULL;
 +        state->nosehoover_vxi = NULL;
 +        state->therm_integral = NULL;
 +    }
 +
 +    if (state->nnhpres > 0)
 +    {
 +        snew(state->nhpres_xi,state->nhchainlength*nnhpres); 
 +        snew(state->nhpres_vxi,state->nhchainlength*nnhpres);
 +        for(i=0; i<nnhpres; i++) 
 +        {
 +            for (j=0;j<state->nhchainlength;j++) 
 +            {
 +                state->nhpres_xi[i*nhchainlength + j]  = 0.0;
 +                state->nhpres_vxi[i*nhchainlength + j]  = 0.0;
 +            }
 +        }
 +    }
 +    else
 +    {
 +        state->nhpres_xi  = NULL;
 +        state->nhpres_vxi = NULL;
 +    }
 +}
 +
 +
++void init_state(t_state *state, int natoms, int ngtc, int nnhpres, int nhchainlength, int nlambda)
 +{
 +  int i;
 +
 +  state->natoms = natoms;
 +  state->nrng   = 0;
 +  state->flags  = 0;
 +  state->lambda = 0;
++  snew(state->lambda,efptNR);
++  for (i=0;i<efptNR;i++)
++  {
++      state->lambda[i] = 0;
++  }
 +  state->veta   = 0;
 +  clear_mat(state->box);
 +  clear_mat(state->box_rel);
 +  clear_mat(state->boxv);
 +  clear_mat(state->pres_prev);
 +  clear_mat(state->svir_prev);
 +  clear_mat(state->fvir_prev);
 +  init_gtc_state(state,ngtc,nnhpres,nhchainlength);
 +  state->nalloc = state->natoms;
 +  if (state->nalloc > 0) {
 +    snew(state->x,state->nalloc);
 +    snew(state->v,state->nalloc);
 +  } else {
 +    state->x = NULL;
 +    state->v = NULL;
 +  }
 +  state->sd_X = NULL;
 +  state->cg_p = NULL;
 +
 +  zero_ekinstate(&state->ekinstate);
 +
 +  init_energyhistory(&state->enerhist);
 +
++  init_df_history(&state->dfhist,nlambda,0);
++
 +  state->ddp_count = 0;
 +  state->ddp_count_cg_gl = 0;
 +  state->cg_gl = NULL;
 +  state->cg_gl_nalloc = 0;
 +}
 +
 +void done_state(t_state *state)
 +{
 +  if (state->nosehoover_xi) sfree(state->nosehoover_xi);
 +  if (state->x) sfree(state->x);
 +  if (state->v) sfree(state->v);
 +  if (state->sd_X) sfree(state->sd_X);
 +  if (state->cg_p) sfree(state->cg_p);
 +  state->nalloc = 0;
 +  if (state->cg_gl) sfree(state->cg_gl);
 +  state->cg_gl_nalloc = 0;
 +}
 +
 +static void do_box_rel(t_inputrec *ir,matrix box_rel,matrix b,gmx_bool bInit)
 +{
 +  int d,d2;
 +
 +  for(d=YY; d<=ZZ; d++) {
 +    for(d2=XX; d2<=(ir->epct==epctSEMIISOTROPIC ? YY : ZZ); d2++) {
 +      /* We need to check if this box component is deformed
 +       * or if deformation of another component might cause
 +       * changes in this component due to box corrections.
 +       */
 +      if (ir->deform[d][d2] == 0 &&
 +        !(d == ZZ && d2 == XX && ir->deform[d][YY] != 0 &&
 +          (b[YY][d2] != 0 || ir->deform[YY][d2] != 0))) {
 +      if (bInit) {
 +        box_rel[d][d2] = b[d][d2]/b[XX][XX];
 +      } else {
 +        b[d][d2] = b[XX][XX]*box_rel[d][d2];
 +      }
 +      }
 +    }
 +  }
 +}
 +
 +void set_box_rel(t_inputrec *ir,t_state *state)
 +{
 +  /* Make sure the box obeys the restrictions before we fix the ratios */
 +  correct_box(NULL,0,state->box,NULL);
 +
 +  clear_mat(state->box_rel);
 +
 +  if (PRESERVE_SHAPE(*ir))
 +    do_box_rel(ir,state->box_rel,state->box,TRUE);
 +}
 +
 +void preserve_box_shape(t_inputrec *ir,matrix box_rel,matrix b)
 +{
 +  if (PRESERVE_SHAPE(*ir))
 +    do_box_rel(ir,box_rel,b,FALSE);
 +}
 +
 +void add_t_atoms(t_atoms *atoms,int natom_extra,int nres_extra)
 +{
 +    int i;
 +    
 +    if (natom_extra > 0) 
 +    {
 +        srenew(atoms->atomname,atoms->nr+natom_extra);
 +        srenew(atoms->atom,atoms->nr+natom_extra);
 +        if (NULL != atoms->pdbinfo)
 +            srenew(atoms->pdbinfo,atoms->nr+natom_extra);
 +        if (NULL != atoms->atomtype)
 +            srenew(atoms->atomtype,atoms->nr+natom_extra);
 +        if (NULL != atoms->atomtypeB)
 +            srenew(atoms->atomtypeB,atoms->nr+natom_extra);
 +        for(i=atoms->nr; (i<atoms->nr+natom_extra); i++) {
 +            atoms->atomname[i] = NULL;
 +            memset(&atoms->atom[i],0,sizeof(atoms->atom[i]));
 +            if (NULL != atoms->pdbinfo)
 +                memset(&atoms->pdbinfo[i],0,sizeof(atoms->pdbinfo[i]));
 +            if (NULL != atoms->atomtype)
 +                atoms->atomtype[i] = NULL;
 +            if (NULL != atoms->atomtypeB)
 +                atoms->atomtypeB[i] = NULL;
 +        }
 +        atoms->nr += natom_extra;
 +    }
 +    if (nres_extra > 0)
 +    {
 +        srenew(atoms->resinfo,atoms->nres+nres_extra);
 +        for(i=atoms->nres; (i<atoms->nres+nres_extra); i++) {
 +            memset(&atoms->resinfo[i],0,sizeof(atoms->resinfo[i]));
 +        }
 +        atoms->nres += nres_extra;
 +    }
 +}
 +
 +void init_t_atoms(t_atoms *atoms, int natoms, gmx_bool bPdbinfo)
 +{
 +  atoms->nr=natoms;
 +  atoms->nres=0;
 +  snew(atoms->atomname,natoms);
 +  atoms->atomtype=NULL;
 +  atoms->atomtypeB=NULL;
 +  snew(atoms->resinfo,natoms);
 +  snew(atoms->atom,natoms);
 +  if (bPdbinfo)
 +    snew(atoms->pdbinfo,natoms);
 +  else
 +    atoms->pdbinfo=NULL;
 +}
 +
 +t_atoms *copy_t_atoms(t_atoms *src)
 +{
 +  t_atoms *dst;
 +  int i;
 +    
 +  snew(dst,1);
 +  init_t_atoms(dst,src->nr,(NULL != src->pdbinfo));
 +  dst->nr = src->nr;
 +  if (NULL != src->atomname)
 +      snew(dst->atomname,src->nr);
 +  if (NULL != src->atomtype)
 +      snew(dst->atomtype,src->nr);
 +  if (NULL != src->atomtypeB)
 +      snew(dst->atomtypeB,src->nr);
 +  for(i=0; (i<src->nr); i++) {
 +    dst->atom[i] = src->atom[i];
 +    if (NULL != src->pdbinfo)
 +      dst->pdbinfo[i] = src->pdbinfo[i];
 +    if (NULL != src->atomname)
 +        dst->atomname[i]  = src->atomname[i];
 +    if (NULL != src->atomtype)
 +        dst->atomtype[i] = src->atomtype[i];
 +    if (NULL != src->atomtypeB)
 +        dst->atomtypeB[i] = src->atomtypeB[i];
 +  }  
 +  dst->nres = src->nres;
 +  for(i=0; (i<src->nres); i++) {
 +    dst->resinfo[i] = src->resinfo[i];
 +  }  
 +  return dst;
 +}
 +
 +void t_atoms_set_resinfo(t_atoms *atoms,int atom_ind,t_symtab *symtab,
 +                         const char *resname,int resnr,unsigned char ic,
 +                         int chainnum, char chainid)
 +{
 +  t_resinfo *ri;
 +
 +  ri = &atoms->resinfo[atoms->atom[atom_ind].resind];
 +  ri->name  = put_symtab(symtab,resname);
 +  ri->rtp   = NULL;
 +  ri->nr    = resnr;
 +  ri->ic    = ic;
 +  ri->chainnum = chainnum;
 +  ri->chainid = chainid;
 +}
 +
 +void free_t_atoms(t_atoms *atoms,gmx_bool bFreeNames)
 +{
 +  int i;
 +
 +  if (bFreeNames) {
 +    for(i=0; i<atoms->nr; i++) {
 +      sfree(*atoms->atomname[i]);
 +      *atoms->atomname[i]=NULL;
 +    }
 +    for(i=0; i<atoms->nres; i++) {
 +      sfree(*atoms->resinfo[i].name);
 +      *atoms->resinfo[i].name=NULL;
 +    }
 +  }
 +  sfree(atoms->atomname);
 +  /* Do we need to free atomtype and atomtypeB as well ? */
 +  sfree(atoms->resinfo);
 +  sfree(atoms->atom);
 +  if (atoms->pdbinfo)
 +    sfree(atoms->pdbinfo);
 +  atoms->nr=0; 
 +  atoms->nres=0;
 +}     
 +
 +real max_cutoff(real cutoff1,real cutoff2)
 +{
 +    if (cutoff1 == 0 || cutoff2 == 0)
 +    {
 +        return 0;
 +    }
 +    else
 +    {
 +        return max(cutoff1,cutoff2);
 +    }
 +}
++
++extern void init_df_history(df_history_t *dfhist, int nlambda, real wl_delta)
++{
++    int i;
++
++    dfhist->bEquil = 0;
++    dfhist->nlambda = nlambda;
++    dfhist->wl_delta = wl_delta;
++    snew(dfhist->sum_weights,dfhist->nlambda);
++    snew(dfhist->sum_dg,dfhist->nlambda);
++    snew(dfhist->sum_minvar,dfhist->nlambda);
++    snew(dfhist->sum_variance,dfhist->nlambda);
++    snew(dfhist->n_at_lam,dfhist->nlambda);
++    snew(dfhist->wl_histo,dfhist->nlambda);
++
++    /* allocate transition matrices here */
++    snew(dfhist->Tij,dfhist->nlambda);
++    snew(dfhist->Tij_empirical,dfhist->nlambda);
++
++    for (i=0;i<dfhist->nlambda;i++) {
++        snew(dfhist->Tij[i],dfhist->nlambda);
++        snew(dfhist->Tij_empirical[i],dfhist->nlambda);
++    }
++
++    snew(dfhist->accum_p,dfhist->nlambda);
++    snew(dfhist->accum_m,dfhist->nlambda);
++    snew(dfhist->accum_p2,dfhist->nlambda);
++    snew(dfhist->accum_m2,dfhist->nlambda);
++
++    for (i=0;i<dfhist->nlambda;i++) {
++        snew((dfhist->accum_p)[i],dfhist->nlambda);
++        snew((dfhist->accum_m)[i],dfhist->nlambda);
++        snew((dfhist->accum_p2)[i],dfhist->nlambda);
++        snew((dfhist->accum_m2)[i],dfhist->nlambda);
++    }
++}
++
++extern void copy_df_history(df_history_t *df_dest, df_history_t *df_source)
++{
++    int i,j;
++
++    init_df_history(df_dest,df_source->nlambda,df_source->wl_delta);
++    df_dest->nlambda = df_source->nlambda;
++    df_dest->bEquil = df_source->bEquil;
++    for (i=0;i<df_dest->nlambda;i++)
++    {
++        df_dest->sum_weights[i]  = df_source->sum_weights[i];
++        df_dest->sum_dg[i]       = df_source->sum_dg[i];
++        df_dest->sum_minvar[i]   = df_source->sum_minvar[i];
++        df_dest->sum_variance[i] = df_source->sum_variance[i];
++        df_dest->n_at_lam[i]     = df_source->n_at_lam[i];
++        df_dest->wl_histo[i]     = df_source->wl_histo[i];
++        df_dest->accum_p[i]      = df_source->accum_p[i];
++        df_dest->accum_m[i]      = df_source->accum_m[i];
++        df_dest->accum_p2[i]     = df_source->accum_p2[i];
++        df_dest->accum_m2[i]     = df_source->accum_m2[i];
++    }
++
++    for (i=0;i<df_dest->nlambda;i++)
++    {
++        for (j=0;j<df_dest->nlambda;j++)
++        {
++            df_dest->Tij[i][j]  = df_source->Tij[i][j];
++            df_dest->Tij_empirical[i][j]  = df_source->Tij_empirical[i][j];
++        }
++    }
++}
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
index abf5ccb76e6b1f3ae30a1f9b176fad43ee71d397,0000000000000000000000000000000000000000..bce7d676bd35bafc0d6940115af7372ce5164a4e
mode 100644,000000..100644
--- /dev/null
@@@ -1,1149 -1,0 +1,1163 @@@
-         lambda += delta_step*ir->delta_lambda;
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GROwing Monsters And Cloning Shrimps
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include "confio.h"
 +#include "constr.h"
 +#include "copyrite.h"
 +#include "invblock.h"
 +#include "main.h"
 +#include "mdrun.h"
 +#include "nrnb.h"
 +#include "smalloc.h"
 +#include "vec.h"
 +#include "physics.h"
 +#include "names.h"
 +#include "txtdump.h"
 +#include "domdec.h"
 +#include "pdbio.h"
 +#include "partdec.h"
 +#include "splitter.h"
 +#include "mtop_util.h"
 +#include "gmxfio.h"
 +#include "macros.h"
 +
 +typedef struct gmx_constr {
 +  int              ncon_tot;     /* The total number of constraints    */
 +  int              nflexcon;     /* The number of flexible constraints */
 +  int              n_at2con_mt;  /* The size of at2con = #moltypes     */
 +  t_blocka         *at2con_mt;   /* A list of atoms to constraints     */
 +  gmx_lincsdata_t  lincsd;       /* LINCS data                         */
 +  gmx_shakedata_t  shaked;       /* SHAKE data                         */
 +  gmx_settledata_t settled;      /* SETTLE data                        */
 +  int              nblocks;      /* The number of SHAKE blocks         */
 +  int              *sblock;      /* The SHAKE blocks                   */
 +  int              sblock_nalloc;/* The allocation size of sblock      */
 +  real             *lagr;        /* Lagrange multipliers for SHAKE     */
 +  int              lagr_nalloc;  /* The allocation size of lagr        */
 +  int              maxwarn;      /* The maximum number of warnings     */
 +  int              warncount_lincs;
 +  int              warncount_settle;
 +  gmx_edsam_t      ed;           /* The essential dynamics data        */
 +
 +  gmx_mtop_t       *warn_mtop;   /* Only used for printing warnings    */
 +} t_gmx_constr;
 +
 +typedef struct {
 +  atom_id iatom[3];
 +  atom_id blocknr;
 +} t_sortblock;
 +
 +static void *init_vetavars(t_vetavars *vars,
 +                           gmx_bool constr_deriv,
 +                           real veta,real vetanew, t_inputrec *ir, gmx_ekindata_t *ekind, gmx_bool bPscal) 
 +{
 +    double g;
 +    int i;
 +
 +    /* first, set the alpha integrator variable */
 +    if ((ir->opts.nrdf[0] > 0) && bPscal) 
 +    {
 +        vars->alpha = 1.0 + DIM/((double)ir->opts.nrdf[0]);  
 +    } else {
 +        vars->alpha = 1.0;
 +    }
 +    g = 0.5*veta*ir->delta_t;
 +    vars->rscale = exp(g)*series_sinhx(g);
 +    g = -0.25*vars->alpha*veta*ir->delta_t;
 +    vars->vscale = exp(g)*series_sinhx(g);
 +    vars->rvscale = vars->vscale*vars->rscale;
 +    vars->veta = vetanew;
 +
 +    if (constr_deriv)
 +    {
 +        snew(vars->vscale_nhc,ir->opts.ngtc);
 +        if ((ekind==NULL) || (!bPscal))
 +        {
 +            for (i=0;i<ir->opts.ngtc;i++)
 +            {
 +                vars->vscale_nhc[i] = 1;
 +            }
 +        }
 +        else
 +        {
 +            for (i=0;i<ir->opts.ngtc;i++)
 +            {
 +                vars->vscale_nhc[i] = ekind->tcstat[i].vscale_nhc;
 +            }
 +        }
 +    }
 +    else
 +    {
 +        vars->vscale_nhc = NULL;
 +    }
 +
 +    return vars;
 +}
 +
 +static void free_vetavars(t_vetavars *vars) 
 +{
 +    if (vars->vscale_nhc != NULL)
 +    {
 +        sfree(vars->vscale_nhc);
 +    }
 +}
 +
 +static int pcomp(const void *p1, const void *p2)
 +{
 +  int     db;
 +  atom_id min1,min2,max1,max2;
 +  t_sortblock *a1=(t_sortblock *)p1;
 +  t_sortblock *a2=(t_sortblock *)p2;
 +  
 +  db=a1->blocknr-a2->blocknr;
 +  
 +  if (db != 0)
 +    return db;
 +    
 +  min1=min(a1->iatom[1],a1->iatom[2]);
 +  max1=max(a1->iatom[1],a1->iatom[2]);
 +  min2=min(a2->iatom[1],a2->iatom[2]);
 +  max2=max(a2->iatom[1],a2->iatom[2]);
 +  
 +  if (min1 == min2)
 +    return max1-max2;
 +  else
 +    return min1-min2;
 +}
 +
 +static int icomp(const void *p1, const void *p2)
 +{
 +  atom_id *a1=(atom_id *)p1;
 +  atom_id *a2=(atom_id *)p2;
 +
 +  return (*a1)-(*a2);
 +}
 +
 +int n_flexible_constraints(struct gmx_constr *constr)
 +{
 +  int nflexcon;
 +
 +  if (constr)
 +    nflexcon = constr->nflexcon;
 +  else
 +    nflexcon = 0;
 +
 +  return nflexcon;
 +}
 +
 +void too_many_constraint_warnings(int eConstrAlg,int warncount)
 +{
 +  const char *abort="- aborting to avoid logfile runaway.\n"
 +    "This normally happens when your system is not sufficiently equilibrated,"
 +    "or if you are changing lambda too fast in free energy simulations.\n";
 +  
 +  gmx_fatal(FARGS,
 +          "Too many %s warnings (%d)\n"
 +          "If you know what you are doing you can %s"
 +          "set the environment variable GMX_MAXCONSTRWARN to -1,\n"
 +          "but normally it is better to fix the problem",
 +          (eConstrAlg == econtLINCS) ? "LINCS" : "SETTLE",warncount,
 +          (eConstrAlg == econtLINCS) ?
 +          "adjust the lincs warning threshold in your mdp file\nor " : "\n");
 +}
 +
 +static void write_constr_pdb(const char *fn,const char *title,
 +                             gmx_mtop_t *mtop,
 +                             int start,int homenr,t_commrec *cr,
 +                             rvec x[],matrix box)
 +{
 +    char fname[STRLEN],format[STRLEN];
 +    FILE *out;
 +    int  dd_ac0=0,dd_ac1=0,i,ii,resnr;
 +    gmx_domdec_t *dd;
 +    char *anm,*resnm;
 +  
 +    dd = NULL;
 +    if (PAR(cr))
 +    {
 +        sprintf(fname,"%s_n%d.pdb",fn,cr->sim_nodeid);
 +        if (DOMAINDECOMP(cr))
 +        {
 +            dd = cr->dd;
 +            dd_get_constraint_range(dd,&dd_ac0,&dd_ac1);
 +            start = 0;
 +            homenr = dd_ac1;
 +        }
 +    }
 +    else
 +    {
 +        sprintf(fname,"%s.pdb",fn);
 +    }
 +    sprintf(format,"%s\n",get_pdbformat());
 +    
 +    out = gmx_fio_fopen(fname,"w");
 +    
 +    fprintf(out,"TITLE     %s\n",title);
 +    gmx_write_pdb_box(out,-1,box);
 +    for(i=start; i<start+homenr; i++)
 +    {
 +        if (dd != NULL)
 +        {
 +            if (i >= dd->nat_home && i < dd_ac0)
 +            {
 +                continue;
 +            }
 +            ii = dd->gatindex[i];
 +        }
 +        else
 +        {
 +            ii = i;
 +        }
 +        gmx_mtop_atominfo_global(mtop,ii,&anm,&resnr,&resnm);
 +        fprintf(out,format,"ATOM",(ii+1)%100000,
 +                anm,resnm,' ',resnr%10000,' ',
 +                10*x[i][XX],10*x[i][YY],10*x[i][ZZ]);
 +    }
 +    fprintf(out,"TER\n");
 +
 +    gmx_fio_fclose(out);
 +}
 +                           
 +static void dump_confs(FILE *fplog,gmx_large_int_t step,gmx_mtop_t *mtop,
 +                     int start,int homenr,t_commrec *cr,
 +                     rvec x[],rvec xprime[],matrix box)
 +{
 +  char buf[256],buf2[22];
 + 
 +  char *env=getenv("GMX_SUPPRESS_DUMP");
 +  if (env)
 +      return; 
 +  
 +  sprintf(buf,"step%sb",gmx_step_str(step,buf2));
 +  write_constr_pdb(buf,"initial coordinates",
 +                 mtop,start,homenr,cr,x,box);
 +  sprintf(buf,"step%sc",gmx_step_str(step,buf2));
 +  write_constr_pdb(buf,"coordinates after constraining",
 +                 mtop,start,homenr,cr,xprime,box);
 +  if (fplog)
 +  {
 +      fprintf(fplog,"Wrote pdb files with previous and current coordinates\n");
 +  }
 +  fprintf(stderr,"Wrote pdb files with previous and current coordinates\n");
 +}
 +
 +static void pr_sortblock(FILE *fp,const char *title,int nsb,t_sortblock sb[])
 +{
 +  int i;
 +  
 +  fprintf(fp,"%s\n",title);
 +  for(i=0; (i<nsb); i++)
 +    fprintf(fp,"i: %5d, iatom: (%5d %5d %5d), blocknr: %5d\n",
 +          i,sb[i].iatom[0],sb[i].iatom[1],sb[i].iatom[2],
 +          sb[i].blocknr);
 +}
 +
 +gmx_bool constrain(FILE *fplog,gmx_bool bLog,gmx_bool bEner,
 +               struct gmx_constr *constr,
 +               t_idef *idef,t_inputrec *ir,gmx_ekindata_t *ekind,
 +               t_commrec *cr,
 +               gmx_large_int_t step,int delta_step,
 +               t_mdatoms *md,
 +               rvec *x,rvec *xprime,rvec *min_proj,matrix box,
 +               real lambda,real *dvdlambda,
 +               rvec *v,tensor *vir,
 +               t_nrnb *nrnb,int econq,gmx_bool bPscal,real veta, real vetanew)
 +{
 +    gmx_bool    bOK,bDump;
 +    int     start,homenr,nrend;
 +    int     i,j,d;
 +    int     ncons,error;
 +    tensor  rmdr;
 +    rvec    *vstor;
 +    real    invdt,vir_fac,t;
 +    t_ilist *settle;
 +    int     nsettle;
 +    t_pbc   pbc;
 +    char    buf[22];
 +    t_vetavars vetavar;
 +
 +    if (econq == econqForceDispl && !EI_ENERGY_MINIMIZATION(ir->eI))
 +    {
 +        gmx_incons("constrain called for forces displacements while not doing energy minimization, can not do this while the LINCS and SETTLE constraint connection matrices are mass weighted");
 +    }
 +    
 +    bOK   = TRUE;
 +    bDump = FALSE;
 +    
 +    start  = md->start;
 +    homenr = md->homenr;
 +    nrend = start+homenr;
 +
 +    /* set constants for pressure control integration */ 
 +    init_vetavars(&vetavar,econq!=econqCoord,
 +                  veta,vetanew,ir,ekind,bPscal);
 +
 +    if (ir->delta_t == 0)
 +    {
 +        invdt = 0;
 +    }
 +    else
 +    {
 +        invdt  = 1/ir->delta_t;
 +    }
 +
 +    if (ir->efep != efepNO && EI_DYNAMICS(ir->eI))
 +    {
 +        /* Set the constraint lengths for the step at which this configuration
 +         * is meant to be. The invmasses should not be changed.
 +         */
-     lam0 = ir->init_lambda;
++        lambda += delta_step*ir->fepvals->delta_lambda;
 +    }
 +    
 +    if (vir != NULL)
 +    {
 +        clear_mat(rmdr);
 +    }
 +    
 +    where();
 +    if (constr->lincsd)
 +    {
 +        bOK = constrain_lincs(fplog,bLog,bEner,ir,step,constr->lincsd,md,cr,
 +                              x,xprime,min_proj,box,lambda,dvdlambda,
 +                              invdt,v,vir!=NULL,rmdr,
 +                              econq,nrnb,
 +                              constr->maxwarn,&constr->warncount_lincs);
 +        if (!bOK && constr->maxwarn >= 0)
 +        {
 +            if (fplog != NULL)
 +            {
 +                fprintf(fplog,"Constraint error in algorithm %s at step %s\n",
 +                        econstr_names[econtLINCS],gmx_step_str(step,buf));
 +            }
 +            bDump = TRUE;
 +        }
 +    } 
 +    
 +    if (constr->nblocks > 0)
 +    {
 +        switch (econq) {
 +        case (econqCoord):
 +            bOK = bshakef(fplog,constr->shaked,
 +                          homenr,md->invmass,constr->nblocks,constr->sblock,
 +                          idef,ir,box,x,xprime,nrnb,
 +                          constr->lagr,lambda,dvdlambda,
 +                          invdt,v,vir!=NULL,rmdr,constr->maxwarn>=0,econq,
 +                          &vetavar);
 +            break;
 +        case (econqVeloc):
 +            bOK = bshakef(fplog,constr->shaked,
 +                          homenr,md->invmass,constr->nblocks,constr->sblock,
 +                          idef,ir,box,x,min_proj,nrnb,
 +                          constr->lagr,lambda,dvdlambda,
 +                          invdt,NULL,vir!=NULL,rmdr,constr->maxwarn>=0,econq,
 +                          &vetavar);
 +            break;
 +        default:
 +            gmx_fatal(FARGS,"Internal error, SHAKE called for constraining something else than coordinates");
 +            break;
 +        }
 +
 +        if (!bOK && constr->maxwarn >= 0)
 +        {
 +            if (fplog != NULL)
 +            {
 +                fprintf(fplog,"Constraint error in algorithm %s at step %s\n",
 +                        econstr_names[econtSHAKE],gmx_step_str(step,buf));
 +            }
 +            bDump = TRUE;
 +        }
 +    }
 +        
 +    settle  = &idef->il[F_SETTLE];
 +    if (settle->nr > 0)
 +    {
 +        nsettle = settle->nr/4;
 +        
 +        switch (econq)
 +        {
 +        case econqCoord:
 +            csettle(constr->settled,
 +                    nsettle,settle->iatoms,x[0],xprime[0],
 +                    invdt,v[0],vir!=NULL,rmdr,&error,&vetavar);
 +            inc_nrnb(nrnb,eNR_SETTLE,nsettle);
 +            if (v != NULL)
 +            {
 +                inc_nrnb(nrnb,eNR_CONSTR_V,nsettle*3);
 +            }
 +            if (vir != NULL)
 +            {
 +                inc_nrnb(nrnb,eNR_CONSTR_VIR,nsettle*3);
 +            }
 +            
 +            bOK = (error < 0);
 +            if (!bOK && constr->maxwarn >= 0)
 +            {
 +                char buf[256];
 +                sprintf(buf,
 +                        "\nstep " gmx_large_int_pfmt ": Water molecule starting at atom %d can not be "
 +                        "settled.\nCheck for bad contacts and/or reduce the timestep if appropriate.\n",
 +                        step,ddglatnr(cr->dd,settle->iatoms[error*4+1]));
 +                if (fplog)
 +                {
 +                    fprintf(fplog,"%s",buf);
 +                }
 +                fprintf(stderr,"%s",buf);
 +                constr->warncount_settle++;
 +                if (constr->warncount_settle > constr->maxwarn)
 +                {
 +                    too_many_constraint_warnings(-1,constr->warncount_settle);
 +                }
 +                bDump = TRUE;
 +                break;
 +            case econqVeloc:
 +            case econqDeriv:
 +            case econqForce:
 +            case econqForceDispl:
 +                settle_proj(fplog,constr->settled,econq,
 +                            nsettle,settle->iatoms,x,
 +                            xprime,min_proj,vir!=NULL,rmdr,&vetavar);
 +                /* This is an overestimate */
 +                inc_nrnb(nrnb,eNR_SETTLE,nsettle);
 +                break;
 +            case econqDeriv_FlexCon:
 +                /* Nothing to do, since the are no flexible constraints in settles */
 +                break;
 +            default:
 +                gmx_incons("Unknown constraint quantity for settle");
 +            }
 +        }
 +    }
 +
 +    free_vetavars(&vetavar);
 +    
 +    if (vir != NULL)
 +    {
 +        switch (econq)
 +        {
 +        case econqCoord:
 +            vir_fac = 0.5/(ir->delta_t*ir->delta_t);
 +            break;
 +        case econqVeloc:
 +            vir_fac = 0.5/ir->delta_t;
 +            break;
 +        case econqForce:
 +        case econqForceDispl:
 +            vir_fac = 0.5;
 +            break;
 +        default:
 +            vir_fac = 0;
 +            gmx_incons("Unsupported constraint quantity for virial");
 +        }
 +        
 +        if (EI_VV(ir->eI))
 +        {
 +            vir_fac *= 2;  /* only constraining over half the distance here */
 +        }
 +        for(i=0; i<DIM; i++)
 +        {
 +            for(j=0; j<DIM; j++)
 +            {
 +                (*vir)[i][j] = vir_fac*rmdr[i][j];
 +            }
 +        }
 +    }
 +    
 +    if (bDump)
 +    {
 +        dump_confs(fplog,step,constr->warn_mtop,start,homenr,cr,x,xprime,box);
 +    }
 +    
 +    if (econq == econqCoord)
 +    {
 +        if (ir->ePull == epullCONSTRAINT)
 +        {
 +            if (EI_DYNAMICS(ir->eI))
 +            {
 +                t = ir->init_t + (step + delta_step)*ir->delta_t;
 +            }
 +            else
 +            {
 +                t = ir->init_t;
 +            }
 +            set_pbc(&pbc,ir->ePBC,box);
 +            pull_constraint(ir->pull,md,&pbc,cr,ir->delta_t,t,x,xprime,v,*vir);
 +        }
 +        if (constr->ed && delta_step > 0)
 +        {
 +            /* apply the essential dynamcs constraints here */
 +            do_edsam(ir,step,md,cr,xprime,v,box,constr->ed);
 +        }
 +    }
 +    
 +    return bOK;
 +}
 +
 +real *constr_rmsd_data(struct gmx_constr *constr)
 +{
 +  if (constr->lincsd)
 +    return lincs_rmsd_data(constr->lincsd);
 +  else
 +    return NULL;
 +}
 +
 +real constr_rmsd(struct gmx_constr *constr,gmx_bool bSD2)
 +{
 +  if (constr->lincsd)
 +    return lincs_rmsd(constr->lincsd,bSD2);
 +  else
 +    return 0;
 +}
 +
 +static void make_shake_sblock_pd(struct gmx_constr *constr,
 +                               t_idef *idef,t_mdatoms *md)
 +{
 +  int  i,j,m,ncons;
 +  int  bstart,bnr;
 +  t_blocka    sblocks;
 +  t_sortblock *sb;
 +  t_iatom     *iatom;
 +  atom_id     *inv_sblock;
 +
 +  /* Since we are processing the local topology,
 +   * the F_CONSTRNC ilist has been concatenated to the F_CONSTR ilist.
 +   */
 +  ncons = idef->il[F_CONSTR].nr/3;
 +
 +  init_blocka(&sblocks);
 +  gen_sblocks(NULL,md->start,md->start+md->homenr,idef,&sblocks,FALSE);
 +  
 +  /*
 +    bstart=(idef->nodeid > 0) ? blocks->multinr[idef->nodeid-1] : 0;
 +    nblocks=blocks->multinr[idef->nodeid] - bstart;
 +  */
 +  bstart  = 0;
 +  constr->nblocks = sblocks.nr;
 +  if (debug) 
 +    fprintf(debug,"ncons: %d, bstart: %d, nblocks: %d\n",
 +          ncons,bstart,constr->nblocks);
 +  
 +  /* Calculate block number for each atom */
 +  inv_sblock = make_invblocka(&sblocks,md->nr);
 +  
 +  done_blocka(&sblocks);
 +  
 +  /* Store the block number in temp array and
 +   * sort the constraints in order of the sblock number 
 +   * and the atom numbers, really sorting a segment of the array!
 +   */
 +#ifdef DEBUGIDEF 
 +  pr_idef(fplog,0,"Before Sort",idef);
 +#endif
 +  iatom=idef->il[F_CONSTR].iatoms;
 +  snew(sb,ncons);
 +  for(i=0; (i<ncons); i++,iatom+=3) {
 +    for(m=0; (m<3); m++)
 +      sb[i].iatom[m] = iatom[m];
 +    sb[i].blocknr = inv_sblock[iatom[1]];
 +  }
 +  
 +  /* Now sort the blocks */
 +  if (debug) {
 +    pr_sortblock(debug,"Before sorting",ncons,sb);
 +    fprintf(debug,"Going to sort constraints\n");
 +  }
 +  
 +  qsort(sb,ncons,(size_t)sizeof(*sb),pcomp);
 +  
 +  if (debug) {
 +    pr_sortblock(debug,"After sorting",ncons,sb);
 +  }
 +  
 +  iatom=idef->il[F_CONSTR].iatoms;
 +  for(i=0; (i<ncons); i++,iatom+=3) 
 +    for(m=0; (m<3); m++)
 +      iatom[m]=sb[i].iatom[m];
 +#ifdef DEBUGIDEF
 +  pr_idef(fplog,0,"After Sort",idef);
 +#endif
 +  
 +  j=0;
 +  snew(constr->sblock,constr->nblocks+1);
 +  bnr=-2;
 +  for(i=0; (i<ncons); i++) {
 +    if (sb[i].blocknr != bnr) {
 +      bnr=sb[i].blocknr;
 +      constr->sblock[j++]=3*i;
 +    }
 +  }
 +  /* Last block... */
 +  constr->sblock[j++] = 3*ncons;
 +  
 +  if (j != (constr->nblocks+1)) {
 +    fprintf(stderr,"bstart: %d\n",bstart);
 +    fprintf(stderr,"j: %d, nblocks: %d, ncons: %d\n",
 +          j,constr->nblocks,ncons);
 +    for(i=0; (i<ncons); i++)
 +      fprintf(stderr,"i: %5d  sb[i].blocknr: %5u\n",i,sb[i].blocknr);
 +    for(j=0; (j<=constr->nblocks); j++)
 +      fprintf(stderr,"sblock[%3d]=%5d\n",j,(int)constr->sblock[j]);
 +    gmx_fatal(FARGS,"DEATH HORROR: "
 +            "sblocks does not match idef->il[F_CONSTR]");
 +  }
 +  sfree(sb);
 +  sfree(inv_sblock);
 +}
 +
 +static void make_shake_sblock_dd(struct gmx_constr *constr,
 +                               t_ilist *ilcon,t_block *cgs,
 +                               gmx_domdec_t *dd)
 +{
 +  int ncons,c,cg;
 +  t_iatom *iatom;
 +
 +  if (dd->ncg_home+1 > constr->sblock_nalloc) {
 +    constr->sblock_nalloc = over_alloc_dd(dd->ncg_home+1);
 +    srenew(constr->sblock,constr->sblock_nalloc);
 +  }
 +  
 +  ncons = ilcon->nr/3;
 +  iatom = ilcon->iatoms;
 +  constr->nblocks = 0;
 +  cg = 0;
 +  for(c=0; c<ncons; c++) {
 +    if (c == 0 || iatom[1] >= cgs->index[cg+1]) {
 +      constr->sblock[constr->nblocks++] = 3*c;
 +      while (iatom[1] >= cgs->index[cg+1])
 +      cg++;
 +    }
 +    iatom += 3;
 +  }
 +  constr->sblock[constr->nblocks] = 3*ncons;
 +}
 +
 +t_blocka make_at2con(int start,int natoms,
 +                   t_ilist *ilist,t_iparams *iparams,
 +                   gmx_bool bDynamics,int *nflexiblecons)
 +{
 +  int *count,ncon,con,con_tot,nflexcon,ftype,i,a;
 +  t_iatom  *ia;
 +  t_blocka at2con;
 +  gmx_bool bFlexCon;
 +  
 +  snew(count,natoms);
 +  nflexcon = 0;
 +  for(ftype=F_CONSTR; ftype<=F_CONSTRNC; ftype++) {
 +    ncon = ilist[ftype].nr/3;
 +    ia   = ilist[ftype].iatoms;
 +    for(con=0; con<ncon; con++) {
 +      bFlexCon = (iparams[ia[0]].constr.dA == 0 &&
 +                iparams[ia[0]].constr.dB == 0);
 +      if (bFlexCon)
 +      nflexcon++;
 +      if (bDynamics || !bFlexCon) {
 +      for(i=1; i<3; i++) {
 +        a = ia[i] - start;
 +        count[a]++;
 +      }
 +      }
 +      ia += 3;
 +    }
 +  }
 +  *nflexiblecons = nflexcon;
 +
 +  at2con.nr = natoms;
 +  at2con.nalloc_index = at2con.nr+1;
 +  snew(at2con.index,at2con.nalloc_index);
 +  at2con.index[0] = 0;
 +  for(a=0; a<natoms; a++) {
 +    at2con.index[a+1] = at2con.index[a] + count[a];
 +    count[a] = 0;
 +  }
 +  at2con.nra = at2con.index[natoms];
 +  at2con.nalloc_a = at2con.nra;
 +  snew(at2con.a,at2con.nalloc_a);
 +
 +  /* The F_CONSTRNC constraints have constraint numbers
 +   * that continue after the last F_CONSTR constraint.
 +   */
 +  con_tot = 0;
 +  for(ftype=F_CONSTR; ftype<=F_CONSTRNC; ftype++) {
 +    ncon = ilist[ftype].nr/3;
 +    ia   = ilist[ftype].iatoms;
 +    for(con=0; con<ncon; con++) {
 +      bFlexCon = (iparams[ia[0]].constr.dA == 0 &&
 +                iparams[ia[0]].constr.dB == 0);
 +      if (bDynamics || !bFlexCon) {
 +      for(i=1; i<3; i++) {
 +        a = ia[i] - start;
 +        at2con.a[at2con.index[a]+count[a]++] = con_tot;
 +      }
 +      }
 +      con_tot++;
 +      ia += 3;
 +    }
 +  }
 +  
 +  sfree(count);
 +
 +  return at2con;
 +}
 +
 +void set_constraints(struct gmx_constr *constr,
 +                     gmx_localtop_t *top,t_inputrec *ir,
 +                     t_mdatoms *md,t_commrec *cr)
 +{
 +    t_idef *idef;
 +    int    ncons;
 +    t_ilist *settle;
 +    int    iO,iH;
 +    
 +    idef = &top->idef;
 +       
 +    if (constr->ncon_tot > 0)
 +    {
 +        /* We are using the local topology,
 +         * so there are only F_CONSTR constraints.
 +         */
 +        ncons = idef->il[F_CONSTR].nr/3;
 +        
 +        /* With DD we might also need to call LINCS with ncons=0 for
 +         * communicating coordinates to other nodes that do have constraints.
 +         */
 +        if (ir->eConstrAlg == econtLINCS)
 +        {
 +            set_lincs(idef,md,EI_DYNAMICS(ir->eI),cr,constr->lincsd);
 +        }
 +        if (ir->eConstrAlg == econtSHAKE)
 +        {
 +            if (cr->dd)
 +            {
 +                make_shake_sblock_dd(constr,&idef->il[F_CONSTR],&top->cgs,cr->dd);
 +            }
 +            else
 +            {
 +                make_shake_sblock_pd(constr,idef,md);
 +            }
 +            if (ncons > constr->lagr_nalloc)
 +            {
 +                constr->lagr_nalloc = over_alloc_dd(ncons);
 +                srenew(constr->lagr,constr->lagr_nalloc);
 +            }
 +        }
 +    }
 +
 +    if (idef->il[F_SETTLE].nr > 0 && constr->settled == NULL)
 +    {
 +        settle = &idef->il[F_SETTLE];
 +        iO = settle->iatoms[1];
 +        iH = settle->iatoms[2];
 +        constr->settled =
 +            settle_init(md->massT[iO],md->massT[iH],
 +                        md->invmass[iO],md->invmass[iH],
 +                        idef->iparams[settle->iatoms[0]].settle.doh,
 +                        idef->iparams[settle->iatoms[0]].settle.dhh);
 +    }
 +    
 +    /* Make a selection of the local atoms for essential dynamics */
 +    if (constr->ed && cr->dd)
 +    {
 +        dd_make_local_ed_indices(cr->dd,constr->ed);
 +    }
 +}
 +
 +static void constr_recur(t_blocka *at2con,
 +                       t_ilist *ilist,t_iparams *iparams,gmx_bool bTopB,
 +                       int at,int depth,int nc,int *path,
 +                       real r0,real r1,real *r2max,
 +                       int *count)
 +{
 +  int  ncon1;
 +  t_iatom *ia1,*ia2;
 +  int  c,con,a1;
 +  gmx_bool bUse;
 +  t_iatom *ia;
 +  real len,rn0,rn1;
 +
 +  (*count)++;
 +
 +  ncon1 = ilist[F_CONSTR].nr/3;
 +  ia1   = ilist[F_CONSTR].iatoms;
 +  ia2   = ilist[F_CONSTRNC].iatoms;
 +
 +  /* Loop over all constraints connected to this atom */
 +  for(c=at2con->index[at]; c<at2con->index[at+1]; c++) {
 +    con = at2con->a[c];
 +    /* Do not walk over already used constraints */
 +    bUse = TRUE;
 +    for(a1=0; a1<depth; a1++) {
 +      if (con == path[a1])
 +      bUse = FALSE;
 +    }
 +    if (bUse) {
 +      ia = constr_iatomptr(ncon1,ia1,ia2,con);
 +      /* Flexible constraints currently have length 0, which is incorrect */
 +      if (!bTopB)
 +      len = iparams[ia[0]].constr.dA;
 +      else
 +      len = iparams[ia[0]].constr.dB;
 +      /* In the worst case the bond directions alternate */
 +      if (nc % 2 == 0) {
 +      rn0 = r0 + len;
 +      rn1 = r1;
 +      } else {
 +      rn0 = r0;
 +      rn1 = r1 + len;
 +      }
 +      /* Assume angles of 120 degrees between all bonds */
 +      if (rn0*rn0 + rn1*rn1 + rn0*rn1 > *r2max) {
 +      *r2max = rn0*rn0 + rn1*rn1 + r0*rn1;
 +      if (debug) {
 +        fprintf(debug,"Found longer constraint distance: r0 %5.3f r1 %5.3f rmax %5.3f\n", rn0,rn1,sqrt(*r2max));
 +        for(a1=0; a1<depth; a1++)
 +          fprintf(debug," %d %5.3f",
 +                  path[a1],
 +                  iparams[constr_iatomptr(ncon1,ia1,ia2,con)[0]].constr.dA);
 +        fprintf(debug," %d %5.3f\n",con,len);
 +      }
 +      }
 +      /* Limit the number of recursions to 1000*nc,
 +       * so a call does not take more than a second,
 +       * even for highly connected systems.
 +       */
 +      if (depth + 1 < nc && *count < 1000*nc) {
 +      if (ia[1] == at)
 +        a1 = ia[2];
 +      else
 +        a1 = ia[1];
 +      /* Recursion */
 +      path[depth] = con;
 +      constr_recur(at2con,ilist,iparams,
 +                   bTopB,a1,depth+1,nc,path,rn0,rn1,r2max,count);
 +      path[depth] = -1;
 +      }
 +    }
 +  }
 +}
 +
 +static real constr_r_max_moltype(FILE *fplog,
 +                               gmx_moltype_t *molt,t_iparams *iparams,
 +                               t_inputrec *ir)
 +{
 +  int natoms,nflexcon,*path,at,count;
 +
 +  t_blocka at2con;
 +  real r0,r1,r2maxA,r2maxB,rmax,lam0,lam1;
 +
 +  if (molt->ilist[F_CONSTR].nr   == 0 &&
 +      molt->ilist[F_CONSTRNC].nr == 0) {
 +    return 0;
 +  }
 +  
 +  natoms = molt->atoms.nr;
 +
 +  at2con = make_at2con(0,natoms,molt->ilist,iparams,
 +                     EI_DYNAMICS(ir->eI),&nflexcon);
 +  snew(path,1+ir->nProjOrder);
 +  for(at=0; at<1+ir->nProjOrder; at++)
 +    path[at] = -1;
 +
 +  r2maxA = 0;
 +  for(at=0; at<natoms; at++) {
 +    r0 = 0;
 +    r1 = 0;
 +
 +    count = 0;
 +    constr_recur(&at2con,molt->ilist,iparams,
 +               FALSE,at,0,1+ir->nProjOrder,path,r0,r1,&r2maxA,&count);
 +  }
 +  if (ir->efep == efepNO) {
 +    rmax = sqrt(r2maxA);
 +  } else {
 +    r2maxB = 0;
 +    for(at=0; at<natoms; at++) {
 +      r0 = 0;
 +      r1 = 0;
 +      count = 0;
 +      constr_recur(&at2con,molt->ilist,iparams,
 +                 TRUE,at,0,1+ir->nProjOrder,path,r0,r1,&r2maxB,&count);
 +    }
-       lam0 += ir->init_step*ir->delta_lambda;
++    lam0 = ir->fepvals->init_lambda;
 +    if (EI_DYNAMICS(ir->eI))
-       lam1 = ir->init_lambda + (ir->init_step + ir->nsteps)*ir->delta_lambda;
++      lam0 += ir->init_step*ir->fepvals->delta_lambda;
 +    rmax = (1 - lam0)*sqrt(r2maxA) + lam0*sqrt(r2maxB);
 +    if (EI_DYNAMICS(ir->eI)) {
++      lam1 = ir->fepvals->init_lambda + (ir->init_step + ir->nsteps)*ir->fepvals->delta_lambda;
 +      rmax = max(rmax,(1 - lam1)*sqrt(r2maxA) + lam1*sqrt(r2maxB));
 +    }
 +  }
 +
 +  done_blocka(&at2con);
 +  sfree(path);
 +
 +  return rmax;
 +}
 +
 +real constr_r_max(FILE *fplog,gmx_mtop_t *mtop,t_inputrec *ir)
 +{
 +  int mt;
 +  real rmax;
 +
 +  rmax = 0;
 +  for(mt=0; mt<mtop->nmoltype; mt++) {
 +    rmax = max(rmax,
 +             constr_r_max_moltype(fplog,&mtop->moltype[mt],
 +                                  mtop->ffparams.iparams,ir));
 +  }
 +  
 +  if (fplog)
 +    fprintf(fplog,"Maximum distance for %d constraints, at 120 deg. angles, all-trans: %.3f nm\n",1+ir->nProjOrder,rmax);
 +
 +  return rmax;
 +}
 +
 +gmx_constr_t init_constraints(FILE *fplog,
 +                              gmx_mtop_t *mtop,t_inputrec *ir,
 +                              gmx_edsam_t ed,t_state *state,
 +                              t_commrec *cr)
 +{
 +    int  ncon,nset,nmol,settle_type,i,natoms,mt,nflexcon;
 +    struct gmx_constr *constr;
 +    char *env;
 +    t_ilist *ilist;
 +    gmx_mtop_ilistloop_t iloop;
 +    
 +    ncon =
 +        gmx_mtop_ftype_count(mtop,F_CONSTR) +
 +        gmx_mtop_ftype_count(mtop,F_CONSTRNC);
 +    nset = gmx_mtop_ftype_count(mtop,F_SETTLE);
 +    
 +    if (ncon+nset == 0 && ir->ePull != epullCONSTRAINT && ed == NULL) 
 +    {
 +        return NULL;
 +    }
 +    
 +    snew(constr,1);
 +    
 +    constr->ncon_tot = ncon;
 +    constr->nflexcon = 0;
 +    if (ncon > 0) 
 +    {
 +        constr->n_at2con_mt = mtop->nmoltype;
 +        snew(constr->at2con_mt,constr->n_at2con_mt);
 +        for(mt=0; mt<mtop->nmoltype; mt++) 
 +        {
 +            constr->at2con_mt[mt] = make_at2con(0,mtop->moltype[mt].atoms.nr,
 +                                                mtop->moltype[mt].ilist,
 +                                                mtop->ffparams.iparams,
 +                                                EI_DYNAMICS(ir->eI),&nflexcon);
 +            for(i=0; i<mtop->nmolblock; i++) 
 +            {
 +                if (mtop->molblock[i].type == mt) 
 +                {
 +                    constr->nflexcon += mtop->molblock[i].nmol*nflexcon;
 +                }
 +            }
 +        }
 +        
 +        if (constr->nflexcon > 0) 
 +        {
 +            if (fplog) 
 +            {
 +                fprintf(fplog,"There are %d flexible constraints\n",
 +                        constr->nflexcon);
 +                if (ir->fc_stepsize == 0) 
 +                {
 +                    fprintf(fplog,"\n"
 +                            "WARNING: step size for flexible constraining = 0\n"
 +                            "         All flexible constraints will be rigid.\n"
 +                            "         Will try to keep all flexible constraints at their original length,\n"
 +                            "         but the lengths may exhibit some drift.\n\n");
 +                    constr->nflexcon = 0;
 +                }
 +            }
 +            if (constr->nflexcon > 0) 
 +            {
 +                please_cite(fplog,"Hess2002");
 +            }
 +        }
 +        
 +        if (ir->eConstrAlg == econtLINCS) 
 +        {
 +            constr->lincsd = init_lincs(fplog,mtop,
 +                                        constr->nflexcon,constr->at2con_mt,
 +                                        DOMAINDECOMP(cr) && cr->dd->bInterCGcons,
 +                                        ir->nLincsIter,ir->nProjOrder);
 +        }
 +        
 +        if (ir->eConstrAlg == econtSHAKE) {
 +            if (DOMAINDECOMP(cr) && cr->dd->bInterCGcons)
 +            {
 +                gmx_fatal(FARGS,"SHAKE is not supported with domain decomposition and constraint that cross charge group boundaries, use LINCS");
 +            }
 +            if (constr->nflexcon) 
 +            {
 +                gmx_fatal(FARGS,"For this system also velocities and/or forces need to be constrained, this can not be done with SHAKE, you should select LINCS");
 +            }
 +            please_cite(fplog,"Ryckaert77a");
 +            if (ir->bShakeSOR) 
 +            {
 +                please_cite(fplog,"Barth95a");
 +            }
 +
 +            constr->shaked = shake_init();
 +        }
 +    }
 +  
 +    if (nset > 0) {
 +        please_cite(fplog,"Miyamoto92a");
 +        
 +        /* Check that we have only one settle type */
 +        settle_type = -1;
 +        iloop = gmx_mtop_ilistloop_init(mtop);
 +        while (gmx_mtop_ilistloop_next(iloop,&ilist,&nmol)) 
 +        {
 +            for (i=0; i<ilist[F_SETTLE].nr; i+=4) 
 +            {
 +                if (settle_type == -1) 
 +                {
 +                    settle_type = ilist[F_SETTLE].iatoms[i];
 +                } 
 +                else if (ilist[F_SETTLE].iatoms[i] != settle_type) 
 +                {
 +                    gmx_fatal(FARGS,
 +                              "The [molecules] section of your topology specifies more than one block of\n"
 +                              "a [moleculetype] with a [settles] block. Only one such is allowed. If you\n"
 +                              "are trying to partition your solvent into different *groups* (e.g. for\n"
 +                              "freezing, T-coupling, etc.) then you are using the wrong approach. Index\n"
 +                              "files specify groups. Otherwise, you may wish to change the least-used\n"
 +                              "block of molecules with SETTLE constraints into 3 normal constraints.");
 +                }
 +            }
 +        }
 +    }
 +    
 +    constr->maxwarn = 999;
 +    env = getenv("GMX_MAXCONSTRWARN");
 +    if (env) 
 +    {
 +        constr->maxwarn = 0;
 +        sscanf(env,"%d",&constr->maxwarn);
 +        if (fplog) 
 +        {
 +            fprintf(fplog,
 +                    "Setting the maximum number of constraint warnings to %d\n",
 +                    constr->maxwarn);
 +        }
 +        if (MASTER(cr)) 
 +        {
 +            fprintf(stderr,
 +                    "Setting the maximum number of constraint warnings to %d\n",
 +                    constr->maxwarn);
 +        }
 +    }
 +    if (constr->maxwarn < 0 && fplog) 
 +    {
 +        fprintf(fplog,"maxwarn < 0, will not stop on constraint errors\n");
 +    }
 +    constr->warncount_lincs  = 0;
 +    constr->warncount_settle = 0;
 +    
 +    /* Initialize the essential dynamics sampling.
 +     * Put the pointer to the ED struct in constr */
 +    constr->ed = ed;
 +    if (ed != NULL) 
 +    {
 +        init_edsam(mtop,ir,cr,ed,state->x,state->box);
 +    }
 +    
 +    constr->warn_mtop = mtop;
 +    
 +    return constr;
 +}
 +
 +t_blocka *atom2constraints_moltype(gmx_constr_t constr)
 +{
 +  return constr->at2con_mt;
 +}
 +
 +
 +gmx_bool inter_charge_group_constraints(gmx_mtop_t *mtop)
 +{
 +  const gmx_moltype_t *molt;
 +  const t_block *cgs;
 +  const t_ilist *il;
 +  int  mb;
 +  int  nat,*at2cg,cg,a,ftype,i;
 +  gmx_bool bInterCG;
 +
 +  bInterCG = FALSE;
 +  for(mb=0; mb<mtop->nmolblock && !bInterCG; mb++) {
 +    molt = &mtop->moltype[mtop->molblock[mb].type];
 +
 +    if (molt->ilist[F_CONSTR].nr   > 0 ||
 +      molt->ilist[F_CONSTRNC].nr > 0) {
 +      cgs  = &molt->cgs;
 +      snew(at2cg,molt->atoms.nr);
 +      for(cg=0; cg<cgs->nr; cg++) {
 +      for(a=cgs->index[cg]; a<cgs->index[cg+1]; a++)
 +        at2cg[a] = cg;
 +      }
 +      
 +      for(ftype=F_CONSTR; ftype<=F_CONSTRNC; ftype++) {
 +      il = &molt->ilist[ftype];
 +      for(i=0; i<il->nr && !bInterCG; i+=3) {
 +        if (at2cg[il->iatoms[i+1]] != at2cg[il->iatoms[i+2]])
 +          bInterCG = TRUE;
 +      }
 +      }
 +      sfree(at2cg);
 +    }
 +  }
 +
 +  return bInterCG;
 +}
++
++/* helper functions for andersen temperature control, because the
++ * gmx_constr construct is only defined in constr.c. Return the list
++ * of blocks (get_sblock) and the number of blocks (get_nblocks).  */
++
++extern int *get_sblock(struct gmx_constr *constr)
++{
++    return constr->sblock;
++}
++
++extern int get_nblocks(struct gmx_constr *constr)
++{
++    return constr->nblocks;
++}
index 09fa4fba054e8f71627833a86d0087e553a72687,0000000000000000000000000000000000000000..19a7e61364c9b4ea4dc5e6e63e940168592f220d
mode 100644,000000..100644
--- /dev/null
@@@ -1,1403 -1,0 +1,1670 @@@
-     
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GROwing Monsters And Cloning Shrimps
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +#include <assert.h>
 +
 +#include "typedefs.h"
 +#include "smalloc.h"
 +#include "update.h"
 +#include "vec.h"
 +#include "macros.h"
 +#include "physics.h"
 +#include "names.h"
 +#include "gmx_fatal.h"
 +#include "txtdump.h"
 +#include "nrnb.h"
 +#include "gmx_random.h"
 +#include "update.h"
 +#include "mdrun.h"
 +
 +#define NTROTTERPARTS 3
 +
 +/* Suzuki-Yoshida Constants, for n=3 and n=5, for symplectic integration  */
 +/* for n=1, w0 = 1 */
 +/* for n=3, w0 = w2 = 1/(2-2^-(1/3)), w1 = 1-2*w0 */
 +/* for n=5, w0 = w1 = w3 = w4 = 1/(4-4^-(1/3)), w1 = 1-4*w0 */
 +
 +#define MAX_SUZUKI_YOSHIDA_NUM 5
 +#define SUZUKI_YOSHIDA_NUM  5
 +
 +static const double sy_const_1[] = { 1. };
 +static const double sy_const_3[] = { 0.828981543588751,-0.657963087177502,0.828981543588751 };
 +static const double sy_const_5[] = { 0.2967324292201065,0.2967324292201065,-0.186929716880426,0.2967324292201065,0.2967324292201065 };
 +
 +static const double* sy_const[] = {
 +    NULL,
 +    sy_const_1,
 +    NULL,
 +    sy_const_3,
 +    NULL,
 +    sy_const_5
 +};
 +
 +/*
 +static const double sy_const[MAX_SUZUKI_YOSHIDA_NUM+1][MAX_SUZUKI_YOSHIDA_NUM+1] = {
 +    {},
 +    {1},
 +    {},
 +    {0.828981543588751,-0.657963087177502,0.828981543588751},
 +    {},
 +    {0.2967324292201065,0.2967324292201065,-0.186929716880426,0.2967324292201065,0.2967324292201065}
 +};*/
 +
 +/* these integration routines are only referenced inside this file */
 +static void NHC_trotter(t_grpopts *opts,int nvar, gmx_ekindata_t *ekind,real dtfull,
 +                        double xi[],double vxi[], double scalefac[], real *veta, t_extmass *MassQ, gmx_bool bEkinAveVel)
 +
 +{
 +    /* general routine for both barostat and thermostat nose hoover chains */
 +
 +    int   i,j,mi,mj,jmax;
 +    double Ekin,Efac,reft,kT,nd;
 +    double dt;
 +    t_grp_tcstat *tcstat;
 +    double *ivxi,*ixi;
 +    double *iQinv;
 +    double *GQ;
 +    gmx_bool bBarostat;
 +    int mstepsi, mstepsj;
 +    int ns = SUZUKI_YOSHIDA_NUM;  /* set the degree of integration in the types/state.h file */
 +    int nh = opts->nhchainlength;
 +    
 +    snew(GQ,nh);
 +    mstepsi = mstepsj = ns;
 +
 +/* if scalefac is NULL, we are doing the NHC of the barostat */
 +    
 +    bBarostat = FALSE;
 +    if (scalefac == NULL) {
 +        bBarostat = TRUE;
 +    }
 +
 +    for (i=0; i<nvar; i++) 
 +    {
-             nd = 1; /* THIS WILL CHANGE IF NOT ISOTROPIC */
++
 +        /* make it easier to iterate by selecting 
 +           out the sub-array that corresponds to this T group */
 +        
 +        ivxi = &vxi[i*nh];
 +        ixi = &xi[i*nh];
 +        if (bBarostat) {
 +            iQinv = &(MassQ->QPinv[i*nh]); 
-     
-     pscal   = calc_pres(ir->ePBC,nwall,box,ekinmod,vir,localpres) + pcorr;
-     
++            nd = 1.0; /* THIS WILL CHANGE IF NOT ISOTROPIC */
 +            reft = max(0.0,opts->ref_t[0]);
 +            Ekin = sqr(*veta)/MassQ->Winv;
 +        } else {
 +            iQinv = &(MassQ->Qinv[i*nh]);  
 +            tcstat = &ekind->tcstat[i];
 +            nd = opts->nrdf[i];
 +            reft = max(0.0,opts->ref_t[i]);
 +            if (bEkinAveVel) 
 +            {
 +                Ekin = 2*trace(tcstat->ekinf)*tcstat->ekinscalef_nhc;
 +            } else {
 +                Ekin = 2*trace(tcstat->ekinh)*tcstat->ekinscaleh_nhc;
 +            }
 +        }
 +        kT = BOLTZ*reft;
 +
 +        for(mi=0;mi<mstepsi;mi++) 
 +        {
 +            for(mj=0;mj<mstepsj;mj++)
 +            { 
 +                /* weighting for this step using Suzuki-Yoshida integration - fixed at 5 */
 +                dt = sy_const[ns][mj] * dtfull / mstepsi;
 +                
 +                /* compute the thermal forces */
 +                GQ[0] = iQinv[0]*(Ekin - nd*kT);
 +                
 +                for (j=0;j<nh-1;j++) 
 +                {     
 +                    if (iQinv[j+1] > 0) {
 +                        /* we actually don't need to update here if we save the 
 +                           state of the GQ, but it's easier to just recompute*/
 +                        GQ[j+1] = iQinv[j+1]*((sqr(ivxi[j])/iQinv[j])-kT);      
 +                    } else {
 +                        GQ[j+1] = 0;
 +                    }
 +                }
 +                
 +                ivxi[nh-1] += 0.25*dt*GQ[nh-1];
 +                for (j=nh-1;j>0;j--) 
 +                { 
 +                    Efac = exp(-0.125*dt*ivxi[j]);
 +                    ivxi[j-1] = Efac*(ivxi[j-1]*Efac + 0.25*dt*GQ[j-1]);
 +                }
 +                
 +                Efac = exp(-0.5*dt*ivxi[0]);
 +                if (bBarostat) {
 +                    *veta *= Efac;                
 +                } else {
 +                    scalefac[i] *= Efac;
 +                }
 +                Ekin *= (Efac*Efac);
 +                
 +                /* Issue - if the KE is an average of the last and the current temperatures, then we might not be
 +                   able to scale the kinetic energy directly with this factor.  Might take more bookkeeping -- have to
 +                   think about this a bit more . . . */
 +
 +                GQ[0] = iQinv[0]*(Ekin - nd*kT);
 +                
 +                /* update thermostat positions */
 +                for (j=0;j<nh;j++) 
 +                { 
 +                    ixi[j] += 0.5*dt*ivxi[j];
 +                }
 +                
 +                for (j=0;j<nh-1;j++) 
 +                { 
 +                    Efac = exp(-0.125*dt*ivxi[j+1]);
 +                    ivxi[j] = Efac*(ivxi[j]*Efac + 0.25*dt*GQ[j]);
 +                    if (iQinv[j+1] > 0) {
 +                        GQ[j+1] = iQinv[j+1]*((sqr(ivxi[j])/iQinv[j])-kT);  
 +                    } else {
 +                        GQ[j+1] = 0;
 +                    }
 +                }
 +                ivxi[nh-1] += 0.25*dt*GQ[nh-1];
 +            }
 +        }
 +    }
 +    sfree(GQ);
 +}
 +
 +static void boxv_trotter(t_inputrec *ir, real *veta, real dt, tensor box, 
 +                         gmx_ekindata_t *ekind, tensor vir, real pcorr, real ecorr, t_extmass *MassQ)
 +{
 +
 +    real  pscal;
 +    double alpha;
 +    int   i,j,d,n,nwall;
 +    real  T,GW,vol;
 +    tensor Winvm,ekinmod,localpres;
 +    
 +    /* The heat bath is coupled to a separate barostat, the last temperature group.  In the 
 +       2006 Tuckerman et al paper., the order is iL_{T_baro} iL {T_part}
 +    */
 +    
 +    if (ir->epct==epctSEMIISOTROPIC) 
 +    {
 +        nwall = 2;
 +    } 
 +    else 
 +    {
 +        nwall = 3;
 +    }
 +
 +    /* eta is in pure units.  veta is in units of ps^-1. GW is in 
 +       units of ps^-2.  However, eta has a reference of 1 nm^3, so care must be 
 +       taken to use only RATIOS of eta in updating the volume. */
 +    
 +    /* we take the partial pressure tensors, modify the 
 +       kinetic energy tensor, and recovert to pressure */
 +    
 +    if (ir->opts.nrdf[0]==0) 
 +    { 
 +        gmx_fatal(FARGS,"Barostat is coupled to a T-group with no degrees of freedom\n");    
 +    } 
 +    /* alpha factor for phase space volume, then multiply by the ekin scaling factor.  */
 +    alpha = 1.0 + DIM/((double)ir->opts.nrdf[0]);
 +    alpha *= ekind->tcstat[0].ekinscalef_nhc;
 +    msmul(ekind->ekin,alpha,ekinmod);  
-     
++    /* for now, we use Elr = 0, because if you want to get it right, you
++       really should be using PME. Maybe print a warning? */
++
++    pscal   = calc_pres(ir->ePBC,nwall,box,ekinmod,vir,localpres);
++
 +    vol = det(box);
 +    GW = (vol*(MassQ->Winv/PRESFAC))*(DIM*pscal - trace(ir->ref_p));   /* W is in ps^2 * bar * nm^3 */
-     
-     if ((opts->tau_t[i] > 0) && (T > 0.0)) {
-  
-       reft = max(0.0,opts->ref_t[i]);
-       lll  = sqrt(1.0 + (dt/opts->tau_t[i])*(reft/T-1.0));
-       ekind->tcstat[i].lambda = max(min(lll,1.25),0.8);
++
 +    *veta += 0.5*dt*GW;   
 +}
 +
 +/* 
 + * This file implements temperature and pressure coupling algorithms:
 + * For now only the Weak coupling and the modified weak coupling.
 + *
 + * Furthermore computation of pressure and temperature is done here
 + *
 + */
 +
 +real calc_pres(int ePBC,int nwall,matrix box,tensor ekin,tensor vir,
 +               tensor pres)
 +{
 +    int  n,m;
 +    real fac;
 +    
 +    if (ePBC==epbcNONE || (ePBC==epbcXY && nwall!=2))
 +        clear_mat(pres);
 +    else {
 +        /* Uitzoeken welke ekin hier van toepassing is, zie Evans & Morris - E. 
 +         * Wrs. moet de druktensor gecorrigeerd worden voor de netto stroom in  
 +         * het systeem...       
 +         */
 +        
 +        fac=PRESFAC*2.0/det(box);
 +        for(n=0; (n<DIM); n++)
 +            for(m=0; (m<DIM); m++)
 +                pres[n][m] = (ekin[n][m] - vir[n][m])*fac;
 +        
 +        if (debug) {
 +            pr_rvecs(debug,0,"PC: pres",pres,DIM);
 +            pr_rvecs(debug,0,"PC: ekin",ekin,DIM);
 +            pr_rvecs(debug,0,"PC: vir ",vir, DIM);
 +            pr_rvecs(debug,0,"PC: box ",box, DIM);
 +        }
 +    }
 +    return trace(pres)/DIM;
 +}
 +
 +real calc_temp(real ekin,real nrdf)
 +{
 +    if (nrdf > 0)
 +        return (2.0*ekin)/(nrdf*BOLTZ);
 +    else
 +        return 0;
 +}
 +
 +void parrinellorahman_pcoupl(FILE *fplog,gmx_large_int_t step,
 +                           t_inputrec *ir,real dt,tensor pres,
 +                           tensor box,tensor box_rel,tensor boxv,
 +                           tensor M,matrix mu,gmx_bool bFirstStep)
 +{
 +  /* This doesn't do any coordinate updating. It just
 +   * integrates the box vector equations from the calculated
 +   * acceleration due to pressure difference. We also compute
 +   * the tensor M which is used in update to couple the particle
 +   * coordinates to the box vectors.
 +   *
 +   * In Nose and Klein (Mol.Phys 50 (1983) no 5., p 1055) this is
 +   * given as
 +   *            -1    .           .     -1
 +   * M_nk = (h')   * (h' * h + h' h) * h
 +   *
 +   * with the dots denoting time derivatives and h is the transformation from
 +   * the scaled frame to the real frame, i.e. the TRANSPOSE of the box. 
 +   * This also goes for the pressure and M tensors - they are transposed relative
 +   * to ours. Our equation thus becomes:
 +   *
 +   *                  -1       .    .           -1
 +   * M_gmx = M_nk' = b  * (b * b' + b * b') * b'
 +   * 
 +   * where b is the gromacs box matrix.                       
 +   * Our box accelerations are given by
 +   *   ..                                    ..
 +   *   b = vol/W inv(box') * (P-ref_P)     (=h')
 +   */
 +  
 +  int    d,n;
 +  tensor winv;
 +  real   vol=box[XX][XX]*box[YY][YY]*box[ZZ][ZZ];
 +  real   atot,arel,change,maxchange,xy_pressure;
 +  tensor invbox,pdiff,t1,t2;
 +
 +  real maxl;
 +
 +  m_inv_ur0(box,invbox);
 +
 +  if (!bFirstStep) {
 +    /* Note that PRESFAC does not occur here.
 +     * The pressure and compressibility always occur as a product,
 +     * therefore the pressure unit drops out.
 +     */
 +    maxl=max(box[XX][XX],box[YY][YY]);
 +    maxl=max(maxl,box[ZZ][ZZ]);
 +    for(d=0;d<DIM;d++)
 +      for(n=0;n<DIM;n++)
 +      winv[d][n]=
 +        (4*M_PI*M_PI*ir->compress[d][n])/(3*ir->tau_p*ir->tau_p*maxl);
 +    
 +    m_sub(pres,ir->ref_p,pdiff);
 +    
 +    if(ir->epct==epctSURFACETENSION) {
 +      /* Unlike Berendsen coupling it might not be trivial to include a z
 +       * pressure correction here? On the other hand we don't scale the
 +       * box momentarily, but change accelerations, so it might not be crucial.
 +       */
 +      xy_pressure=0.5*(pres[XX][XX]+pres[YY][YY]);
 +      for(d=0;d<ZZ;d++)
 +      pdiff[d][d]=(xy_pressure-(pres[ZZ][ZZ]-ir->ref_p[d][d]/box[d][d]));
 +    }
 +    
 +    tmmul(invbox,pdiff,t1);
 +    /* Move the off-diagonal elements of the 'force' to one side to ensure
 +     * that we obey the box constraints.
 +     */
 +    for(d=0;d<DIM;d++) {
 +      for(n=0;n<d;n++) {
 +      t1[d][n] += t1[n][d];
 +      t1[n][d] = 0;
 +      }
 +    }
 +    
 +    switch (ir->epct) {
 +    case epctANISOTROPIC:
 +      for(d=0;d<DIM;d++) 
 +      for(n=0;n<=d;n++)
 +        t1[d][n] *= winv[d][n]*vol;
 +      break;
 +    case epctISOTROPIC:
 +      /* calculate total volume acceleration */
 +      atot=box[XX][XX]*box[YY][YY]*t1[ZZ][ZZ]+
 +      box[XX][XX]*t1[YY][YY]*box[ZZ][ZZ]+
 +      t1[XX][XX]*box[YY][YY]*box[ZZ][ZZ];
 +      arel=atot/(3*vol);
 +      /* set all RELATIVE box accelerations equal, and maintain total V
 +       * change speed */
 +      for(d=0;d<DIM;d++)
 +      for(n=0;n<=d;n++)
 +        t1[d][n] = winv[0][0]*vol*arel*box[d][n];    
 +      break;
 +    case epctSEMIISOTROPIC:
 +    case epctSURFACETENSION:
 +      /* Note the correction to pdiff above for surftens. coupling  */
 +      
 +      /* calculate total XY volume acceleration */
 +      atot=box[XX][XX]*t1[YY][YY]+t1[XX][XX]*box[YY][YY];
 +      arel=atot/(2*box[XX][XX]*box[YY][YY]);
 +      /* set RELATIVE XY box accelerations equal, and maintain total V
 +       * change speed. Dont change the third box vector accelerations */
 +      for(d=0;d<ZZ;d++)
 +      for(n=0;n<=d;n++)
 +        t1[d][n] = winv[d][n]*vol*arel*box[d][n];
 +      for(n=0;n<DIM;n++)
 +      t1[ZZ][n] *= winv[d][n]*vol;
 +      break;
 +    default:
 +      gmx_fatal(FARGS,"Parrinello-Rahman pressure coupling type %s "
 +                "not supported yet\n",EPCOUPLTYPETYPE(ir->epct));
 +      break;
 +    }
 +    
 +    maxchange=0;
 +    for(d=0;d<DIM;d++)
 +      for(n=0;n<=d;n++) {
 +      boxv[d][n] += dt*t1[d][n];
 +      
 +      /* We do NOT update the box vectors themselves here, since
 +       * we need them for shifting later. It is instead done last
 +       * in the update() routine.
 +       */
 +      
 +      /* Calculate the change relative to diagonal elements-
 +         since it's perfectly ok for the off-diagonal ones to
 +         be zero it doesn't make sense to check the change relative
 +         to its current size.
 +      */
 +      
 +      change=fabs(dt*boxv[d][n]/box[d][d]);
 +      
 +      if (change>maxchange)
 +        maxchange=change;
 +      }
 +    
 +    if (maxchange > 0.01 && fplog) {
 +      char buf[22];
 +      fprintf(fplog,
 +              "\nStep %s  Warning: Pressure scaling more than 1%%. "
 +              "This may mean your system\n is not yet equilibrated. "
 +              "Use of Parrinello-Rahman pressure coupling during\n"
 +              "equilibration can lead to simulation instability, "
 +              "and is discouraged.\n",
 +            gmx_step_str(step,buf));
 +    }
 +  }
 +  
 +  preserve_box_shape(ir,box_rel,boxv);
 +
 +  mtmul(boxv,box,t1);       /* t1=boxv * b' */
 +  mmul(invbox,t1,t2);
 +  mtmul(t2,invbox,M);
 +
 +  /* Determine the scaling matrix mu for the coordinates */
 +  for(d=0;d<DIM;d++)
 +    for(n=0;n<=d;n++)
 +      t1[d][n] = box[d][n] + dt*boxv[d][n];
 +  preserve_box_shape(ir,box_rel,t1);
 +  /* t1 is the box at t+dt, determine mu as the relative change */
 +  mmul_ur0(invbox,t1,mu);
 +}
 +
 +void berendsen_pcoupl(FILE *fplog,gmx_large_int_t step, 
 +                    t_inputrec *ir,real dt, tensor pres,matrix box,
 +                    matrix mu)
 +{
 +  int    d,n;
 +  real   scalar_pressure, xy_pressure, p_corr_z;
 +  char   *ptr,buf[STRLEN];
 +
 +  /*
 +   *  Calculate the scaling matrix mu
 +   */
 +  scalar_pressure=0;
 +  xy_pressure=0;
 +  for(d=0; d<DIM; d++) {
 +    scalar_pressure += pres[d][d]/DIM;
 +    if (d != ZZ)
 +      xy_pressure += pres[d][d]/(DIM-1);
 +  }
 +  /* Pressure is now in bar, everywhere. */
 +#define factor(d,m) (ir->compress[d][m]*dt/ir->tau_p)
 +  
 +  /* mu has been changed from pow(1+...,1/3) to 1+.../3, since this is
 +   * necessary for triclinic scaling
 +   */
 +  clear_mat(mu);
 +  switch (ir->epct) {
 +  case epctISOTROPIC:
 +    for(d=0; d<DIM; d++) 
 +      {
 +      mu[d][d] = 1.0 - factor(d,d)*(ir->ref_p[d][d] - scalar_pressure) /DIM;
 +      }
 +    break;
 +  case epctSEMIISOTROPIC:
 +    for(d=0; d<ZZ; d++)
 +      mu[d][d] = 1.0 - factor(d,d)*(ir->ref_p[d][d]-xy_pressure)/DIM;
 +    mu[ZZ][ZZ] = 
 +      1.0 - factor(ZZ,ZZ)*(ir->ref_p[ZZ][ZZ] - pres[ZZ][ZZ])/DIM;
 +    break;
 +  case epctANISOTROPIC:
 +    for(d=0; d<DIM; d++)
 +      for(n=0; n<DIM; n++)
 +      mu[d][n] = (d==n ? 1.0 : 0.0) 
 +        -factor(d,n)*(ir->ref_p[d][n] - pres[d][n])/DIM;
 +    break;
 +  case epctSURFACETENSION:
 +    /* ir->ref_p[0/1] is the reference surface-tension times *
 +     * the number of surfaces                                */
 +    if (ir->compress[ZZ][ZZ])
 +      p_corr_z = dt/ir->tau_p*(ir->ref_p[ZZ][ZZ] - pres[ZZ][ZZ]);
 +    else
 +      /* when the compressibity is zero, set the pressure correction   *
 +       * in the z-direction to zero to get the correct surface tension */
 +      p_corr_z = 0;
 +    mu[ZZ][ZZ] = 1.0 - ir->compress[ZZ][ZZ]*p_corr_z;
 +    for(d=0; d<DIM-1; d++)
 +      mu[d][d] = 1.0 + factor(d,d)*(ir->ref_p[d][d]/(mu[ZZ][ZZ]*box[ZZ][ZZ])
 +                                  - (pres[ZZ][ZZ]+p_corr_z - xy_pressure))/(DIM-1);
 +    break;
 +  default:
 +    gmx_fatal(FARGS,"Berendsen pressure coupling type %s not supported yet\n",
 +              EPCOUPLTYPETYPE(ir->epct));
 +    break;
 +  }
 +  /* To fullfill the orientation restrictions on triclinic boxes
 +   * we will set mu_yx, mu_zx and mu_zy to 0 and correct
 +   * the other elements of mu to first order.
 +   */
 +  mu[YY][XX] += mu[XX][YY];
 +  mu[ZZ][XX] += mu[XX][ZZ];
 +  mu[ZZ][YY] += mu[YY][ZZ];
 +  mu[XX][YY] = 0;
 +  mu[XX][ZZ] = 0;
 +  mu[YY][ZZ] = 0;
 +
 +  if (debug) {
 +    pr_rvecs(debug,0,"PC: pres ",pres,3);
 +    pr_rvecs(debug,0,"PC: mu   ",mu,3);
 +  }
 +  
 +  if (mu[XX][XX]<0.99 || mu[XX][XX]>1.01 ||
 +      mu[YY][YY]<0.99 || mu[YY][YY]>1.01 ||
 +      mu[ZZ][ZZ]<0.99 || mu[ZZ][ZZ]>1.01) {
 +    char buf2[22];
 +    sprintf(buf,"\nStep %s  Warning: pressure scaling more than 1%%, "
 +          "mu: %g %g %g\n",
 +          gmx_step_str(step,buf2),mu[XX][XX],mu[YY][YY],mu[ZZ][ZZ]);
 +    if (fplog)
 +      fprintf(fplog,"%s",buf);
 +    fprintf(stderr,"%s",buf);
 +  }
 +}
 +
 +void berendsen_pscale(t_inputrec *ir,matrix mu,
 +                    matrix box,matrix box_rel,
 +                    int start,int nr_atoms,
 +                    rvec x[],unsigned short cFREEZE[],
 +                    t_nrnb *nrnb)
 +{
 +  ivec   *nFreeze=ir->opts.nFreeze;
 +  int    n,d,g=0;
 +      
 +  /* Scale the positions */
 +  for (n=start; n<start+nr_atoms; n++) {
 +    if (cFREEZE)
 +      g = cFREEZE[n];
 +    
 +    if (!nFreeze[g][XX])
 +      x[n][XX] = mu[XX][XX]*x[n][XX]+mu[YY][XX]*x[n][YY]+mu[ZZ][XX]*x[n][ZZ];
 +    if (!nFreeze[g][YY])
 +      x[n][YY] = mu[YY][YY]*x[n][YY]+mu[ZZ][YY]*x[n][ZZ];
 +    if (!nFreeze[g][ZZ])
 +      x[n][ZZ] = mu[ZZ][ZZ]*x[n][ZZ];
 +  }
 +  /* compute final boxlengths */
 +  for (d=0; d<DIM; d++) {
 +    box[d][XX] = mu[XX][XX]*box[d][XX]+mu[YY][XX]*box[d][YY]+mu[ZZ][XX]*box[d][ZZ];
 +    box[d][YY] = mu[YY][YY]*box[d][YY]+mu[ZZ][YY]*box[d][ZZ];
 +    box[d][ZZ] = mu[ZZ][ZZ]*box[d][ZZ];
 +  }      
 +
 +  preserve_box_shape(ir,box_rel,box);
 +  
 +  /* (un)shifting should NOT be done after this,
 +   * since the box vectors might have changed
 +   */
 +  inc_nrnb(nrnb,eNR_PCOUPL,nr_atoms);
 +}
 +
 +void berendsen_tcoupl(t_inputrec *ir,gmx_ekindata_t *ekind,real dt)
 +{
 +    t_grpopts *opts;
 +    int    i;
 +    real   T,reft=0,lll;
 +
 +    opts = &ir->opts;
 +
 +    for(i=0; (i<opts->ngtc); i++)
 +    {
 +        if (ir->eI == eiVV)
 +        {
 +            T = ekind->tcstat[i].T;
 +        }
 +        else
 +        {
 +            T = ekind->tcstat[i].Th;
 +        }
-     else {
-        ekind->tcstat[i].lambda = 1.0;
++
++        if ((opts->tau_t[i] > 0) && (T > 0.0)) {  
++            reft = max(0.0,opts->ref_t[i]);
++            lll  = sqrt(1.0 + (dt/opts->tau_t[i])*(reft/T-1.0));
++            ekind->tcstat[i].lambda = max(min(lll,1.25),0.8);
++        }
++        else {
++            ekind->tcstat[i].lambda = 1.0;
++        }
++
++        if (debug)
++        {
++            fprintf(debug,"TC: group %d: T: %g, Lambda: %g\n",
++                    i,T,ekind->tcstat[i].lambda);
++        }
 +    }
-     if (debug)
-       fprintf(debug,"TC: group %d: T: %g, Lambda: %g\n",
-             i,T,ekind->tcstat[i].lambda);
-   }
++}
++
++static int poisson_variate(real lambda,gmx_rng_t rng) {
++
++    real L;
++    int k=0;
++    real p=1.0;
++
++    L = exp(-lambda);
++
++    do
++    {
++        k = k+1;
++        p *= gmx_rng_uniform_real(rng);
++    } while (p>L);
++
++    return k-1;
++}
++
++void andersen_tcoupl(t_inputrec *ir,t_mdatoms *md,t_state *state, gmx_rng_t rng, real rate, t_idef *idef, int nblocks, int *sblock,gmx_bool *randatom, int *randatom_list, gmx_bool *randomize, real *boltzfac)
++{
++    t_grpopts *opts;
++    int    i,j,k,d,len,n,ngtc,gc=0;
++    int    nshake, nsettle, nrandom, nrand_group;
++    real   boltz,scal,reft,prand;
++    t_iatom *iatoms;
++
++    /* convenience variables */
++    opts = &ir->opts;
++    ngtc = opts->ngtc;
++
++    /* idef is only passed in if it's chance-per-particle andersen, so
++       it essentially serves as a boolean to determine which type of
++       andersen is being used */
++    if (idef) {
++
++        /* randomly atoms to randomize.  However, all constraint
++           groups have to have either all of the atoms or none of the
++           atoms randomize.
++
++           Algorithm:
++           1. Select whether or not to randomize each atom to get the correct probability.
++           2. Cycle through the constraint groups.
++              2a. for each constraint group, determine the fraction f of that constraint group that are
++                  chosen to be randomized.
++              2b. all atoms in the constraint group are randomized with probability f.
++        */
++
++        nrandom = 0;
++        if ((rate < 0.05) && (md->homenr > 50))
++        {
++            /* if the rate is relatively high, use a standard method, if low rate,
++             * use poisson */
++            /* poisson distributions approxmation, more efficient for
++             * low rates, fewer random numbers required */
++            nrandom = poisson_variate(md->homenr*rate,rng);  /* how many do we randomize? Use Poisson. */
++            /* now we know how many, choose them randomly. No worries about repeats, at this rate, it's negligible.
++               worst thing that happens, it lowers the true rate an negligible amount */
++            for (i=0;i<nrandom;i++)
++            {
++                randatom[(int)(gmx_rng_uniform_real(rng)*md->homenr)] = TRUE;
++            }
++        }
++        else
++        {
++            for (i=0;i<md->homenr;i++)
++            {
++                if (gmx_rng_uniform_real(rng)<rate)
++                {
++                    randatom[i] = TRUE;
++                    nrandom++;
++                }
++            }
++        }
++
++        /* instead of looping over the constraint groups, if we had a
++           list of which atoms were in which constraint groups, we
++           could then loop over only the groups that are randomized
++           now.  But that is not available now.  Create later after
++           determining whether there actually is any slowing. */
++
++        /* first, loop through the settles to make sure all groups either entirely randomized, or not randomized. */
++
++        nsettle  = idef->il[F_SETTLE].nr/2;
++        for (i=0;i<nsettle;i++)
++        {
++            iatoms = idef->il[F_SETTLE].iatoms;
++            nrand_group = 0;
++            for (k=0;k<3;k++)  /* settles are always 3 atoms, hardcoded */
++            {
++                if (randatom[iatoms[2*i+1]+k])
++                {
++                    nrand_group++;     /* count the number of atoms to be shaken in the settles group */
++                    randatom[iatoms[2*i+1]+k] = FALSE;
++                    nrandom--;
++                }
++            }
++            if (nrand_group > 0)
++            {
++                prand = (nrand_group)/3.0;  /* use this fraction to compute the probability the
++                                               whole group is randomized */
++                if (gmx_rng_uniform_real(rng)<prand)
++                {
++                    for (k=0;k<3;k++)
++                    {
++                        randatom[iatoms[2*i+1]+k] = TRUE;   /* mark them all to be randomized */
++                    }
++                    nrandom+=3;
++                }
++            }
++        }
++
++        /* now loop through the shake groups */
++        nshake = nblocks;
++        for (i=0;i<nshake;i++)
++        {
++            iatoms = &(idef->il[F_CONSTR].iatoms[sblock[i]]);
++            len = sblock[i+1]-sblock[i];
++            nrand_group = 0;
++            for (k=0;k<len;k++)
++            {
++                if (k%3 != 0)
++                {  /* only 2/3 of the sblock items are atoms, the others are labels */
++                    if (randatom[iatoms[k]])
++                    {
++                        nrand_group++;
++                        randatom[iatoms[k]] = FALSE;  /* need to mark it false here in case the atom is in more than
++                                                         one group in the shake block */
++                        nrandom--;
++                    }
++                }
++            }
++            if (nrand_group > 0)
++            {
++                prand = (nrand_group)/(1.0*(2*len/3));
++                if (gmx_rng_uniform_real(rng)<prand)
++                {
++                    for (k=0;k<len;k++)
++                    {
++                        if (k%3 != 0)
++                        {  /* only 2/3 of the sblock items are atoms, the others are labels */
++                            randatom[iatoms[k]] = TRUE; /* randomize all of them */
++                            nrandom++;
++                        }
++                    }
++                }
++            }
++        }
++        if (nrandom > 0)
++        {
++            n = 0;
++            for (i=0;i<md->homenr;i++)  /* now loop over the list of atoms */
++            {
++                if (randatom[i])
++                {
++                    randatom_list[n] = i;
++                    n++;
++                }
++            }
++            nrandom = n;  /* there are some values of nrandom for which
++                             this algorithm won't work; for example all
++                             water molecules and nrandom =/= 3.  Better to
++                             recount and use this number (which we
++                             calculate anyway: it will not affect
++                             the average number of atoms accepted.
++                          */
++        }
++    }
++    else
++    {
++        /* if it's andersen-massive, then randomize all the atoms */
++        nrandom = md->homenr;
++        for (i=0;i<nrandom;i++)
++        {
++            randatom_list[i] = i;
++        }
 +    }
 +
-     for(i=0; (i<opts->ngtc); i++) {
++    /* randomize the velocities of the selected particles */
++
++    for (i=0;i<nrandom;i++)  /* now loop over the list of atoms */
++    {
++        n = randatom_list[i];
++        if (md->cTC)
++        {
++            gc   = md->cTC[n];  /* assign the atom to a temperature group if there are more than one */
++        }
++        if (randomize[gc])
++        {
++            scal = sqrt(boltzfac[gc]*md->invmass[n]);
++            for (d=0;d<DIM;d++)
++            {
++                state->v[n][d] = scal*gmx_rng_gaussian_table(rng);
++            }
++        }
++        randatom[n] = FALSE; /* unmark this atom for randomization */
++    }
 +}
 +
++
 +void nosehoover_tcoupl(t_grpopts *opts,gmx_ekindata_t *ekind,real dt,
 +                       double xi[],double vxi[], t_extmass *MassQ)
 +{
 +    int   i;
 +    real  reft,oldvxi;
 +    
 +    /* note that this routine does not include Nose-hoover chains yet. Should be easy to add. */
 +    
- t_state *init_bufstate(const t_state *template_state) 
++    for(i=0; (i<opts->ngtc); i++)
++    {
 +        reft = max(0.0,opts->ref_t[i]);
 +        oldvxi = vxi[i];
 +        vxi[i]  += dt*MassQ->Qinv[i]*(ekind->tcstat[i].Th - reft);
 +        xi[i] += dt*(oldvxi + vxi[i])*0.5;
 +    }
 +}
 +
-                 if (md->cTC) 
++t_state *init_bufstate(const t_state *template_state)
 +{
 +    t_state *state;
 +    int nc = template_state->nhchainlength;
 +    snew(state,1);
 +    snew(state->nosehoover_xi,nc*template_state->ngtc);
 +    snew(state->nosehoover_vxi,nc*template_state->ngtc);
 +    snew(state->therm_integral,template_state->ngtc);
 +    snew(state->nhpres_xi,nc*template_state->nnhpres);
 +    snew(state->nhpres_vxi,nc*template_state->nnhpres);
 +
 +    return state;
 +}  
 +
 +void destroy_bufstate(t_state *state) 
 +{
 +    sfree(state->x);
 +    sfree(state->v);
 +    sfree(state->nosehoover_xi);
 +    sfree(state->nosehoover_vxi);
 +    sfree(state->therm_integral);
 +    sfree(state->nhpres_xi);
 +    sfree(state->nhpres_vxi);
 +    sfree(state);
 +}  
 +
 +void trotter_update(t_inputrec *ir,gmx_large_int_t step, gmx_ekindata_t *ekind, 
 +                    gmx_enerdata_t *enerd, t_state *state, 
 +                    tensor vir, t_mdatoms *md, 
 +                    t_extmass *MassQ, int **trotter_seqlist, int trotter_seqno) 
 +{
 +    
 +    int n,i,j,d,ntgrp,ngtc,gc=0;
 +    t_grp_tcstat *tcstat;
 +    t_grpopts *opts;
 +    gmx_large_int_t step_eff;
 +    real ecorr,pcorr,dvdlcorr;
 +    real bmass,qmass,reft,kT,dt,nd;
 +    tensor dumpres,dumvir;
 +    double *scalefac,dtc;
 +    int *trotter_seq;
 +    rvec sumv={0,0,0},consk;
 +    gmx_bool bCouple;
 +
 +    if (trotter_seqno <= ettTSEQ2)
 +    {
 +        step_eff = step-1;  /* the velocity verlet calls are actually out of order -- the first half step
 +                               is actually the last half step from the previous step.  Thus the first half step
 +                               actually corresponds to the n-1 step*/
 +                               
 +    } else {
 +        step_eff = step;
 +    }
 +
 +    bCouple = (ir->nsttcouple == 1 ||
 +               do_per_step(step_eff+ir->nsttcouple,ir->nsttcouple));
 +
 +    trotter_seq = trotter_seqlist[trotter_seqno];
 +
 +    /* signal we are returning if nothing is going to be done in this routine */
 +    if ((trotter_seq[0] == etrtSKIPALL)  || !(bCouple))
 +    {
 +        return;
 +    }
 +
 +    dtc = ir->nsttcouple*ir->delta_t;
 +    opts = &(ir->opts); /* just for ease of referencing */
 +    ngtc = opts->ngtc;
 +    assert(ngtc>0);
 +    snew(scalefac,opts->ngtc);
 +    for (i=0;i<ngtc;i++) 
 +    {
 +        scalefac[i] = 1;
 +    }
 +    /* execute the series of trotter updates specified in the trotterpart array */
 +    
 +    for (i=0;i<NTROTTERPARTS;i++){
 +        /* allow for doubled intgrators by doubling dt instead of making 2 calls */
 +        if ((trotter_seq[i] == etrtBAROV2) || (trotter_seq[i] == etrtBARONHC2) || (trotter_seq[i] == etrtNHC2))
 +        {
 +            dt = 2 * dtc;
 +        }
 +        else 
 +        {
 +            dt = dtc;
 +        }
 +
 +        switch (trotter_seq[i])
 +        {
 +        case etrtBAROV:
 +        case etrtBAROV2:
 +            boxv_trotter(ir,&(state->veta),dt,state->box,ekind,vir,
 +                         enerd->term[F_PDISPCORR],enerd->term[F_DISPCORR],MassQ);
 +            break;
 +        case etrtBARONHC:
 +        case etrtBARONHC2:
 +            NHC_trotter(opts,state->nnhpres,ekind,dt,state->nhpres_xi,
 +                        state->nhpres_vxi,NULL,&(state->veta),MassQ,FALSE);      
 +            break;
 +        case etrtNHC:
 +        case etrtNHC2:
 +            NHC_trotter(opts,opts->ngtc,ekind,dt,state->nosehoover_xi,
 +                        state->nosehoover_vxi,scalefac,NULL,MassQ,(ir->eI==eiVV));
 +            /* need to rescale the kinetic energies and velocities here.  Could 
 +               scale the velocities later, but we need them scaled in order to 
 +               produce the correct outputs, so we'll scale them here. */
 +            
 +            for (i=0; i<ngtc;i++) 
 +            {
 +                tcstat = &ekind->tcstat[i];
 +                tcstat->vscale_nhc = scalefac[i]; 
 +                tcstat->ekinscaleh_nhc *= (scalefac[i]*scalefac[i]); 
 +                tcstat->ekinscalef_nhc *= (scalefac[i]*scalefac[i]); 
 +            }
 +            /* now that we've scaled the groupwise velocities, we can add them up to get the total */
 +            /* but do we actually need the total? */
 +            
 +            /* modify the velocities as well */
 +            for (n=md->start;n<md->start+md->homenr;n++) 
 +            {
- int **init_npt_vars(t_inputrec *ir, t_state *state, t_extmass *MassQ, gmx_bool bTrotter) 
++                if (md->cTC)   /* does this conditional need to be here? is this always true?*/
 +                { 
 +                    gc = md->cTC[n];
 +                }
 +                for (d=0;d<DIM;d++) 
 +                {
 +                    state->v[n][d] *= scalefac[gc];
 +                }
 +                
 +                if (debug) 
 +                {
 +                    for (d=0;d<DIM;d++) 
 +                    {
 +                        sumv[d] += (state->v[n][d])/md->invmass[n];
 +                    }
 +                }
 +            }          
 +            break;
 +        default:
 +            break;
 +        }
 +    }
 +    /* check for conserved momentum -- worth looking at this again eventually, but not working right now.*/  
 +#if 0
 +    if (debug) 
 +    {
 +        if (bFirstHalf) 
 +        {
 +            for (d=0;d<DIM;d++) 
 +            {
 +                consk[d] = sumv[d]*exp((1 + 1.0/opts->nrdf[0])*((1.0/DIM)*log(det(state->box)/state->vol0)) + state->nosehoover_xi[0]); 
 +            }
 +            fprintf(debug,"Conserved kappa: %15.8f %15.8f %15.8f\n",consk[0],consk[1],consk[2]);    
 +        }
 +    }
 +#endif
 +    sfree(scalefac);
 +}
 +
-     int **trotter_seq;
++
++extern void init_npt_masses(t_inputrec *ir, t_state *state, t_extmass *MassQ, gmx_bool bInit)
 +{
 +    int n,i,j,d,ntgrp,ngtc,nnhpres,nh,gc=0;
 +    t_grp_tcstat *tcstat;
 +    t_grpopts *opts;
 +    real ecorr,pcorr,dvdlcorr;
 +    real bmass,qmass,reft,kT,dt,ndj,nd;
 +    tensor dumpres,dumvir;
-     ngtc = state->ngtc;
 +
 +    opts = &(ir->opts); /* just for ease of referencing */
-         snew(MassQ->Qinv,ngtc);
++    ngtc = ir->opts.ngtc;
 +    nnhpres = state->nnhpres;
 +    nh = state->nhchainlength; 
 +
 +    if (ir->eI == eiMD) {
-         
-         if (state->vol0 == 0) 
++        if (bInit)
++        {
++            snew(MassQ->Qinv,ngtc);
++        }
 +        for(i=0; (i<ngtc); i++) 
 +        { 
 +            if ((opts->tau_t[i] > 0) && (opts->ref_t[i] > 0)) 
 +            {
 +                MassQ->Qinv[i]=1.0/(sqr(opts->tau_t[i]/M_2PI)*opts->ref_t[i]);
 +            } 
 +            else 
 +            {
 +                MassQ->Qinv[i]=0.0;     
 +            }
 +        }
 +    }
 +    else if (EI_VV(ir->eI))
 +    {
 +    /* Set pressure variables */
-             state->vol0 = det(state->box); /* because we start by defining a fixed compressibility, 
-                                               we need the volume at this compressibility to solve the problem */ 
++
++        if (bInit)
 +        {
-         /* Investigate this more -- is this the right mass to make it? */
++            if (state->vol0 == 0)
++            {
++                state->vol0 = det(state->box); 
++                /* because we start by defining a fixed
++                   compressibility, we need the volume at this
++                   compressibility to solve the problem. */
++            }
 +        }
 +
 +        /* units are nm^3 * ns^2 / (nm^3 * bar / kJ/mol) = kJ/mol  */
-                 /* not clear this is correct yet for the anisotropic case*/
-             } 
-         }           
++        /* Consider evaluating eventually if this the right mass to use.  All are correct, some might be more stable  */
 +        MassQ->Winv = (PRESFAC*trace(ir->compress)*BOLTZ*opts->ref_t[0])/(DIM*state->vol0*sqr(ir->tau_p/M_2PI));
 +        /* An alternate mass definition, from Tuckerman et al. */ 
 +        /* MassQ->Winv = 1.0/(sqr(ir->tau_p/M_2PI)*(opts->nrdf[0]+DIM)*BOLTZ*opts->ref_t[0]); */
 +        for (d=0;d<DIM;d++) 
 +        {
 +            for (n=0;n<DIM;n++) 
 +            {
 +                MassQ->Winvm[d][n]= PRESFAC*ir->compress[d][n]/(state->vol0*sqr(ir->tau_p/M_2PI)); 
-         snew(MassQ->Qinv,ngtc*nh);
-         
++                /* not clear this is correct yet for the anisotropic case. Will need to reevaluate
++                 before using MTTK for anisotropic states.*/
++            }
++        }
 +        /* Allocate space for thermostat variables */
-         for(i=0; i<ngtc; i++) 
++        if (bInit)
++        {
++            snew(MassQ->Qinv,ngtc*nh);
++        }
++
 +        /* now, set temperature variables */
-             if ((opts->tau_t[i] > 0) && (opts->ref_t[i] > 0)) 
++        for (i=0; i<ngtc; i++)
 +        {
-                 for (j=0;j<nh;j++) 
++            if ((opts->tau_t[i] > 0) && (opts->ref_t[i] > 0))
 +            {
 +                reft = max(0.0,opts->ref_t[i]);
 +                nd = opts->nrdf[i];
 +                kT = BOLTZ*reft;
-                     if (j==0) 
++                for (j=0;j<nh;j++)
 +                {
-                     } 
-                     else 
++                    if (j==0)
 +                    {
 +                        ndj = nd;
-             else 
++                    }
++                    else
 +                    {
 +                        ndj = 1;
 +                    }
 +                    MassQ->Qinv[i*nh+j]   = 1.0/(sqr(opts->tau_t[i]/M_2PI)*ndj*kT);
 +                }
 +            }
-                 for (j=0;j<nh;j++) 
++            else
 +            {
 +                reft=0.0;
-             trotter_seq[0][0] = etrtBAROV; 
-             
++                for (j=0;j<nh;j++)
 +                {
 +                    MassQ->Qinv[i*nh+j] = 0.0;
 +                }
 +            }
 +        }
 +    }
++}
++
++int **init_npt_vars(t_inputrec *ir, t_state *state, t_extmass *MassQ, gmx_bool bTrotter)
++{
++    int n,i,j,d,ntgrp,ngtc,nnhpres,nh,gc=0;
++    t_grp_tcstat *tcstat;
++    t_grpopts *opts;
++    real ecorr,pcorr,dvdlcorr;
++    real bmass,qmass,reft,kT,dt,ndj,nd;
++    tensor dumpres,dumvir;
++    int **trotter_seq;
++
++    opts = &(ir->opts); /* just for ease of referencing */
++    ngtc = state->ngtc;
++    nnhpres = state->nnhpres;
++    nh = state->nhchainlength;
++
++    init_npt_masses(ir, state, MassQ, TRUE);
 +    
 +    /* first, initialize clear all the trotter calls */
 +    snew(trotter_seq,ettTSEQMAX);
 +    for (i=0;i<ettTSEQMAX;i++) 
 +    {
 +        snew(trotter_seq[i],NTROTTERPARTS);
 +        for (j=0;j<NTROTTERPARTS;j++) {
 +            trotter_seq[i][j] = etrtNONE;
 +        }
 +        trotter_seq[i][0] = etrtSKIPALL;
 +    }
 +    
 +    if (!bTrotter) 
 +    {
 +        /* no trotter calls, so we never use the values in the array.
 +         * We access them (so we need to define them, but ignore
 +         * then.*/
 +
 +        return trotter_seq;
 +    }
 +
 +    /* compute the kinetic energy by using the half step velocities or
 +     * the kinetic energies, depending on the order of the trotter calls */
 +
 +    if (ir->eI==eiVV)
 +    {
 +        if (IR_NPT_TROTTER(ir)) 
 +        {
 +            /* This is the complicated version - there are 4 possible calls, depending on ordering.
 +               We start with the initial one. */
 +            /* first, a round that estimates veta. */
-             
++            trotter_seq[0][0] = etrtBAROV;
++
 +            /* trotter_seq[1] is etrtNHC for 1/2 step velocities - leave zero */
-             
++
 +            /* The first half trotter update */
 +            trotter_seq[2][0] = etrtBAROV;
 +            trotter_seq[2][1] = etrtNHC;
 +            trotter_seq[2][2] = etrtBARONHC;
++
 +            /* The second half trotter update */
 +            trotter_seq[3][0] = etrtBARONHC;
 +            trotter_seq[3][1] = etrtNHC;
 +            trotter_seq[3][2] = etrtBAROV;
 +
 +            /* trotter_seq[4] is etrtNHC for second 1/2 step velocities - leave zero */
-         else 
++            
 +        } 
-             if (IR_NVT_TROTTER(ir)) 
-             {
-                 /* This is the easy version - there are only two calls, both the same. 
-                    Otherwise, even easier -- no calls  */
-                 trotter_seq[2][0] = etrtNHC;
-                 trotter_seq[3][0] = etrtNHC;
-             }
++        else if (IR_NVT_TROTTER(ir))
 +        {
-     } else if (ir->eI==eiVVAK) {
-         if (IR_NPT_TROTTER(ir)) 
++            /* This is the easy version - there are only two calls, both the same.
++               Otherwise, even easier -- no calls  */
++            trotter_seq[2][0] = etrtNHC;
++            trotter_seq[3][0] = etrtNHC;
 +        }
-             trotter_seq[0][0] = etrtBAROV; 
-             
++        else if (IR_NPH_TROTTER(ir))
 +        {
 +            /* This is the complicated version - there are 4 possible calls, depending on ordering.
 +               We start with the initial one. */
 +            /* first, a round that estimates veta. */
-             /* The second half trotter update -- blank for now */
++            trotter_seq[0][0] = etrtBAROV;
++
++            /* trotter_seq[1] is etrtNHC for 1/2 step velocities - leave zero */
++
++            /* The first half trotter update */
++            trotter_seq[2][0] = etrtBAROV;
++            trotter_seq[2][1] = etrtBARONHC;
++
++            /* The second half trotter update */
++            trotter_seq[3][0] = etrtBARONHC;
++            trotter_seq[3][1] = etrtBAROV;
++
++            /* trotter_seq[4] is etrtNHC for second 1/2 step velocities - leave zero */
++        }
++    }
++    else if (ir->eI==eiVVAK)
++    {
++        if (IR_NPT_TROTTER(ir))
++        {
++            /* This is the complicated version - there are 4 possible calls, depending on ordering.
++               We start with the initial one. */
++            /* first, a round that estimates veta. */
++            trotter_seq[0][0] = etrtBAROV;
++
 +            /* The first half trotter update, part 1 -- double update, because it commutes */
 +            trotter_seq[1][0] = etrtNHC;
 +
 +            /* The first half trotter update, part 2 */
 +            trotter_seq[2][0] = etrtBAROV;
 +            trotter_seq[2][1] = etrtBARONHC;
 +            
 +            /* The second half trotter update, part 1 */
 +            trotter_seq[3][0] = etrtBARONHC;
 +            trotter_seq[3][1] = etrtBAROV;
 +
-         else 
++            /* The second half trotter update */
 +            trotter_seq[4][0] = etrtNHC;
 +        } 
-             if (IR_NVT_TROTTER(ir)) 
-             {
-                 /* This is the easy version - there is only one call, both the same. 
-                    Otherwise, even easier -- no calls  */
-                 trotter_seq[1][0] = etrtNHC;
-                 trotter_seq[4][0] = etrtNHC;
-             }
++        else if (IR_NVT_TROTTER(ir))
 +        {
-     switch (ir->epct) 
++            /* This is the easy version - there is only one call, both the same.
++               Otherwise, even easier -- no calls  */
++            trotter_seq[1][0] = etrtNHC;
++            trotter_seq[4][0] = etrtNHC;
++        }
++        else if (IR_NPH_TROTTER(ir))
++        {
++            /* This is the complicated version - there are 4 possible calls, depending on ordering.
++               We start with the initial one. */
++            /* first, a round that estimates veta. */
++            trotter_seq[0][0] = etrtBAROV; 
++
++            /* The first half trotter update, part 1 -- leave zero */
++            trotter_seq[1][0] = etrtNHC;
++
++            /* The first half trotter update, part 2 */
++            trotter_seq[2][0] = etrtBAROV;
++            trotter_seq[2][1] = etrtBARONHC;
++
++            /* The second half trotter update, part 1 */
++            trotter_seq[3][0] = etrtBARONHC;
++            trotter_seq[3][1] = etrtBAROV;
++
++            /* The second half trotter update -- blank for now */
 +        }
 +    }
 +
-     if ((ir->tau_p > 0) && (opts->ref_t[0] > 0)) 
++    switch (ir->epct)
 +    {
 +    case epctISOTROPIC:  
 +    default:
 +        bmass = DIM*DIM;  /* recommended mass parameters for isotropic barostat */
 +    }    
 +
 +    snew(MassQ->QPinv,nnhpres*opts->nhchainlength);
 +
 +    /* barostat temperature */
-     if (IR_NPT_TROTTER(ir)) 
++    if ((ir->tau_p > 0) && (opts->ref_t[0] > 0))
 +    {
 +        reft = max(0.0,opts->ref_t[0]);
 +        kT = BOLTZ*reft;
 +        for (i=0;i<nnhpres;i++) {
 +            for (j=0;j<nh;j++) 
 +            {
 +                if (j==0) {
 +                    qmass = bmass;
 +                } 
 +                else 
 +                {
 +                    qmass = 1;
 +                }
 +                MassQ->QPinv[i*opts->nhchainlength+j]   = 1.0/(sqr(opts->tau_t[0]/M_2PI)*qmass*kT);
 +            }
 +        }
 +    }
 +    else 
 +    {
 +        for (i=0;i<nnhpres;i++) {
 +            for (j=0;j<nh;j++) 
 +            {
 +                MassQ->QPinv[i*nh+j]=0.0;
 +            }
 +        }
 +    }    
 +    return trotter_seq;
 +}
 +
 +real NPT_energy(t_inputrec *ir, t_state *state, t_extmass *MassQ)
 +{
 +    int  i,j,nd,ndj,bmass,qmass,ngtcall;
 +    real ener_npt,reft,eta,kT,tau;
 +    double *ivxi, *ixi;
 +    double *iQinv;
 +    real vol,dbaro,W,Q;
 +    int nh = state->nhchainlength;
 +
 +    ener_npt = 0;
 +    
 +    /* now we compute the contribution of the pressure to the conserved quantity*/
 +    
 +    if (ir->epc==epcMTTK) 
 +    {
 +        /* find the volume, and the kinetic energy of the volume */
 +        
 +        switch (ir->epct) {
 +            
 +        case epctISOTROPIC:
 +            /* contribution from the pressure momenenta */
 +            ener_npt += 0.5*sqr(state->veta)/MassQ->Winv;
 +            
 +            /* contribution from the PV term */
 +            vol = det(state->box);
 +            ener_npt += vol*trace(ir->ref_p)/(DIM*PRESFAC);
 +
 +            break;
 +        case epctANISOTROPIC:
 +            
 +            break;
 +            
 +        case epctSURFACETENSION:
 +            
 +            break;
 +        case epctSEMIISOTROPIC:
 +            
 +            break;
 +        default:
 +            break;
 +        }
 +    }
 +    
-         if (opts->tau_t[i] >= 0 && opts->nrdf[i] > 0 && Ek > 0)
++    if (IR_NPT_TROTTER(ir) || IR_NPH_TROTTER(ir))
 +    {
 +        /* add the energy from the barostat thermostat chain */
 +        for (i=0;i<state->nnhpres;i++) {
 +
 +            /* note -- assumes only one degree of freedom that is thermostatted in barostat */
 +            ivxi = &state->nhpres_vxi[i*nh];
 +            ixi = &state->nhpres_xi[i*nh];
 +            iQinv = &(MassQ->QPinv[i*nh]);
 +            reft = max(ir->opts.ref_t[0],0); /* using 'System' temperature */
 +            kT = BOLTZ * reft;
 +        
 +            for (j=0;j<nh;j++) 
 +            {
 +                if (iQinv[j] > 0)
 +                {
 +                    ener_npt += 0.5*sqr(ivxi[j])/iQinv[j];
 +                    /* contribution from the thermal variable of the NH chain */
 +                    ener_npt += ixi[j]*kT;
 +                }
 +                if (debug) 
 +                {
 +                    fprintf(debug,"P-T-group: %10d Chain %4d ThermV: %15.8f ThermX: %15.8f",i,j,ivxi[j],ixi[j]);
 +                }
 +            }
 +        }
 +    }
 +        
 +    if (ir->etc) 
 +    {
 +        for(i=0; i<ir->opts.ngtc; i++) 
 +        {
 +            ixi = &state->nosehoover_xi[i*nh];
 +            ivxi = &state->nosehoover_vxi[i*nh];
 +            iQinv = &(MassQ->Qinv[i*nh]);
 +            
 +            nd = ir->opts.nrdf[i];
 +            reft = max(ir->opts.ref_t[i],0);
 +            kT = BOLTZ * reft;
 +            
 +            if (nd > 0) 
 +            {
 +                if (IR_NVT_TROTTER(ir))
 +                {
 +                    /* contribution from the thermal momenta of the NH chain */
 +                    for (j=0;j<nh;j++) 
 +                    {
 +                        if (iQinv[j] > 0) {
 +                            ener_npt += 0.5*sqr(ivxi[j])/iQinv[j];
 +                            /* contribution from the thermal variable of the NH chain */
 +                            if (j==0) {
 +                                ndj = nd;
 +                            } 
 +                            else 
 +                            {
 +                                ndj = 1;
 +                            } 
 +                            ener_npt += ndj*ixi[j]*kT;
 +                        }
 +                    }
 +                }
 +                else  /* Other non Trotter temperature NH control  -- no chains yet. */
 +                { 
 +                    ener_npt += 0.5*BOLTZ*nd*sqr(ivxi[0])/iQinv[0];
 +                    ener_npt += nd*ixi[0]*kT;
 +                }
 +            }
 +        }
 +    }
 +    return ener_npt;
 +}
 +
 +static real vrescale_gamdev(int ia, gmx_rng_t rng)
 +/* Gamma distribution, adapted from numerical recipes */
 +{
 +    int j;
 +    real am,e,s,v1,v2,x,y;
 +
 +    if (ia < 6)
 +    {
 +        do
 +        {
 +            x = 1.0;
 +            for(j=1; j<=ia; j++)
 +            {
 +                x *= gmx_rng_uniform_real(rng);
 +            }
 +        }
 +        while (x == 0);
 +        x = -log(x);
 +    }
 +    else
 +    {
 +        do
 +        {
 +            do
 +            {
 +                do
 +                {
 +                    v1 = gmx_rng_uniform_real(rng);
 +                    v2 = 2.0*gmx_rng_uniform_real(rng)-1.0;
 +                }
 +                while (v1*v1 + v2*v2 > 1.0 ||
 +                       v1*v1*GMX_REAL_MAX < 3.0*ia);
 +                /* The last check above ensures that both x (3.0 > 2.0 in s)
 +                 * and the pre-factor for e do not go out of range.
 +                 */
 +                y = v2/v1;
 +                am = ia - 1;
 +                s = sqrt(2.0*am + 1.0);
 +                x = s*y + am;
 +            }
 +            while (x <= 0.0);
 +            e = (1.0 + y*y)*exp(am*log(x/am) - s*y);
 +        }
 +        while (gmx_rng_uniform_real(rng) > e);
 +    }
 +
 +    return x;
 +}
 +
 +static real vrescale_sumnoises(int nn,gmx_rng_t rng)
 +{
 +/*
 + * Returns the sum of n independent gaussian noises squared
 + * (i.e. equivalent to summing the square of the return values
 + * of nn calls to gmx_rng_gaussian_real).xs
 + */
 +  real rr;
 +
 +  if (nn == 0) {
 +    return 0.0;
 +  } else if (nn == 1) {
 +    rr = gmx_rng_gaussian_real(rng);
 +    return rr*rr;
 +  } else if (nn % 2 == 0) {
 +    return 2.0*vrescale_gamdev(nn/2,rng);
 +  } else {
 +    rr = gmx_rng_gaussian_real(rng);
 +    return 2.0*vrescale_gamdev((nn-1)/2,rng) + rr*rr;
 +  }
 +}
 +
 +static real vrescale_resamplekin(real kk,real sigma, int ndeg, real taut,
 +                               gmx_rng_t rng)
 +{
 +/*
 + * Generates a new value for the kinetic energy,
 + * according to Bussi et al JCP (2007), Eq. (A7)
 + * kk:    present value of the kinetic energy of the atoms to be thermalized (in arbitrary units)
 + * sigma: target average value of the kinetic energy (ndeg k_b T/2)  (in the same units as kk)
 + * ndeg:  number of degrees of freedom of the atoms to be thermalized
 + * taut:  relaxation time of the thermostat, in units of 'how often this routine is called'
 + */
 +  real factor,rr;
 +
 +  if (taut > 0.1) {
 +    factor = exp(-1.0/taut);
 +  } else {
 +    factor = 0.0;
 +  }
 +  rr = gmx_rng_gaussian_real(rng);
 +  return
 +    kk +
 +    (1.0 - factor)*(sigma*(vrescale_sumnoises(ndeg-1,rng) + rr*rr)/ndeg - kk) +
 +    2.0*rr*sqrt(kk*sigma/ndeg*(1.0 - factor)*factor);
 +}
 +
 +void vrescale_tcoupl(t_inputrec *ir,gmx_ekindata_t *ekind,real dt,
 +                     double therm_integral[],gmx_rng_t rng)
 +{
 +    t_grpopts *opts;
 +    int    i;
 +    real   Ek,Ek_ref1,Ek_ref,Ek_new; 
 +    
 +    opts = &ir->opts;
 +
 +    for(i=0; (i<opts->ngtc); i++)
 +    {
 +        if (ir->eI == eiVV)
 +        {
 +            Ek = trace(ekind->tcstat[i].ekinf);
 +        }
 +        else
 +        {
 +            Ek = trace(ekind->tcstat[i].ekinh);
 +        }
 +        
++        if (opts->tau_t[i] > 0 && opts->nrdf[i] > 0 && Ek > 0)
 +        {
 +            Ek_ref1 = 0.5*opts->ref_t[i]*BOLTZ;
 +            Ek_ref  = Ek_ref1*opts->nrdf[i];
 +
 +            Ek_new  = vrescale_resamplekin(Ek,Ek_ref,opts->nrdf[i],
 +                                           opts->tau_t[i]/dt,rng);
 +
 +            /* Analytically Ek_new>=0, but we check for rounding errors */
 +            if (Ek_new <= 0)
 +            {
 +                ekind->tcstat[i].lambda = 0.0;
 +            }
 +            else
 +            {
 +                ekind->tcstat[i].lambda = sqrt(Ek_new/Ek);
 +            }
 +
 +            therm_integral[i] -= Ek_new - Ek;
 +
 +            if (debug)
 +            {
 +                fprintf(debug,"TC: group %d: Ekr %g, Ek %g, Ek_new %g, Lambda: %g\n",
 +                        i,Ek_ref,Ek,Ek_new,ekind->tcstat[i].lambda);
 +            }
 +        }
 +        else
 +        {
 +            ekind->tcstat[i].lambda = 1.0;
 +        }
 +    }
 +}
 +
 +real vrescale_energy(t_grpopts *opts,double therm_integral[])
 +{
 +  int i;
 +  real ener;
 +
 +  ener = 0;
 +  for(i=0; i<opts->ngtc; i++) {
 +    ener += therm_integral[i];
 +  }
 +  
 +  return ener;
 +}
 +
 +void rescale_velocities(gmx_ekindata_t *ekind,t_mdatoms *mdatoms,
 +                        int start,int end,rvec v[])
 +{
 +    t_grp_acc      *gstat;
 +    t_grp_tcstat   *tcstat;
 +    unsigned short *cACC,*cTC;
 +    int  ga,gt,n,d;
 +    real lg;
 +    rvec vrel;
 +
 +    tcstat = ekind->tcstat;
 +    cTC    = mdatoms->cTC;
 +
 +    if (ekind->bNEMD)
 +    {
 +        gstat  = ekind->grpstat;
 +        cACC   = mdatoms->cACC;
 +
 +        ga = 0;
 +        gt = 0;
 +        for(n=start; n<end; n++) 
 +        {
 +            if (cACC) 
 +            {
 +                ga   = cACC[n];
 +            }
 +            if (cTC)
 +            {
 +                gt   = cTC[n];
 +            }
 +            /* Only scale the velocity component relative to the COM velocity */
 +            rvec_sub(v[n],gstat[ga].u,vrel);
 +            lg = tcstat[gt].lambda;
 +            for(d=0; d<DIM; d++)
 +            {
 +                v[n][d] = gstat[ga].u[d] + lg*vrel[d];
 +            }
 +        }
 +    }
 +    else
 +    {
 +        gt = 0;
 +        for(n=start; n<end; n++) 
 +        {
 +            if (cTC)
 +            {
 +                gt   = cTC[n];
 +            }
 +            lg = tcstat[gt].lambda;
 +            for(d=0; d<DIM; d++)
 +            {
 +                v[n][d] *= lg;
 +            }
 +        }
 +    }
 +}
 +
 +
 +/* set target temperatures if we are annealing */
 +void update_annealing_target_temp(t_grpopts *opts,real t)
 +{
 +  int i,j,n,npoints;
 +  real pert,thist=0,x;
 +
 +  for(i=0;i<opts->ngtc;i++) {
 +    npoints = opts->anneal_npoints[i];
 +    switch (opts->annealing[i]) {
 +    case eannNO:
 +      continue;
 +    case  eannPERIODIC:
 +      /* calculate time modulo the period */
 +      pert  = opts->anneal_time[i][npoints-1];
 +      n     = t / pert;
 +      thist = t - n*pert; /* modulo time */
 +      /* Make sure rounding didn't get us outside the interval */
 +      if (fabs(thist-pert) < GMX_REAL_EPS*100)
 +      thist=0;
 +      break;
 +    case eannSINGLE:
 +      thist = t;
 +      break;
 +    default:
 +      gmx_fatal(FARGS,"Death horror in update_annealing_target_temp (i=%d/%d npoints=%d)",i,opts->ngtc,npoints);
 +    }
 +    /* We are doing annealing for this group if we got here, 
 +     * and we have the (relative) time as thist.
 +     * calculate target temp */
 +    j=0;
 +    while ((j < npoints-1) && (thist>(opts->anneal_time[i][j+1])))
 +      j++;
 +    if (j < npoints-1) {
 +      /* Found our position between points j and j+1. 
 +       * Interpolate: x is the amount from j+1, (1-x) from point j 
 +       * First treat possible jumps in temperature as a special case.
 +       */
 +      if ((opts->anneal_time[i][j+1]-opts->anneal_time[i][j]) < GMX_REAL_EPS*100)
 +      opts->ref_t[i]=opts->anneal_temp[i][j+1];
 +      else {
 +      x = ((thist-opts->anneal_time[i][j])/
 +           (opts->anneal_time[i][j+1]-opts->anneal_time[i][j]));
 +      opts->ref_t[i] = x*opts->anneal_temp[i][j+1]+(1-x)*opts->anneal_temp[i][j];
 +      }
 +    }
 +    else {
 +      opts->ref_t[i] = opts->anneal_temp[i][npoints-1];
 +    }
 +  }
 +}
index f88f40a3532dd660ab107eaf1a2071d77fd89244,0000000000000000000000000000000000000000..5ed15d190d711e31900b2edff53a850774fefc36
mode 100644,000000..100644
--- /dev/null
@@@ -1,8657 -1,0 +1,8665 @@@
-         state->lambda = state_local->lambda;
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + * This file is part of Gromacs        Copyright (c) 1991-2008
 + * David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
 + *
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the research papers on the package. Check out http://www.gromacs.org
 + * 
 + * And Hey:
 + * Gnomes, ROck Monsters And Chili Sauce
 + */
 +
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <stdio.h>
 +#include <time.h>
 +#include <math.h>
 +#include <string.h>
 +#include <stdlib.h>
 +#include "typedefs.h"
 +#include "smalloc.h"
 +#include "vec.h"
 +#include "domdec.h"
 +#include "domdec_network.h"
 +#include "nrnb.h"
 +#include "pbc.h"
 +#include "chargegroup.h"
 +#include "constr.h"
 +#include "mdatoms.h"
 +#include "names.h"
 +#include "pdbio.h"
 +#include "futil.h"
 +#include "force.h"
 +#include "pme.h"
 +#include "pull.h"
 +#include "pull_rotation.h"
 +#include "gmx_wallcycle.h"
 +#include "mdrun.h"
 +#include "nsgrid.h"
 +#include "shellfc.h"
 +#include "mtop_util.h"
 +#include "gmxfio.h"
 +#include "gmx_ga2la.h"
 +#include "gmx_sort.h"
 +#include "macros.h"
 +
 +#ifdef GMX_LIB_MPI
 +#include <mpi.h>
 +#endif
 +#ifdef GMX_THREAD_MPI
 +#include "tmpi.h"
 +#endif
 +
 +#define DDRANK(dd,rank)    (rank)
 +#define DDMASTERRANK(dd)   (dd->masterrank)
 +
 +typedef struct gmx_domdec_master
 +{
 +    /* The cell boundaries */
 +    real **cell_x;
 +    /* The global charge group division */
 +    int  *ncg;     /* Number of home charge groups for each node */
 +    int  *index;   /* Index of nnodes+1 into cg */
 +    int  *cg;      /* Global charge group index */
 +    int  *nat;     /* Number of home atoms for each node. */
 +    int  *ibuf;    /* Buffer for communication */
 +    rvec *vbuf;    /* Buffer for state scattering and gathering */
 +} gmx_domdec_master_t;
 +
 +typedef struct
 +{
 +    /* The numbers of charge groups to send and receive for each cell
 +     * that requires communication, the last entry contains the total
 +     * number of atoms that needs to be communicated.
 +     */
 +    int nsend[DD_MAXIZONE+2];
 +    int nrecv[DD_MAXIZONE+2];
 +    /* The charge groups to send */
 +    int *index;
 +    int nalloc;
 +    /* The atom range for non-in-place communication */
 +    int cell2at0[DD_MAXIZONE];
 +    int cell2at1[DD_MAXIZONE];
 +} gmx_domdec_ind_t;
 +
 +typedef struct
 +{
 +    int  np;                   /* Number of grid pulses in this dimension */
 +    int  np_dlb;               /* For dlb, for use with edlbAUTO          */
 +    gmx_domdec_ind_t *ind;     /* The indices to communicate, size np     */
 +    int  np_nalloc;
 +    gmx_bool bInPlace;             /* Can we communicate in place?            */
 +} gmx_domdec_comm_dim_t;
 +
 +typedef struct
 +{
 +    gmx_bool *bCellMin;    /* Temp. var.: is this cell size at the limit     */
 +    real *cell_f;      /* State var.: cell boundaries, box relative      */
 +    real *old_cell_f;  /* Temp. var.: old cell size                      */
 +    real *cell_f_max0; /* State var.: max lower boundary, incl neighbors */
 +    real *cell_f_min1; /* State var.: min upper boundary, incl neighbors */
 +    real *bound_min;   /* Temp. var.: lower limit for cell boundary      */
 +    real *bound_max;   /* Temp. var.: upper limit for cell boundary      */
 +    gmx_bool bLimited;     /* State var.: is DLB limited in this dim and row */
 +    real *buf_ncd;     /* Temp. var.                                     */
 +} gmx_domdec_root_t;
 +
 +#define DD_NLOAD_MAX 9
 +
 +/* Here floats are accurate enough, since these variables
 + * only influence the load balancing, not the actual MD results.
 + */
 +typedef struct
 +{
 +    int  nload;
 +    float *load;
 +    float sum;
 +    float max;
 +    float sum_m;
 +    float cvol_min;
 +    float mdf;
 +    float pme;
 +    int   flags;
 +} gmx_domdec_load_t;
 +
 +typedef struct
 +{
 +    int  nsc;
 +    int  ind_gl;
 +    int  ind;
 +} gmx_cgsort_t;
 +
 +typedef struct
 +{
 +    gmx_cgsort_t *sort1,*sort2;
 +    int  sort_nalloc;
 +    gmx_cgsort_t *sort_new;
 +    int  sort_new_nalloc;
 +    int  *ibuf;
 +    int  ibuf_nalloc;
 +} gmx_domdec_sort_t;
 +
 +typedef struct
 +{
 +    rvec *v;
 +    int  nalloc;
 +} vec_rvec_t;
 +
 +/* This enum determines the order of the coordinates.
 + * ddnatHOME and ddnatZONE should be first and second,
 + * the others can be ordered as wanted.
 + */
 +enum { ddnatHOME, ddnatZONE, ddnatVSITE, ddnatCON, ddnatNR };
 +
 +enum { edlbAUTO, edlbNO, edlbYES, edlbNR };
 +const char *edlb_names[edlbNR] = { "auto", "no", "yes" };
 +
 +typedef struct
 +{
 +    int  dim;      /* The dimension                                          */
 +    gmx_bool dim_match;/* Tells if DD and PME dims match                         */
 +    int  nslab;    /* The number of PME slabs in this dimension              */
 +    real *slb_dim_f; /* Cell sizes for determining the PME comm. with SLB    */
 +    int  *pp_min;  /* The minimum pp node location, size nslab               */
 +    int  *pp_max;  /* The maximum pp node location,size nslab                */
 +    int  maxshift; /* The maximum shift for coordinate redistribution in PME */
 +} gmx_ddpme_t;
 +
 +typedef struct
 +{
 +    real min0;    /* The minimum bottom of this zone                        */
 +    real max1;    /* The maximum top of this zone                           */
 +    real mch0;    /* The maximum bottom communicaton height for this zone   */
 +    real mch1;    /* The maximum top communicaton height for this zone      */
 +    real p1_0;    /* The bottom value of the first cell in this zone        */
 +    real p1_1;    /* The top value of the first cell in this zone           */
 +} gmx_ddzone_t;
 +
 +typedef struct gmx_domdec_comm
 +{
 +    /* All arrays are indexed with 0 to dd->ndim (not Cartesian indexing),
 +     * unless stated otherwise.
 +     */
 +
 +    /* The number of decomposition dimensions for PME, 0: no PME */
 +    int  npmedecompdim;
 +    /* The number of nodes doing PME (PP/PME or only PME) */
 +    int  npmenodes;
 +    int  npmenodes_x;
 +    int  npmenodes_y;
 +    /* The communication setup including the PME only nodes */
 +    gmx_bool bCartesianPP_PME;
 +    ivec ntot;
 +    int  cartpmedim;
 +    int  *pmenodes;          /* size npmenodes                         */
 +    int  *ddindex2simnodeid; /* size npmenodes, only with bCartesianPP
 +                              * but with bCartesianPP_PME              */
 +    gmx_ddpme_t ddpme[2];
 +    
 +    /* The DD particle-particle nodes only */
 +    gmx_bool bCartesianPP;
 +    int  *ddindex2ddnodeid; /* size npmenode, only with bCartesianPP_PME */
 +    
 +    /* The global charge groups */
 +    t_block cgs_gl;
 +
 +    /* Should we sort the cgs */
 +    int  nstSortCG;
 +    gmx_domdec_sort_t *sort;
 +    
 +    /* Are there bonded and multi-body interactions between charge groups? */
 +    gmx_bool bInterCGBondeds;
 +    gmx_bool bInterCGMultiBody;
 +
 +    /* Data for the optional bonded interaction atom communication range */
 +    gmx_bool bBondComm;
 +    t_blocka *cglink;
 +    char *bLocalCG;
 +
 +    /* The DLB option */
 +    int  eDLB;
 +    /* Are we actually using DLB? */
 +    gmx_bool bDynLoadBal;
 +
 +    /* Cell sizes for static load balancing, first index cartesian */
 +    real **slb_frac;
 +    
 +    /* The width of the communicated boundaries */
 +    real cutoff_mbody;
 +    real cutoff;
 +    /* The minimum cell size (including triclinic correction) */
 +    rvec cellsize_min;
 +    /* For dlb, for use with edlbAUTO */
 +    rvec cellsize_min_dlb;
 +    /* The lower limit for the DD cell size with DLB */
 +    real cellsize_limit;
 +    /* Effectively no NB cut-off limit with DLB for systems without PBC? */
 +    gmx_bool bVacDLBNoLimit;
 +
 +    /* tric_dir is only stored here because dd_get_ns_ranges needs it */
 +    ivec tric_dir;
 +    /* box0 and box_size are required with dim's without pbc and -gcom */
 +    rvec box0;
 +    rvec box_size;
 +    
 +    /* The cell boundaries */
 +    rvec cell_x0;
 +    rvec cell_x1;
 +
 +    /* The old location of the cell boundaries, to check cg displacements */
 +    rvec old_cell_x0;
 +    rvec old_cell_x1;
 +
 +    /* The communication setup and charge group boundaries for the zones */
 +    gmx_domdec_zones_t zones;
 +    
 +    /* The zone limits for DD dimensions 1 and 2 (not 0), determined from
 +     * cell boundaries of neighboring cells for dynamic load balancing.
 +     */
 +    gmx_ddzone_t zone_d1[2];
 +    gmx_ddzone_t zone_d2[2][2];
 +    
 +    /* The coordinate/force communication setup and indices */
 +    gmx_domdec_comm_dim_t cd[DIM];
 +    /* The maximum number of cells to communicate with in one dimension */
 +    int  maxpulse;
 +    
 +    /* Which cg distribution is stored on the master node */
 +    int master_cg_ddp_count;
 +    
 +    /* The number of cg's received from the direct neighbors */
 +    int  zone_ncg1[DD_MAXZONE];
 +    
 +    /* The atom counts, the range for each type t is nat[t-1] <= at < nat[t] */
 +    int  nat[ddnatNR];
 +    
 +    /* Communication buffer for general use */
 +    int  *buf_int;
 +    int  nalloc_int;
 +
 +     /* Communication buffer for general use */
 +    vec_rvec_t vbuf;
 +    
 +    /* Communication buffers only used with multiple grid pulses */
 +    int  *buf_int2;
 +    int  nalloc_int2;
 +    vec_rvec_t vbuf2;
 +    
 +    /* Communication buffers for local redistribution */
 +    int  **cggl_flag;
 +    int  cggl_flag_nalloc[DIM*2];
 +    rvec **cgcm_state;
 +    int  cgcm_state_nalloc[DIM*2];
 +    
 +    /* Cell sizes for dynamic load balancing */
 +    gmx_domdec_root_t **root;
 +    real *cell_f_row;
 +    real cell_f0[DIM];
 +    real cell_f1[DIM];
 +    real cell_f_max0[DIM];
 +    real cell_f_min1[DIM];
 +    
 +    /* Stuff for load communication */
 +    gmx_bool bRecordLoad;
 +    gmx_domdec_load_t *load;
 +#ifdef GMX_MPI
 +    MPI_Comm *mpi_comm_load;
 +#endif
 +
 +    /* Maximum DLB scaling per load balancing step in percent */
 +    int dlb_scale_lim;
 +
 +    /* Cycle counters */
 +    float cycl[ddCyclNr];
 +    int   cycl_n[ddCyclNr];
 +    float cycl_max[ddCyclNr];
 +    /* Flop counter (0=no,1=yes,2=with (eFlop-1)*5% noise */
 +    int eFlop;
 +    double flop;
 +    int    flop_n;
 +    /* Have often have did we have load measurements */
 +    int    n_load_have;
 +    /* Have often have we collected the load measurements */
 +    int    n_load_collect;
 +    
 +    /* Statistics */
 +    double sum_nat[ddnatNR-ddnatZONE];
 +    int    ndecomp;
 +    int    nload;
 +    double load_step;
 +    double load_sum;
 +    double load_max;
 +    ivec   load_lim;
 +    double load_mdf;
 +    double load_pme;
 +
 +    /* The last partition step */
 +    gmx_large_int_t globalcomm_step;
 +
 +    /* Debugging */
 +    int  nstDDDump;
 +    int  nstDDDumpGrid;
 +    int  DD_debug;
 +} gmx_domdec_comm_t;
 +
 +/* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
 +#define DD_CGIBS 2
 +
 +/* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
 +#define DD_FLAG_NRCG  65535
 +#define DD_FLAG_FW(d) (1<<(16+(d)*2))
 +#define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
 +
 +/* Zone permutation required to obtain consecutive charge groups
 + * for neighbor searching.
 + */
 +static const int zone_perm[3][4] = { {0,0,0,0},{1,0,0,0},{3,0,1,2} };
 +
 +/* dd_zo and dd_zp3/dd_zp2 are set up such that i zones with non-zero
 + * components see only j zones with that component 0.
 + */
 +
 +/* The DD zone order */
 +static const ivec dd_zo[DD_MAXZONE] =
 +  {{0,0,0},{1,0,0},{1,1,0},{0,1,0},{0,1,1},{0,0,1},{1,0,1},{1,1,1}};
 +
 +/* The 3D setup */
 +#define dd_z3n  8
 +#define dd_zp3n 4
 +static const ivec dd_zp3[dd_zp3n] = {{0,0,8},{1,3,6},{2,5,6},{3,5,7}};
 +
 +/* The 2D setup */
 +#define dd_z2n  4
 +#define dd_zp2n 2
 +static const ivec dd_zp2[dd_zp2n] = {{0,0,4},{1,3,4}};
 +
 +/* The 1D setup */
 +#define dd_z1n  2
 +#define dd_zp1n 1
 +static const ivec dd_zp1[dd_zp1n] = {{0,0,2}};
 +
 +/* Factors used to avoid problems due to rounding issues */
 +#define DD_CELL_MARGIN       1.0001
 +#define DD_CELL_MARGIN2      1.00005
 +/* Factor to account for pressure scaling during nstlist steps */
 +#define DD_PRES_SCALE_MARGIN 1.02
 +
 +/* Allowed performance loss before we DLB or warn */
 +#define DD_PERF_LOSS 0.05
 +
 +#define DD_CELL_F_SIZE(dd,di) ((dd)->nc[(dd)->dim[(di)]]+1+(di)*2+1+(di))
 +
 +/* Use separate MPI send and receive commands
 + * when nnodes <= GMX_DD_NNODES_SENDRECV.
 + * This saves memory (and some copying for small nnodes).
 + * For high parallelization scatter and gather calls are used.
 + */
 +#define GMX_DD_NNODES_SENDRECV 4
 +
 +
 +/*
 +#define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
 +
 +static void index2xyz(ivec nc,int ind,ivec xyz)
 +{
 +  xyz[XX] = ind % nc[XX];
 +  xyz[YY] = (ind / nc[XX]) % nc[YY];
 +  xyz[ZZ] = ind / (nc[YY]*nc[XX]);
 +}
 +*/
 +
 +/* This order is required to minimize the coordinate communication in PME
 + * which uses decomposition in the x direction.
 + */
 +#define dd_index(n,i) ((((i)[XX]*(n)[YY] + (i)[YY])*(n)[ZZ]) + (i)[ZZ])
 +
 +static void ddindex2xyz(ivec nc,int ind,ivec xyz)
 +{
 +    xyz[XX] = ind / (nc[YY]*nc[ZZ]);
 +    xyz[YY] = (ind / nc[ZZ]) % nc[YY];
 +    xyz[ZZ] = ind % nc[ZZ];
 +}
 +
 +static int ddcoord2ddnodeid(gmx_domdec_t *dd,ivec c)
 +{
 +    int ddindex;
 +    int ddnodeid=-1;
 +    
 +    ddindex = dd_index(dd->nc,c);
 +    if (dd->comm->bCartesianPP_PME)
 +    {
 +        ddnodeid = dd->comm->ddindex2ddnodeid[ddindex];
 +    }
 +    else if (dd->comm->bCartesianPP)
 +    {
 +#ifdef GMX_MPI
 +        MPI_Cart_rank(dd->mpi_comm_all,c,&ddnodeid);
 +#endif
 +    }
 +    else
 +    {
 +        ddnodeid = ddindex;
 +    }
 +    
 +    return ddnodeid;
 +}
 +
 +static gmx_bool dynamic_dd_box(gmx_ddbox_t *ddbox,t_inputrec *ir)
 +{
 +    return (ddbox->nboundeddim < DIM || DYNAMIC_BOX(*ir));
 +}
 +
 +int ddglatnr(gmx_domdec_t *dd,int i)
 +{
 +    int atnr;
 +    
 +    if (dd == NULL)
 +    {
 +        atnr = i + 1;
 +    }
 +    else
 +    {
 +        if (i >= dd->comm->nat[ddnatNR-1])
 +        {
 +            gmx_fatal(FARGS,"glatnr called with %d, which is larger than the local number of atoms (%d)",i,dd->comm->nat[ddnatNR-1]);
 +        }
 +        atnr = dd->gatindex[i] + 1;
 +    }
 +    
 +    return atnr;
 +}
 +
 +t_block *dd_charge_groups_global(gmx_domdec_t *dd)
 +{
 +    return &dd->comm->cgs_gl;
 +}
 +
 +static void vec_rvec_init(vec_rvec_t *v)
 +{
 +    v->nalloc = 0;
 +    v->v      = NULL;
 +}
 +
 +static void vec_rvec_check_alloc(vec_rvec_t *v,int n)
 +{
 +    if (n > v->nalloc)
 +    {
 +        v->nalloc = over_alloc_dd(n);
 +        srenew(v->v,v->nalloc);
 +    }
 +}
 +
 +void dd_store_state(gmx_domdec_t *dd,t_state *state)
 +{
 +    int i;
 +    
 +    if (state->ddp_count != dd->ddp_count)
 +    {
 +        gmx_incons("The state does not the domain decomposition state");
 +    }
 +    
 +    state->ncg_gl = dd->ncg_home;
 +    if (state->ncg_gl > state->cg_gl_nalloc)
 +    {
 +        state->cg_gl_nalloc = over_alloc_dd(state->ncg_gl);
 +        srenew(state->cg_gl,state->cg_gl_nalloc);
 +    }
 +    for(i=0; i<state->ncg_gl; i++)
 +    {
 +        state->cg_gl[i] = dd->index_gl[i];
 +    }
 +    
 +    state->ddp_count_cg_gl = dd->ddp_count;
 +}
 +
 +gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
 +{
 +    return &dd->comm->zones;
 +}
 +
 +void dd_get_ns_ranges(gmx_domdec_t *dd,int icg,
 +                      int *jcg0,int *jcg1,ivec shift0,ivec shift1)
 +{
 +    gmx_domdec_zones_t *zones;
 +    int izone,d,dim;
 +
 +    zones = &dd->comm->zones;
 +
 +    izone = 0;
 +    while (icg >= zones->izone[izone].cg1)
 +    {
 +        izone++;
 +    }
 +    
 +    if (izone == 0)
 +    {
 +        *jcg0 = icg;
 +    }
 +    else if (izone < zones->nizone)
 +    {
 +        *jcg0 = zones->izone[izone].jcg0;
 +    }
 +    else
 +    {
 +        gmx_fatal(FARGS,"DD icg %d out of range: izone (%d) >= nizone (%d)",
 +                  icg,izone,zones->nizone);
 +    }
 +        
 +    *jcg1 = zones->izone[izone].jcg1;
 +    
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        dim = dd->dim[d];
 +        shift0[dim] = zones->izone[izone].shift0[dim];
 +        shift1[dim] = zones->izone[izone].shift1[dim];
 +        if (dd->comm->tric_dir[dim] || (dd->bGridJump && d > 0))
 +        {
 +            /* A conservative approach, this can be optimized */
 +            shift0[dim] -= 1;
 +            shift1[dim] += 1;
 +        }
 +    }
 +}
 +
 +int dd_natoms_vsite(gmx_domdec_t *dd)
 +{
 +    return dd->comm->nat[ddnatVSITE];
 +}
 +
 +void dd_get_constraint_range(gmx_domdec_t *dd,int *at_start,int *at_end)
 +{
 +    *at_start = dd->comm->nat[ddnatCON-1];
 +    *at_end   = dd->comm->nat[ddnatCON];
 +}
 +
 +void dd_move_x(gmx_domdec_t *dd,matrix box,rvec x[])
 +{
 +    int  nzone,nat_tot,n,d,p,i,j,at0,at1,zone;
 +    int  *index,*cgindex;
 +    gmx_domdec_comm_t *comm;
 +    gmx_domdec_comm_dim_t *cd;
 +    gmx_domdec_ind_t *ind;
 +    rvec shift={0,0,0},*buf,*rbuf;
 +    gmx_bool bPBC,bScrew;
 +    
 +    comm = dd->comm;
 +    
 +    cgindex = dd->cgindex;
 +    
 +    buf = comm->vbuf.v;
 +
 +    nzone = 1;
 +    nat_tot = dd->nat_home;
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        bPBC   = (dd->ci[dd->dim[d]] == 0);
 +        bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
 +        if (bPBC)
 +        {
 +            copy_rvec(box[dd->dim[d]],shift);
 +        }
 +        cd = &comm->cd[d];
 +        for(p=0; p<cd->np; p++)
 +        {
 +            ind = &cd->ind[p];
 +            index = ind->index;
 +            n = 0;
 +            if (!bPBC)
 +            {
 +                for(i=0; i<ind->nsend[nzone]; i++)
 +                {
 +                    at0 = cgindex[index[i]];
 +                    at1 = cgindex[index[i]+1];
 +                    for(j=at0; j<at1; j++)
 +                    {
 +                        copy_rvec(x[j],buf[n]);
 +                        n++;
 +                    }
 +                }
 +            }
 +            else if (!bScrew)
 +            {
 +                for(i=0; i<ind->nsend[nzone]; i++)
 +                {
 +                    at0 = cgindex[index[i]];
 +                    at1 = cgindex[index[i]+1];
 +                    for(j=at0; j<at1; j++)
 +                    {
 +                        /* We need to shift the coordinates */
 +                        rvec_add(x[j],shift,buf[n]);
 +                        n++;
 +                    }
 +                }
 +            }
 +            else
 +            {
 +                for(i=0; i<ind->nsend[nzone]; i++)
 +                {
 +                    at0 = cgindex[index[i]];
 +                    at1 = cgindex[index[i]+1];
 +                    for(j=at0; j<at1; j++)
 +                    {
 +                        /* Shift x */
 +                        buf[n][XX] = x[j][XX] + shift[XX];
 +                        /* Rotate y and z.
 +                         * This operation requires a special shift force
 +                         * treatment, which is performed in calc_vir.
 +                         */
 +                        buf[n][YY] = box[YY][YY] - x[j][YY];
 +                        buf[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
 +                        n++;
 +                    }
 +                }
 +            }
 +            
 +            if (cd->bInPlace)
 +            {
 +                rbuf = x + nat_tot;
 +            }
 +            else
 +            {
 +                rbuf = comm->vbuf2.v;
 +            }
 +            /* Send and receive the coordinates */
 +            dd_sendrecv_rvec(dd, d, dddirBackward,
 +                             buf,  ind->nsend[nzone+1],
 +                             rbuf, ind->nrecv[nzone+1]);
 +            if (!cd->bInPlace)
 +            {
 +                j = 0;
 +                for(zone=0; zone<nzone; zone++)
 +                {
 +                    for(i=ind->cell2at0[zone]; i<ind->cell2at1[zone]; i++)
 +                    {
 +                        copy_rvec(rbuf[j],x[i]);
 +                        j++;
 +                    }
 +                }
 +            }
 +            nat_tot += ind->nrecv[nzone+1];
 +        }
 +        nzone += nzone;
 +    }
 +}
 +
 +void dd_move_f(gmx_domdec_t *dd,rvec f[],rvec *fshift)
 +{
 +    int  nzone,nat_tot,n,d,p,i,j,at0,at1,zone;
 +    int  *index,*cgindex;
 +    gmx_domdec_comm_t *comm;
 +    gmx_domdec_comm_dim_t *cd;
 +    gmx_domdec_ind_t *ind;
 +    rvec *buf,*sbuf;
 +    ivec vis;
 +    int  is;
 +    gmx_bool bPBC,bScrew;
 +    
 +    comm = dd->comm;
 +    
 +    cgindex = dd->cgindex;
 +
 +    buf = comm->vbuf.v;
 +
 +    n = 0;
 +    nzone = comm->zones.n/2;
 +    nat_tot = dd->nat_tot;
 +    for(d=dd->ndim-1; d>=0; d--)
 +    {
 +        bPBC   = (dd->ci[dd->dim[d]] == 0);
 +        bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
 +        if (fshift == NULL && !bScrew)
 +        {
 +            bPBC = FALSE;
 +        }
 +        /* Determine which shift vector we need */
 +        clear_ivec(vis);
 +        vis[dd->dim[d]] = 1;
 +        is = IVEC2IS(vis);
 +        
 +        cd = &comm->cd[d];
 +        for(p=cd->np-1; p>=0; p--) {
 +            ind = &cd->ind[p];
 +            nat_tot -= ind->nrecv[nzone+1];
 +            if (cd->bInPlace)
 +            {
 +                sbuf = f + nat_tot;
 +            }
 +            else
 +            {
 +                sbuf = comm->vbuf2.v;
 +                j = 0;
 +                for(zone=0; zone<nzone; zone++)
 +                {
 +                    for(i=ind->cell2at0[zone]; i<ind->cell2at1[zone]; i++)
 +                    {
 +                        copy_rvec(f[i],sbuf[j]);
 +                        j++;
 +                    }
 +                }
 +            }
 +            /* Communicate the forces */
 +            dd_sendrecv_rvec(dd, d, dddirForward,
 +                             sbuf, ind->nrecv[nzone+1],
 +                             buf,  ind->nsend[nzone+1]);
 +            index = ind->index;
 +            /* Add the received forces */
 +            n = 0;
 +            if (!bPBC)
 +            {
 +                for(i=0; i<ind->nsend[nzone]; i++)
 +                {
 +                    at0 = cgindex[index[i]];
 +                    at1 = cgindex[index[i]+1];
 +                    for(j=at0; j<at1; j++)
 +                    {
 +                        rvec_inc(f[j],buf[n]);
 +                        n++;
 +                    }
 +                } 
 +            }
 +            else if (!bScrew)
 +            {
 +                for(i=0; i<ind->nsend[nzone]; i++)
 +                {
 +                    at0 = cgindex[index[i]];
 +                    at1 = cgindex[index[i]+1];
 +                    for(j=at0; j<at1; j++)
 +                    {
 +                        rvec_inc(f[j],buf[n]);
 +                        /* Add this force to the shift force */
 +                        rvec_inc(fshift[is],buf[n]);
 +                        n++;
 +                    }
 +                }
 +            }
 +            else
 +            {
 +                for(i=0; i<ind->nsend[nzone]; i++)
 +                {
 +                    at0 = cgindex[index[i]];
 +                    at1 = cgindex[index[i]+1];
 +                    for(j=at0; j<at1; j++)
 +                    {
 +                        /* Rotate the force */
 +                        f[j][XX] += buf[n][XX];
 +                        f[j][YY] -= buf[n][YY];
 +                        f[j][ZZ] -= buf[n][ZZ];
 +                        if (fshift)
 +                        {
 +                            /* Add this force to the shift force */
 +                            rvec_inc(fshift[is],buf[n]);
 +                        }
 +                        n++;
 +                    }
 +                }
 +            }
 +        }
 +        nzone /= 2;
 +    }
 +}
 +
 +void dd_atom_spread_real(gmx_domdec_t *dd,real v[])
 +{
 +    int  nzone,nat_tot,n,d,p,i,j,at0,at1,zone;
 +    int  *index,*cgindex;
 +    gmx_domdec_comm_t *comm;
 +    gmx_domdec_comm_dim_t *cd;
 +    gmx_domdec_ind_t *ind;
 +    real *buf,*rbuf;
 +    
 +    comm = dd->comm;
 +    
 +    cgindex = dd->cgindex;
 +    
 +    buf = &comm->vbuf.v[0][0];
 +
 +    nzone = 1;
 +    nat_tot = dd->nat_home;
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        cd = &comm->cd[d];
 +        for(p=0; p<cd->np; p++)
 +        {
 +            ind = &cd->ind[p];
 +            index = ind->index;
 +            n = 0;
 +            for(i=0; i<ind->nsend[nzone]; i++)
 +            {
 +                at0 = cgindex[index[i]];
 +                at1 = cgindex[index[i]+1];
 +                for(j=at0; j<at1; j++)
 +                {
 +                    buf[n] = v[j];
 +                    n++;
 +                }
 +            }
 +            
 +            if (cd->bInPlace)
 +            {
 +                rbuf = v + nat_tot;
 +            }
 +            else
 +            {
 +                rbuf = &comm->vbuf2.v[0][0];
 +            }
 +            /* Send and receive the coordinates */
 +            dd_sendrecv_real(dd, d, dddirBackward,
 +                             buf,  ind->nsend[nzone+1],
 +                             rbuf, ind->nrecv[nzone+1]);
 +            if (!cd->bInPlace)
 +            {
 +                j = 0;
 +                for(zone=0; zone<nzone; zone++)
 +                {
 +                    for(i=ind->cell2at0[zone]; i<ind->cell2at1[zone]; i++)
 +                    {
 +                        v[i] = rbuf[j];
 +                        j++;
 +                    }
 +                }
 +            }
 +            nat_tot += ind->nrecv[nzone+1];
 +        }
 +        nzone += nzone;
 +    }
 +}
 +
 +void dd_atom_sum_real(gmx_domdec_t *dd,real v[])
 +{
 +    int  nzone,nat_tot,n,d,p,i,j,at0,at1,zone;
 +    int  *index,*cgindex;
 +    gmx_domdec_comm_t *comm;
 +    gmx_domdec_comm_dim_t *cd;
 +    gmx_domdec_ind_t *ind;
 +    real *buf,*sbuf;
 +    
 +    comm = dd->comm;
 +    
 +    cgindex = dd->cgindex;
 +
 +    buf = &comm->vbuf.v[0][0];
 +
 +    n = 0;
 +    nzone = comm->zones.n/2;
 +    nat_tot = dd->nat_tot;
 +    for(d=dd->ndim-1; d>=0; d--)
 +    {
 +        cd = &comm->cd[d];
 +        for(p=cd->np-1; p>=0; p--) {
 +            ind = &cd->ind[p];
 +            nat_tot -= ind->nrecv[nzone+1];
 +            if (cd->bInPlace)
 +            {
 +                sbuf = v + nat_tot;
 +            }
 +            else
 +            {
 +                sbuf = &comm->vbuf2.v[0][0];
 +                j = 0;
 +                for(zone=0; zone<nzone; zone++)
 +                {
 +                    for(i=ind->cell2at0[zone]; i<ind->cell2at1[zone]; i++)
 +                    {
 +                        sbuf[j] = v[i];
 +                        j++;
 +                    }
 +                }
 +            }
 +            /* Communicate the forces */
 +            dd_sendrecv_real(dd, d, dddirForward,
 +                             sbuf, ind->nrecv[nzone+1],
 +                             buf,  ind->nsend[nzone+1]);
 +            index = ind->index;
 +            /* Add the received forces */
 +            n = 0;
 +            for(i=0; i<ind->nsend[nzone]; i++)
 +            {
 +                at0 = cgindex[index[i]];
 +                at1 = cgindex[index[i]+1];
 +                for(j=at0; j<at1; j++)
 +                {
 +                    v[j] += buf[n];
 +                    n++;
 +                }
 +            } 
 +        }
 +        nzone /= 2;
 +    }
 +}
 +
 +static void print_ddzone(FILE *fp,int d,int i,int j,gmx_ddzone_t *zone)
 +{
 +    fprintf(fp,"zone d0 %d d1 %d d2 %d  min0 %6.3f max1 %6.3f mch0 %6.3f mch1 %6.3f p1_0 %6.3f p1_1 %6.3f\n",
 +            d,i,j,
 +            zone->min0,zone->max1,
 +            zone->mch0,zone->mch0,
 +            zone->p1_0,zone->p1_1);
 +}
 +
 +static void dd_sendrecv_ddzone(const gmx_domdec_t *dd,
 +                               int ddimind,int direction,
 +                               gmx_ddzone_t *buf_s,int n_s,
 +                               gmx_ddzone_t *buf_r,int n_r)
 +{
 +    rvec vbuf_s[5*2],vbuf_r[5*2];
 +    int i;
 +
 +    for(i=0; i<n_s; i++)
 +    {
 +        vbuf_s[i*2  ][0] = buf_s[i].min0;
 +        vbuf_s[i*2  ][1] = buf_s[i].max1;
 +        vbuf_s[i*2  ][2] = buf_s[i].mch0;
 +        vbuf_s[i*2+1][0] = buf_s[i].mch1;
 +        vbuf_s[i*2+1][1] = buf_s[i].p1_0;
 +        vbuf_s[i*2+1][2] = buf_s[i].p1_1;
 +    }
 +
 +    dd_sendrecv_rvec(dd, ddimind, direction,
 +                     vbuf_s, n_s*2,
 +                     vbuf_r, n_r*2);
 +
 +    for(i=0; i<n_r; i++)
 +    {
 +        buf_r[i].min0 = vbuf_r[i*2  ][0];
 +        buf_r[i].max1 = vbuf_r[i*2  ][1];
 +        buf_r[i].mch0 = vbuf_r[i*2  ][2];
 +        buf_r[i].mch1 = vbuf_r[i*2+1][0];
 +        buf_r[i].p1_0 = vbuf_r[i*2+1][1];
 +        buf_r[i].p1_1 = vbuf_r[i*2+1][2];
 +    }
 +}
 +
 +static void dd_move_cellx(gmx_domdec_t *dd,gmx_ddbox_t *ddbox,
 +                          rvec cell_ns_x0,rvec cell_ns_x1)
 +{
 +    int  d,d1,dim,dim1,pos,buf_size,i,j,k,p,npulse,npulse_min;
 +    gmx_ddzone_t *zp,buf_s[5],buf_r[5],buf_e[5];
 +    rvec extr_s[2],extr_r[2];
 +    rvec dh;
 +    real dist_d,c=0,det;
 +    gmx_domdec_comm_t *comm;
 +    gmx_bool bPBC,bUse;
 +
 +    comm = dd->comm;
 +
 +    for(d=1; d<dd->ndim; d++)
 +    {
 +        dim = dd->dim[d];
 +        zp = (d == 1) ? &comm->zone_d1[0] : &comm->zone_d2[0][0];
 +        zp->min0 = cell_ns_x0[dim];
 +        zp->max1 = cell_ns_x1[dim];
 +        zp->mch0 = cell_ns_x0[dim];
 +        zp->mch1 = cell_ns_x1[dim];
 +        zp->p1_0 = cell_ns_x0[dim];
 +        zp->p1_1 = cell_ns_x1[dim];
 +    }
 +    
 +    for(d=dd->ndim-2; d>=0; d--)
 +    {
 +        dim  = dd->dim[d];
 +        bPBC = (dim < ddbox->npbcdim);
 +
 +        /* Use an rvec to store two reals */
 +        extr_s[d][0] = comm->cell_f0[d+1];
 +        extr_s[d][1] = comm->cell_f1[d+1];
 +        extr_s[d][2] = 0;
 +
 +        pos = 0;
 +        /* Store the extremes in the backward sending buffer,
 +         * so the get updated separately from the forward communication.
 +         */
 +        for(d1=d; d1<dd->ndim-1; d1++)
 +        {
 +            /* We invert the order to be able to use the same loop for buf_e */
 +            buf_s[pos].min0 = extr_s[d1][1];
 +            buf_s[pos].max1 = extr_s[d1][0];
 +            buf_s[pos].mch0 = 0;
 +            buf_s[pos].mch1 = 0;
 +            /* Store the cell corner of the dimension we communicate along */
 +            buf_s[pos].p1_0 = comm->cell_x0[dim];
 +            buf_s[pos].p1_1 = 0;
 +            pos++;
 +        }
 +
 +        buf_s[pos] = (dd->ndim == 2) ? comm->zone_d1[0] : comm->zone_d2[0][0];
 +        pos++;
 +
 +        if (dd->ndim == 3 && d == 0)
 +        {
 +            buf_s[pos] = comm->zone_d2[0][1];
 +            pos++;
 +            buf_s[pos] = comm->zone_d1[0];
 +            pos++;
 +        }
 +
 +        /* We only need to communicate the extremes
 +         * in the forward direction
 +         */
 +        npulse = comm->cd[d].np;
 +        if (bPBC)
 +        {
 +            /* Take the minimum to avoid double communication */
 +            npulse_min = min(npulse,dd->nc[dim]-1-npulse);
 +        }
 +        else
 +        {
 +            /* Without PBC we should really not communicate over
 +             * the boundaries, but implementing that complicates
 +             * the communication setup and therefore we simply
 +             * do all communication, but ignore some data.
 +             */
 +            npulse_min = npulse;
 +        }
 +        for(p=0; p<npulse_min; p++)
 +        {
 +            /* Communicate the extremes forward */
 +            bUse = (bPBC || dd->ci[dim] > 0);
 +
 +            dd_sendrecv_rvec(dd, d, dddirForward,
 +                             extr_s+d, dd->ndim-d-1,
 +                             extr_r+d, dd->ndim-d-1);
 +
 +            if (bUse)
 +            {
 +                for(d1=d; d1<dd->ndim-1; d1++)
 +                {
 +                    extr_s[d1][0] = max(extr_s[d1][0],extr_r[d1][0]);
 +                    extr_s[d1][1] = min(extr_s[d1][1],extr_r[d1][1]);
 +                }
 +            }
 +        }
 +
 +        buf_size = pos;
 +        for(p=0; p<npulse; p++)
 +        {
 +            /* Communicate all the zone information backward */
 +            bUse = (bPBC || dd->ci[dim] < dd->nc[dim] - 1);
 +
 +            dd_sendrecv_ddzone(dd, d, dddirBackward,
 +                               buf_s, buf_size,
 +                               buf_r, buf_size);
 +
 +            clear_rvec(dh);
 +            if (p > 0)
 +            {
 +                for(d1=d+1; d1<dd->ndim; d1++)
 +                {
 +                    /* Determine the decrease of maximum required
 +                     * communication height along d1 due to the distance along d,
 +                     * this avoids a lot of useless atom communication.
 +                     */
 +                    dist_d = comm->cell_x1[dim] - buf_r[0].p1_0;
 +
 +                    if (ddbox->tric_dir[dim])
 +                    {
 +                        /* c is the off-diagonal coupling between the cell planes
 +                         * along directions d and d1.
 +                         */
 +                        c = ddbox->v[dim][dd->dim[d1]][dim];
 +                    }
 +                    else
 +                    {
 +                        c = 0;
 +                    }
 +                    det = (1 + c*c)*comm->cutoff*comm->cutoff - dist_d*dist_d;
 +                    if (det > 0)
 +                    {
 +                        dh[d1] = comm->cutoff - (c*dist_d + sqrt(det))/(1 + c*c);
 +                    }
 +                    else
 +                    {
 +                        /* A negative value signals out of range */
 +                        dh[d1] = -1;
 +                    }
 +                }
 +            }
 +
 +            /* Accumulate the extremes over all pulses */
 +            for(i=0; i<buf_size; i++)
 +            {
 +                if (p == 0)
 +                {
 +                    buf_e[i] = buf_r[i];
 +                }
 +                else
 +                {
 +                    if (bUse)
 +                    {
 +                        buf_e[i].min0 = min(buf_e[i].min0,buf_r[i].min0);
 +                        buf_e[i].max1 = max(buf_e[i].max1,buf_r[i].max1);
 +                    }
 +
 +                    if (dd->ndim == 3 && d == 0 && i == buf_size - 1)
 +                    {
 +                        d1 = 1;
 +                    }
 +                    else
 +                    {
 +                        d1 = d + 1;
 +                    }
 +                    if (bUse && dh[d1] >= 0)
 +                    {
 +                        buf_e[i].mch0 = max(buf_e[i].mch0,buf_r[i].mch0-dh[d1]);
 +                        buf_e[i].mch1 = max(buf_e[i].mch1,buf_r[i].mch1-dh[d1]);
 +                    }
 +                }
 +                /* Copy the received buffer to the send buffer,
 +                 * to pass the data through with the next pulse.
 +                 */
 +                buf_s[i] = buf_r[i];
 +            }
 +            if (((bPBC || dd->ci[dim]+npulse < dd->nc[dim]) && p == npulse-1) ||
 +                (!bPBC && dd->ci[dim]+1+p == dd->nc[dim]-1))
 +            {
 +                /* Store the extremes */ 
 +                pos = 0;
 +
 +                for(d1=d; d1<dd->ndim-1; d1++)
 +                {
 +                    extr_s[d1][1] = min(extr_s[d1][1],buf_e[pos].min0);
 +                    extr_s[d1][0] = max(extr_s[d1][0],buf_e[pos].max1);
 +                    pos++;
 +                }
 +
 +                if (d == 1 || (d == 0 && dd->ndim == 3))
 +                {
 +                    for(i=d; i<2; i++)
 +                    {
 +                        comm->zone_d2[1-d][i] = buf_e[pos];
 +                        pos++;
 +                    }
 +                }
 +                if (d == 0)
 +                {
 +                    comm->zone_d1[1] = buf_e[pos];
 +                    pos++;
 +                }
 +            }
 +        }
 +    }
 +    
 +    if (dd->ndim >= 2)
 +    {
 +        dim = dd->dim[1];
 +        for(i=0; i<2; i++)
 +        {
 +            if (debug)
 +            {
 +                print_ddzone(debug,1,i,0,&comm->zone_d1[i]);
 +            }
 +            cell_ns_x0[dim] = min(cell_ns_x0[dim],comm->zone_d1[i].min0);
 +            cell_ns_x1[dim] = max(cell_ns_x1[dim],comm->zone_d1[i].max1);
 +        }
 +    }
 +    if (dd->ndim >= 3)
 +    {
 +        dim = dd->dim[2];
 +        for(i=0; i<2; i++)
 +        {
 +            for(j=0; j<2; j++)
 +            {
 +                if (debug)
 +                {
 +                    print_ddzone(debug,2,i,j,&comm->zone_d2[i][j]);
 +                }
 +                cell_ns_x0[dim] = min(cell_ns_x0[dim],comm->zone_d2[i][j].min0);
 +                cell_ns_x1[dim] = max(cell_ns_x1[dim],comm->zone_d2[i][j].max1);
 +            }
 +        }
 +    }
 +    for(d=1; d<dd->ndim; d++)
 +    {
 +        comm->cell_f_max0[d] = extr_s[d-1][0];
 +        comm->cell_f_min1[d] = extr_s[d-1][1];
 +        if (debug)
 +        {
 +            fprintf(debug,"Cell fraction d %d, max0 %f, min1 %f\n",
 +                    d,comm->cell_f_max0[d],comm->cell_f_min1[d]);
 +        }
 +    }
 +}
 +
 +static void dd_collect_cg(gmx_domdec_t *dd,
 +                          t_state *state_local)
 +{
 +    gmx_domdec_master_t *ma=NULL;
 +    int buf2[2],*ibuf,i,ncg_home=0,*cg=NULL,nat_home=0;
 +    t_block *cgs_gl;
 +
 +    if (state_local->ddp_count == dd->comm->master_cg_ddp_count)
 +    {
 +        /* The master has the correct distribution */
 +        return;
 +    }
 +    
 +    if (state_local->ddp_count == dd->ddp_count)
 +    {
 +        ncg_home = dd->ncg_home;
 +        cg       = dd->index_gl;
 +        nat_home = dd->nat_home;
 +    } 
 +    else if (state_local->ddp_count_cg_gl == state_local->ddp_count)
 +    {
 +        cgs_gl = &dd->comm->cgs_gl;
 +
 +        ncg_home = state_local->ncg_gl;
 +        cg       = state_local->cg_gl;
 +        nat_home = 0;
 +        for(i=0; i<ncg_home; i++)
 +        {
 +            nat_home += cgs_gl->index[cg[i]+1] - cgs_gl->index[cg[i]];
 +        }
 +    }
 +    else
 +    {
 +        gmx_incons("Attempted to collect a vector for a state for which the charge group distribution is unknown");
 +    }
 +    
 +    buf2[0] = dd->ncg_home;
 +    buf2[1] = dd->nat_home;
 +    if (DDMASTER(dd))
 +    {
 +        ma = dd->ma;
 +        ibuf = ma->ibuf;
 +    }
 +    else
 +    {
 +        ibuf = NULL;
 +    }
 +    /* Collect the charge group and atom counts on the master */
 +    dd_gather(dd,2*sizeof(int),buf2,ibuf);
 +    
 +    if (DDMASTER(dd))
 +    {
 +        ma->index[0] = 0;
 +        for(i=0; i<dd->nnodes; i++)
 +        {
 +            ma->ncg[i] = ma->ibuf[2*i];
 +            ma->nat[i] = ma->ibuf[2*i+1];
 +            ma->index[i+1] = ma->index[i] + ma->ncg[i];
 +            
 +        }
 +        /* Make byte counts and indices */
 +        for(i=0; i<dd->nnodes; i++)
 +        {
 +            ma->ibuf[i] = ma->ncg[i]*sizeof(int);
 +            ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
 +        }
 +        if (debug)
 +        {
 +            fprintf(debug,"Initial charge group distribution: ");
 +            for(i=0; i<dd->nnodes; i++)
 +                fprintf(debug," %d",ma->ncg[i]);
 +            fprintf(debug,"\n");
 +        }
 +    }
 +    
 +    /* Collect the charge group indices on the master */
 +    dd_gatherv(dd,
 +               dd->ncg_home*sizeof(int),dd->index_gl,
 +               DDMASTER(dd) ? ma->ibuf : NULL,
 +               DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
 +               DDMASTER(dd) ? ma->cg : NULL);
 +    
 +    dd->comm->master_cg_ddp_count = state_local->ddp_count;
 +}
 +
 +static void dd_collect_vec_sendrecv(gmx_domdec_t *dd,
 +                                    rvec *lv,rvec *v)
 +{
 +    gmx_domdec_master_t *ma;
 +    int  n,i,c,a,nalloc=0;
 +    rvec *buf=NULL;
 +    t_block *cgs_gl;
 +
 +    ma = dd->ma;
 +    
 +    if (!DDMASTER(dd))
 +    {
 +#ifdef GMX_MPI
 +        MPI_Send(lv,dd->nat_home*sizeof(rvec),MPI_BYTE,DDMASTERRANK(dd),
 +                 dd->rank,dd->mpi_comm_all);
 +#endif
 +    } else {
 +        /* Copy the master coordinates to the global array */
 +        cgs_gl = &dd->comm->cgs_gl;
 +
 +        n = DDMASTERRANK(dd);
 +        a = 0;
 +        for(i=ma->index[n]; i<ma->index[n+1]; i++)
 +        {
 +            for(c=cgs_gl->index[ma->cg[i]]; c<cgs_gl->index[ma->cg[i]+1]; c++)
 +            {
 +                copy_rvec(lv[a++],v[c]);
 +            }
 +        }
 +        
 +        for(n=0; n<dd->nnodes; n++)
 +        {
 +            if (n != dd->rank)
 +            {
 +                if (ma->nat[n] > nalloc)
 +                {
 +                    nalloc = over_alloc_dd(ma->nat[n]);
 +                    srenew(buf,nalloc);
 +                }
 +#ifdef GMX_MPI
 +                MPI_Recv(buf,ma->nat[n]*sizeof(rvec),MPI_BYTE,DDRANK(dd,n),
 +                         n,dd->mpi_comm_all,MPI_STATUS_IGNORE);
 +#endif
 +                a = 0;
 +                for(i=ma->index[n]; i<ma->index[n+1]; i++)
 +                {
 +                    for(c=cgs_gl->index[ma->cg[i]]; c<cgs_gl->index[ma->cg[i]+1]; c++)
 +                    {
 +                        copy_rvec(buf[a++],v[c]);
 +                    }
 +                }
 +            }
 +        }
 +        sfree(buf);
 +    }
 +}
 +
 +static void get_commbuffer_counts(gmx_domdec_t *dd,
 +                                  int **counts,int **disps)
 +{
 +    gmx_domdec_master_t *ma;
 +    int n;
 +
 +    ma = dd->ma;
 +    
 +    /* Make the rvec count and displacment arrays */
 +    *counts  = ma->ibuf;
 +    *disps   = ma->ibuf + dd->nnodes;
 +    for(n=0; n<dd->nnodes; n++)
 +    {
 +        (*counts)[n] = ma->nat[n]*sizeof(rvec);
 +        (*disps)[n]  = (n == 0 ? 0 : (*disps)[n-1] + (*counts)[n-1]);
 +    }
 +}
 +
 +static void dd_collect_vec_gatherv(gmx_domdec_t *dd,
 +                                   rvec *lv,rvec *v)
 +{
 +    gmx_domdec_master_t *ma;
 +    int  *rcounts=NULL,*disps=NULL;
 +    int  n,i,c,a;
 +    rvec *buf=NULL;
 +    t_block *cgs_gl;
 +    
 +    ma = dd->ma;
 +    
 +    if (DDMASTER(dd))
 +    {
 +        get_commbuffer_counts(dd,&rcounts,&disps);
 +
 +        buf = ma->vbuf;
 +    }
 +    
 +    dd_gatherv(dd,dd->nat_home*sizeof(rvec),lv,rcounts,disps,buf);
 +
 +    if (DDMASTER(dd))
 +    {
 +        cgs_gl = &dd->comm->cgs_gl;
 +
 +        a = 0;
 +        for(n=0; n<dd->nnodes; n++)
 +        {
 +            for(i=ma->index[n]; i<ma->index[n+1]; i++)
 +            {
 +                for(c=cgs_gl->index[ma->cg[i]]; c<cgs_gl->index[ma->cg[i]+1]; c++)
 +                {
 +                    copy_rvec(buf[a++],v[c]);
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +void dd_collect_vec(gmx_domdec_t *dd,
 +                    t_state *state_local,rvec *lv,rvec *v)
 +{
 +    gmx_domdec_master_t *ma;
 +    int  n,i,c,a,nalloc=0;
 +    rvec *buf=NULL;
 +    
 +    dd_collect_cg(dd,state_local);
 +
 +    if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
 +    {
 +        dd_collect_vec_sendrecv(dd,lv,v);
 +    }
 +    else
 +    {
 +        dd_collect_vec_gatherv(dd,lv,v);
 +    }
 +}
 +
 +
 +void dd_collect_state(gmx_domdec_t *dd,
 +                      t_state *state_local,t_state *state)
 +{
 +    int est,i,j,nh;
 +
 +    nh = state->nhchainlength;
 +
 +    if (DDMASTER(dd))
 +    {
-     int  i,j,ngtch,ngtcp,nh;
++        for (i=0;i<efptNR;i++) {
++            state->lambda[i] = state_local->lambda[i];
++        }
++        state->fep_state = state_local->fep_state;
 +        state->veta = state_local->veta;
 +        state->vol0 = state_local->vol0;
 +        copy_mat(state_local->box,state->box);
 +        copy_mat(state_local->boxv,state->boxv);
 +        copy_mat(state_local->svir_prev,state->svir_prev);
 +        copy_mat(state_local->fvir_prev,state->fvir_prev);
 +        copy_mat(state_local->pres_prev,state->pres_prev);
 +
 +
 +        for(i=0; i<state_local->ngtc; i++)
 +        {
 +            for(j=0; j<nh; j++) {
 +                state->nosehoover_xi[i*nh+j]        = state_local->nosehoover_xi[i*nh+j];
 +                state->nosehoover_vxi[i*nh+j]       = state_local->nosehoover_vxi[i*nh+j];
 +            }
 +            state->therm_integral[i] = state_local->therm_integral[i];            
 +        }
 +        for(i=0; i<state_local->nnhpres; i++) 
 +        {
 +            for(j=0; j<nh; j++) {
 +                state->nhpres_xi[i*nh+j]        = state_local->nhpres_xi[i*nh+j];
 +                state->nhpres_vxi[i*nh+j]       = state_local->nhpres_vxi[i*nh+j];
 +            }
 +        }
 +    }
 +    for(est=0; est<estNR; est++)
 +    {
 +        if (EST_DISTR(est) && (state_local->flags & (1<<est)))
 +        {
 +            switch (est) {
 +            case estX:
 +                dd_collect_vec(dd,state_local,state_local->x,state->x);
 +                break;
 +            case estV:
 +                dd_collect_vec(dd,state_local,state_local->v,state->v);
 +                break;
 +            case estSDX:
 +                dd_collect_vec(dd,state_local,state_local->sd_X,state->sd_X);
 +                break;
 +            case estCGP:
 +                dd_collect_vec(dd,state_local,state_local->cg_p,state->cg_p);
 +                break;
 +            case estLD_RNG:
 +                if (state->nrngi == 1)
 +                {
 +                    if (DDMASTER(dd))
 +                    {
 +                        for(i=0; i<state_local->nrng; i++)
 +                        {
 +                            state->ld_rng[i] = state_local->ld_rng[i];
 +                        }
 +                    }
 +                }
 +                else
 +                {
 +                    dd_gather(dd,state_local->nrng*sizeof(state->ld_rng[0]),
 +                              state_local->ld_rng,state->ld_rng);
 +                }
 +                break;
 +            case estLD_RNGI:
 +                if (state->nrngi == 1)
 +                {
 +                   if (DDMASTER(dd))
 +                    {
 +                        state->ld_rngi[0] = state_local->ld_rngi[0];
 +                    } 
 +                }
 +                else
 +                {
 +                    dd_gather(dd,sizeof(state->ld_rngi[0]),
 +                              state_local->ld_rngi,state->ld_rngi);
 +                }
 +                break;
 +            case estDISRE_INITF:
 +            case estDISRE_RM3TAV:
 +            case estORIRE_INITF:
 +            case estORIRE_DTAV:
 +                break;
 +            default:
 +                gmx_incons("Unknown state entry encountered in dd_collect_state");
 +            }
 +        }
 +    }
 +}
 +
 +static void dd_realloc_fr_cg(t_forcerec *fr,int nalloc)
 +{
 +    if (debug)
 +    {
 +        fprintf(debug,"Reallocating forcerec: currently %d, required %d, allocating %d\n",fr->cg_nalloc,nalloc,over_alloc_dd(nalloc));
 +    }
 +    fr->cg_nalloc = over_alloc_dd(nalloc);
 +    srenew(fr->cg_cm,fr->cg_nalloc);
 +    srenew(fr->cginfo,fr->cg_nalloc);
 +}
 +
 +static void dd_realloc_state(t_state *state,rvec **f,int nalloc)
 +{
 +    int est;
 +
 +    if (debug)
 +    {
 +        fprintf(debug,"Reallocating state: currently %d, required %d, allocating %d\n",state->nalloc,nalloc,over_alloc_dd(nalloc));
 +    }
 +
 +    state->nalloc = over_alloc_dd(nalloc);
 +    
 +    for(est=0; est<estNR; est++)
 +    {
 +        if (EST_DISTR(est) && (state->flags & (1<<est)))
 +        {
 +            switch(est) {
 +            case estX:
 +                srenew(state->x,state->nalloc);
 +                break;
 +            case estV:
 +                srenew(state->v,state->nalloc);
 +                break;
 +            case estSDX:
 +                srenew(state->sd_X,state->nalloc);
 +                break;
 +            case estCGP:
 +                srenew(state->cg_p,state->nalloc);
 +                break;
 +            case estLD_RNG:
 +            case estLD_RNGI:
 +            case estDISRE_INITF:
 +            case estDISRE_RM3TAV:
 +            case estORIRE_INITF:
 +            case estORIRE_DTAV:
 +                /* No reallocation required */
 +                break;
 +            default:
 +                gmx_incons("Unknown state entry encountered in dd_realloc_state");            
 +            }
 +        }
 +    }
 +    
 +    if (f != NULL)
 +    {
 +        srenew(*f,state->nalloc);
 +    }
 +}
 +
 +static void dd_distribute_vec_sendrecv(gmx_domdec_t *dd,t_block *cgs,
 +                                       rvec *v,rvec *lv)
 +{
 +    gmx_domdec_master_t *ma;
 +    int  n,i,c,a,nalloc=0;
 +    rvec *buf=NULL;
 +    
 +    if (DDMASTER(dd))
 +    {
 +        ma  = dd->ma;
 +        
 +        for(n=0; n<dd->nnodes; n++)
 +        {
 +            if (n != dd->rank)
 +            {
 +                if (ma->nat[n] > nalloc)
 +                {
 +                    nalloc = over_alloc_dd(ma->nat[n]);
 +                    srenew(buf,nalloc);
 +                }
 +                /* Use lv as a temporary buffer */
 +                a = 0;
 +                for(i=ma->index[n]; i<ma->index[n+1]; i++)
 +                {
 +                    for(c=cgs->index[ma->cg[i]]; c<cgs->index[ma->cg[i]+1]; c++)
 +                    {
 +                        copy_rvec(v[c],buf[a++]);
 +                    }
 +                }
 +                if (a != ma->nat[n])
 +                {
 +                    gmx_fatal(FARGS,"Internal error a (%d) != nat (%d)",
 +                              a,ma->nat[n]);
 +                }
 +                
 +#ifdef GMX_MPI
 +                MPI_Send(buf,ma->nat[n]*sizeof(rvec),MPI_BYTE,
 +                         DDRANK(dd,n),n,dd->mpi_comm_all);
 +#endif
 +            }
 +        }
 +        sfree(buf);
 +        n = DDMASTERRANK(dd);
 +        a = 0;
 +        for(i=ma->index[n]; i<ma->index[n+1]; i++)
 +        {
 +            for(c=cgs->index[ma->cg[i]]; c<cgs->index[ma->cg[i]+1]; c++)
 +            {
 +                copy_rvec(v[c],lv[a++]);
 +            }
 +        }
 +    }
 +    else
 +    {
 +#ifdef GMX_MPI
 +        MPI_Recv(lv,dd->nat_home*sizeof(rvec),MPI_BYTE,DDMASTERRANK(dd),
 +                 MPI_ANY_TAG,dd->mpi_comm_all,MPI_STATUS_IGNORE);
 +#endif
 +    }
 +}
 +
 +static void dd_distribute_vec_scatterv(gmx_domdec_t *dd,t_block *cgs,
 +                                       rvec *v,rvec *lv)
 +{
 +    gmx_domdec_master_t *ma;
 +    int  *scounts=NULL,*disps=NULL;
 +    int  n,i,c,a,nalloc=0;
 +    rvec *buf=NULL;
 +    
 +    if (DDMASTER(dd))
 +    {
 +        ma  = dd->ma;
 +     
 +        get_commbuffer_counts(dd,&scounts,&disps);
 +
 +        buf = ma->vbuf;
 +        a = 0;
 +        for(n=0; n<dd->nnodes; n++)
 +        {
 +            for(i=ma->index[n]; i<ma->index[n+1]; i++)
 +            {
 +                for(c=cgs->index[ma->cg[i]]; c<cgs->index[ma->cg[i]+1]; c++)
 +                {
 +                    copy_rvec(v[c],buf[a++]);
 +                }
 +            }
 +        }
 +    }
 +
 +    dd_scatterv(dd,scounts,disps,buf,dd->nat_home*sizeof(rvec),lv);
 +}
 +
 +static void dd_distribute_vec(gmx_domdec_t *dd,t_block *cgs,rvec *v,rvec *lv)
 +{
 +    if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
 +    {
 +        dd_distribute_vec_sendrecv(dd,cgs,v,lv);
 +    }
 +    else
 +    {
 +        dd_distribute_vec_scatterv(dd,cgs,v,lv);
 +    }
 +}
 +
 +static void dd_distribute_state(gmx_domdec_t *dd,t_block *cgs,
 +                                t_state *state,t_state *state_local,
 +                                rvec **f)
 +{
-         state_local->lambda = state->lambda;
++    int  i,j,nh;
 +
 +    nh = state->nhchainlength;
 +
 +    if (DDMASTER(dd))
 +    {
-     dd_bcast(dd,sizeof(real),&state_local->lambda);
++        for(i=0;i<efptNR;i++)
++        {
++            state_local->lambda[i] = state->lambda[i];
++        }
++        state_local->fep_state = state->fep_state;
 +        state_local->veta   = state->veta;
 +        state_local->vol0   = state->vol0;
 +        copy_mat(state->box,state_local->box);
 +        copy_mat(state->box_rel,state_local->box_rel);
 +        copy_mat(state->boxv,state_local->boxv);
 +        copy_mat(state->svir_prev,state_local->svir_prev);
 +        copy_mat(state->fvir_prev,state_local->fvir_prev);
 +        for(i=0; i<state_local->ngtc; i++)
 +        {
 +            for(j=0; j<nh; j++) {
 +                state_local->nosehoover_xi[i*nh+j]        = state->nosehoover_xi[i*nh+j];
 +                state_local->nosehoover_vxi[i*nh+j]       = state->nosehoover_vxi[i*nh+j];
 +            }
 +            state_local->therm_integral[i] = state->therm_integral[i];
 +        }
 +        for(i=0; i<state_local->nnhpres; i++)
 +        {
 +            for(j=0; j<nh; j++) {
 +                state_local->nhpres_xi[i*nh+j]        = state->nhpres_xi[i*nh+j];
 +                state_local->nhpres_vxi[i*nh+j]       = state->nhpres_vxi[i*nh+j];
 +            }
 +        }
 +    }
++    dd_bcast(dd,((efptNR)*sizeof(real)),state_local->lambda);
++    dd_bcast(dd,sizeof(int),&state_local->fep_state);
 +    dd_bcast(dd,sizeof(real),&state_local->veta);
 +    dd_bcast(dd,sizeof(real),&state_local->vol0);
 +    dd_bcast(dd,sizeof(state_local->box),state_local->box);
 +    dd_bcast(dd,sizeof(state_local->box_rel),state_local->box_rel);
 +    dd_bcast(dd,sizeof(state_local->boxv),state_local->boxv);
 +    dd_bcast(dd,sizeof(state_local->svir_prev),state_local->svir_prev);
 +    dd_bcast(dd,sizeof(state_local->fvir_prev),state_local->fvir_prev);
 +    dd_bcast(dd,((state_local->ngtc*nh)*sizeof(double)),state_local->nosehoover_xi);
 +    dd_bcast(dd,((state_local->ngtc*nh)*sizeof(double)),state_local->nosehoover_vxi);
 +    dd_bcast(dd,state_local->ngtc*sizeof(double),state_local->therm_integral);
 +    dd_bcast(dd,((state_local->nnhpres*nh)*sizeof(double)),state_local->nhpres_xi);
 +    dd_bcast(dd,((state_local->nnhpres*nh)*sizeof(double)),state_local->nhpres_vxi);
 +
 +    if (dd->nat_home > state_local->nalloc)
 +    {
 +        dd_realloc_state(state_local,f,dd->nat_home);
 +    }
 +    for(i=0; i<estNR; i++)
 +    {
 +        if (EST_DISTR(i) && (state_local->flags & (1<<i)))
 +        {
 +            switch (i) {
 +            case estX:
 +                dd_distribute_vec(dd,cgs,state->x,state_local->x);
 +                break;
 +            case estV:
 +                dd_distribute_vec(dd,cgs,state->v,state_local->v);
 +                break;
 +            case estSDX:
 +                dd_distribute_vec(dd,cgs,state->sd_X,state_local->sd_X);
 +                break;
 +            case estCGP:
 +                dd_distribute_vec(dd,cgs,state->cg_p,state_local->cg_p);
 +                break;
 +            case estLD_RNG:
 +                if (state->nrngi == 1)
 +                {
 +                    dd_bcastc(dd,
 +                              state_local->nrng*sizeof(state_local->ld_rng[0]),
 +                              state->ld_rng,state_local->ld_rng);
 +                }
 +                else
 +                {
 +                    dd_scatter(dd,
 +                               state_local->nrng*sizeof(state_local->ld_rng[0]),
 +                               state->ld_rng,state_local->ld_rng);
 +                }
 +                break;
 +            case estLD_RNGI:
 +                if (state->nrngi == 1)
 +                {
 +                    dd_bcastc(dd,sizeof(state_local->ld_rngi[0]),
 +                              state->ld_rngi,state_local->ld_rngi);
 +                }
 +                else
 +                {
 +                     dd_scatter(dd,sizeof(state_local->ld_rngi[0]),
 +                               state->ld_rngi,state_local->ld_rngi);
 +                }   
 +                break;
 +            case estDISRE_INITF:
 +            case estDISRE_RM3TAV:
 +            case estORIRE_INITF:
 +            case estORIRE_DTAV:
 +                /* Not implemented yet */
 +                break;
 +            default:
 +                gmx_incons("Unknown state entry encountered in dd_distribute_state");
 +            }
 +        }
 +    }
 +}
 +
 +static char dim2char(int dim)
 +{
 +    char c='?';
 +    
 +    switch (dim)
 +    {
 +    case XX: c = 'X'; break;
 +    case YY: c = 'Y'; break;
 +    case ZZ: c = 'Z'; break;
 +    default: gmx_fatal(FARGS,"Unknown dim %d",dim);
 +    }
 +    
 +    return c;
 +}
 +
 +static void write_dd_grid_pdb(const char *fn,gmx_large_int_t step,
 +                              gmx_domdec_t *dd,matrix box,gmx_ddbox_t *ddbox)
 +{
 +    rvec grid_s[2],*grid_r=NULL,cx,r;
 +    char fname[STRLEN],format[STRLEN],buf[22];
 +    FILE *out;
 +    int  a,i,d,z,y,x;
 +    matrix tric;
 +    real vol;
 +
 +    copy_rvec(dd->comm->cell_x0,grid_s[0]);
 +    copy_rvec(dd->comm->cell_x1,grid_s[1]);
 +    
 +    if (DDMASTER(dd))
 +    {
 +        snew(grid_r,2*dd->nnodes);
 +    }
 +    
 +    dd_gather(dd,2*sizeof(rvec),grid_s[0],DDMASTER(dd) ? grid_r[0] : NULL);
 +    
 +    if (DDMASTER(dd))
 +    {
 +        for(d=0; d<DIM; d++)
 +        {
 +            for(i=0; i<DIM; i++)
 +            {
 +                if (d == i)
 +                {
 +                    tric[d][i] = 1;
 +                }
 +                else
 +                {
 +                    if (d < ddbox->npbcdim && dd->nc[d] > 1)
 +                    {
 +                        tric[d][i] = box[i][d]/box[i][i];
 +                    }
 +                    else
 +                    {
 +                        tric[d][i] = 0;
 +                    }
 +                }
 +            }
 +        }
 +        sprintf(fname,"%s_%s.pdb",fn,gmx_step_str(step,buf));
 +        sprintf(format,"%s%s\n",get_pdbformat(),"%6.2f%6.2f");
 +        out = gmx_fio_fopen(fname,"w");
 +        gmx_write_pdb_box(out,dd->bScrewPBC ? epbcSCREW : epbcXYZ,box);
 +        a = 1;
 +        for(i=0; i<dd->nnodes; i++)
 +        {
 +            vol = dd->nnodes/(box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]);
 +            for(d=0; d<DIM; d++)
 +            {
 +                vol *= grid_r[i*2+1][d] - grid_r[i*2][d];
 +            }
 +            for(z=0; z<2; z++)
 +            {
 +                for(y=0; y<2; y++)
 +                {
 +                    for(x=0; x<2; x++)
 +                    {
 +                        cx[XX] = grid_r[i*2+x][XX];
 +                        cx[YY] = grid_r[i*2+y][YY];
 +                        cx[ZZ] = grid_r[i*2+z][ZZ];
 +                        mvmul(tric,cx,r);
 +                        fprintf(out,format,"ATOM",a++,"CA","GLY",' ',1+i,
 +                                10*r[XX],10*r[YY],10*r[ZZ],1.0,vol);
 +                    }
 +                }
 +            }
 +            for(d=0; d<DIM; d++)
 +            {
 +                for(x=0; x<4; x++)
 +                {
 +                    switch(d)
 +                    {
 +                    case 0: y = 1 + i*8 + 2*x; break;
 +                    case 1: y = 1 + i*8 + 2*x - (x % 2); break;
 +                    case 2: y = 1 + i*8 + x; break;
 +                    }
 +                    fprintf(out,"%6s%5d%5d\n","CONECT",y,y+(1<<d));
 +                }
 +            }
 +        }
 +        gmx_fio_fclose(out);
 +        sfree(grid_r);
 +    }
 +}
 +
 +void write_dd_pdb(const char *fn,gmx_large_int_t step,const char *title,
 +                  gmx_mtop_t *mtop,t_commrec *cr,
 +                  int natoms,rvec x[],matrix box)
 +{
 +    char fname[STRLEN],format[STRLEN],format4[STRLEN],buf[22];
 +    FILE *out;
 +    int  i,ii,resnr,c;
 +    char *atomname,*resname;
 +    real b;
 +    gmx_domdec_t *dd;
 +    
 +    dd = cr->dd;
 +    if (natoms == -1)
 +    {
 +        natoms = dd->comm->nat[ddnatVSITE];
 +    }
 +    
 +    sprintf(fname,"%s_%s_n%d.pdb",fn,gmx_step_str(step,buf),cr->sim_nodeid);
 +    
 +    sprintf(format,"%s%s\n",get_pdbformat(),"%6.2f%6.2f");
 +    sprintf(format4,"%s%s\n",get_pdbformat4(),"%6.2f%6.2f");
 +    
 +    out = gmx_fio_fopen(fname,"w");
 +    
 +    fprintf(out,"TITLE     %s\n",title);
 +    gmx_write_pdb_box(out,dd->bScrewPBC ? epbcSCREW : epbcXYZ,box);
 +    for(i=0; i<natoms; i++)
 +    {
 +        ii = dd->gatindex[i];
 +        gmx_mtop_atominfo_global(mtop,ii,&atomname,&resnr,&resname);
 +        if (i < dd->comm->nat[ddnatZONE])
 +        {
 +            c = 0;
 +            while (i >= dd->cgindex[dd->comm->zones.cg_range[c+1]])
 +            {
 +                c++;
 +            }
 +            b = c;
 +        }
 +        else if (i < dd->comm->nat[ddnatVSITE])
 +        {
 +            b = dd->comm->zones.n;
 +        }
 +        else
 +        {
 +            b = dd->comm->zones.n + 1;
 +        }
 +        fprintf(out,strlen(atomname)<4 ? format : format4,
 +                "ATOM",(ii+1)%100000,
 +                atomname,resname,' ',resnr%10000,' ',
 +                10*x[i][XX],10*x[i][YY],10*x[i][ZZ],1.0,b);
 +    }
 +    fprintf(out,"TER\n");
 +    
 +    gmx_fio_fclose(out);
 +}
 +
 +real dd_cutoff_mbody(gmx_domdec_t *dd)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  di;
 +    real r;
 +
 +    comm = dd->comm;
 +
 +    r = -1;
 +    if (comm->bInterCGBondeds)
 +    {
 +        if (comm->cutoff_mbody > 0)
 +        {
 +            r = comm->cutoff_mbody;
 +        }
 +        else
 +        {
 +            /* cutoff_mbody=0 means we do not have DLB */
 +            r = comm->cellsize_min[dd->dim[0]];
 +            for(di=1; di<dd->ndim; di++)
 +            {
 +                r = min(r,comm->cellsize_min[dd->dim[di]]);
 +            }
 +            if (comm->bBondComm)
 +            {
 +                r = max(r,comm->cutoff_mbody);
 +            }
 +            else
 +            {
 +                r = min(r,comm->cutoff);
 +            }
 +        }
 +    }
 +
 +    return r;
 +}
 +
 +real dd_cutoff_twobody(gmx_domdec_t *dd)
 +{
 +    real r_mb;
 +
 +    r_mb = dd_cutoff_mbody(dd);
 +
 +    return max(dd->comm->cutoff,r_mb);
 +}
 +
 +
 +static void dd_cart_coord2pmecoord(gmx_domdec_t *dd,ivec coord,ivec coord_pme)
 +{
 +    int nc,ntot;
 +    
 +    nc   = dd->nc[dd->comm->cartpmedim];
 +    ntot = dd->comm->ntot[dd->comm->cartpmedim];
 +    copy_ivec(coord,coord_pme);
 +    coord_pme[dd->comm->cartpmedim] =
 +        nc + (coord[dd->comm->cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
 +}
 +
 +static int low_ddindex2pmeindex(int ndd,int npme,int ddindex)
 +{
 +    /* Here we assign a PME node to communicate with this DD node
 +     * by assuming that the major index of both is x.
 +     * We add cr->npmenodes/2 to obtain an even distribution.
 +     */
 +    return (ddindex*npme + npme/2)/ndd;
 +}
 +
 +static int ddindex2pmeindex(const gmx_domdec_t *dd,int ddindex)
 +{
 +    return low_ddindex2pmeindex(dd->nnodes,dd->comm->npmenodes,ddindex);
 +}
 +
 +static int cr_ddindex2pmeindex(const t_commrec *cr,int ddindex)
 +{
 +    return low_ddindex2pmeindex(cr->dd->nnodes,cr->npmenodes,ddindex);
 +}
 +
 +static int *dd_pmenodes(t_commrec *cr)
 +{
 +    int *pmenodes;
 +    int n,i,p0,p1;
 +    
 +    snew(pmenodes,cr->npmenodes);
 +    n = 0;
 +    for(i=0; i<cr->dd->nnodes; i++) {
 +        p0 = cr_ddindex2pmeindex(cr,i);
 +        p1 = cr_ddindex2pmeindex(cr,i+1);
 +        if (i+1 == cr->dd->nnodes || p1 > p0) {
 +            if (debug)
 +                fprintf(debug,"pmenode[%d] = %d\n",n,i+1+n);
 +            pmenodes[n] = i + 1 + n;
 +            n++;
 +        }
 +    }
 +
 +    return pmenodes;
 +}
 +
 +static int gmx_ddcoord2pmeindex(t_commrec *cr,int x,int y,int z)
 +{
 +    gmx_domdec_t *dd;
 +    ivec coords,coords_pme,nc;
 +    int  slab;
 +    
 +    dd = cr->dd;
 +    /*
 +      if (dd->comm->bCartesian) {
 +      gmx_ddindex2xyz(dd->nc,ddindex,coords);
 +      dd_coords2pmecoords(dd,coords,coords_pme);
 +      copy_ivec(dd->ntot,nc);
 +      nc[dd->cartpmedim]         -= dd->nc[dd->cartpmedim];
 +      coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
 +      
 +      slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
 +      } else {
 +      slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
 +      }
 +    */
 +    coords[XX] = x;
 +    coords[YY] = y;
 +    coords[ZZ] = z;
 +    slab = ddindex2pmeindex(dd,dd_index(dd->nc,coords));
 +    
 +    return slab;
 +}
 +
 +static int ddcoord2simnodeid(t_commrec *cr,int x,int y,int z)
 +{
 +    gmx_domdec_comm_t *comm;
 +    ivec coords;
 +    int  ddindex,nodeid=-1;
 +    
 +    comm = cr->dd->comm;
 +    
 +    coords[XX] = x;
 +    coords[YY] = y;
 +    coords[ZZ] = z;
 +    if (comm->bCartesianPP_PME)
 +    {
 +#ifdef GMX_MPI
 +        MPI_Cart_rank(cr->mpi_comm_mysim,coords,&nodeid);
 +#endif
 +    }
 +    else
 +    {
 +        ddindex = dd_index(cr->dd->nc,coords);
 +        if (comm->bCartesianPP)
 +        {
 +            nodeid = comm->ddindex2simnodeid[ddindex];
 +        }
 +        else
 +        {
 +            if (comm->pmenodes)
 +            {
 +                nodeid = ddindex + gmx_ddcoord2pmeindex(cr,x,y,z);
 +            }
 +            else
 +            {
 +                nodeid = ddindex;
 +            }
 +        }
 +    }
 +  
 +    return nodeid;
 +}
 +
 +static int dd_simnode2pmenode(t_commrec *cr,int sim_nodeid)
 +{
 +    gmx_domdec_t *dd;
 +    gmx_domdec_comm_t *comm;
 +    ivec coord,coord_pme;
 +    int  i;
 +    int  pmenode=-1;
 +    
 +    dd = cr->dd;
 +    comm = dd->comm;
 +    
 +    /* This assumes a uniform x domain decomposition grid cell size */
 +    if (comm->bCartesianPP_PME)
 +    {
 +#ifdef GMX_MPI
 +        MPI_Cart_coords(cr->mpi_comm_mysim,sim_nodeid,DIM,coord);
 +        if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim])
 +        {
 +            /* This is a PP node */
 +            dd_cart_coord2pmecoord(dd,coord,coord_pme);
 +            MPI_Cart_rank(cr->mpi_comm_mysim,coord_pme,&pmenode);
 +        }
 +#endif
 +    }
 +    else if (comm->bCartesianPP)
 +    {
 +        if (sim_nodeid < dd->nnodes)
 +        {
 +            pmenode = dd->nnodes + ddindex2pmeindex(dd,sim_nodeid);
 +        }
 +    }
 +    else
 +    {
 +        /* This assumes DD cells with identical x coordinates
 +         * are numbered sequentially.
 +         */
 +        if (dd->comm->pmenodes == NULL)
 +        {
 +            if (sim_nodeid < dd->nnodes)
 +            {
 +                /* The DD index equals the nodeid */
 +                pmenode = dd->nnodes + ddindex2pmeindex(dd,sim_nodeid);
 +            }
 +        }
 +        else
 +        {
 +            i = 0;
 +            while (sim_nodeid > dd->comm->pmenodes[i])
 +            {
 +                i++;
 +            }
 +            if (sim_nodeid < dd->comm->pmenodes[i])
 +            {
 +                pmenode = dd->comm->pmenodes[i];
 +            }
 +        }
 +    }
 +    
 +    return pmenode;
 +}
 +
 +gmx_bool gmx_pmeonlynode(t_commrec *cr,int sim_nodeid)
 +{
 +    gmx_bool bPMEOnlyNode;
 +    
 +    if (DOMAINDECOMP(cr))
 +    {
 +        bPMEOnlyNode = (dd_simnode2pmenode(cr,sim_nodeid) == -1);
 +    }
 +    else
 +    {
 +        bPMEOnlyNode = FALSE;
 +    }
 +    
 +    return bPMEOnlyNode;
 +}
 +
 +void get_pme_ddnodes(t_commrec *cr,int pmenodeid,
 +                     int *nmy_ddnodes,int **my_ddnodes,int *node_peer)
 +{
 +    gmx_domdec_t *dd;
 +    int x,y,z;
 +    ivec coord,coord_pme;
 +    
 +    dd = cr->dd;
 +    
 +    snew(*my_ddnodes,(dd->nnodes+cr->npmenodes-1)/cr->npmenodes);
 +    
 +    *nmy_ddnodes = 0;
 +    for(x=0; x<dd->nc[XX]; x++)
 +    {
 +        for(y=0; y<dd->nc[YY]; y++)
 +        {
 +            for(z=0; z<dd->nc[ZZ]; z++)
 +            {
 +                if (dd->comm->bCartesianPP_PME)
 +                {
 +                    coord[XX] = x;
 +                    coord[YY] = y;
 +                    coord[ZZ] = z;
 +                    dd_cart_coord2pmecoord(dd,coord,coord_pme);
 +                    if (dd->ci[XX] == coord_pme[XX] &&
 +                        dd->ci[YY] == coord_pme[YY] &&
 +                        dd->ci[ZZ] == coord_pme[ZZ])
 +                        (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr,x,y,z);
 +                }
 +                else
 +                {
 +                    /* The slab corresponds to the nodeid in the PME group */
 +                    if (gmx_ddcoord2pmeindex(cr,x,y,z) == pmenodeid)
 +                    {
 +                        (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr,x,y,z);
 +                    }
 +                }
 +            }
 +        }
 +    }
 +    
 +    /* The last PP-only node is the peer node */
 +    *node_peer = (*my_ddnodes)[*nmy_ddnodes-1];
 +    
 +    if (debug)
 +    {
 +        fprintf(debug,"Receive coordinates from PP nodes:");
 +        for(x=0; x<*nmy_ddnodes; x++)
 +        {
 +            fprintf(debug," %d",(*my_ddnodes)[x]);
 +        }
 +        fprintf(debug,"\n");
 +    }
 +}
 +
 +static gmx_bool receive_vir_ener(t_commrec *cr)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  pmenode,coords[DIM],rank;
 +    gmx_bool bReceive;
 +    
 +    bReceive = TRUE;
 +    if (cr->npmenodes < cr->dd->nnodes)
 +    {
 +        comm = cr->dd->comm;
 +        if (comm->bCartesianPP_PME)
 +        {
 +            pmenode = dd_simnode2pmenode(cr,cr->sim_nodeid);
 +#ifdef GMX_MPI
 +            MPI_Cart_coords(cr->mpi_comm_mysim,cr->sim_nodeid,DIM,coords);
 +            coords[comm->cartpmedim]++;
 +            if (coords[comm->cartpmedim] < cr->dd->nc[comm->cartpmedim])
 +            {
 +                MPI_Cart_rank(cr->mpi_comm_mysim,coords,&rank);
 +                if (dd_simnode2pmenode(cr,rank) == pmenode)
 +                {
 +                    /* This is not the last PP node for pmenode */
 +                    bReceive = FALSE;
 +                }
 +            }
 +#endif  
 +        }
 +        else
 +        {
 +            pmenode = dd_simnode2pmenode(cr,cr->sim_nodeid);
 +            if (cr->sim_nodeid+1 < cr->nnodes &&
 +                dd_simnode2pmenode(cr,cr->sim_nodeid+1) == pmenode)
 +            {
 +                /* This is not the last PP node for pmenode */
 +                bReceive = FALSE;
 +            }
 +        }
 +    }
 +    
 +    return bReceive;
 +}
 +
 +static void set_zones_ncg_home(gmx_domdec_t *dd)
 +{
 +    gmx_domdec_zones_t *zones;
 +    int i;
 +
 +    zones = &dd->comm->zones;
 +
 +    zones->cg_range[0] = 0;
 +    for(i=1; i<zones->n+1; i++)
 +    {
 +        zones->cg_range[i] = dd->ncg_home;
 +    }
 +}
 +
 +static void rebuild_cgindex(gmx_domdec_t *dd,int *gcgs_index,t_state *state)
 +{
 +    int nat,i,*ind,*dd_cg_gl,*cgindex,cg_gl;
 +    
 +    ind = state->cg_gl;
 +    dd_cg_gl = dd->index_gl;
 +    cgindex  = dd->cgindex;
 +    nat = 0;
 +    cgindex[0] = nat;
 +    for(i=0; i<state->ncg_gl; i++)
 +    {
 +        cgindex[i] = nat;
 +        cg_gl = ind[i];
 +        dd_cg_gl[i] = cg_gl;
 +        nat += gcgs_index[cg_gl+1] - gcgs_index[cg_gl];
 +    }
 +    cgindex[i] = nat;
 +    
 +    dd->ncg_home = state->ncg_gl;
 +    dd->nat_home = nat;
 +
 +    set_zones_ncg_home(dd);
 +}
 +
 +static int ddcginfo(const cginfo_mb_t *cginfo_mb,int cg)
 +{
 +    while (cg >= cginfo_mb->cg_end)
 +    {
 +        cginfo_mb++;
 +    }
 +
 +    return cginfo_mb->cginfo[(cg - cginfo_mb->cg_start) % cginfo_mb->cg_mod];
 +}
 +
 +static void dd_set_cginfo(int *index_gl,int cg0,int cg1,
 +                          t_forcerec *fr,char *bLocalCG)
 +{
 +    cginfo_mb_t *cginfo_mb;
 +    int *cginfo;
 +    int cg;
 +
 +    if (fr != NULL)
 +    {
 +        cginfo_mb = fr->cginfo_mb;
 +        cginfo    = fr->cginfo;
 +
 +        for(cg=cg0; cg<cg1; cg++)
 +        {
 +            cginfo[cg] = ddcginfo(cginfo_mb,index_gl[cg]);
 +        }
 +    }
 +
 +    if (bLocalCG != NULL)
 +    {
 +        for(cg=cg0; cg<cg1; cg++)
 +        {
 +            bLocalCG[index_gl[cg]] = TRUE;
 +        }
 +    }
 +}
 +
 +static void make_dd_indices(gmx_domdec_t *dd,int *gcgs_index,int cg_start)
 +{
 +    int nzone,zone,zone1,cg0,cg,cg_gl,a,a_gl;
 +    int *zone2cg,*zone_ncg1,*index_gl,*gatindex;
 +    gmx_ga2la_t *ga2la;
 +    char *bLocalCG;
 +
 +    bLocalCG = dd->comm->bLocalCG;
 +
 +    if (dd->nat_tot > dd->gatindex_nalloc)
 +    {
 +        dd->gatindex_nalloc = over_alloc_dd(dd->nat_tot);
 +        srenew(dd->gatindex,dd->gatindex_nalloc);
 +    }
 +
 +    nzone      = dd->comm->zones.n;
 +    zone2cg    = dd->comm->zones.cg_range;
 +    zone_ncg1  = dd->comm->zone_ncg1;
 +    index_gl   = dd->index_gl;
 +    gatindex   = dd->gatindex;
 +
 +    if (zone2cg[1] != dd->ncg_home)
 +    {
 +        gmx_incons("dd->ncg_zone is not up to date");
 +    }
 +    
 +    /* Make the local to global and global to local atom index */
 +    a = dd->cgindex[cg_start];
 +    for(zone=0; zone<nzone; zone++)
 +    {
 +        if (zone == 0)
 +        {
 +            cg0 = cg_start;
 +        }
 +        else
 +        {
 +            cg0 = zone2cg[zone];
 +        }
 +        for(cg=cg0; cg<zone2cg[zone+1]; cg++)
 +        {
 +            zone1 = zone;
 +            if (cg - cg0 >= zone_ncg1[zone])
 +            {
 +                /* Signal that this cg is from more than one zone away */
 +                zone1 += nzone;
 +            }
 +            cg_gl = index_gl[cg];
 +            for(a_gl=gcgs_index[cg_gl]; a_gl<gcgs_index[cg_gl+1]; a_gl++)
 +            {
 +                gatindex[a] = a_gl;
 +                ga2la_set(dd->ga2la,a_gl,a,zone1);
 +                a++;
 +            }
 +        }
 +    }
 +}
 +
 +static int check_bLocalCG(gmx_domdec_t *dd,int ncg_sys,const char *bLocalCG,
 +                          const char *where)
 +{
 +    int ncg,i,ngl,nerr;
 +
 +    nerr = 0;
 +    if (bLocalCG == NULL)
 +    {
 +        return nerr;
 +    }
 +    for(i=0; i<dd->ncg_tot; i++)
 +    {
 +        if (!bLocalCG[dd->index_gl[i]])
 +        {
 +            fprintf(stderr,
 +                    "DD node %d, %s: cg %d, global cg %d is not marked in bLocalCG (ncg_home %d)\n",dd->rank,where,i+1,dd->index_gl[i]+1,dd->ncg_home);
 +            nerr++;
 +        }
 +    }
 +    ngl = 0;
 +    for(i=0; i<ncg_sys; i++)
 +    {
 +        if (bLocalCG[i])
 +        {
 +            ngl++;
 +        }
 +    }
 +    if (ngl != dd->ncg_tot)
 +    {
 +        fprintf(stderr,"DD node %d, %s: In bLocalCG %d cgs are marked as local, whereas there are %d\n",dd->rank,where,ngl,dd->ncg_tot);
 +        nerr++;
 +    }
 +
 +    return nerr;
 +}
 +
 +static void check_index_consistency(gmx_domdec_t *dd,
 +                                    int natoms_sys,int ncg_sys,
 +                                    const char *where)
 +{
 +    int  nerr,ngl,i,a,cell;
 +    int  *have;
 +
 +    nerr = 0;
 +
 +    if (dd->comm->DD_debug > 1)
 +    {
 +        snew(have,natoms_sys);
 +        for(a=0; a<dd->nat_tot; a++)
 +        {
 +            if (have[dd->gatindex[a]] > 0)
 +            {
 +                fprintf(stderr,"DD node %d: global atom %d occurs twice: index %d and %d\n",dd->rank,dd->gatindex[a]+1,have[dd->gatindex[a]],a+1);
 +            }
 +            else
 +            {
 +                have[dd->gatindex[a]] = a + 1;
 +            }
 +        }
 +        sfree(have);
 +    }
 +
 +    snew(have,dd->nat_tot);
 +
 +    ngl  = 0;
 +    for(i=0; i<natoms_sys; i++)
 +    {
 +        if (ga2la_get(dd->ga2la,i,&a,&cell))
 +        {
 +            if (a >= dd->nat_tot)
 +            {
 +                fprintf(stderr,"DD node %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n",dd->rank,i+1,a+1,dd->nat_tot);
 +                nerr++;
 +            }
 +            else
 +            {
 +                have[a] = 1;
 +                if (dd->gatindex[a] != i)
 +                {
 +                    fprintf(stderr,"DD node %d: global atom %d marked as local atom %d, which has global atom index %d\n",dd->rank,i+1,a+1,dd->gatindex[a]+1);
 +                    nerr++;
 +                }
 +            }
 +            ngl++;
 +        }
 +    }
 +    if (ngl != dd->nat_tot)
 +    {
 +        fprintf(stderr,
 +                "DD node %d, %s: %d global atom indices, %d local atoms\n",
 +                dd->rank,where,ngl,dd->nat_tot);
 +    }
 +    for(a=0; a<dd->nat_tot; a++)
 +    {
 +        if (have[a] == 0)
 +        {
 +            fprintf(stderr,
 +                    "DD node %d, %s: local atom %d, global %d has no global index\n",
 +                    dd->rank,where,a+1,dd->gatindex[a]+1);
 +        }
 +    }
 +    sfree(have);
 +
 +    nerr += check_bLocalCG(dd,ncg_sys,dd->comm->bLocalCG,where);
 +
 +    if (nerr > 0) {
 +        gmx_fatal(FARGS,"DD node %d, %s: %d atom/cg index inconsistencies",
 +                  dd->rank,where,nerr);
 +    }
 +}
 +
 +static void clear_dd_indices(gmx_domdec_t *dd,int cg_start,int a_start)
 +{
 +    int  i;
 +    char *bLocalCG;
 +
 +    if (a_start == 0)
 +    {
 +        /* Clear the whole list without searching */
 +        ga2la_clear(dd->ga2la);
 +    }
 +    else
 +    {
 +        for(i=a_start; i<dd->nat_tot; i++)
 +        {
 +            ga2la_del(dd->ga2la,dd->gatindex[i]);
 +        }
 +    }
 +
 +    bLocalCG = dd->comm->bLocalCG;
 +    if (bLocalCG)
 +    {
 +        for(i=cg_start; i<dd->ncg_tot; i++)
 +        {
 +            bLocalCG[dd->index_gl[i]] = FALSE;
 +        }
 +    }
 +
 +    dd_clear_local_vsite_indices(dd);
 +    
 +    if (dd->constraints)
 +    {
 +        dd_clear_local_constraint_indices(dd);
 +    }
 +}
 +
 +static real grid_jump_limit(gmx_domdec_comm_t *comm,int dim_ind)
 +{
 +    real grid_jump_limit;
 +
 +    /* The distance between the boundaries of cells at distance
 +     * x+-1,y+-1 or y+-1,z+-1 is limited by the cut-off restrictions
 +     * and by the fact that cells should not be shifted by more than
 +     * half their size, such that cg's only shift by one cell
 +     * at redecomposition.
 +     */
 +    grid_jump_limit = comm->cellsize_limit;
 +    if (!comm->bVacDLBNoLimit)
 +    {
 +        grid_jump_limit = max(grid_jump_limit,
 +                              comm->cutoff/comm->cd[dim_ind].np);
 +    }
 +
 +    return grid_jump_limit;
 +}
 +
 +static void check_grid_jump(gmx_large_int_t step,gmx_domdec_t *dd,gmx_ddbox_t *ddbox)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  d,dim;
 +    real limit,bfac;
 +    
 +    comm = dd->comm;
 +    
 +    for(d=1; d<dd->ndim; d++)
 +    {
 +        dim = dd->dim[d];
 +        limit = grid_jump_limit(comm,d);
 +        bfac = ddbox->box_size[dim];
 +        if (ddbox->tric_dir[dim])
 +        {
 +            bfac *= ddbox->skew_fac[dim];
 +        }
 +        if ((comm->cell_f1[d] - comm->cell_f_max0[d])*bfac <  limit ||
 +            (comm->cell_f0[d] - comm->cell_f_min1[d])*bfac > -limit)
 +        {
 +            char buf[22];
 +            gmx_fatal(FARGS,"Step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d\n",
 +                      gmx_step_str(step,buf),
 +                      dim2char(dim),dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
 +        }
 +    }
 +}
 +
 +static int dd_load_count(gmx_domdec_comm_t *comm)
 +{
 +    return (comm->eFlop ? comm->flop_n : comm->cycl_n[ddCyclF]);
 +}
 +
 +static float dd_force_load(gmx_domdec_comm_t *comm)
 +{
 +    float load;
 +    
 +    if (comm->eFlop)
 +    {
 +        load = comm->flop;
 +        if (comm->eFlop > 1)
 +        {
 +            load *= 1.0 + (comm->eFlop - 1)*(0.1*rand()/RAND_MAX - 0.05);
 +        }
 +    } 
 +    else
 +    {
 +        load = comm->cycl[ddCyclF];
 +        if (comm->cycl_n[ddCyclF] > 1)
 +        {
 +            /* Subtract the maximum of the last n cycle counts
 +             * to get rid of possible high counts due to other soures,
 +             * for instance system activity, that would otherwise
 +             * affect the dynamic load balancing.
 +             */
 +            load -= comm->cycl_max[ddCyclF];
 +        }
 +    }
 +    
 +    return load;
 +}
 +
 +static void set_slb_pme_dim_f(gmx_domdec_t *dd,int dim,real **dim_f)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int i;
 +    
 +    comm = dd->comm;
 +    
 +    snew(*dim_f,dd->nc[dim]+1);
 +    (*dim_f)[0] = 0;
 +    for(i=1; i<dd->nc[dim]; i++)
 +    {
 +        if (comm->slb_frac[dim])
 +        {
 +            (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
 +        }
 +        else
 +        {
 +            (*dim_f)[i] = (real)i/(real)dd->nc[dim];
 +        }
 +    }
 +    (*dim_f)[dd->nc[dim]] = 1;
 +}
 +
 +static void init_ddpme(gmx_domdec_t *dd,gmx_ddpme_t *ddpme,int dimind)
 +{
 +    int        pmeindex,slab,nso,i;
 +    ivec xyz;
 +    
 +    if (dimind == 0 && dd->dim[0] == YY && dd->comm->npmenodes_x == 1)
 +    {
 +        ddpme->dim = YY;
 +    }
 +    else
 +    {
 +        ddpme->dim = dimind;
 +    }
 +    ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
 +    
 +    ddpme->nslab = (ddpme->dim == 0 ?
 +                    dd->comm->npmenodes_x :
 +                    dd->comm->npmenodes_y);
 +
 +    if (ddpme->nslab <= 1)
 +    {
 +        return;
 +    }
 +
 +    nso = dd->comm->npmenodes/ddpme->nslab;
 +    /* Determine for each PME slab the PP location range for dimension dim */
 +    snew(ddpme->pp_min,ddpme->nslab);
 +    snew(ddpme->pp_max,ddpme->nslab);
 +    for(slab=0; slab<ddpme->nslab; slab++) {
 +        ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
 +        ddpme->pp_max[slab] = 0;
 +    }
 +    for(i=0; i<dd->nnodes; i++) {
 +        ddindex2xyz(dd->nc,i,xyz);
 +        /* For y only use our y/z slab.
 +         * This assumes that the PME x grid size matches the DD grid size.
 +         */
 +        if (dimind == 0 || xyz[XX] == dd->ci[XX]) {
 +            pmeindex = ddindex2pmeindex(dd,i);
 +            if (dimind == 0) {
 +                slab = pmeindex/nso;
 +            } else {
 +                slab = pmeindex % ddpme->nslab;
 +            }
 +            ddpme->pp_min[slab] = min(ddpme->pp_min[slab],xyz[dimind]);
 +            ddpme->pp_max[slab] = max(ddpme->pp_max[slab],xyz[dimind]);
 +        }
 +    }
 +
 +    set_slb_pme_dim_f(dd,ddpme->dim,&ddpme->slb_dim_f);
 +}
 +
 +int dd_pme_maxshift_x(gmx_domdec_t *dd)
 +{
 +    if (dd->comm->ddpme[0].dim == XX)
 +    {
 +        return dd->comm->ddpme[0].maxshift;
 +    }
 +    else
 +    {
 +        return 0;
 +    }
 +}
 +
 +int dd_pme_maxshift_y(gmx_domdec_t *dd)
 +{
 +    if (dd->comm->ddpme[0].dim == YY)
 +    {
 +        return dd->comm->ddpme[0].maxshift;
 +    }
 +    else if (dd->comm->npmedecompdim >= 2 && dd->comm->ddpme[1].dim == YY)
 +    {
 +        return dd->comm->ddpme[1].maxshift;
 +    }
 +    else
 +    {
 +        return 0;
 +    }
 +}
 +
 +static void set_pme_maxshift(gmx_domdec_t *dd,gmx_ddpme_t *ddpme,
 +                             gmx_bool bUniform,gmx_ddbox_t *ddbox,real *cell_f)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  nc,ns,s;
 +    int  *xmin,*xmax;
 +    real range,pme_boundary;
 +    int  sh;
 +    
 +    comm = dd->comm;
 +    nc  = dd->nc[ddpme->dim];
 +    ns  = ddpme->nslab;
 +    
 +    if (!ddpme->dim_match)
 +    {
 +        /* PP decomposition is not along dim: the worst situation */
 +        sh = ns/2;
 +    }
 +    else if (ns <= 3 || (bUniform && ns == nc))
 +    {
 +        /* The optimal situation */
 +        sh = 1;
 +    }
 +    else
 +    {
 +        /* We need to check for all pme nodes which nodes they
 +         * could possibly need to communicate with.
 +         */
 +        xmin = ddpme->pp_min;
 +        xmax = ddpme->pp_max;
 +        /* Allow for atoms to be maximally 2/3 times the cut-off
 +         * out of their DD cell. This is a reasonable balance between
 +         * between performance and support for most charge-group/cut-off
 +         * combinations.
 +         */
 +        range  = 2.0/3.0*comm->cutoff/ddbox->box_size[ddpme->dim];
 +        /* Avoid extra communication when we are exactly at a boundary */
 +        range *= 0.999;
 +        
 +        sh = 1;
 +        for(s=0; s<ns; s++)
 +        {
 +            /* PME slab s spreads atoms between box frac. s/ns and (s+1)/ns */
 +            pme_boundary = (real)s/ns;
 +            while (sh+1 < ns &&
 +                   ((s-(sh+1) >= 0 &&
 +                     cell_f[xmax[s-(sh+1)   ]+1]     + range > pme_boundary) ||
 +                    (s-(sh+1) <  0 &&
 +                     cell_f[xmax[s-(sh+1)+ns]+1] - 1 + range > pme_boundary)))
 +            {
 +                sh++;
 +            }
 +            pme_boundary = (real)(s+1)/ns;
 +            while (sh+1 < ns &&
 +                   ((s+(sh+1) <  ns &&
 +                     cell_f[xmin[s+(sh+1)   ]  ]     - range < pme_boundary) ||
 +                    (s+(sh+1) >= ns &&
 +                     cell_f[xmin[s+(sh+1)-ns]  ] + 1 - range < pme_boundary)))
 +            {
 +                sh++;
 +            }
 +        }
 +    }
 +    
 +    ddpme->maxshift = sh;
 +    
 +    if (debug)
 +    {
 +        fprintf(debug,"PME slab communication range for dim %d is %d\n",
 +                ddpme->dim,ddpme->maxshift);
 +    }
 +}
 +
 +static void check_box_size(gmx_domdec_t *dd,gmx_ddbox_t *ddbox)
 +{
 +    int d,dim;
 +    
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        dim = dd->dim[d];
 +        if (dim < ddbox->nboundeddim &&
 +            ddbox->box_size[dim]*ddbox->skew_fac[dim] <
 +            dd->nc[dim]*dd->comm->cellsize_limit*DD_CELL_MARGIN)
 +        {
 +            gmx_fatal(FARGS,"The %c-size of the box (%f) times the triclinic skew factor (%f) is smaller than the number of DD cells (%d) times the smallest allowed cell size (%f)\n",
 +                      dim2char(dim),ddbox->box_size[dim],ddbox->skew_fac[dim],
 +                      dd->nc[dim],dd->comm->cellsize_limit);
 +        }
 +    }
 +}
 +
 +static void set_dd_cell_sizes_slb(gmx_domdec_t *dd,gmx_ddbox_t *ddbox,
 +                                  gmx_bool bMaster,ivec npulse)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  d,j;
 +    rvec cellsize_min;
 +    real *cell_x,cell_dx,cellsize;
 +    
 +    comm = dd->comm;
 +    
 +    for(d=0; d<DIM; d++)
 +    {
 +        cellsize_min[d] = ddbox->box_size[d]*ddbox->skew_fac[d];
 +        npulse[d] = 1;
 +        if (dd->nc[d] == 1 || comm->slb_frac[d] == NULL)
 +        {
 +            /* Uniform grid */
 +            cell_dx = ddbox->box_size[d]/dd->nc[d];
 +            if (bMaster)
 +            {
 +                for(j=0; j<dd->nc[d]+1; j++)
 +                {
 +                    dd->ma->cell_x[d][j] = ddbox->box0[d] + j*cell_dx;
 +                }
 +            }
 +            else
 +            {
 +                comm->cell_x0[d] = ddbox->box0[d] + (dd->ci[d]  )*cell_dx;
 +                comm->cell_x1[d] = ddbox->box0[d] + (dd->ci[d]+1)*cell_dx;
 +            }
 +            cellsize = cell_dx*ddbox->skew_fac[d];
 +            while (cellsize*npulse[d] < comm->cutoff && npulse[d] < dd->nc[d]-1)
 +            {
 +                npulse[d]++;
 +            }
 +            cellsize_min[d] = cellsize;
 +        }
 +        else
 +        {
 +            /* Statically load balanced grid */
 +            /* Also when we are not doing a master distribution we determine
 +             * all cell borders in a loop to obtain identical values
 +             * to the master distribution case and to determine npulse.
 +             */
 +            if (bMaster)
 +            {
 +                cell_x = dd->ma->cell_x[d];
 +            }
 +            else
 +            {
 +                snew(cell_x,dd->nc[d]+1);
 +            }
 +            cell_x[0] = ddbox->box0[d];
 +            for(j=0; j<dd->nc[d]; j++)
 +            {
 +                cell_dx = ddbox->box_size[d]*comm->slb_frac[d][j];
 +                cell_x[j+1] = cell_x[j] + cell_dx;
 +                cellsize = cell_dx*ddbox->skew_fac[d];
 +                while (cellsize*npulse[d] < comm->cutoff &&
 +                       npulse[d] < dd->nc[d]-1)
 +                {
 +                    npulse[d]++;
 +                }
 +                cellsize_min[d] = min(cellsize_min[d],cellsize);
 +            }
 +            if (!bMaster)
 +            {
 +                comm->cell_x0[d] = cell_x[dd->ci[d]];
 +                comm->cell_x1[d] = cell_x[dd->ci[d]+1];
 +                sfree(cell_x);
 +            }
 +        }
 +        /* The following limitation is to avoid that a cell would receive
 +         * some of its own home charge groups back over the periodic boundary.
 +         * Double charge groups cause trouble with the global indices.
 +         */
 +        if (d < ddbox->npbcdim &&
 +            dd->nc[d] > 1 && npulse[d] >= dd->nc[d])
 +        {
 +            gmx_fatal_collective(FARGS,NULL,dd,
 +                                 "The box size in direction %c (%f) times the triclinic skew factor (%f) is too small for a cut-off of %f with %d domain decomposition cells, use 1 or more than %d %s or increase the box size in this direction",
 +                                 dim2char(d),ddbox->box_size[d],ddbox->skew_fac[d],
 +                                 comm->cutoff,
 +                                 dd->nc[d],dd->nc[d],
 +                                 dd->nnodes > dd->nc[d] ? "cells" : "processors");
 +        }
 +    }
 +    
 +    if (!comm->bDynLoadBal)
 +    {
 +        copy_rvec(cellsize_min,comm->cellsize_min);
 +    }
 +   
 +    for(d=0; d<comm->npmedecompdim; d++)
 +    {
 +        set_pme_maxshift(dd,&comm->ddpme[d],
 +                         comm->slb_frac[dd->dim[d]]==NULL,ddbox,
 +                         comm->ddpme[d].slb_dim_f);
 +    }
 +}
 +
 +
 +static void dd_cell_sizes_dlb_root_enforce_limits(gmx_domdec_t *dd,
 +                                       int d,int dim,gmx_domdec_root_t *root,
 +                                       gmx_ddbox_t *ddbox,
 +                                       gmx_bool bUniform,gmx_large_int_t step, real cellsize_limit_f, int range[])
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  ncd,i,j,nmin,nmin_old;
 +    gmx_bool bLimLo,bLimHi;
 +    real *cell_size;
 +    real fac,halfway,cellsize_limit_f_i,region_size;
 +    gmx_bool bPBC,bLastHi=FALSE;
 +    int nrange[]={range[0],range[1]};
 +
 +    region_size= root->cell_f[range[1]]-root->cell_f[range[0]];  
 +
 +    comm = dd->comm;
 +
 +    ncd = dd->nc[dim];
 +
 +    bPBC = (dim < ddbox->npbcdim);
 +
 +    cell_size = root->buf_ncd;
 +
 +    if (debug) 
 +    {
 +        fprintf(debug,"enforce_limits: %d %d\n",range[0],range[1]);
 +    }
 +
 +    /* First we need to check if the scaling does not make cells
 +     * smaller than the smallest allowed size.
 +     * We need to do this iteratively, since if a cell is too small,
 +     * it needs to be enlarged, which makes all the other cells smaller,
 +     * which could in turn make another cell smaller than allowed.
 +     */
 +    for(i=range[0]; i<range[1]; i++)
 +    {
 +        root->bCellMin[i] = FALSE;
 +    }
 +    nmin = 0;
 +    do
 +    {
 +        nmin_old = nmin;
 +        /* We need the total for normalization */
 +        fac = 0;
 +        for(i=range[0]; i<range[1]; i++)
 +        {
 +            if (root->bCellMin[i] == FALSE)
 +            {
 +                fac += cell_size[i];
 +            }
 +        }
 +        fac = ( region_size - nmin*cellsize_limit_f)/fac; /* substracting cells already set to cellsize_limit_f */
 +        /* Determine the cell boundaries */
 +        for(i=range[0]; i<range[1]; i++)
 +        {
 +            if (root->bCellMin[i] == FALSE)
 +            {
 +                cell_size[i] *= fac;
 +                if (!bPBC && (i == 0 || i == dd->nc[dim] -1))
 +                {
 +                    cellsize_limit_f_i = 0;
 +                }
 +                else
 +                {
 +                    cellsize_limit_f_i = cellsize_limit_f;
 +                }
 +                if (cell_size[i] < cellsize_limit_f_i)
 +                {
 +                    root->bCellMin[i] = TRUE;
 +                    cell_size[i] = cellsize_limit_f_i;
 +                    nmin++;
 +                }
 +            }
 +            root->cell_f[i+1] = root->cell_f[i] + cell_size[i];
 +        }
 +    }
 +    while (nmin > nmin_old);
 +    
 +    i=range[1]-1;
 +    cell_size[i] = root->cell_f[i+1] - root->cell_f[i];
 +    /* For this check we should not use DD_CELL_MARGIN,
 +     * but a slightly smaller factor,
 +     * since rounding could get use below the limit.
 +     */
 +    if (bPBC && cell_size[i] < cellsize_limit_f*DD_CELL_MARGIN2/DD_CELL_MARGIN)
 +    {
 +        char buf[22];
 +        gmx_fatal(FARGS,"Step %s: the dynamic load balancing could not balance dimension %c: box size %f, triclinic skew factor %f, #cells %d, minimum cell size %f\n",
 +                  gmx_step_str(step,buf),
 +                  dim2char(dim),ddbox->box_size[dim],ddbox->skew_fac[dim],
 +                  ncd,comm->cellsize_min[dim]);
 +    }
 +    
 +    root->bLimited = (nmin > 0) || (range[0]>0) || (range[1]<ncd);
 +    
 +    if (!bUniform)
 +    {
 +        /* Check if the boundary did not displace more than halfway
 +         * each of the cells it bounds, as this could cause problems,
 +         * especially when the differences between cell sizes are large.
 +         * If changes are applied, they will not make cells smaller
 +         * than the cut-off, as we check all the boundaries which
 +         * might be affected by a change and if the old state was ok,
 +         * the cells will at most be shrunk back to their old size.
 +         */
 +        for(i=range[0]+1; i<range[1]; i++)
 +        {
 +            halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i-1]);
 +            if (root->cell_f[i] < halfway)
 +            {
 +                root->cell_f[i] = halfway;
 +                /* Check if the change also causes shifts of the next boundaries */
 +                for(j=i+1; j<range[1]; j++)
 +                {
 +                    if (root->cell_f[j] < root->cell_f[j-1] + cellsize_limit_f)
 +                        root->cell_f[j] =  root->cell_f[j-1] + cellsize_limit_f;
 +                }
 +            }
 +            halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i+1]);
 +            if (root->cell_f[i] > halfway)
 +            {
 +                root->cell_f[i] = halfway;
 +                /* Check if the change also causes shifts of the next boundaries */
 +                for(j=i-1; j>=range[0]+1; j--)
 +                {
 +                    if (root->cell_f[j] > root->cell_f[j+1] - cellsize_limit_f)
 +                        root->cell_f[j] = root->cell_f[j+1] - cellsize_limit_f;
 +                }
 +            }
 +        }
 +    }
 +    
 +    /* nrange is defined as [lower, upper) range for new call to enforce_limits */
 +    /* find highest violation of LimLo (a) and the following violation of LimHi (thus the lowest following) (b)
 +     * then call enforce_limits for (oldb,a), (a,b). In the next step: (b,nexta). oldb and nexta can be the boundaries.
 +     * for a and b nrange is used */
 +    if (d > 0)
 +    {
 +        /* Take care of the staggering of the cell boundaries */
 +        if (bUniform)
 +        {
 +            for(i=range[0]; i<range[1]; i++)
 +            {
 +                root->cell_f_max0[i] = root->cell_f[i];
 +                root->cell_f_min1[i] = root->cell_f[i+1];
 +            }
 +        }
 +        else
 +        {
 +            for(i=range[0]+1; i<range[1]; i++)
 +            {
 +                bLimLo = (root->cell_f[i] < root->bound_min[i]);
 +                bLimHi = (root->cell_f[i] > root->bound_max[i]);
 +                if (bLimLo && bLimHi)
 +                {
 +                    /* Both limits violated, try the best we can */
 +                    /* For this case we split the original range (range) in two parts and care about the other limitiations in the next iteration. */
 +                    root->cell_f[i] = 0.5*(root->bound_min[i] + root->bound_max[i]);
 +                    nrange[0]=range[0];
 +                    nrange[1]=i;
 +                    dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
 +
 +                    nrange[0]=i;
 +                    nrange[1]=range[1];
 +                    dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
 +
 +                    return;
 +                }
 +                else if (bLimLo)
 +                {
 +                    /* root->cell_f[i] = root->bound_min[i]; */
 +                    nrange[1]=i;  /* only store violation location. There could be a LimLo violation following with an higher index */
 +                    bLastHi=FALSE;
 +                }
 +                else if (bLimHi && !bLastHi)
 +                {
 +                    bLastHi=TRUE;
 +                    if (nrange[1] < range[1])   /* found a LimLo before */
 +                    {
 +                        root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
 +                        dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
 +                        nrange[0]=nrange[1];
 +                    }
 +                    root->cell_f[i] = root->bound_max[i];
 +                    nrange[1]=i; 
 +                    dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
 +                    nrange[0]=i;
 +                    nrange[1]=range[1];
 +                }
 +            }
 +            if (nrange[1] < range[1])   /* found last a LimLo */
 +            {
 +                root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
 +                dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
 +                nrange[0]=nrange[1];
 +                nrange[1]=range[1];
 +                dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
 +            } 
 +            else if (nrange[0] > range[0]) /* found at least one LimHi */
 +            {
 +                dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
 +            }
 +        }
 +    }
 +}
 +
 +
 +static void set_dd_cell_sizes_dlb_root(gmx_domdec_t *dd,
 +                                       int d,int dim,gmx_domdec_root_t *root,
 +                                       gmx_ddbox_t *ddbox,gmx_bool bDynamicBox,
 +                                       gmx_bool bUniform,gmx_large_int_t step)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  ncd,d1,i,j,pos;
 +    real *cell_size;
 +    real load_aver,load_i,imbalance,change,change_max,sc;
 +    real cellsize_limit_f,dist_min_f,dist_min_f_hard,space;
 +    real change_limit;
 +    real relax = 0.5;
 +    gmx_bool bPBC;
 +    int range[] = { 0, 0 };
 +
 +    comm = dd->comm;
 +
 +    /* Convert the maximum change from the input percentage to a fraction */
 +    change_limit = comm->dlb_scale_lim*0.01;
 +
 +    ncd = dd->nc[dim];
 +
 +    bPBC = (dim < ddbox->npbcdim);
 +
 +    cell_size = root->buf_ncd;
 +
 +    /* Store the original boundaries */
 +    for(i=0; i<ncd+1; i++)
 +    {
 +        root->old_cell_f[i] = root->cell_f[i];
 +    }
 +    if (bUniform) {
 +        for(i=0; i<ncd; i++)
 +        {
 +            cell_size[i] = 1.0/ncd;
 +        }
 +    }
 +    else if (dd_load_count(comm))
 +    {
 +        load_aver = comm->load[d].sum_m/ncd;
 +        change_max = 0;
 +        for(i=0; i<ncd; i++)
 +        {
 +            /* Determine the relative imbalance of cell i */
 +            load_i = comm->load[d].load[i*comm->load[d].nload+2];
 +            imbalance = (load_i - load_aver)/(load_aver>0 ? load_aver : 1);
 +            /* Determine the change of the cell size using underrelaxation */
 +            change = -relax*imbalance;
 +            change_max = max(change_max,max(change,-change));
 +        }
 +        /* Limit the amount of scaling.
 +         * We need to use the same rescaling for all cells in one row,
 +         * otherwise the load balancing might not converge.
 +         */
 +        sc = relax;
 +        if (change_max > change_limit)
 +        {
 +            sc *= change_limit/change_max;
 +        }
 +        for(i=0; i<ncd; i++)
 +        {
 +            /* Determine the relative imbalance of cell i */
 +            load_i = comm->load[d].load[i*comm->load[d].nload+2];
 +            imbalance = (load_i - load_aver)/(load_aver>0 ? load_aver : 1);
 +            /* Determine the change of the cell size using underrelaxation */
 +            change = -sc*imbalance;
 +            cell_size[i] = (root->cell_f[i+1]-root->cell_f[i])*(1 + change);
 +        }
 +    }
 +    
 +    cellsize_limit_f  = comm->cellsize_min[dim]/ddbox->box_size[dim];
 +    cellsize_limit_f *= DD_CELL_MARGIN;
 +    dist_min_f_hard        = grid_jump_limit(comm,d)/ddbox->box_size[dim];
 +    dist_min_f       = dist_min_f_hard * DD_CELL_MARGIN;
 +    if (ddbox->tric_dir[dim])
 +    {
 +        cellsize_limit_f /= ddbox->skew_fac[dim];
 +        dist_min_f       /= ddbox->skew_fac[dim];
 +    }
 +    if (bDynamicBox && d > 0)
 +    {
 +        dist_min_f *= DD_PRES_SCALE_MARGIN;
 +    }
 +    if (d > 0 && !bUniform)
 +    {
 +        /* Make sure that the grid is not shifted too much */
 +        for(i=1; i<ncd; i++) {
 +            if (root->cell_f_min1[i] - root->cell_f_max0[i-1] < 2 * dist_min_f_hard) 
 +            {
 +                gmx_incons("Inconsistent DD boundary staggering limits!");
 +            }
 +            root->bound_min[i] = root->cell_f_max0[i-1] + dist_min_f;
 +            space = root->cell_f[i] - (root->cell_f_max0[i-1] + dist_min_f);
 +            if (space > 0) {
 +                root->bound_min[i] += 0.5*space;
 +            }
 +            root->bound_max[i] = root->cell_f_min1[i] - dist_min_f;
 +            space = root->cell_f[i] - (root->cell_f_min1[i] - dist_min_f);
 +            if (space < 0) {
 +                root->bound_max[i] += 0.5*space;
 +            }
 +            if (debug)
 +            {
 +                fprintf(debug,
 +                        "dim %d boundary %d %.3f < %.3f < %.3f < %.3f < %.3f\n",
 +                        d,i,
 +                        root->cell_f_max0[i-1] + dist_min_f,
 +                        root->bound_min[i],root->cell_f[i],root->bound_max[i],
 +                        root->cell_f_min1[i] - dist_min_f);
 +            }
 +        }
 +    }
 +    range[1]=ncd;
 +    root->cell_f[0] = 0;
 +    root->cell_f[ncd] = 1;
 +    dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, range);
 +
 +
 +    /* After the checks above, the cells should obey the cut-off
 +     * restrictions, but it does not hurt to check.
 +     */
 +    for(i=0; i<ncd; i++)
 +    {
 +        if (debug)
 +        {
 +            fprintf(debug,"Relative bounds dim %d  cell %d: %f %f\n",
 +                    dim,i,root->cell_f[i],root->cell_f[i+1]);
 +        }
 +
 +        if ((bPBC || (i != 0 && i != dd->nc[dim]-1)) &&
 +            root->cell_f[i+1] - root->cell_f[i] <
 +            cellsize_limit_f/DD_CELL_MARGIN)
 +        {
 +            char buf[22];
 +            fprintf(stderr,
 +                    "\nWARNING step %s: direction %c, cell %d too small: %f\n",
 +                    gmx_step_str(step,buf),dim2char(dim),i,
 +                    (root->cell_f[i+1] - root->cell_f[i])
 +                    *ddbox->box_size[dim]*ddbox->skew_fac[dim]);
 +        }
 +    }
 +    
 +    pos = ncd + 1;
 +    /* Store the cell boundaries of the lower dimensions at the end */
 +    for(d1=0; d1<d; d1++)
 +    {
 +        root->cell_f[pos++] = comm->cell_f0[d1];
 +        root->cell_f[pos++] = comm->cell_f1[d1];
 +    }
 +    
 +    if (d < comm->npmedecompdim)
 +    {
 +        /* The master determines the maximum shift for
 +         * the coordinate communication between separate PME nodes.
 +         */
 +        set_pme_maxshift(dd,&comm->ddpme[d],bUniform,ddbox,root->cell_f);
 +    }
 +    root->cell_f[pos++] = comm->ddpme[0].maxshift;
 +    if (d >= 1)
 +    {
 +        root->cell_f[pos++] = comm->ddpme[1].maxshift;
 +    }
 +}    
 +
 +static void relative_to_absolute_cell_bounds(gmx_domdec_t *dd,
 +                                             gmx_ddbox_t *ddbox,int dimind)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int dim;
 +
 +    comm = dd->comm;
 +
 +    /* Set the cell dimensions */
 +    dim = dd->dim[dimind];
 +    comm->cell_x0[dim] = comm->cell_f0[dimind]*ddbox->box_size[dim];
 +    comm->cell_x1[dim] = comm->cell_f1[dimind]*ddbox->box_size[dim];
 +    if (dim >= ddbox->nboundeddim)
 +    {
 +        comm->cell_x0[dim] += ddbox->box0[dim];
 +        comm->cell_x1[dim] += ddbox->box0[dim];
 +    }
 +}
 +
 +static void distribute_dd_cell_sizes_dlb(gmx_domdec_t *dd,
 +                                         int d,int dim,real *cell_f_row,
 +                                         gmx_ddbox_t *ddbox)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int d1,dim1,pos;
 +
 +    comm = dd->comm;
 +
 +#ifdef GMX_MPI
 +    /* Each node would only need to know two fractions,
 +     * but it is probably cheaper to broadcast the whole array.
 +     */
 +    MPI_Bcast(cell_f_row,DD_CELL_F_SIZE(dd,d)*sizeof(real),MPI_BYTE,
 +              0,comm->mpi_comm_load[d]);
 +#endif
 +    /* Copy the fractions for this dimension from the buffer */
 +    comm->cell_f0[d] = cell_f_row[dd->ci[dim]  ];
 +    comm->cell_f1[d] = cell_f_row[dd->ci[dim]+1];
 +    /* The whole array was communicated, so set the buffer position */
 +    pos = dd->nc[dim] + 1;
 +    for(d1=0; d1<=d; d1++)
 +    {
 +        if (d1 < d)
 +        {
 +            /* Copy the cell fractions of the lower dimensions */
 +            comm->cell_f0[d1] = cell_f_row[pos++];
 +            comm->cell_f1[d1] = cell_f_row[pos++];
 +        }
 +        relative_to_absolute_cell_bounds(dd,ddbox,d1);
 +    }
 +    /* Convert the communicated shift from float to int */
 +    comm->ddpme[0].maxshift = (int)(cell_f_row[pos++] + 0.5);
 +    if (d >= 1)
 +    {
 +        comm->ddpme[1].maxshift = (int)(cell_f_row[pos++] + 0.5);
 +    }
 +}
 +
 +static void set_dd_cell_sizes_dlb_change(gmx_domdec_t *dd,
 +                                         gmx_ddbox_t *ddbox,gmx_bool bDynamicBox,
 +                                         gmx_bool bUniform,gmx_large_int_t step)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int d,dim,d1;
 +    gmx_bool bRowMember,bRowRoot;
 +    real *cell_f_row;
 +    
 +    comm = dd->comm;
 +
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        dim = dd->dim[d];
 +        bRowMember = TRUE;
 +        bRowRoot = TRUE;
 +        for(d1=d; d1<dd->ndim; d1++)
 +        {
 +            if (dd->ci[dd->dim[d1]] > 0)
 +            {
 +                if (d1 > d)
 +                {
 +                    bRowMember = FALSE;
 +                }
 +                bRowRoot = FALSE;
 +            }
 +        }
 +        if (bRowMember)
 +        {
 +            if (bRowRoot)
 +            {
 +                set_dd_cell_sizes_dlb_root(dd,d,dim,comm->root[d],
 +                                           ddbox,bDynamicBox,bUniform,step);
 +                cell_f_row = comm->root[d]->cell_f;
 +            }
 +            else
 +            {
 +                cell_f_row = comm->cell_f_row;
 +            }
 +            distribute_dd_cell_sizes_dlb(dd,d,dim,cell_f_row,ddbox);
 +        }
 +    }
 +}    
 +
 +static void set_dd_cell_sizes_dlb_nochange(gmx_domdec_t *dd,gmx_ddbox_t *ddbox)
 +{
 +    int d;
 +
 +    /* This function assumes the box is static and should therefore
 +     * not be called when the box has changed since the last
 +     * call to dd_partition_system.
 +     */
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        relative_to_absolute_cell_bounds(dd,ddbox,d); 
 +    }
 +}
 +
 +
 +
 +static void set_dd_cell_sizes_dlb(gmx_domdec_t *dd,
 +                                  gmx_ddbox_t *ddbox,gmx_bool bDynamicBox,
 +                                  gmx_bool bUniform,gmx_bool bDoDLB,gmx_large_int_t step,
 +                                  gmx_wallcycle_t wcycle)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int dim;
 +
 +    comm = dd->comm;
 +    
 +    if (bDoDLB)
 +    {
 +        wallcycle_start(wcycle,ewcDDCOMMBOUND);
 +        set_dd_cell_sizes_dlb_change(dd,ddbox,bDynamicBox,bUniform,step);
 +        wallcycle_stop(wcycle,ewcDDCOMMBOUND);
 +    }
 +    else if (bDynamicBox)
 +    {
 +        set_dd_cell_sizes_dlb_nochange(dd,ddbox);
 +    }
 +    
 +    /* Set the dimensions for which no DD is used */
 +    for(dim=0; dim<DIM; dim++) {
 +        if (dd->nc[dim] == 1) {
 +            comm->cell_x0[dim] = 0;
 +            comm->cell_x1[dim] = ddbox->box_size[dim];
 +            if (dim >= ddbox->nboundeddim)
 +            {
 +                comm->cell_x0[dim] += ddbox->box0[dim];
 +                comm->cell_x1[dim] += ddbox->box0[dim];
 +            }
 +        }
 +    }
 +}
 +
 +static void realloc_comm_ind(gmx_domdec_t *dd,ivec npulse)
 +{
 +    int d,np,i;
 +    gmx_domdec_comm_dim_t *cd;
 +    
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        cd = &dd->comm->cd[d];
 +        np = npulse[dd->dim[d]];
 +        if (np > cd->np_nalloc)
 +        {
 +            if (debug)
 +            {
 +                fprintf(debug,"(Re)allocing cd for %c to %d pulses\n",
 +                        dim2char(dd->dim[d]),np);
 +            }
 +            if (DDMASTER(dd) && cd->np_nalloc > 0)
 +            {
 +                fprintf(stderr,"\nIncreasing the number of cell to communicate in dimension %c to %d for the first time\n",dim2char(dd->dim[d]),np);
 +            }
 +            srenew(cd->ind,np);
 +            for(i=cd->np_nalloc; i<np; i++)
 +            {
 +                cd->ind[i].index  = NULL;
 +                cd->ind[i].nalloc = 0;
 +            }
 +            cd->np_nalloc = np;
 +        }
 +        cd->np = np;
 +    }
 +}
 +
 +
 +static void set_dd_cell_sizes(gmx_domdec_t *dd,
 +                              gmx_ddbox_t *ddbox,gmx_bool bDynamicBox,
 +                              gmx_bool bUniform,gmx_bool bDoDLB,gmx_large_int_t step,
 +                              gmx_wallcycle_t wcycle)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  d;
 +    ivec npulse;
 +    
 +    comm = dd->comm;
 +
 +    /* Copy the old cell boundaries for the cg displacement check */
 +    copy_rvec(comm->cell_x0,comm->old_cell_x0);
 +    copy_rvec(comm->cell_x1,comm->old_cell_x1);
 +    
 +    if (comm->bDynLoadBal)
 +    {
 +        if (DDMASTER(dd))
 +        {
 +            check_box_size(dd,ddbox);
 +        }
 +        set_dd_cell_sizes_dlb(dd,ddbox,bDynamicBox,bUniform,bDoDLB,step,wcycle);
 +    }
 +    else
 +    {
 +        set_dd_cell_sizes_slb(dd,ddbox,FALSE,npulse);
 +        realloc_comm_ind(dd,npulse);
 +    }
 +    
 +    if (debug)
 +    {
 +        for(d=0; d<DIM; d++)
 +        {
 +            fprintf(debug,"cell_x[%d] %f - %f skew_fac %f\n",
 +                    d,comm->cell_x0[d],comm->cell_x1[d],ddbox->skew_fac[d]);
 +        }
 +    }
 +}
 +
 +static void comm_dd_ns_cell_sizes(gmx_domdec_t *dd,
 +                                  gmx_ddbox_t *ddbox,
 +                                  rvec cell_ns_x0,rvec cell_ns_x1,
 +                                  gmx_large_int_t step)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int dim_ind,dim;
 +    
 +    comm = dd->comm;
 +
 +    for(dim_ind=0; dim_ind<dd->ndim; dim_ind++)
 +    {
 +        dim = dd->dim[dim_ind];
 +        
 +        /* Without PBC we don't have restrictions on the outer cells */
 +        if (!(dim >= ddbox->npbcdim && 
 +              (dd->ci[dim] == 0 || dd->ci[dim] == dd->nc[dim] - 1)) &&
 +            comm->bDynLoadBal &&
 +            (comm->cell_x1[dim] - comm->cell_x0[dim])*ddbox->skew_fac[dim] <
 +            comm->cellsize_min[dim])
 +        {
 +            char buf[22];
 +            gmx_fatal(FARGS,"Step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
 +                      gmx_step_str(step,buf),dim2char(dim),
 +                      comm->cell_x1[dim] - comm->cell_x0[dim],
 +                      ddbox->skew_fac[dim],
 +                      dd->comm->cellsize_min[dim],
 +                      dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
 +        }
 +    }
 +    
 +    if ((dd->bGridJump && dd->ndim > 1) || ddbox->nboundeddim < DIM)
 +    {
 +        /* Communicate the boundaries and update cell_ns_x0/1 */
 +        dd_move_cellx(dd,ddbox,cell_ns_x0,cell_ns_x1);
 +        if (dd->bGridJump && dd->ndim > 1)
 +        {
 +            check_grid_jump(step,dd,ddbox);
 +        }
 +    }
 +}
 +
 +static void make_tric_corr_matrix(int npbcdim,matrix box,matrix tcm)
 +{
 +    if (YY < npbcdim)
 +    {
 +        tcm[YY][XX] = -box[YY][XX]/box[YY][YY];
 +    }
 +    else
 +    {
 +        tcm[YY][XX] = 0;
 +    }
 +    if (ZZ < npbcdim)
 +    {
 +        tcm[ZZ][XX] = -(box[ZZ][YY]*tcm[YY][XX] + box[ZZ][XX])/box[ZZ][ZZ];
 +        tcm[ZZ][YY] = -box[ZZ][YY]/box[ZZ][ZZ];
 +    }
 +    else
 +    {
 +        tcm[ZZ][XX] = 0;
 +        tcm[ZZ][YY] = 0;
 +    }
 +}
 +
 +static void check_screw_box(matrix box)
 +{
 +    /* Mathematical limitation */
 +    if (box[YY][XX] != 0 || box[ZZ][XX] != 0)
 +    {
 +        gmx_fatal(FARGS,"With screw pbc the unit cell can not have non-zero off-diagonal x-components");
 +    }
 +    
 +    /* Limitation due to the asymmetry of the eighth shell method */
 +    if (box[ZZ][YY] != 0)
 +    {
 +        gmx_fatal(FARGS,"pbc=screw with non-zero box_zy is not supported");
 +    }
 +}
 +
 +static void distribute_cg(FILE *fplog,gmx_large_int_t step,
 +                          matrix box,ivec tric_dir,t_block *cgs,rvec pos[],
 +                          gmx_domdec_t *dd)
 +{
 +    gmx_domdec_master_t *ma;
 +    int **tmp_ind=NULL,*tmp_nalloc=NULL;
 +    int  i,icg,j,k,k0,k1,d,npbcdim;
 +    matrix tcm;
 +    rvec box_size,cg_cm;
 +    ivec ind;
 +    real nrcg,inv_ncg,pos_d;
 +    atom_id *cgindex;
 +    gmx_bool bUnbounded,bScrew;
 +
 +    ma = dd->ma;
 +    
 +    if (tmp_ind == NULL)
 +    {
 +        snew(tmp_nalloc,dd->nnodes);
 +        snew(tmp_ind,dd->nnodes);
 +        for(i=0; i<dd->nnodes; i++)
 +        {
 +            tmp_nalloc[i] = over_alloc_large(cgs->nr/dd->nnodes+1);
 +            snew(tmp_ind[i],tmp_nalloc[i]);
 +        }
 +    }
 +    
 +    /* Clear the count */
 +    for(i=0; i<dd->nnodes; i++)
 +    {
 +        ma->ncg[i] = 0;
 +        ma->nat[i] = 0;
 +    }
 +    
 +    make_tric_corr_matrix(dd->npbcdim,box,tcm);
 +    
 +    cgindex = cgs->index;
 +    
 +    /* Compute the center of geometry for all charge groups */
 +    for(icg=0; icg<cgs->nr; icg++)
 +    {
 +        k0      = cgindex[icg];
 +        k1      = cgindex[icg+1];
 +        nrcg    = k1 - k0;
 +        if (nrcg == 1)
 +        {
 +            copy_rvec(pos[k0],cg_cm);
 +        }
 +        else
 +        {
 +            inv_ncg = 1.0/nrcg;
 +            
 +            clear_rvec(cg_cm);
 +            for(k=k0; (k<k1); k++)
 +            {
 +                rvec_inc(cg_cm,pos[k]);
 +            }
 +            for(d=0; (d<DIM); d++)
 +            {
 +                cg_cm[d] *= inv_ncg;
 +            }
 +        }
 +        /* Put the charge group in the box and determine the cell index */
 +        for(d=DIM-1; d>=0; d--) {
 +            pos_d = cg_cm[d];
 +            if (d < dd->npbcdim)
 +            {
 +                bScrew = (dd->bScrewPBC && d == XX);
 +                if (tric_dir[d] && dd->nc[d] > 1)
 +                {
 +                    /* Use triclinic coordintates for this dimension */
 +                    for(j=d+1; j<DIM; j++)
 +                    {
 +                        pos_d += cg_cm[j]*tcm[j][d];
 +                    }
 +                }
 +                while(pos_d >= box[d][d])
 +                {
 +                    pos_d -= box[d][d];
 +                    rvec_dec(cg_cm,box[d]);
 +                    if (bScrew)
 +                    {
 +                        cg_cm[YY] = box[YY][YY] - cg_cm[YY];
 +                        cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
 +                    }
 +                    for(k=k0; (k<k1); k++)
 +                    {
 +                        rvec_dec(pos[k],box[d]);
 +                        if (bScrew)
 +                        {
 +                            pos[k][YY] = box[YY][YY] - pos[k][YY];
 +                            pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
 +                        }
 +                    }
 +                }
 +                while(pos_d < 0)
 +                {
 +                    pos_d += box[d][d];
 +                    rvec_inc(cg_cm,box[d]);
 +                    if (bScrew)
 +                    {
 +                        cg_cm[YY] = box[YY][YY] - cg_cm[YY];
 +                        cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
 +                    }
 +                    for(k=k0; (k<k1); k++)
 +                    {
 +                        rvec_inc(pos[k],box[d]);
 +                        if (bScrew) {
 +                            pos[k][YY] = box[YY][YY] - pos[k][YY];
 +                            pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
 +                        }
 +                    }
 +                }
 +            }
 +            /* This could be done more efficiently */
 +            ind[d] = 0;
 +            while(ind[d]+1 < dd->nc[d] && pos_d >= ma->cell_x[d][ind[d]+1])
 +            {
 +                ind[d]++;
 +            }
 +        }
 +        i = dd_index(dd->nc,ind);
 +        if (ma->ncg[i] == tmp_nalloc[i])
 +        {
 +            tmp_nalloc[i] = over_alloc_large(ma->ncg[i]+1);
 +            srenew(tmp_ind[i],tmp_nalloc[i]);
 +        }
 +        tmp_ind[i][ma->ncg[i]] = icg;
 +        ma->ncg[i]++;
 +        ma->nat[i] += cgindex[icg+1] - cgindex[icg];
 +    }
 +    
 +    k1 = 0;
 +    for(i=0; i<dd->nnodes; i++)
 +    {
 +        ma->index[i] = k1;
 +        for(k=0; k<ma->ncg[i]; k++)
 +        {
 +            ma->cg[k1++] = tmp_ind[i][k];
 +        }
 +    }
 +    ma->index[dd->nnodes] = k1;
 +    
 +    for(i=0; i<dd->nnodes; i++)
 +    {
 +        sfree(tmp_ind[i]);
 +    }
 +    sfree(tmp_ind);
 +    sfree(tmp_nalloc);
 +    
 +    if (fplog)
 +    {
 +        char buf[22];
 +        fprintf(fplog,"Charge group distribution at step %s:",
 +                gmx_step_str(step,buf));
 +        for(i=0; i<dd->nnodes; i++)
 +        {
 +            fprintf(fplog," %d",ma->ncg[i]);
 +        }
 +        fprintf(fplog,"\n");
 +    }
 +}
 +
 +static void get_cg_distribution(FILE *fplog,gmx_large_int_t step,gmx_domdec_t *dd,
 +                                t_block *cgs,matrix box,gmx_ddbox_t *ddbox,
 +                                rvec pos[])
 +{
 +    gmx_domdec_master_t *ma=NULL;
 +    ivec npulse;
 +    int  i,cg_gl;
 +    int  *ibuf,buf2[2] = { 0, 0 };
 +    gmx_bool bMaster = DDMASTER(dd);
 +    if (bMaster)
 +    {
 +        ma = dd->ma;
 +        
 +        if (dd->bScrewPBC)
 +        {
 +            check_screw_box(box);
 +        }
 +    
 +        set_dd_cell_sizes_slb(dd,ddbox,TRUE,npulse);
 +    
 +        distribute_cg(fplog,step,box,ddbox->tric_dir,cgs,pos,dd);
 +        for(i=0; i<dd->nnodes; i++)
 +        {
 +            ma->ibuf[2*i]   = ma->ncg[i];
 +            ma->ibuf[2*i+1] = ma->nat[i];
 +        }
 +        ibuf = ma->ibuf;
 +    }
 +    else
 +    {
 +        ibuf = NULL;
 +    }
 +    dd_scatter(dd,2*sizeof(int),ibuf,buf2);
 +    
 +    dd->ncg_home = buf2[0];
 +    dd->nat_home = buf2[1];
 +    dd->ncg_tot  = dd->ncg_home;
 +    dd->nat_tot  = dd->nat_home;
 +    if (dd->ncg_home > dd->cg_nalloc || dd->cg_nalloc == 0)
 +    {
 +        dd->cg_nalloc = over_alloc_dd(dd->ncg_home);
 +        srenew(dd->index_gl,dd->cg_nalloc);
 +        srenew(dd->cgindex,dd->cg_nalloc+1);
 +    }
 +    if (bMaster)
 +    {
 +        for(i=0; i<dd->nnodes; i++)
 +        {
 +            ma->ibuf[i] = ma->ncg[i]*sizeof(int);
 +            ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
 +        }
 +    }
 +    
 +    dd_scatterv(dd,
 +                DDMASTER(dd) ? ma->ibuf : NULL,
 +                DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
 +                DDMASTER(dd) ? ma->cg : NULL,
 +                dd->ncg_home*sizeof(int),dd->index_gl);
 +    
 +    /* Determine the home charge group sizes */
 +    dd->cgindex[0] = 0;
 +    for(i=0; i<dd->ncg_home; i++)
 +    {
 +        cg_gl = dd->index_gl[i];
 +        dd->cgindex[i+1] =
 +            dd->cgindex[i] + cgs->index[cg_gl+1] - cgs->index[cg_gl];
 +    }
 +    
 +    if (debug)
 +    {
 +        fprintf(debug,"Home charge groups:\n");
 +        for(i=0; i<dd->ncg_home; i++)
 +        {
 +            fprintf(debug," %d",dd->index_gl[i]);
 +            if (i % 10 == 9) 
 +                fprintf(debug,"\n");
 +        }
 +        fprintf(debug,"\n");
 +    }
 +}
 +
 +static int compact_and_copy_vec_at(int ncg,int *move,
 +                                   int *cgindex,
 +                                   int nvec,int vec,
 +                                   rvec *src,gmx_domdec_comm_t *comm,
 +                                   gmx_bool bCompact)
 +{
 +    int m,icg,i,i0,i1,nrcg;
 +    int home_pos;
 +    int pos_vec[DIM*2];
 +    
 +    home_pos = 0;
 +
 +    for(m=0; m<DIM*2; m++)
 +    {
 +        pos_vec[m] = 0;
 +    }
 +    
 +    i0 = 0;
 +    for(icg=0; icg<ncg; icg++)
 +    {
 +        i1 = cgindex[icg+1];
 +        m = move[icg];
 +        if (m == -1)
 +        {
 +            if (bCompact)
 +            {
 +                /* Compact the home array in place */
 +                for(i=i0; i<i1; i++)
 +                {
 +                    copy_rvec(src[i],src[home_pos++]);
 +                }
 +            }
 +        }
 +        else
 +        {
 +            /* Copy to the communication buffer */
 +            nrcg = i1 - i0;
 +            pos_vec[m] += 1 + vec*nrcg;
 +            for(i=i0; i<i1; i++)
 +            {
 +                copy_rvec(src[i],comm->cgcm_state[m][pos_vec[m]++]);
 +            }
 +            pos_vec[m] += (nvec - vec - 1)*nrcg;
 +        }
 +        if (!bCompact)
 +        {
 +            home_pos += i1 - i0;
 +        }
 +        i0 = i1;
 +    }
 +    
 +    return home_pos;
 +}
 +
 +static int compact_and_copy_vec_cg(int ncg,int *move,
 +                                   int *cgindex,
 +                                   int nvec,rvec *src,gmx_domdec_comm_t *comm,
 +                                   gmx_bool bCompact)
 +{
 +    int m,icg,i0,i1,nrcg;
 +    int home_pos;
 +    int pos_vec[DIM*2];
 +    
 +    home_pos = 0;
 +    
 +    for(m=0; m<DIM*2; m++)
 +    {
 +        pos_vec[m] = 0;
 +    }
 +    
 +    i0 = 0;
 +    for(icg=0; icg<ncg; icg++)
 +    {
 +        i1 = cgindex[icg+1];
 +        m = move[icg];
 +        if (m == -1)
 +        {
 +            if (bCompact)
 +            {
 +                /* Compact the home array in place */
 +                copy_rvec(src[icg],src[home_pos++]);
 +            }
 +        }
 +        else
 +        {
 +            nrcg = i1 - i0;
 +            /* Copy to the communication buffer */
 +            copy_rvec(src[icg],comm->cgcm_state[m][pos_vec[m]]);
 +            pos_vec[m] += 1 + nrcg*nvec;
 +        }
 +        i0 = i1;
 +    }
 +    if (!bCompact)
 +    {
 +        home_pos = ncg;
 +    }
 +    
 +    return home_pos;
 +}
 +
 +static int compact_ind(int ncg,int *move,
 +                       int *index_gl,int *cgindex,
 +                       int *gatindex,
 +                       gmx_ga2la_t ga2la,char *bLocalCG,
 +                       int *cginfo)
 +{
 +    int cg,nat,a0,a1,a,a_gl;
 +    int home_pos;
 +
 +    home_pos = 0;
 +    nat = 0;
 +    for(cg=0; cg<ncg; cg++)
 +    {
 +        a0 = cgindex[cg];
 +        a1 = cgindex[cg+1];
 +        if (move[cg] == -1)
 +        {
 +            /* Compact the home arrays in place.
 +             * Anything that can be done here avoids access to global arrays.
 +             */
 +            cgindex[home_pos] = nat;
 +            for(a=a0; a<a1; a++)
 +            {
 +                a_gl = gatindex[a];
 +                gatindex[nat] = a_gl;
 +                /* The cell number stays 0, so we don't need to set it */
 +                ga2la_change_la(ga2la,a_gl,nat);
 +                nat++;
 +            }
 +            index_gl[home_pos] = index_gl[cg];
 +            cginfo[home_pos]   = cginfo[cg];
 +            /* The charge group remains local, so bLocalCG does not change */
 +            home_pos++;
 +        }
 +        else
 +        {
 +            /* Clear the global indices */
 +            for(a=a0; a<a1; a++)
 +            {
 +                ga2la_del(ga2la,gatindex[a]);
 +            }
 +            if (bLocalCG)
 +            {
 +                bLocalCG[index_gl[cg]] = FALSE;
 +            }
 +        }
 +    }
 +    cgindex[home_pos] = nat;
 +    
 +    return home_pos;
 +}
 +
 +static void clear_and_mark_ind(int ncg,int *move,
 +                               int *index_gl,int *cgindex,int *gatindex,
 +                               gmx_ga2la_t ga2la,char *bLocalCG,
 +                               int *cell_index)
 +{
 +    int cg,a0,a1,a;
 +    
 +    for(cg=0; cg<ncg; cg++)
 +    {
 +        if (move[cg] >= 0)
 +        {
 +            a0 = cgindex[cg];
 +            a1 = cgindex[cg+1];
 +            /* Clear the global indices */
 +            for(a=a0; a<a1; a++)
 +            {
 +                ga2la_del(ga2la,gatindex[a]);
 +            }
 +            if (bLocalCG)
 +            {
 +                bLocalCG[index_gl[cg]] = FALSE;
 +            }
 +            /* Signal that this cg has moved using the ns cell index.
 +             * Here we set it to -1.
 +             * fill_grid will change it from -1 to 4*grid->ncells.
 +             */
 +            cell_index[cg] = -1;
 +        }
 +    }
 +}
 +
 +static void print_cg_move(FILE *fplog,
 +                          gmx_domdec_t *dd,
 +                          gmx_large_int_t step,int cg,int dim,int dir,
 +                          gmx_bool bHaveLimitdAndCMOld,real limitd,
 +                          rvec cm_old,rvec cm_new,real pos_d)
 +{
 +    gmx_domdec_comm_t *comm;
 +    char buf[22];
 +
 +    comm = dd->comm;
 +
 +    fprintf(fplog,"\nStep %s:\n",gmx_step_str(step,buf));
 +    if (bHaveLimitdAndCMOld)
 +    {
 +        fprintf(fplog,"The charge group starting at atom %d moved than the distance allowed by the domain decomposition (%f) in direction %c\n",
 +                ddglatnr(dd,dd->cgindex[cg]),limitd,dim2char(dim));
 +    }
 +    else
 +    {
 +        fprintf(fplog,"The charge group starting at atom %d moved than the distance allowed by the domain decomposition in direction %c\n",
 +                ddglatnr(dd,dd->cgindex[cg]),dim2char(dim));
 +    }
 +    fprintf(fplog,"distance out of cell %f\n",
 +            dir==1 ? pos_d - comm->cell_x1[dim] : pos_d - comm->cell_x0[dim]);
 +    if (bHaveLimitdAndCMOld)
 +    {
 +        fprintf(fplog,"Old coordinates: %8.3f %8.3f %8.3f\n",
 +                cm_old[XX],cm_old[YY],cm_old[ZZ]);
 +    }
 +    fprintf(fplog,"New coordinates: %8.3f %8.3f %8.3f\n",
 +            cm_new[XX],cm_new[YY],cm_new[ZZ]);
 +    fprintf(fplog,"Old cell boundaries in direction %c: %8.3f %8.3f\n",
 +            dim2char(dim),
 +            comm->old_cell_x0[dim],comm->old_cell_x1[dim]);
 +    fprintf(fplog,"New cell boundaries in direction %c: %8.3f %8.3f\n",
 +            dim2char(dim),
 +            comm->cell_x0[dim],comm->cell_x1[dim]);
 +}
 +
 +static void cg_move_error(FILE *fplog,
 +                          gmx_domdec_t *dd,
 +                          gmx_large_int_t step,int cg,int dim,int dir,
 +                          gmx_bool bHaveLimitdAndCMOld,real limitd,
 +                          rvec cm_old,rvec cm_new,real pos_d)
 +{
 +    if (fplog)
 +    {
 +        print_cg_move(fplog, dd,step,cg,dim,dir,
 +                      bHaveLimitdAndCMOld,limitd,cm_old,cm_new,pos_d);
 +    }
 +    print_cg_move(stderr,dd,step,cg,dim,dir,
 +                  bHaveLimitdAndCMOld,limitd,cm_old,cm_new,pos_d);
 +    gmx_fatal(FARGS,
 +              "A charge group moved too far between two domain decomposition steps\n"
 +              "This usually means that your system is not well equilibrated");
 +}
 +
 +static void rotate_state_atom(t_state *state,int a)
 +{
 +    int est;
 +
 +    for(est=0; est<estNR; est++)
 +    {
 +        if (EST_DISTR(est) && (state->flags & (1<<est))) {
 +            switch (est) {
 +            case estX:
 +                /* Rotate the complete state; for a rectangular box only */
 +                state->x[a][YY] = state->box[YY][YY] - state->x[a][YY];
 +                state->x[a][ZZ] = state->box[ZZ][ZZ] - state->x[a][ZZ];
 +                break;
 +            case estV:
 +                state->v[a][YY] = -state->v[a][YY];
 +                state->v[a][ZZ] = -state->v[a][ZZ];
 +                break;
 +            case estSDX:
 +                state->sd_X[a][YY] = -state->sd_X[a][YY];
 +                state->sd_X[a][ZZ] = -state->sd_X[a][ZZ];
 +                break;
 +            case estCGP:
 +                state->cg_p[a][YY] = -state->cg_p[a][YY];
 +                state->cg_p[a][ZZ] = -state->cg_p[a][ZZ];
 +                break;
 +            case estDISRE_INITF:
 +            case estDISRE_RM3TAV:
 +            case estORIRE_INITF:
 +            case estORIRE_DTAV:
 +                /* These are distances, so not affected by rotation */
 +                break;
 +            default:
 +                gmx_incons("Unknown state entry encountered in rotate_state_atom");            
 +            }
 +        }
 +    }
 +}
 +
 +static int dd_redistribute_cg(FILE *fplog,gmx_large_int_t step,
 +                              gmx_domdec_t *dd,ivec tric_dir,
 +                              t_state *state,rvec **f,
 +                              t_forcerec *fr,t_mdatoms *md,
 +                              gmx_bool bCompact,
 +                              t_nrnb *nrnb)
 +{
 +    int  *move;
 +    int  npbcdim;
 +    int  ncg[DIM*2],nat[DIM*2];
 +    int  c,i,cg,k,k0,k1,d,dim,dim2,dir,d2,d3,d4,cell_d;
 +    int  mc,cdd,nrcg,ncg_recv,nat_recv,nvs,nvr,nvec,vec;
 +    int  sbuf[2],rbuf[2];
 +    int  home_pos_cg,home_pos_at,ncg_stay_home,buf_pos;
 +    int  flag;
 +    gmx_bool bV=FALSE,bSDX=FALSE,bCGP=FALSE;
 +    gmx_bool bScrew;
 +    ivec dev;
 +    real inv_ncg,pos_d;
 +    matrix tcm;
 +    rvec *cg_cm,cell_x0,cell_x1,limitd,limit0,limit1,cm_new;
 +    atom_id *cgindex;
 +    cginfo_mb_t *cginfo_mb;
 +    gmx_domdec_comm_t *comm;
 +    
 +    if (dd->bScrewPBC)
 +    {
 +        check_screw_box(state->box);
 +    }
 +    
 +    comm  = dd->comm;
 +    cg_cm = fr->cg_cm;
 +    
 +    for(i=0; i<estNR; i++)
 +    {
 +        if (EST_DISTR(i))
 +        {
 +            switch (i)
 +            {
 +            case estX:   /* Always present */            break;
 +            case estV:   bV   = (state->flags & (1<<i)); break;
 +            case estSDX: bSDX = (state->flags & (1<<i)); break;
 +            case estCGP: bCGP = (state->flags & (1<<i)); break;
 +            case estLD_RNG:
 +            case estLD_RNGI:
 +            case estDISRE_INITF:
 +            case estDISRE_RM3TAV:
 +            case estORIRE_INITF:
 +            case estORIRE_DTAV:
 +                /* No processing required */
 +                break;
 +            default:
 +            gmx_incons("Unknown state entry encountered in dd_redistribute_cg");
 +            }
 +        }
 +    }
 +    
 +    if (dd->ncg_tot > comm->nalloc_int)
 +    {
 +        comm->nalloc_int = over_alloc_dd(dd->ncg_tot);
 +        srenew(comm->buf_int,comm->nalloc_int);
 +    }
 +    move = comm->buf_int;
 +    
 +    /* Clear the count */
 +    for(c=0; c<dd->ndim*2; c++)
 +    {
 +        ncg[c] = 0;
 +        nat[c] = 0;
 +    }
 +
 +    npbcdim = dd->npbcdim;
 +
 +    for(d=0; (d<DIM); d++)
 +    {
 +        limitd[d] = dd->comm->cellsize_min[d];
 +        if (d >= npbcdim && dd->ci[d] == 0)
 +        {
 +            cell_x0[d] = -GMX_FLOAT_MAX;
 +        }
 +        else
 +        {
 +            cell_x0[d] = comm->cell_x0[d];
 +        }
 +        if (d >= npbcdim && dd->ci[d] == dd->nc[d] - 1)
 +        {
 +            cell_x1[d] = GMX_FLOAT_MAX;
 +        }
 +        else
 +        {
 +            cell_x1[d] = comm->cell_x1[d];
 +        }
 +        if (d < npbcdim)
 +        {
 +            limit0[d] = comm->old_cell_x0[d] - limitd[d];
 +            limit1[d] = comm->old_cell_x1[d] + limitd[d];
 +        }
 +        else
 +        {
 +            /* We check after communication if a charge group moved
 +             * more than one cell. Set the pre-comm check limit to float_max.
 +             */
 +            limit0[d] = -GMX_FLOAT_MAX;
 +            limit1[d] =  GMX_FLOAT_MAX;
 +        }
 +    }
 +    
 +    make_tric_corr_matrix(npbcdim,state->box,tcm);
 +    
 +    cgindex = dd->cgindex;
 +    
 +    /* Compute the center of geometry for all home charge groups
 +     * and put them in the box and determine where they should go.
 +     */
 +    for(cg=0; cg<dd->ncg_home; cg++)
 +    {
 +        k0   = cgindex[cg];
 +        k1   = cgindex[cg+1];
 +        nrcg = k1 - k0;
 +        if (nrcg == 1)
 +        {
 +            copy_rvec(state->x[k0],cm_new);
 +        }
 +        else
 +        {
 +            inv_ncg = 1.0/nrcg;
 +            
 +            clear_rvec(cm_new);
 +            for(k=k0; (k<k1); k++)
 +            {
 +                rvec_inc(cm_new,state->x[k]);
 +            }
 +            for(d=0; (d<DIM); d++)
 +            {
 +                cm_new[d] = inv_ncg*cm_new[d];
 +            }
 +        }
 +        
 +        clear_ivec(dev);
 +        /* Do pbc and check DD cell boundary crossings */
 +        for(d=DIM-1; d>=0; d--)
 +        {
 +            if (dd->nc[d] > 1)
 +            {
 +                bScrew = (dd->bScrewPBC && d == XX);
 +                /* Determine the location of this cg in lattice coordinates */
 +                pos_d = cm_new[d];
 +                if (tric_dir[d])
 +                {
 +                    for(d2=d+1; d2<DIM; d2++)
 +                    {
 +                        pos_d += cm_new[d2]*tcm[d2][d];
 +                    }
 +                }
 +                /* Put the charge group in the triclinic unit-cell */
 +                if (pos_d >= cell_x1[d])
 +                {
 +                    if (pos_d >= limit1[d])
 +                    {
 +                        cg_move_error(fplog,dd,step,cg,d,1,TRUE,limitd[d],
 +                                      cg_cm[cg],cm_new,pos_d);
 +                    }
 +                    dev[d] = 1;
 +                    if (dd->ci[d] == dd->nc[d] - 1)
 +                    {
 +                        rvec_dec(cm_new,state->box[d]);
 +                        if (bScrew)
 +                        {
 +                            cm_new[YY] = state->box[YY][YY] - cm_new[YY];
 +                            cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
 +                        }
 +                        for(k=k0; (k<k1); k++)
 +                        {
 +                            rvec_dec(state->x[k],state->box[d]);
 +                            if (bScrew)
 +                            {
 +                                rotate_state_atom(state,k);
 +                            }
 +                        }
 +                    }
 +                }
 +                else if (pos_d < cell_x0[d])
 +                {
 +                    if (pos_d < limit0[d])
 +                    {
 +                        cg_move_error(fplog,dd,step,cg,d,-1,TRUE,limitd[d],
 +                                      cg_cm[cg],cm_new,pos_d);
 +                    }
 +                    dev[d] = -1;
 +                    if (dd->ci[d] == 0)
 +                    {
 +                        rvec_inc(cm_new,state->box[d]);
 +                        if (bScrew)
 +                        {
 +                            cm_new[YY] = state->box[YY][YY] - cm_new[YY];
 +                            cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
 +                        }
 +                        for(k=k0; (k<k1); k++)
 +                        {
 +                            rvec_inc(state->x[k],state->box[d]);
 +                            if (bScrew)
 +                            {
 +                                rotate_state_atom(state,k);
 +                            }
 +                        }
 +                    }
 +                }
 +            }
 +            else if (d < npbcdim)
 +            {
 +                /* Put the charge group in the rectangular unit-cell */
 +                while (cm_new[d] >= state->box[d][d])
 +                {
 +                    rvec_dec(cm_new,state->box[d]);
 +                    for(k=k0; (k<k1); k++)
 +                    {
 +                        rvec_dec(state->x[k],state->box[d]);
 +                    }
 +                }
 +                while (cm_new[d] < 0)
 +                {
 +                    rvec_inc(cm_new,state->box[d]);
 +                    for(k=k0; (k<k1); k++)
 +                    {
 +                        rvec_inc(state->x[k],state->box[d]);
 +                    }
 +                }
 +            }
 +        }
 +    
 +        copy_rvec(cm_new,cg_cm[cg]);
 +        
 +        /* Determine where this cg should go */
 +        flag = 0;
 +        mc = -1;
 +        for(d=0; d<dd->ndim; d++)
 +        {
 +            dim = dd->dim[d];
 +            if (dev[dim] == 1)
 +            {
 +                flag |= DD_FLAG_FW(d);
 +                if (mc == -1)
 +                {
 +                    mc = d*2;
 +                }
 +            }
 +            else if (dev[dim] == -1)
 +            {
 +                flag |= DD_FLAG_BW(d);
 +                if (mc == -1) {
 +                    if (dd->nc[dim] > 2)
 +                    {
 +                        mc = d*2 + 1;
 +                    }
 +                    else
 +                    {
 +                        mc = d*2;
 +                    }
 +                }
 +            }
 +        }
 +        move[cg] = mc;
 +        if (mc >= 0)
 +        {
 +            if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
 +            {
 +                comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
 +                srenew(comm->cggl_flag[mc],comm->cggl_flag_nalloc[mc]*DD_CGIBS);
 +            }
 +            comm->cggl_flag[mc][ncg[mc]*DD_CGIBS  ] = dd->index_gl[cg];
 +            /* We store the cg size in the lower 16 bits
 +             * and the place where the charge group should go
 +             * in the next 6 bits. This saves some communication volume.
 +             */
 +            comm->cggl_flag[mc][ncg[mc]*DD_CGIBS+1] = nrcg | flag;
 +            ncg[mc] += 1;
 +            nat[mc] += nrcg;
 +        }
 +    }
 +    
 +    inc_nrnb(nrnb,eNR_CGCM,dd->nat_home);
 +    inc_nrnb(nrnb,eNR_RESETX,dd->ncg_home);
 +    
 +    nvec = 1;
 +    if (bV)
 +    {
 +        nvec++;
 +    }
 +    if (bSDX)
 +    {
 +        nvec++;
 +    }
 +    if (bCGP)
 +    {
 +        nvec++;
 +    }
 +    
 +    /* Make sure the communication buffers are large enough */
 +    for(mc=0; mc<dd->ndim*2; mc++)
 +    {
 +        nvr = ncg[mc] + nat[mc]*nvec;
 +        if (nvr > comm->cgcm_state_nalloc[mc])
 +        {
 +            comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr);
 +            srenew(comm->cgcm_state[mc],comm->cgcm_state_nalloc[mc]);
 +        }
 +    }
 +    
 +    /* Recalculating cg_cm might be cheaper than communicating,
 +     * but that could give rise to rounding issues.
 +     */
 +    home_pos_cg =
 +        compact_and_copy_vec_cg(dd->ncg_home,move,cgindex,
 +                                nvec,cg_cm,comm,bCompact);
 +    
 +    vec = 0;
 +    home_pos_at =
 +        compact_and_copy_vec_at(dd->ncg_home,move,cgindex,
 +                                nvec,vec++,state->x,comm,bCompact);
 +    if (bV)
 +    {
 +        compact_and_copy_vec_at(dd->ncg_home,move,cgindex,
 +                                nvec,vec++,state->v,comm,bCompact);
 +    }
 +    if (bSDX)
 +    {
 +        compact_and_copy_vec_at(dd->ncg_home,move,cgindex,
 +                                nvec,vec++,state->sd_X,comm,bCompact);
 +    }
 +    if (bCGP)
 +    {
 +        compact_and_copy_vec_at(dd->ncg_home,move,cgindex,
 +                                nvec,vec++,state->cg_p,comm,bCompact);
 +    }
 +    
 +    if (bCompact)
 +    {
 +        compact_ind(dd->ncg_home,move,
 +                    dd->index_gl,dd->cgindex,dd->gatindex,
 +                    dd->ga2la,comm->bLocalCG,
 +                    fr->cginfo);
 +    }
 +    else
 +    {
 +        clear_and_mark_ind(dd->ncg_home,move,
 +                           dd->index_gl,dd->cgindex,dd->gatindex,
 +                           dd->ga2la,comm->bLocalCG,
 +                           fr->ns.grid->cell_index);
 +    }
 +    
 +    cginfo_mb = fr->cginfo_mb;
 +
 +    ncg_stay_home = home_pos_cg;
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        dim = dd->dim[d];
 +        ncg_recv = 0;
 +        nat_recv = 0;
 +        nvr      = 0;
 +        for(dir=0; dir<(dd->nc[dim]==2 ? 1 : 2); dir++)
 +        {
 +            cdd = d*2 + dir;
 +            /* Communicate the cg and atom counts */
 +            sbuf[0] = ncg[cdd];
 +            sbuf[1] = nat[cdd];
 +            if (debug)
 +            {
 +                fprintf(debug,"Sending ddim %d dir %d: ncg %d nat %d\n",
 +                        d,dir,sbuf[0],sbuf[1]);
 +            }
 +            dd_sendrecv_int(dd, d, dir, sbuf, 2, rbuf, 2);
 +            
 +            if ((ncg_recv+rbuf[0])*DD_CGIBS > comm->nalloc_int)
 +            {
 +                comm->nalloc_int = over_alloc_dd((ncg_recv+rbuf[0])*DD_CGIBS);
 +                srenew(comm->buf_int,comm->nalloc_int);
 +            }
 +            
 +            /* Communicate the charge group indices, sizes and flags */
 +            dd_sendrecv_int(dd, d, dir,
 +                            comm->cggl_flag[cdd], sbuf[0]*DD_CGIBS,
 +                            comm->buf_int+ncg_recv*DD_CGIBS, rbuf[0]*DD_CGIBS);
 +            
 +            nvs = ncg[cdd] + nat[cdd]*nvec;
 +            i   = rbuf[0]  + rbuf[1] *nvec;
 +            vec_rvec_check_alloc(&comm->vbuf,nvr+i);
 +            
 +            /* Communicate cgcm and state */
 +            dd_sendrecv_rvec(dd, d, dir,
 +                             comm->cgcm_state[cdd], nvs,
 +                             comm->vbuf.v+nvr, i);
 +            ncg_recv += rbuf[0];
 +            nat_recv += rbuf[1];
 +            nvr      += i;
 +        }
 +        
 +        /* Process the received charge groups */
 +        buf_pos = 0;
 +        for(cg=0; cg<ncg_recv; cg++)
 +        {
 +            flag = comm->buf_int[cg*DD_CGIBS+1];
 +
 +            if (dim >= npbcdim && dd->nc[dim] > 2)
 +            {
 +                /* No pbc in this dim and more than one domain boundary.
 +                 * We to a separate check if a charge did not move too far.
 +                 */
 +                if (((flag & DD_FLAG_FW(d)) &&
 +                     comm->vbuf.v[buf_pos][d] > cell_x1[dim]) ||
 +                    ((flag & DD_FLAG_BW(d)) &&
 +                     comm->vbuf.v[buf_pos][d] < cell_x0[dim]))
 +                {
 +                    cg_move_error(fplog,dd,step,cg,d,
 +                                  (flag & DD_FLAG_FW(d)) ? 1 : 0,
 +                                   FALSE,0,
 +                                   comm->vbuf.v[buf_pos],
 +                                   comm->vbuf.v[buf_pos],
 +                                   comm->vbuf.v[buf_pos][d]);
 +                }
 +            }
 +
 +            mc = -1;
 +            if (d < dd->ndim-1)
 +            {
 +                /* Check which direction this cg should go */
 +                for(d2=d+1; (d2<dd->ndim && mc==-1); d2++)
 +                {
 +                    if (dd->bGridJump)
 +                    {
 +                        /* The cell boundaries for dimension d2 are not equal
 +                         * for each cell row of the lower dimension(s),
 +                         * therefore we might need to redetermine where
 +                         * this cg should go.
 +                         */
 +                        dim2 = dd->dim[d2];
 +                        /* If this cg crosses the box boundary in dimension d2
 +                         * we can use the communicated flag, so we do not
 +                         * have to worry about pbc.
 +                         */
 +                        if (!((dd->ci[dim2] == dd->nc[dim2]-1 &&
 +                               (flag & DD_FLAG_FW(d2))) ||
 +                              (dd->ci[dim2] == 0 &&
 +                               (flag & DD_FLAG_BW(d2)))))
 +                        {
 +                            /* Clear the two flags for this dimension */
 +                            flag &= ~(DD_FLAG_FW(d2) | DD_FLAG_BW(d2));
 +                            /* Determine the location of this cg
 +                             * in lattice coordinates
 +                             */
 +                            pos_d = comm->vbuf.v[buf_pos][dim2];
 +                            if (tric_dir[dim2])
 +                            {
 +                                for(d3=dim2+1; d3<DIM; d3++)
 +                                {
 +                                    pos_d +=
 +                                        comm->vbuf.v[buf_pos][d3]*tcm[d3][dim2];
 +                                }
 +                            }
 +                            /* Check of we are not at the box edge.
 +                             * pbc is only handled in the first step above,
 +                             * but this check could move over pbc while
 +                             * the first step did not due to different rounding.
 +                             */
 +                            if (pos_d >= cell_x1[dim2] &&
 +                                dd->ci[dim2] != dd->nc[dim2]-1)
 +                            {
 +                                flag |= DD_FLAG_FW(d2);
 +                            }
 +                            else if (pos_d < cell_x0[dim2] &&
 +                                     dd->ci[dim2] != 0)
 +                            {
 +                                flag |= DD_FLAG_BW(d2);
 +                            }
 +                            comm->buf_int[cg*DD_CGIBS+1] = flag;
 +                        }
 +                    }
 +                    /* Set to which neighboring cell this cg should go */
 +                    if (flag & DD_FLAG_FW(d2))
 +                    {
 +                        mc = d2*2;
 +                    }
 +                    else if (flag & DD_FLAG_BW(d2))
 +                    {
 +                        if (dd->nc[dd->dim[d2]] > 2)
 +                        {
 +                            mc = d2*2+1;
 +                        }
 +                        else
 +                        {
 +                            mc = d2*2;
 +                        }
 +                    }
 +                }
 +            }
 +            
 +            nrcg = flag & DD_FLAG_NRCG;
 +            if (mc == -1)
 +            {
 +                if (home_pos_cg+1 > dd->cg_nalloc)
 +                {
 +                    dd->cg_nalloc = over_alloc_dd(home_pos_cg+1);
 +                    srenew(dd->index_gl,dd->cg_nalloc);
 +                    srenew(dd->cgindex,dd->cg_nalloc+1);
 +                }
 +                /* Set the global charge group index and size */
 +                dd->index_gl[home_pos_cg] = comm->buf_int[cg*DD_CGIBS];
 +                dd->cgindex[home_pos_cg+1] = dd->cgindex[home_pos_cg] + nrcg;
 +                /* Copy the state from the buffer */
 +                if (home_pos_cg >= fr->cg_nalloc)
 +                {
 +                    dd_realloc_fr_cg(fr,home_pos_cg+1);
 +                    cg_cm = fr->cg_cm;
 +                }
 +                copy_rvec(comm->vbuf.v[buf_pos++],cg_cm[home_pos_cg]);
 +                /* Set the cginfo */
 +                fr->cginfo[home_pos_cg] = ddcginfo(cginfo_mb,
 +                                                   dd->index_gl[home_pos_cg]);
 +                if (comm->bLocalCG)
 +                {
 +                    comm->bLocalCG[dd->index_gl[home_pos_cg]] = TRUE;
 +                }
 +
 +                if (home_pos_at+nrcg > state->nalloc)
 +                {
 +                    dd_realloc_state(state,f,home_pos_at+nrcg);
 +                }
 +                for(i=0; i<nrcg; i++)
 +                {
 +                    copy_rvec(comm->vbuf.v[buf_pos++],
 +                              state->x[home_pos_at+i]);
 +                }
 +                if (bV)
 +                {
 +                    for(i=0; i<nrcg; i++)
 +                    {
 +                        copy_rvec(comm->vbuf.v[buf_pos++],
 +                                  state->v[home_pos_at+i]);
 +                    }
 +                }
 +                if (bSDX)
 +                {
 +                    for(i=0; i<nrcg; i++)
 +                    {
 +                        copy_rvec(comm->vbuf.v[buf_pos++],
 +                                  state->sd_X[home_pos_at+i]);
 +                    }
 +                }
 +                if (bCGP)
 +                {
 +                    for(i=0; i<nrcg; i++)
 +                    {
 +                        copy_rvec(comm->vbuf.v[buf_pos++],
 +                                  state->cg_p[home_pos_at+i]);
 +                    }
 +                }
 +                home_pos_cg += 1;
 +                home_pos_at += nrcg;
 +            }
 +            else
 +            {
 +                /* Reallocate the buffers if necessary  */
 +                if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
 +                {
 +                    comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
 +                    srenew(comm->cggl_flag[mc],comm->cggl_flag_nalloc[mc]*DD_CGIBS);
 +                }
 +                nvr = ncg[mc] + nat[mc]*nvec;
 +                if (nvr + 1 + nrcg*nvec > comm->cgcm_state_nalloc[mc])
 +                {
 +                    comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr + 1 + nrcg*nvec);
 +                    srenew(comm->cgcm_state[mc],comm->cgcm_state_nalloc[mc]);
 +                }
 +                /* Copy from the receive to the send buffers */
 +                memcpy(comm->cggl_flag[mc] + ncg[mc]*DD_CGIBS,
 +                       comm->buf_int + cg*DD_CGIBS,
 +                       DD_CGIBS*sizeof(int));
 +                memcpy(comm->cgcm_state[mc][nvr],
 +                       comm->vbuf.v[buf_pos],
 +                       (1+nrcg*nvec)*sizeof(rvec));
 +                buf_pos += 1 + nrcg*nvec;
 +                ncg[mc] += 1;
 +                nat[mc] += nrcg;
 +            }
 +        }
 +    }
 +    
 +    /* With sorting (!bCompact) the indices are now only partially up to date
 +     * and ncg_home and nat_home are not the real count, since there are
 +     * "holes" in the arrays for the charge groups that moved to neighbors.
 +     */
 +    dd->ncg_home = home_pos_cg;
 +    dd->nat_home = home_pos_at;
 +
 +    if (debug)
 +    {
 +        fprintf(debug,"Finished repartitioning\n");
 +    }
 +
 +    return ncg_stay_home;
 +}
 +
 +void dd_cycles_add(gmx_domdec_t *dd,float cycles,int ddCycl)
 +{
 +    dd->comm->cycl[ddCycl] += cycles;
 +    dd->comm->cycl_n[ddCycl]++;
 +    if (cycles > dd->comm->cycl_max[ddCycl])
 +    {
 +        dd->comm->cycl_max[ddCycl] = cycles;
 +    }
 +}
 +
 +static double force_flop_count(t_nrnb *nrnb)
 +{
 +    int i;
 +    double sum;
 +    const char *name;
 +
 +    sum = 0;
 +    for(i=eNR_NBKERNEL010; i<eNR_NBKERNEL_FREE_ENERGY; i++)
 +    {
 +        /* To get closer to the real timings, we half the count
 +         * for the normal loops and again half it for water loops.
 +         */
 +        name = nrnb_str(i);
 +        if (strstr(name,"W3") != NULL || strstr(name,"W4") != NULL)
 +        {
 +            sum += nrnb->n[i]*0.25*cost_nrnb(i);
 +        }
 +        else
 +        {
 +            sum += nrnb->n[i]*0.50*cost_nrnb(i);
 +        }
 +    }
 +    for(i=eNR_NBKERNEL_FREE_ENERGY; i<=eNR_NB14; i++)
 +    {
 +        name = nrnb_str(i);
 +        if (strstr(name,"W3") != NULL || strstr(name,"W4") != NULL)
 +        sum += nrnb->n[i]*cost_nrnb(i);
 +    }
 +    for(i=eNR_BONDS; i<=eNR_WALLS; i++)
 +    {
 +        sum += nrnb->n[i]*cost_nrnb(i);
 +    }
 +
 +    return sum;
 +}
 +
 +void dd_force_flop_start(gmx_domdec_t *dd,t_nrnb *nrnb)
 +{
 +    if (dd->comm->eFlop)
 +    {
 +        dd->comm->flop -= force_flop_count(nrnb);
 +    }
 +}
 +void dd_force_flop_stop(gmx_domdec_t *dd,t_nrnb *nrnb)
 +{
 +    if (dd->comm->eFlop)
 +    {
 +        dd->comm->flop += force_flop_count(nrnb);
 +        dd->comm->flop_n++;
 +    }
 +}  
 +
 +static void clear_dd_cycle_counts(gmx_domdec_t *dd)
 +{
 +    int i;
 +    
 +    for(i=0; i<ddCyclNr; i++)
 +    {
 +        dd->comm->cycl[i] = 0;
 +        dd->comm->cycl_n[i] = 0;
 +        dd->comm->cycl_max[i] = 0;
 +    }
 +    dd->comm->flop = 0;
 +    dd->comm->flop_n = 0;
 +}
 +
 +static void get_load_distribution(gmx_domdec_t *dd,gmx_wallcycle_t wcycle)
 +{
 +    gmx_domdec_comm_t *comm;
 +    gmx_domdec_load_t *load;
 +    gmx_domdec_root_t *root=NULL;
 +    int  d,dim,cid,i,pos;
 +    float cell_frac=0,sbuf[DD_NLOAD_MAX];
 +    gmx_bool bSepPME;
 +    
 +    if (debug)
 +    {
 +        fprintf(debug,"get_load_distribution start\n");
 +    }
 +
 +    wallcycle_start(wcycle,ewcDDCOMMLOAD);
 +    
 +    comm = dd->comm;
 +    
 +    bSepPME = (dd->pme_nodeid >= 0);
 +    
 +    for(d=dd->ndim-1; d>=0; d--)
 +    {
 +        dim = dd->dim[d];
 +        /* Check if we participate in the communication in this dimension */
 +        if (d == dd->ndim-1 || 
 +            (dd->ci[dd->dim[d+1]]==0 && dd->ci[dd->dim[dd->ndim-1]]==0))
 +        {
 +            load = &comm->load[d];
 +            if (dd->bGridJump)
 +            {
 +                cell_frac = comm->cell_f1[d] - comm->cell_f0[d];
 +            }
 +            pos = 0;
 +            if (d == dd->ndim-1)
 +            {
 +                sbuf[pos++] = dd_force_load(comm);
 +                sbuf[pos++] = sbuf[0];
 +                if (dd->bGridJump)
 +                {
 +                    sbuf[pos++] = sbuf[0];
 +                    sbuf[pos++] = cell_frac;
 +                    if (d > 0)
 +                    {
 +                        sbuf[pos++] = comm->cell_f_max0[d];
 +                        sbuf[pos++] = comm->cell_f_min1[d];
 +                    }
 +                }
 +                if (bSepPME)
 +                {
 +                    sbuf[pos++] = comm->cycl[ddCyclPPduringPME];
 +                    sbuf[pos++] = comm->cycl[ddCyclPME];
 +                }
 +            }
 +            else
 +            {
 +                sbuf[pos++] = comm->load[d+1].sum;
 +                sbuf[pos++] = comm->load[d+1].max;
 +                if (dd->bGridJump)
 +                {
 +                    sbuf[pos++] = comm->load[d+1].sum_m;
 +                    sbuf[pos++] = comm->load[d+1].cvol_min*cell_frac;
 +                    sbuf[pos++] = comm->load[d+1].flags;
 +                    if (d > 0)
 +                    {
 +                        sbuf[pos++] = comm->cell_f_max0[d];
 +                        sbuf[pos++] = comm->cell_f_min1[d];
 +                    }
 +                }
 +                if (bSepPME)
 +                {
 +                    sbuf[pos++] = comm->load[d+1].mdf;
 +                    sbuf[pos++] = comm->load[d+1].pme;
 +                }
 +            }
 +            load->nload = pos;
 +            /* Communicate a row in DD direction d.
 +             * The communicators are setup such that the root always has rank 0.
 +             */
 +#ifdef GMX_MPI
 +            MPI_Gather(sbuf      ,load->nload*sizeof(float),MPI_BYTE,
 +                       load->load,load->nload*sizeof(float),MPI_BYTE,
 +                       0,comm->mpi_comm_load[d]);
 +#endif
 +            if (dd->ci[dim] == dd->master_ci[dim])
 +            {
 +                /* We are the root, process this row */
 +                if (comm->bDynLoadBal)
 +                {
 +                    root = comm->root[d];
 +                }
 +                load->sum = 0;
 +                load->max = 0;
 +                load->sum_m = 0;
 +                load->cvol_min = 1;
 +                load->flags = 0;
 +                load->mdf = 0;
 +                load->pme = 0;
 +                pos = 0;
 +                for(i=0; i<dd->nc[dim]; i++)
 +                {
 +                    load->sum += load->load[pos++];
 +                    load->max = max(load->max,load->load[pos]);
 +                    pos++;
 +                    if (dd->bGridJump)
 +                    {
 +                        if (root->bLimited)
 +                        {
 +                            /* This direction could not be load balanced properly,
 +                             * therefore we need to use the maximum iso the average load.
 +                             */
 +                            load->sum_m = max(load->sum_m,load->load[pos]);
 +                        }
 +                        else
 +                        {
 +                            load->sum_m += load->load[pos];
 +                        }
 +                        pos++;
 +                        load->cvol_min = min(load->cvol_min,load->load[pos]);
 +                        pos++;
 +                        if (d < dd->ndim-1)
 +                        {
 +                            load->flags = (int)(load->load[pos++] + 0.5);
 +                        }
 +                        if (d > 0)
 +                        {
 +                            root->cell_f_max0[i] = load->load[pos++];
 +                            root->cell_f_min1[i] = load->load[pos++];
 +                        }
 +                    }
 +                    if (bSepPME)
 +                    {
 +                        load->mdf = max(load->mdf,load->load[pos]);
 +                        pos++;
 +                        load->pme = max(load->pme,load->load[pos]);
 +                        pos++;
 +                    }
 +                }
 +                if (comm->bDynLoadBal && root->bLimited)
 +                {
 +                    load->sum_m *= dd->nc[dim];
 +                    load->flags |= (1<<d);
 +                }
 +            }
 +        }
 +    }
 +
 +    if (DDMASTER(dd))
 +    {
 +        comm->nload      += dd_load_count(comm);
 +        comm->load_step  += comm->cycl[ddCyclStep];
 +        comm->load_sum   += comm->load[0].sum;
 +        comm->load_max   += comm->load[0].max;
 +        if (comm->bDynLoadBal)
 +        {
 +            for(d=0; d<dd->ndim; d++)
 +            {
 +                if (comm->load[0].flags & (1<<d))
 +                {
 +                    comm->load_lim[d]++;
 +                }
 +            }
 +        }
 +        if (bSepPME)
 +        {
 +            comm->load_mdf += comm->load[0].mdf;
 +            comm->load_pme += comm->load[0].pme;
 +        }
 +    }
 +
 +    wallcycle_stop(wcycle,ewcDDCOMMLOAD);
 +    
 +    if (debug)
 +    {
 +        fprintf(debug,"get_load_distribution finished\n");
 +    }
 +}
 +
 +static float dd_force_imb_perf_loss(gmx_domdec_t *dd)
 +{
 +    /* Return the relative performance loss on the total run time
 +     * due to the force calculation load imbalance.
 +     */
 +    if (dd->comm->nload > 0)
 +    {
 +        return
 +            (dd->comm->load_max*dd->nnodes - dd->comm->load_sum)/
 +            (dd->comm->load_step*dd->nnodes);
 +    }
 +    else
 +    {
 +        return 0;
 +    }
 +}
 +
 +static void print_dd_load_av(FILE *fplog,gmx_domdec_t *dd)
 +{
 +    char  buf[STRLEN];
 +    int   npp,npme,nnodes,d,limp;
 +    float imbal,pme_f_ratio,lossf,lossp=0;
 +    gmx_bool  bLim;
 +    gmx_domdec_comm_t *comm;
 +
 +    comm = dd->comm;
 +    if (DDMASTER(dd) && comm->nload > 0)
 +    {
 +        npp    = dd->nnodes;
 +        npme   = (dd->pme_nodeid >= 0) ? comm->npmenodes : 0;
 +        nnodes = npp + npme;
 +        imbal = comm->load_max*npp/comm->load_sum - 1;
 +        lossf = dd_force_imb_perf_loss(dd);
 +        sprintf(buf," Average load imbalance: %.1f %%\n",imbal*100);
 +        fprintf(fplog,"%s",buf);
 +        fprintf(stderr,"\n");
 +        fprintf(stderr,"%s",buf);
 +        sprintf(buf," Part of the total run time spent waiting due to load imbalance: %.1f %%\n",lossf*100);
 +        fprintf(fplog,"%s",buf);
 +        fprintf(stderr,"%s",buf);
 +        bLim = FALSE;
 +        if (comm->bDynLoadBal)
 +        {
 +            sprintf(buf," Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
 +            for(d=0; d<dd->ndim; d++)
 +            {
 +                limp = (200*comm->load_lim[d]+1)/(2*comm->nload);
 +                sprintf(buf+strlen(buf)," %c %d %%",dim2char(dd->dim[d]),limp);
 +                if (limp >= 50)
 +                {
 +                    bLim = TRUE;
 +                }
 +            }
 +            sprintf(buf+strlen(buf),"\n");
 +            fprintf(fplog,"%s",buf);
 +            fprintf(stderr,"%s",buf);
 +        }
 +        if (npme > 0)
 +        {
 +            pme_f_ratio = comm->load_pme/comm->load_mdf;
 +            lossp = (comm->load_pme -comm->load_mdf)/comm->load_step;
 +            if (lossp <= 0)
 +            {
 +                lossp *= (float)npme/(float)nnodes;
 +            }
 +            else
 +            {
 +                lossp *= (float)npp/(float)nnodes;
 +            }
 +            sprintf(buf," Average PME mesh/force load: %5.3f\n",pme_f_ratio);
 +            fprintf(fplog,"%s",buf);
 +            fprintf(stderr,"%s",buf);
 +            sprintf(buf," Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n",fabs(lossp)*100);
 +            fprintf(fplog,"%s",buf);
 +            fprintf(stderr,"%s",buf);
 +        }
 +        fprintf(fplog,"\n");
 +        fprintf(stderr,"\n");
 +        
 +        if (lossf >= DD_PERF_LOSS)
 +        {
 +            sprintf(buf,
 +                    "NOTE: %.1f %% performance was lost due to load imbalance\n"
 +                    "      in the domain decomposition.\n",lossf*100);
 +            if (!comm->bDynLoadBal)
 +            {
 +                sprintf(buf+strlen(buf),"      You might want to use dynamic load balancing (option -dlb.)\n");
 +            }
 +            else if (bLim)
 +            {
 +                sprintf(buf+strlen(buf),"      You might want to decrease the cell size limit (options -rdd, -rcon and/or -dds).\n");
 +            }
 +            fprintf(fplog,"%s\n",buf);
 +            fprintf(stderr,"%s\n",buf);
 +        }
 +        if (npme > 0 && fabs(lossp) >= DD_PERF_LOSS)
 +        {
 +            sprintf(buf,
 +                    "NOTE: %.1f %% performance was lost because the PME nodes\n"
 +                    "      had %s work to do than the PP nodes.\n"
 +                    "      You might want to %s the number of PME nodes\n"
 +                    "      or %s the cut-off and the grid spacing.\n",
 +                    fabs(lossp*100),
 +                    (lossp < 0) ? "less"     : "more",
 +                    (lossp < 0) ? "decrease" : "increase",
 +                    (lossp < 0) ? "decrease" : "increase");
 +            fprintf(fplog,"%s\n",buf);
 +            fprintf(stderr,"%s\n",buf);
 +        }
 +    }
 +}
 +
 +static float dd_vol_min(gmx_domdec_t *dd)
 +{
 +    return dd->comm->load[0].cvol_min*dd->nnodes;
 +}
 +
 +static gmx_bool dd_load_flags(gmx_domdec_t *dd)
 +{
 +    return dd->comm->load[0].flags;
 +}
 +
 +static float dd_f_imbal(gmx_domdec_t *dd)
 +{
 +    return dd->comm->load[0].max*dd->nnodes/dd->comm->load[0].sum - 1;
 +}
 +
 +static float dd_pme_f_ratio(gmx_domdec_t *dd)
 +{
 +    return dd->comm->load[0].pme/dd->comm->load[0].mdf;
 +}
 +
 +static void dd_print_load(FILE *fplog,gmx_domdec_t *dd,gmx_large_int_t step)
 +{
 +    int flags,d;
 +    char buf[22];
 +    
 +    flags = dd_load_flags(dd);
 +    if (flags)
 +    {
 +        fprintf(fplog,
 +                "DD  load balancing is limited by minimum cell size in dimension");
 +        for(d=0; d<dd->ndim; d++)
 +        {
 +            if (flags & (1<<d))
 +            {
 +                fprintf(fplog," %c",dim2char(dd->dim[d]));
 +            }
 +        }
 +        fprintf(fplog,"\n");
 +    }
 +    fprintf(fplog,"DD  step %s",gmx_step_str(step,buf));
 +    if (dd->comm->bDynLoadBal)
 +    {
 +        fprintf(fplog,"  vol min/aver %5.3f%c",
 +                dd_vol_min(dd),flags ? '!' : ' ');
 +    }
 +    fprintf(fplog," load imb.: force %4.1f%%",dd_f_imbal(dd)*100);
 +    if (dd->comm->cycl_n[ddCyclPME])
 +    {
 +        fprintf(fplog,"  pme mesh/force %5.3f",dd_pme_f_ratio(dd));
 +    }
 +    fprintf(fplog,"\n\n");
 +}
 +
 +static void dd_print_load_verbose(gmx_domdec_t *dd)
 +{
 +    if (dd->comm->bDynLoadBal)
 +    {
 +        fprintf(stderr,"vol %4.2f%c ",
 +                dd_vol_min(dd),dd_load_flags(dd) ? '!' : ' ');
 +    }
 +    fprintf(stderr,"imb F %2d%% ",(int)(dd_f_imbal(dd)*100+0.5));
 +    if (dd->comm->cycl_n[ddCyclPME])
 +    {
 +        fprintf(stderr,"pme/F %4.2f ",dd_pme_f_ratio(dd));
 +    }
 +}
 +
 +#ifdef GMX_MPI
 +static void make_load_communicator(gmx_domdec_t *dd,MPI_Group g_all,
 +                                   int dim_ind,ivec loc)
 +{
 +    MPI_Group g_row = MPI_GROUP_EMPTY;
 +    MPI_Comm  c_row;
 +    int  dim,i,*rank;
 +    ivec loc_c;
 +    gmx_domdec_root_t *root;
 +    gmx_bool bPartOfGroup = FALSE;
 +    
 +    dim = dd->dim[dim_ind];
 +    copy_ivec(loc,loc_c);
 +    snew(rank,dd->nc[dim]);
 +    for(i=0; i<dd->nc[dim]; i++)
 +    {
 +        loc_c[dim] = i;
 +        rank[i] = dd_index(dd->nc,loc_c);
 +        if (rank[i] == dd->rank)
 +        {
 +            /* This process is part of the group */
 +            bPartOfGroup = TRUE;
 +        }
 +    }
 +    if (bPartOfGroup)
 +    {
 +        MPI_Group_incl(g_all,dd->nc[dim],rank,&g_row);
 +    }
 +    MPI_Comm_create(dd->mpi_comm_all,g_row,&c_row);
 +    if (bPartOfGroup)
 +    {
 +        dd->comm->mpi_comm_load[dim_ind] = c_row;
 +        if (dd->comm->eDLB != edlbNO)
 +        {
 +            if (dd->ci[dim] == dd->master_ci[dim])
 +            {
 +                /* This is the root process of this row */
 +                snew(dd->comm->root[dim_ind],1);
 +                root = dd->comm->root[dim_ind];
 +                snew(root->cell_f,DD_CELL_F_SIZE(dd,dim_ind));
 +                snew(root->old_cell_f,dd->nc[dim]+1);
 +                snew(root->bCellMin,dd->nc[dim]);
 +                if (dim_ind > 0)
 +                {
 +                    snew(root->cell_f_max0,dd->nc[dim]);
 +                    snew(root->cell_f_min1,dd->nc[dim]);
 +                    snew(root->bound_min,dd->nc[dim]);
 +                    snew(root->bound_max,dd->nc[dim]);
 +                }
 +                snew(root->buf_ncd,dd->nc[dim]);
 +            }
 +            else
 +            {
 +                /* This is not a root process, we only need to receive cell_f */
 +                snew(dd->comm->cell_f_row,DD_CELL_F_SIZE(dd,dim_ind));
 +            }
 +        }
 +        if (dd->ci[dim] == dd->master_ci[dim])
 +        {
 +            snew(dd->comm->load[dim_ind].load,dd->nc[dim]*DD_NLOAD_MAX);
 +        }
 +    }
 +    sfree(rank);
 +}
 +#endif
 +
 +static void make_load_communicators(gmx_domdec_t *dd)
 +{
 +#ifdef GMX_MPI
 +  MPI_Group g_all;
 +  int  dim0,dim1,i,j;
 +  ivec loc;
 +
 +  if (debug)
 +    fprintf(debug,"Making load communicators\n");
 +
 +  MPI_Comm_group(dd->mpi_comm_all,&g_all);
 +  
 +  snew(dd->comm->load,dd->ndim);
 +  snew(dd->comm->mpi_comm_load,dd->ndim);
 +  
 +  clear_ivec(loc);
 +  make_load_communicator(dd,g_all,0,loc);
 +  if (dd->ndim > 1) {
 +    dim0 = dd->dim[0];
 +    for(i=0; i<dd->nc[dim0]; i++) {
 +      loc[dim0] = i;
 +      make_load_communicator(dd,g_all,1,loc);
 +    }
 +  }
 +  if (dd->ndim > 2) {
 +    dim0 = dd->dim[0];
 +    for(i=0; i<dd->nc[dim0]; i++) {
 +      loc[dim0] = i;
 +      dim1 = dd->dim[1];
 +      for(j=0; j<dd->nc[dim1]; j++) {
 +        loc[dim1] = j;
 +        make_load_communicator(dd,g_all,2,loc);
 +      }
 +    }
 +  }
 +
 +  MPI_Group_free(&g_all);
 +
 +  if (debug)
 +    fprintf(debug,"Finished making load communicators\n");
 +#endif
 +}
 +
 +void setup_dd_grid(FILE *fplog,gmx_domdec_t *dd)
 +{
 +    gmx_bool bZYX;
 +    int  d,dim,i,j,m;
 +    ivec tmp,s;
 +    int  nzone,nzonep;
 +    ivec dd_zp[DD_MAXIZONE];
 +    gmx_domdec_zones_t *zones;
 +    gmx_domdec_ns_ranges_t *izone;
 +    
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        dim = dd->dim[d];
 +        copy_ivec(dd->ci,tmp);
 +        tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
 +        dd->neighbor[d][0] = ddcoord2ddnodeid(dd,tmp);
 +        copy_ivec(dd->ci,tmp);
 +        tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
 +        dd->neighbor[d][1] = ddcoord2ddnodeid(dd,tmp);
 +        if (debug)
 +        {
 +            fprintf(debug,"DD rank %d neighbor ranks in dir %d are + %d - %d\n",
 +                    dd->rank,dim,
 +                    dd->neighbor[d][0],
 +                    dd->neighbor[d][1]);
 +        }
 +    }
 +    
 +    if (DDMASTER(dd))
 +    {
 +        fprintf(stderr,"Making %dD domain decomposition %d x %d x %d\n",
 +          dd->ndim,dd->nc[XX],dd->nc[YY],dd->nc[ZZ]);
 +    }
 +    if (fplog)
 +    {
 +        fprintf(fplog,"\nMaking %dD domain decomposition grid %d x %d x %d, home cell index %d %d %d\n\n",
 +                dd->ndim,
 +                dd->nc[XX],dd->nc[YY],dd->nc[ZZ],
 +                dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
 +    }
 +    switch (dd->ndim)
 +    {
 +    case 3:
 +        nzone  = dd_z3n;
 +        nzonep = dd_zp3n;
 +        for(i=0; i<nzonep; i++)
 +        {
 +            copy_ivec(dd_zp3[i],dd_zp[i]);
 +        }
 +        break;
 +    case 2:
 +        nzone  = dd_z2n;
 +        nzonep = dd_zp2n;
 +        for(i=0; i<nzonep; i++)
 +        {
 +            copy_ivec(dd_zp2[i],dd_zp[i]);
 +        }
 +        break;
 +    case 1:
 +        nzone  = dd_z1n;
 +        nzonep = dd_zp1n;
 +        for(i=0; i<nzonep; i++)
 +        {
 +            copy_ivec(dd_zp1[i],dd_zp[i]);
 +        }
 +        break;
 +    default:
 +        gmx_fatal(FARGS,"Can only do 1, 2 or 3D domain decomposition");
 +        nzone = 0;
 +        nzonep = 0;
 +    }
 +
 +    zones = &dd->comm->zones;
 +
 +    for(i=0; i<nzone; i++)
 +    {
 +        m = 0;
 +        clear_ivec(zones->shift[i]);
 +        for(d=0; d<dd->ndim; d++)
 +        {
 +            zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
 +        }
 +    }
 +    
 +    zones->n = nzone;
 +    for(i=0; i<nzone; i++)
 +    {
 +        for(d=0; d<DIM; d++)
 +        {
 +            s[d] = dd->ci[d] - zones->shift[i][d];
 +            if (s[d] < 0)
 +            {
 +                s[d] += dd->nc[d];
 +            }
 +            else if (s[d] >= dd->nc[d])
 +            {
 +                s[d] -= dd->nc[d];
 +            }
 +        }
 +    }
 +    zones->nizone = nzonep;
 +    for(i=0; i<zones->nizone; i++)
 +    {
 +        if (dd_zp[i][0] != i)
 +        {
 +            gmx_fatal(FARGS,"Internal inconsistency in the dd grid setup");
 +        }
 +        izone = &zones->izone[i];
 +        izone->j0 = dd_zp[i][1];
 +        izone->j1 = dd_zp[i][2];
 +        for(dim=0; dim<DIM; dim++)
 +        {
 +            if (dd->nc[dim] == 1)
 +            {
 +                /* All shifts should be allowed */
 +                izone->shift0[dim] = -1;
 +                izone->shift1[dim] = 1;
 +            }
 +            else
 +            {
 +                /*
 +                  izone->shift0[d] = 0;
 +                  izone->shift1[d] = 0;
 +                  for(j=izone->j0; j<izone->j1; j++) {
 +                  if (dd->shift[j][d] > dd->shift[i][d])
 +                  izone->shift0[d] = -1;
 +                  if (dd->shift[j][d] < dd->shift[i][d])
 +                  izone->shift1[d] = 1;
 +                  }
 +                */
 +                
 +                int shift_diff;
 +                
 +                /* Assume the shift are not more than 1 cell */
 +                izone->shift0[dim] = 1;
 +                izone->shift1[dim] = -1;
 +                for(j=izone->j0; j<izone->j1; j++)
 +                {
 +                    shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
 +                    if (shift_diff < izone->shift0[dim])
 +                    {
 +                        izone->shift0[dim] = shift_diff;
 +                    }
 +                    if (shift_diff > izone->shift1[dim])
 +                    {
 +                        izone->shift1[dim] = shift_diff;
 +                    }
 +                }
 +            }
 +        }
 +    }
 +    
 +    if (dd->comm->eDLB != edlbNO)
 +    {
 +        snew(dd->comm->root,dd->ndim);
 +    }
 +    
 +    if (dd->comm->bRecordLoad)
 +    {
 +        make_load_communicators(dd);
 +    }
 +}
 +
 +static void make_pp_communicator(FILE *fplog,t_commrec *cr,int reorder)
 +{
 +    gmx_domdec_t *dd;
 +    gmx_domdec_comm_t *comm;
 +    int  i,rank,*buf;
 +    ivec periods;
 +#ifdef GMX_MPI
 +    MPI_Comm comm_cart;
 +#endif
 +    
 +    dd = cr->dd;
 +    comm = dd->comm;
 +    
 +#ifdef GMX_MPI
 +    if (comm->bCartesianPP)
 +    {
 +        /* Set up cartesian communication for the particle-particle part */
 +        if (fplog)
 +        {
 +            fprintf(fplog,"Will use a Cartesian communicator: %d x %d x %d\n",
 +                    dd->nc[XX],dd->nc[YY],dd->nc[ZZ]);
 +        }
 +        
 +        for(i=0; i<DIM; i++)
 +        {
 +            periods[i] = TRUE;
 +        }
 +        MPI_Cart_create(cr->mpi_comm_mygroup,DIM,dd->nc,periods,reorder,
 +                        &comm_cart);
 +        /* We overwrite the old communicator with the new cartesian one */
 +        cr->mpi_comm_mygroup = comm_cart;
 +    }
 +    
 +    dd->mpi_comm_all = cr->mpi_comm_mygroup;
 +    MPI_Comm_rank(dd->mpi_comm_all,&dd->rank);
 +    
 +    if (comm->bCartesianPP_PME)
 +    {
 +        /* Since we want to use the original cartesian setup for sim,
 +         * and not the one after split, we need to make an index.
 +         */
 +        snew(comm->ddindex2ddnodeid,dd->nnodes);
 +        comm->ddindex2ddnodeid[dd_index(dd->nc,dd->ci)] = dd->rank;
 +        gmx_sumi(dd->nnodes,comm->ddindex2ddnodeid,cr);
 +        /* Get the rank of the DD master,
 +         * above we made sure that the master node is a PP node.
 +         */
 +        if (MASTER(cr))
 +        {
 +            rank = dd->rank;
 +        }
 +        else
 +        {
 +            rank = 0;
 +        }
 +        MPI_Allreduce(&rank,&dd->masterrank,1,MPI_INT,MPI_SUM,dd->mpi_comm_all);
 +    }
 +    else if (comm->bCartesianPP)
 +    {
 +        if (cr->npmenodes == 0)
 +        {
 +            /* The PP communicator is also
 +             * the communicator for this simulation
 +             */
 +            cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
 +        }
 +        cr->nodeid = dd->rank;
 +        
 +        MPI_Cart_coords(dd->mpi_comm_all,dd->rank,DIM,dd->ci);
 +        
 +        /* We need to make an index to go from the coordinates
 +         * to the nodeid of this simulation.
 +         */
 +        snew(comm->ddindex2simnodeid,dd->nnodes);
 +        snew(buf,dd->nnodes);
 +        if (cr->duty & DUTY_PP)
 +        {
 +            buf[dd_index(dd->nc,dd->ci)] = cr->sim_nodeid;
 +        }
 +        /* Communicate the ddindex to simulation nodeid index */
 +        MPI_Allreduce(buf,comm->ddindex2simnodeid,dd->nnodes,MPI_INT,MPI_SUM,
 +                      cr->mpi_comm_mysim);
 +        sfree(buf);
 +        
 +        /* Determine the master coordinates and rank.
 +         * The DD master should be the same node as the master of this sim.
 +         */
 +        for(i=0; i<dd->nnodes; i++)
 +        {
 +            if (comm->ddindex2simnodeid[i] == 0)
 +            {
 +                ddindex2xyz(dd->nc,i,dd->master_ci);
 +                MPI_Cart_rank(dd->mpi_comm_all,dd->master_ci,&dd->masterrank);
 +            }
 +        }
 +        if (debug)
 +        {
 +            fprintf(debug,"The master rank is %d\n",dd->masterrank);
 +        }
 +    }
 +    else
 +    {
 +        /* No Cartesian communicators */
 +        /* We use the rank in dd->comm->all as DD index */
 +        ddindex2xyz(dd->nc,dd->rank,dd->ci);
 +        /* The simulation master nodeid is 0, so the DD master rank is also 0 */
 +        dd->masterrank = 0;
 +        clear_ivec(dd->master_ci);
 +    }
 +#endif
 +  
 +    if (fplog)
 +    {
 +        fprintf(fplog,
 +                "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
 +                dd->rank,dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
 +    }
 +    if (debug)
 +    {
 +        fprintf(debug,
 +                "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
 +                dd->rank,dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
 +    }
 +}
 +
 +static void receive_ddindex2simnodeid(t_commrec *cr)
 +{
 +    gmx_domdec_t *dd;
 +    
 +    gmx_domdec_comm_t *comm;
 +    int  *buf;
 +    
 +    dd = cr->dd;
 +    comm = dd->comm;
 +    
 +#ifdef GMX_MPI
 +    if (!comm->bCartesianPP_PME && comm->bCartesianPP)
 +    {
 +        snew(comm->ddindex2simnodeid,dd->nnodes);
 +        snew(buf,dd->nnodes);
 +        if (cr->duty & DUTY_PP)
 +        {
 +            buf[dd_index(dd->nc,dd->ci)] = cr->sim_nodeid;
 +        }
 +#ifdef GMX_MPI
 +        /* Communicate the ddindex to simulation nodeid index */
 +        MPI_Allreduce(buf,comm->ddindex2simnodeid,dd->nnodes,MPI_INT,MPI_SUM,
 +                      cr->mpi_comm_mysim);
 +#endif
 +        sfree(buf);
 +    }
 +#endif
 +}
 +
 +static gmx_domdec_master_t *init_gmx_domdec_master_t(gmx_domdec_t *dd,
 +                                                     int ncg,int natoms)
 +{
 +    gmx_domdec_master_t *ma;
 +    int i;
 +
 +    snew(ma,1);
 +    
 +    snew(ma->ncg,dd->nnodes);
 +    snew(ma->index,dd->nnodes+1);
 +    snew(ma->cg,ncg);
 +    snew(ma->nat,dd->nnodes);
 +    snew(ma->ibuf,dd->nnodes*2);
 +    snew(ma->cell_x,DIM);
 +    for(i=0; i<DIM; i++)
 +    {
 +        snew(ma->cell_x[i],dd->nc[i]+1);
 +    }
 +
 +    if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
 +    {
 +        ma->vbuf = NULL;
 +    }
 +    else
 +    {
 +        snew(ma->vbuf,natoms);
 +    }
 +
 +    return ma;
 +}
 +
 +static void split_communicator(FILE *fplog,t_commrec *cr,int dd_node_order,
 +                               int reorder)
 +{
 +    gmx_domdec_t *dd;
 +    gmx_domdec_comm_t *comm;
 +    int  i,rank;
 +    gmx_bool bDiv[DIM];
 +    ivec periods;
 +#ifdef GMX_MPI
 +    MPI_Comm comm_cart;
 +#endif
 +    
 +    dd = cr->dd;
 +    comm = dd->comm;
 +    
 +    if (comm->bCartesianPP)
 +    {
 +        for(i=1; i<DIM; i++)
 +        {
 +            bDiv[i] = ((cr->npmenodes*dd->nc[i]) % (dd->nnodes) == 0);
 +        }
 +        if (bDiv[YY] || bDiv[ZZ])
 +        {
 +            comm->bCartesianPP_PME = TRUE;
 +            /* If we have 2D PME decomposition, which is always in x+y,
 +             * we stack the PME only nodes in z.
 +             * Otherwise we choose the direction that provides the thinnest slab
 +             * of PME only nodes as this will have the least effect
 +             * on the PP communication.
 +             * But for the PME communication the opposite might be better.
 +             */
 +            if (bDiv[ZZ] && (comm->npmenodes_y > 1 ||
 +                             !bDiv[YY] ||
 +                             dd->nc[YY] > dd->nc[ZZ]))
 +            {
 +                comm->cartpmedim = ZZ;
 +            }
 +            else
 +            {
 +                comm->cartpmedim = YY;
 +            }
 +            comm->ntot[comm->cartpmedim]
 +                += (cr->npmenodes*dd->nc[comm->cartpmedim])/dd->nnodes;
 +        }
 +        else if (fplog)
 +        {
 +            fprintf(fplog,"#pmenodes (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)\n",cr->npmenodes,dd->nc[XX],dd->nc[YY],dd->nc[XX],dd->nc[ZZ]);
 +            fprintf(fplog,
 +                    "Will not use a Cartesian communicator for PP <-> PME\n\n");
 +        }
 +    }
 +    
 +#ifdef GMX_MPI
 +    if (comm->bCartesianPP_PME)
 +    {
 +        if (fplog)
 +        {
 +            fprintf(fplog,"Will use a Cartesian communicator for PP <-> PME: %d x %d x %d\n",comm->ntot[XX],comm->ntot[YY],comm->ntot[ZZ]);
 +        }
 +        
 +        for(i=0; i<DIM; i++)
 +        {
 +            periods[i] = TRUE;
 +        }
 +        MPI_Cart_create(cr->mpi_comm_mysim,DIM,comm->ntot,periods,reorder,
 +                        &comm_cart);
 +        
 +        MPI_Comm_rank(comm_cart,&rank);
 +        if (MASTERNODE(cr) && rank != 0)
 +        {
 +            gmx_fatal(FARGS,"MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
 +        }
 +        
 +        /* With this assigment we loose the link to the original communicator
 +         * which will usually be MPI_COMM_WORLD, unless have multisim.
 +         */
 +        cr->mpi_comm_mysim = comm_cart;
 +        cr->sim_nodeid = rank;
 +        
 +        MPI_Cart_coords(cr->mpi_comm_mysim,cr->sim_nodeid,DIM,dd->ci);
 +        
 +        if (fplog)
 +        {
 +            fprintf(fplog,"Cartesian nodeid %d, coordinates %d %d %d\n\n",
 +                    cr->sim_nodeid,dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
 +        }
 +        
 +        if (dd->ci[comm->cartpmedim] < dd->nc[comm->cartpmedim])
 +        {
 +            cr->duty = DUTY_PP;
 +        }
 +        if (cr->npmenodes == 0 ||
 +            dd->ci[comm->cartpmedim] >= dd->nc[comm->cartpmedim])
 +        {
 +            cr->duty = DUTY_PME;
 +        }
 +        
 +        /* Split the sim communicator into PP and PME only nodes */
 +        MPI_Comm_split(cr->mpi_comm_mysim,
 +                       cr->duty,
 +                       dd_index(comm->ntot,dd->ci),
 +                       &cr->mpi_comm_mygroup);
 +    }
 +    else
 +    {
 +        switch (dd_node_order)
 +        {
 +        case ddnoPP_PME:
 +            if (fplog)
 +            {
 +                fprintf(fplog,"Order of the nodes: PP first, PME last\n");
 +            }
 +            break;
 +        case ddnoINTERLEAVE:
 +            /* Interleave the PP-only and PME-only nodes,
 +             * as on clusters with dual-core machines this will double
 +             * the communication bandwidth of the PME processes
 +             * and thus speed up the PP <-> PME and inter PME communication.
 +             */
 +            if (fplog)
 +            {
 +                fprintf(fplog,"Interleaving PP and PME nodes\n");
 +            }
 +            comm->pmenodes = dd_pmenodes(cr);
 +            break;
 +        case ddnoCARTESIAN:
 +            break;
 +        default:
 +            gmx_fatal(FARGS,"Unknown dd_node_order=%d",dd_node_order);
 +        }
 +    
 +        if (dd_simnode2pmenode(cr,cr->sim_nodeid) == -1)
 +        {
 +            cr->duty = DUTY_PME;
 +        }
 +        else
 +        {
 +            cr->duty = DUTY_PP;
 +        }
 +        
 +        /* Split the sim communicator into PP and PME only nodes */
 +        MPI_Comm_split(cr->mpi_comm_mysim,
 +                       cr->duty,
 +                       cr->nodeid,
 +                       &cr->mpi_comm_mygroup);
 +        MPI_Comm_rank(cr->mpi_comm_mygroup,&cr->nodeid);
 +    }
 +#endif
 +
 +    if (fplog)
 +    {
 +        fprintf(fplog,"This is a %s only node\n\n",
 +                (cr->duty & DUTY_PP) ? "particle-particle" : "PME-mesh");
 +    }
 +}
 +
 +void make_dd_communicators(FILE *fplog,t_commrec *cr,int dd_node_order)
 +{
 +    gmx_domdec_t *dd;
 +    gmx_domdec_comm_t *comm;
 +    int CartReorder;
 +    
 +    dd = cr->dd;
 +    comm = dd->comm;
 +    
 +    copy_ivec(dd->nc,comm->ntot);
 +    
 +    comm->bCartesianPP = (dd_node_order == ddnoCARTESIAN);
 +    comm->bCartesianPP_PME = FALSE;
 +    
 +    /* Reorder the nodes by default. This might change the MPI ranks.
 +     * Real reordering is only supported on very few architectures,
 +     * Blue Gene is one of them.
 +     */
 +    CartReorder = (getenv("GMX_NO_CART_REORDER") == NULL);
 +    
 +    if (cr->npmenodes > 0)
 +    {
 +        /* Split the communicator into a PP and PME part */
 +        split_communicator(fplog,cr,dd_node_order,CartReorder);
 +        if (comm->bCartesianPP_PME)
 +        {
 +            /* We (possibly) reordered the nodes in split_communicator,
 +             * so it is no longer required in make_pp_communicator.
 +             */
 +            CartReorder = FALSE;
 +        }
 +    }
 +    else
 +    {
 +        /* All nodes do PP and PME */
 +#ifdef GMX_MPI    
 +        /* We do not require separate communicators */
 +        cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
 +#endif
 +    }
 +    
 +    if (cr->duty & DUTY_PP)
 +    {
 +        /* Copy or make a new PP communicator */
 +        make_pp_communicator(fplog,cr,CartReorder);
 +    }
 +    else
 +    {
 +        receive_ddindex2simnodeid(cr);
 +    }
 +    
 +    if (!(cr->duty & DUTY_PME))
 +    {
 +        /* Set up the commnuication to our PME node */
 +        dd->pme_nodeid = dd_simnode2pmenode(cr,cr->sim_nodeid);
 +        dd->pme_receive_vir_ener = receive_vir_ener(cr);
 +        if (debug)
 +        {
 +            fprintf(debug,"My pme_nodeid %d receive ener %d\n",
 +                    dd->pme_nodeid,dd->pme_receive_vir_ener);
 +        }
 +    }
 +    else
 +    {
 +        dd->pme_nodeid = -1;
 +    }
 +
 +    if (DDMASTER(dd))
 +    {
 +        dd->ma = init_gmx_domdec_master_t(dd,
 +                                          comm->cgs_gl.nr,
 +                                          comm->cgs_gl.index[comm->cgs_gl.nr]);
 +    }
 +}
 +
 +static real *get_slb_frac(FILE *fplog,const char *dir,int nc,const char *size_string)
 +{
 +    real *slb_frac,tot;
 +    int  i,n;
 +    double dbl;
 +    
 +    slb_frac = NULL;
 +    if (nc > 1 && size_string != NULL)
 +    {
 +        if (fplog)
 +        {
 +            fprintf(fplog,"Using static load balancing for the %s direction\n",
 +                    dir);
 +        }
 +        snew(slb_frac,nc);
 +        tot = 0;
 +        for (i=0; i<nc; i++)
 +        {
 +            dbl = 0;
 +            sscanf(size_string,"%lf%n",&dbl,&n);
 +            if (dbl == 0)
 +            {
 +                gmx_fatal(FARGS,"Incorrect or not enough DD cell size entries for direction %s: '%s'",dir,size_string);
 +            }
 +            slb_frac[i] = dbl;
 +            size_string += n;
 +            tot += slb_frac[i];
 +        }
 +        /* Normalize */
 +        if (fplog)
 +        {
 +            fprintf(fplog,"Relative cell sizes:");
 +        }
 +        for (i=0; i<nc; i++)
 +        {
 +            slb_frac[i] /= tot;
 +            if (fplog)
 +            {
 +                fprintf(fplog," %5.3f",slb_frac[i]);
 +            }
 +        }
 +        if (fplog)
 +        {
 +            fprintf(fplog,"\n");
 +        }
 +    }
 +    
 +    return slb_frac;
 +}
 +
 +static int multi_body_bondeds_count(gmx_mtop_t *mtop)
 +{
 +    int n,nmol,ftype;
 +    gmx_mtop_ilistloop_t iloop;
 +    t_ilist *il;
 +    
 +    n = 0;
 +    iloop = gmx_mtop_ilistloop_init(mtop);
 +    while (gmx_mtop_ilistloop_next(iloop,&il,&nmol))
 +    {
 +        for(ftype=0; ftype<F_NRE; ftype++)
 +        {
 +            if ((interaction_function[ftype].flags & IF_BOND) &&
 +                NRAL(ftype) >  2)
 +            {
 +                n += nmol*il[ftype].nr/(1 + NRAL(ftype));
 +            }
 +        }
 +  }
 +
 +  return n;
 +}
 +
 +static int dd_nst_env(FILE *fplog,const char *env_var,int def)
 +{
 +    char *val;
 +    int  nst;
 +    
 +    nst = def;
 +    val = getenv(env_var);
 +    if (val)
 +    {
 +        if (sscanf(val,"%d",&nst) <= 0)
 +        {
 +            nst = 1;
 +        }
 +        if (fplog)
 +        {
 +            fprintf(fplog,"Found env.var. %s = %s, using value %d\n",
 +                    env_var,val,nst);
 +        }
 +    }
 +    
 +    return nst;
 +}
 +
 +static void dd_warning(t_commrec *cr,FILE *fplog,const char *warn_string)
 +{
 +    if (MASTER(cr))
 +    {
 +        fprintf(stderr,"\n%s\n",warn_string);
 +    }
 +    if (fplog)
 +    {
 +        fprintf(fplog,"\n%s\n",warn_string);
 +    }
 +}
 +
 +static void check_dd_restrictions(t_commrec *cr,gmx_domdec_t *dd,
 +                                  t_inputrec *ir,FILE *fplog)
 +{
 +    if (ir->ePBC == epbcSCREW &&
 +        (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
 +    {
 +        gmx_fatal(FARGS,"With pbc=%s can only do domain decomposition in the x-direction",epbc_names[ir->ePBC]);
 +    }
 +
 +    if (ir->ns_type == ensSIMPLE)
 +    {
 +        gmx_fatal(FARGS,"Domain decomposition does not support simple neighbor searching, use grid searching or use particle decomposition");
 +    }
 +
 +    if (ir->nstlist == 0)
 +    {
 +        gmx_fatal(FARGS,"Domain decomposition does not work with nstlist=0");
 +    }
 +
 +    if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
 +    {
 +        dd_warning(cr,fplog,"comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
 +    }
 +}
 +
 +static real average_cellsize_min(gmx_domdec_t *dd,gmx_ddbox_t *ddbox)
 +{
 +    int  di,d;
 +    real r;
 +
 +    r = ddbox->box_size[XX];
 +    for(di=0; di<dd->ndim; di++)
 +    {
 +        d = dd->dim[di];
 +        /* Check using the initial average cell size */
 +        r = min(r,ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
 +    }
 +
 +    return r;
 +}
 +
 +static int check_dlb_support(FILE *fplog,t_commrec *cr,
 +                             const char *dlb_opt,gmx_bool bRecordLoad,
 +                             unsigned long Flags,t_inputrec *ir)
 +{
 +    gmx_domdec_t *dd;
 +    int  eDLB=-1;
 +    char buf[STRLEN];
 +
 +    switch (dlb_opt[0])
 +    {
 +    case 'a': eDLB = edlbAUTO; break;
 +    case 'n': eDLB = edlbNO;   break;
 +    case 'y': eDLB = edlbYES;  break;
 +    default: gmx_incons("Unknown dlb_opt");
 +    }
 +
 +    if (Flags & MD_RERUN)
 +    {
 +        return edlbNO;
 +    }
 +
 +    if (!EI_DYNAMICS(ir->eI))
 +    {
 +        if (eDLB == edlbYES)
 +        {
 +            sprintf(buf,"NOTE: dynamic load balancing is only supported with dynamics, not with integrator '%s'\n",EI(ir->eI));
 +            dd_warning(cr,fplog,buf);
 +        }
 +            
 +        return edlbNO;
 +    }
 +
 +    if (!bRecordLoad)
 +    {
 +        dd_warning(cr,fplog,"NOTE: Cycle counting is not supported on this architecture, will not use dynamic load balancing\n");
 +
 +        return edlbNO;
 +    }
 +
 +    if (Flags & MD_REPRODUCIBLE)
 +    {
 +        switch (eDLB)
 +        {
 +                      case edlbNO: 
 +                              break;
 +                      case edlbAUTO:
 +                              dd_warning(cr,fplog,"NOTE: reproducibility requested, will not use dynamic load balancing\n");
 +                              eDLB = edlbNO;
 +                              break;
 +                      case edlbYES:
 +                              dd_warning(cr,fplog,"WARNING: reproducibility requested with dynamic load balancing, the simulation will NOT be binary reproducible\n");
 +                              break;
 +                      default:
 +                              gmx_fatal(FARGS,"Death horror: undefined case (%d) for load balancing choice",eDLB);
 +                              break;
 +        }
 +    }
 +
 +    return eDLB;
 +}
 +
 +static void set_dd_dim(FILE *fplog,gmx_domdec_t *dd)
 +{
 +    int dim;
 +
 +    dd->ndim = 0;
 +    if (getenv("GMX_DD_ORDER_ZYX") != NULL)
 +    {
 +        /* Decomposition order z,y,x */
 +        if (fplog)
 +        {
 +            fprintf(fplog,"Using domain decomposition order z, y, x\n");
 +        }
 +        for(dim=DIM-1; dim>=0; dim--)
 +        {
 +            if (dd->nc[dim] > 1)
 +            {
 +                dd->dim[dd->ndim++] = dim;
 +            }
 +        }
 +    }
 +    else
 +    {
 +        /* Decomposition order x,y,z */
 +        for(dim=0; dim<DIM; dim++)
 +        {
 +            if (dd->nc[dim] > 1)
 +            {
 +                dd->dim[dd->ndim++] = dim;
 +            }
 +        }
 +    }
 +}
 +
 +static gmx_domdec_comm_t *init_dd_comm()
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  i;
 +
 +    snew(comm,1);
 +    snew(comm->cggl_flag,DIM*2);
 +    snew(comm->cgcm_state,DIM*2);
 +    for(i=0; i<DIM*2; i++)
 +    {
 +        comm->cggl_flag_nalloc[i]  = 0;
 +        comm->cgcm_state_nalloc[i] = 0;
 +    }
 +    
 +    comm->nalloc_int = 0;
 +    comm->buf_int    = NULL;
 +
 +    vec_rvec_init(&comm->vbuf);
 +
 +    comm->n_load_have    = 0;
 +    comm->n_load_collect = 0;
 +
 +    for(i=0; i<ddnatNR-ddnatZONE; i++)
 +    {
 +        comm->sum_nat[i] = 0;
 +    }
 +    comm->ndecomp = 0;
 +    comm->nload   = 0;
 +    comm->load_step = 0;
 +    comm->load_sum  = 0;
 +    comm->load_max  = 0;
 +    clear_ivec(comm->load_lim);
 +    comm->load_mdf  = 0;
 +    comm->load_pme  = 0;
 +
 +    return comm;
 +}
 +
 +gmx_domdec_t *init_domain_decomposition(FILE *fplog,t_commrec *cr,
 +                                        unsigned long Flags,
 +                                        ivec nc,
 +                                        real comm_distance_min,real rconstr,
 +                                        const char *dlb_opt,real dlb_scale,
 +                                        const char *sizex,const char *sizey,const char *sizez,
 +                                        gmx_mtop_t *mtop,t_inputrec *ir,
 +                                        matrix box,rvec *x,
 +                                        gmx_ddbox_t *ddbox,
 +                                        int *npme_x,int *npme_y)
 +{
 +    gmx_domdec_t *dd;
 +    gmx_domdec_comm_t *comm;
 +    int  recload;
 +    int  d,i,j;
 +    real r_2b,r_mb,r_bonded=-1,r_bonded_limit=-1,limit,acs;
 +    gmx_bool bC;
 +    char buf[STRLEN];
 +    
 +    if (fplog)
 +    {
 +        fprintf(fplog,
 +                "\nInitializing Domain Decomposition on %d nodes\n",cr->nnodes);
 +    }
 +    
 +    snew(dd,1);
 +
 +    dd->comm = init_dd_comm();
 +    comm = dd->comm;
 +    snew(comm->cggl_flag,DIM*2);
 +    snew(comm->cgcm_state,DIM*2);
 +
 +    dd->npbcdim   = ePBC2npbcdim(ir->ePBC);
 +    dd->bScrewPBC = (ir->ePBC == epbcSCREW);
 +    
 +    dd->bSendRecv2      = dd_nst_env(fplog,"GMX_DD_SENDRECV2",0);
 +    comm->dlb_scale_lim = dd_nst_env(fplog,"GMX_DLB_MAX",10);
 +    comm->eFlop         = dd_nst_env(fplog,"GMX_DLB_FLOP",0);
 +    recload             = dd_nst_env(fplog,"GMX_DD_LOAD",1);
 +    comm->nstSortCG     = dd_nst_env(fplog,"GMX_DD_SORT",1);
 +    comm->nstDDDump     = dd_nst_env(fplog,"GMX_DD_DUMP",0);
 +    comm->nstDDDumpGrid = dd_nst_env(fplog,"GMX_DD_DUMP_GRID",0);
 +    comm->DD_debug      = dd_nst_env(fplog,"GMX_DD_DEBUG",0);
 +
 +    dd->pme_recv_f_alloc = 0;
 +    dd->pme_recv_f_buf = NULL;
 +
 +    if (dd->bSendRecv2 && fplog)
 +    {
 +        fprintf(fplog,"Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication\n");
 +    }
 +    if (comm->eFlop)
 +    {
 +        if (fplog)
 +        {
 +            fprintf(fplog,"Will load balance based on FLOP count\n");
 +        }
 +        if (comm->eFlop > 1)
 +        {
 +            srand(1+cr->nodeid);
 +        }
 +        comm->bRecordLoad = TRUE;
 +    }
 +    else
 +    {
 +        comm->bRecordLoad = (wallcycle_have_counter() && recload > 0);
 +                             
 +    }
 +    
 +    comm->eDLB = check_dlb_support(fplog,cr,dlb_opt,comm->bRecordLoad,Flags,ir);
 +    
 +    comm->bDynLoadBal = (comm->eDLB == edlbYES);
 +    if (fplog)
 +    {
 +        fprintf(fplog,"Dynamic load balancing: %s\n",edlb_names[comm->eDLB]);
 +    }
 +    dd->bGridJump = comm->bDynLoadBal;
 +    
 +    if (comm->nstSortCG)
 +    {
 +        if (fplog)
 +        {
 +            if (comm->nstSortCG == 1)
 +            {
 +                fprintf(fplog,"Will sort the charge groups at every domain (re)decomposition\n");
 +            }
 +            else
 +            {
 +                fprintf(fplog,"Will sort the charge groups every %d steps\n",
 +                        comm->nstSortCG);
 +            }
 +        }
 +        snew(comm->sort,1);
 +    }
 +    else
 +    {
 +        if (fplog)
 +        {
 +            fprintf(fplog,"Will not sort the charge groups\n");
 +        }
 +    }
 +    
 +    comm->bInterCGBondeds = (ncg_mtop(mtop) > mtop->mols.nr);
 +    if (comm->bInterCGBondeds)
 +    {
 +        comm->bInterCGMultiBody = (multi_body_bondeds_count(mtop) > 0);
 +    }
 +    else
 +    {
 +        comm->bInterCGMultiBody = FALSE;
 +    }
 +    
 +    dd->bInterCGcons = inter_charge_group_constraints(mtop);
 +
 +    if (ir->rlistlong == 0)
 +    {
 +        /* Set the cut-off to some very large value,
 +         * so we don't need if statements everywhere in the code.
 +         * We use sqrt, since the cut-off is squared in some places.
 +         */
 +        comm->cutoff   = GMX_CUTOFF_INF;
 +    }
 +    else
 +    {
 +        comm->cutoff   = ir->rlistlong;
 +    }
 +    comm->cutoff_mbody = 0;
 +    
 +    comm->cellsize_limit = 0;
 +    comm->bBondComm = FALSE;
 +
 +    if (comm->bInterCGBondeds)
 +    {
 +        if (comm_distance_min > 0)
 +        {
 +            comm->cutoff_mbody = comm_distance_min;
 +            if (Flags & MD_DDBONDCOMM)
 +            {
 +                comm->bBondComm = (comm->cutoff_mbody > comm->cutoff);
 +            }
 +            else
 +            {
 +                comm->cutoff = max(comm->cutoff,comm->cutoff_mbody);
 +            }
 +            r_bonded_limit = comm->cutoff_mbody;
 +        }
 +        else if (ir->bPeriodicMols)
 +        {
 +            /* Can not easily determine the required cut-off */
 +            dd_warning(cr,fplog,"NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.\n");
 +            comm->cutoff_mbody = comm->cutoff/2;
 +            r_bonded_limit = comm->cutoff_mbody;
 +        }
 +        else
 +        {
 +            if (MASTER(cr))
 +            {
 +                dd_bonded_cg_distance(fplog,dd,mtop,ir,x,box,
 +                                      Flags & MD_DDBONDCHECK,&r_2b,&r_mb);
 +            }
 +            gmx_bcast(sizeof(r_2b),&r_2b,cr);
 +            gmx_bcast(sizeof(r_mb),&r_mb,cr);
 +
 +            /* We use an initial margin of 10% for the minimum cell size,
 +             * except when we are just below the non-bonded cut-off.
 +             */
 +            if (Flags & MD_DDBONDCOMM)
 +            {
 +                if (max(r_2b,r_mb) > comm->cutoff)
 +                {
 +                    r_bonded       = max(r_2b,r_mb);
 +                    r_bonded_limit = 1.1*r_bonded;
 +                    comm->bBondComm = TRUE;
 +                }
 +                else
 +                {
 +                    r_bonded       = r_mb;
 +                    r_bonded_limit = min(1.1*r_bonded,comm->cutoff);
 +                }
 +                /* We determine cutoff_mbody later */
 +            }
 +            else
 +            {
 +                /* No special bonded communication,
 +                 * simply increase the DD cut-off.
 +                 */
 +                r_bonded_limit     = 1.1*max(r_2b,r_mb);
 +                comm->cutoff_mbody = r_bonded_limit;
 +                comm->cutoff       = max(comm->cutoff,comm->cutoff_mbody);
 +            }
 +        }
 +        comm->cellsize_limit = max(comm->cellsize_limit,r_bonded_limit);
 +        if (fplog)
 +        {
 +            fprintf(fplog,
 +                    "Minimum cell size due to bonded interactions: %.3f nm\n",
 +                    comm->cellsize_limit);
 +        }
 +    }
 +
 +    if (dd->bInterCGcons && rconstr <= 0)
 +    {
 +        /* There is a cell size limit due to the constraints (P-LINCS) */
 +        rconstr = constr_r_max(fplog,mtop,ir);
 +        if (fplog)
 +        {
 +            fprintf(fplog,
 +                    "Estimated maximum distance required for P-LINCS: %.3f nm\n",
 +                    rconstr);
 +            if (rconstr > comm->cellsize_limit)
 +            {
 +                fprintf(fplog,"This distance will limit the DD cell size, you can override this with -rcon\n");
 +            }
 +        }
 +    }
 +    else if (rconstr > 0 && fplog)
 +    {
 +        /* Here we do not check for dd->bInterCGcons,
 +         * because one can also set a cell size limit for virtual sites only
 +         * and at this point we don't know yet if there are intercg v-sites.
 +         */
 +        fprintf(fplog,
 +                "User supplied maximum distance required for P-LINCS: %.3f nm\n",
 +                rconstr);
 +    }
 +    comm->cellsize_limit = max(comm->cellsize_limit,rconstr);
 +
 +    comm->cgs_gl = gmx_mtop_global_cgs(mtop);
 +
 +    if (nc[XX] > 0)
 +    {
 +        copy_ivec(nc,dd->nc);
 +        set_dd_dim(fplog,dd);
 +        set_ddbox_cr(cr,&dd->nc,ir,box,&comm->cgs_gl,x,ddbox);
 +
 +        if (cr->npmenodes == -1)
 +        {
 +            cr->npmenodes = 0;
 +        }
 +        acs = average_cellsize_min(dd,ddbox);
 +        if (acs < comm->cellsize_limit)
 +        {
 +            if (fplog)
 +            {
 +                fprintf(fplog,"ERROR: The initial cell size (%f) is smaller than the cell size limit (%f)\n",acs,comm->cellsize_limit);
 +            }
 +            gmx_fatal_collective(FARGS,cr,NULL,
 +                                 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
 +                                 acs,comm->cellsize_limit);
 +        }
 +    }
 +    else
 +    {
 +        set_ddbox_cr(cr,NULL,ir,box,&comm->cgs_gl,x,ddbox);
 +
 +        /* We need to choose the optimal DD grid and possibly PME nodes */
 +        limit = dd_choose_grid(fplog,cr,dd,ir,mtop,box,ddbox,
 +                               comm->eDLB!=edlbNO,dlb_scale,
 +                               comm->cellsize_limit,comm->cutoff,
 +                               comm->bInterCGBondeds,comm->bInterCGMultiBody);
 +        
 +        if (dd->nc[XX] == 0)
 +        {
 +            bC = (dd->bInterCGcons && rconstr > r_bonded_limit);
 +            sprintf(buf,"Change the number of nodes or mdrun option %s%s%s",
 +                    !bC ? "-rdd" : "-rcon",
 +                    comm->eDLB!=edlbNO ? " or -dds" : "",
 +                    bC ? " or your LINCS settings" : "");
 +
 +            gmx_fatal_collective(FARGS,cr,NULL,
 +                                 "There is no domain decomposition for %d nodes that is compatible with the given box and a minimum cell size of %g nm\n"
 +                                 "%s\n"
 +                                 "Look in the log file for details on the domain decomposition",
 +                                 cr->nnodes-cr->npmenodes,limit,buf);
 +        }
 +        set_dd_dim(fplog,dd);
 +    }
 +
 +    if (fplog)
 +    {
 +        fprintf(fplog,
 +                "Domain decomposition grid %d x %d x %d, separate PME nodes %d\n",
 +                dd->nc[XX],dd->nc[YY],dd->nc[ZZ],cr->npmenodes);
 +    }
 +    
 +    dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
 +    if (cr->nnodes - dd->nnodes != cr->npmenodes)
 +    {
 +        gmx_fatal_collective(FARGS,cr,NULL,
 +                             "The size of the domain decomposition grid (%d) does not match the number of nodes (%d). The total number of nodes is %d",
 +                             dd->nnodes,cr->nnodes - cr->npmenodes,cr->nnodes);
 +    }
 +    if (cr->npmenodes > dd->nnodes)
 +    {
 +        gmx_fatal_collective(FARGS,cr,NULL,
 +                             "The number of separate PME nodes (%d) is larger than the number of PP nodes (%d), this is not supported.",cr->npmenodes,dd->nnodes);
 +    }
 +    if (cr->npmenodes > 0)
 +    {
 +        comm->npmenodes = cr->npmenodes;
 +    }
 +    else
 +    {
 +        comm->npmenodes = dd->nnodes;
 +    }
 +
 +    if (EEL_PME(ir->coulombtype))
 +    {
 +        /* The following choices should match those
 +         * in comm_cost_est in domdec_setup.c.
 +         * Note that here the checks have to take into account
 +         * that the decomposition might occur in a different order than xyz
 +         * (for instance through the env.var. GMX_DD_ORDER_ZYX),
 +         * in which case they will not match those in comm_cost_est,
 +         * but since that is mainly for testing purposes that's fine.
 +         */
 +        if (dd->ndim >= 2 && dd->dim[0] == XX && dd->dim[1] == YY &&
 +            comm->npmenodes > dd->nc[XX] && comm->npmenodes % dd->nc[XX] == 0 &&
 +            getenv("GMX_PMEONEDD") == NULL)
 +        {
 +            comm->npmedecompdim = 2;
 +            comm->npmenodes_x   = dd->nc[XX];
 +            comm->npmenodes_y   = comm->npmenodes/comm->npmenodes_x;
 +        }
 +        else
 +        {
 +            /* In case nc is 1 in both x and y we could still choose to
 +             * decompose pme in y instead of x, but we use x for simplicity.
 +             */
 +            comm->npmedecompdim = 1;
 +            if (dd->dim[0] == YY)
 +            {
 +                comm->npmenodes_x = 1;
 +                comm->npmenodes_y = comm->npmenodes;
 +            }
 +            else
 +            {
 +                comm->npmenodes_x = comm->npmenodes;
 +                comm->npmenodes_y = 1;
 +            }
 +        }    
 +        if (fplog)
 +        {
 +            fprintf(fplog,"PME domain decomposition: %d x %d x %d\n",
 +                    comm->npmenodes_x,comm->npmenodes_y,1);
 +        }
 +    }
 +    else
 +    {
 +        comm->npmedecompdim = 0;
 +        comm->npmenodes_x   = 0;
 +        comm->npmenodes_y   = 0;
 +    }
 +    
 +    /* Technically we don't need both of these,
 +     * but it simplifies code not having to recalculate it.
 +     */
 +    *npme_x = comm->npmenodes_x;
 +    *npme_y = comm->npmenodes_y;
 +        
 +    snew(comm->slb_frac,DIM);
 +    if (comm->eDLB == edlbNO)
 +    {
 +        comm->slb_frac[XX] = get_slb_frac(fplog,"x",dd->nc[XX],sizex);
 +        comm->slb_frac[YY] = get_slb_frac(fplog,"y",dd->nc[YY],sizey);
 +        comm->slb_frac[ZZ] = get_slb_frac(fplog,"z",dd->nc[ZZ],sizez);
 +    }
 +
 +    if (comm->bInterCGBondeds && comm->cutoff_mbody == 0)
 +    {
 +        if (comm->bBondComm || comm->eDLB != edlbNO)
 +        {
 +            /* Set the bonded communication distance to halfway
 +             * the minimum and the maximum,
 +             * since the extra communication cost is nearly zero.
 +             */
 +            acs = average_cellsize_min(dd,ddbox);
 +            comm->cutoff_mbody = 0.5*(r_bonded + acs);
 +            if (comm->eDLB != edlbNO)
 +            {
 +                /* Check if this does not limit the scaling */
 +                comm->cutoff_mbody = min(comm->cutoff_mbody,dlb_scale*acs);
 +            }
 +            if (!comm->bBondComm)
 +            {
 +                /* Without bBondComm do not go beyond the n.b. cut-off */
 +                comm->cutoff_mbody = min(comm->cutoff_mbody,comm->cutoff);
 +                if (comm->cellsize_limit >= comm->cutoff)
 +                {
 +                    /* We don't loose a lot of efficieny
 +                     * when increasing it to the n.b. cut-off.
 +                     * It can even be slightly faster, because we need
 +                     * less checks for the communication setup.
 +                     */
 +                    comm->cutoff_mbody = comm->cutoff;
 +                }
 +            }
 +            /* Check if we did not end up below our original limit */
 +            comm->cutoff_mbody = max(comm->cutoff_mbody,r_bonded_limit);
 +
 +            if (comm->cutoff_mbody > comm->cellsize_limit)
 +            {
 +                comm->cellsize_limit = comm->cutoff_mbody;
 +            }
 +        }
 +        /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
 +    }
 +
 +    if (debug)
 +    {
 +        fprintf(debug,"Bonded atom communication beyond the cut-off: %d\n"
 +                "cellsize limit %f\n",
 +                comm->bBondComm,comm->cellsize_limit);
 +    }
 +    
 +    if (MASTER(cr))
 +    {
 +        check_dd_restrictions(cr,dd,ir,fplog);
 +    }
 +
 +    comm->globalcomm_step = INT_MIN;
 +    dd->ddp_count = 0;
 +
 +    clear_dd_cycle_counts(dd);
 +
 +    return dd;
 +}
 +
 +static void set_dlb_limits(gmx_domdec_t *dd)
 +
 +{
 +    int d;
 +
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        dd->comm->cd[d].np = dd->comm->cd[d].np_dlb;
 +        dd->comm->cellsize_min[dd->dim[d]] =
 +            dd->comm->cellsize_min_dlb[dd->dim[d]];
 +    }
 +}
 +
 +
 +static void turn_on_dlb(FILE *fplog,t_commrec *cr,gmx_large_int_t step)
 +{
 +    gmx_domdec_t *dd;
 +    gmx_domdec_comm_t *comm;
 +    real cellsize_min;
 +    int  d,nc,i;
 +    char buf[STRLEN];
 +    
 +    dd = cr->dd;
 +    comm = dd->comm;
 +    
 +    if (fplog)
 +    {
 +        fprintf(fplog,"At step %s the performance loss due to force load imbalance is %.1f %%\n",gmx_step_str(step,buf),dd_force_imb_perf_loss(dd)*100);
 +    }
 +
 +    cellsize_min = comm->cellsize_min[dd->dim[0]];
 +    for(d=1; d<dd->ndim; d++)
 +    {
 +        cellsize_min = min(cellsize_min,comm->cellsize_min[dd->dim[d]]);
 +    }
 +
 +    if (cellsize_min < comm->cellsize_limit*1.05)
 +    {
 +        dd_warning(cr,fplog,"NOTE: the minimum cell size is smaller than 1.05 times the cell size limit, will not turn on dynamic load balancing\n");
 +
 +        /* Change DLB from "auto" to "no". */
 +        comm->eDLB = edlbNO;
 +
 +        return;
 +    }
 +
 +    dd_warning(cr,fplog,"NOTE: Turning on dynamic load balancing\n");
 +    comm->bDynLoadBal = TRUE;
 +    dd->bGridJump = TRUE;
 +    
 +    set_dlb_limits(dd);
 +
 +    /* We can set the required cell size info here,
 +     * so we do not need to communicate this.
 +     * The grid is completely uniform.
 +     */
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        if (comm->root[d])
 +        {
 +            comm->load[d].sum_m = comm->load[d].sum;
 +
 +            nc = dd->nc[dd->dim[d]];
 +            for(i=0; i<nc; i++)
 +            {
 +                comm->root[d]->cell_f[i]    = i/(real)nc;
 +                if (d > 0)
 +                {
 +                    comm->root[d]->cell_f_max0[i] =  i   /(real)nc;
 +                    comm->root[d]->cell_f_min1[i] = (i+1)/(real)nc;
 +                }
 +            }
 +            comm->root[d]->cell_f[nc] = 1.0;
 +        }
 +    }
 +}
 +
 +static char *init_bLocalCG(gmx_mtop_t *mtop)
 +{
 +    int  ncg,cg;
 +    char *bLocalCG;
 +    
 +    ncg = ncg_mtop(mtop);
 +    snew(bLocalCG,ncg);
 +    for(cg=0; cg<ncg; cg++)
 +    {
 +        bLocalCG[cg] = FALSE;
 +    }
 +
 +    return bLocalCG;
 +}
 +
 +void dd_init_bondeds(FILE *fplog,
 +                     gmx_domdec_t *dd,gmx_mtop_t *mtop,
 +                     gmx_vsite_t *vsite,gmx_constr_t constr,
 +                     t_inputrec *ir,gmx_bool bBCheck,cginfo_mb_t *cginfo_mb)
 +{
 +    gmx_domdec_comm_t *comm;
 +    gmx_bool bBondComm;
 +    int  d;
 +
 +    dd_make_reverse_top(fplog,dd,mtop,vsite,constr,ir,bBCheck);
 +
 +    comm = dd->comm;
 +
 +    if (comm->bBondComm)
 +    {
 +        /* Communicate atoms beyond the cut-off for bonded interactions */
 +        comm = dd->comm;
 +
 +        comm->cglink = make_charge_group_links(mtop,dd,cginfo_mb);
 +
 +        comm->bLocalCG = init_bLocalCG(mtop);
 +    }
 +    else
 +    {
 +        /* Only communicate atoms based on cut-off */
 +        comm->cglink   = NULL;
 +        comm->bLocalCG = NULL;
 +    }
 +}
 +
 +static void print_dd_settings(FILE *fplog,gmx_domdec_t *dd,
 +                              t_inputrec *ir,
 +                              gmx_bool bDynLoadBal,real dlb_scale,
 +                              gmx_ddbox_t *ddbox)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  d;
 +    ivec np;
 +    real limit,shrink;
 +    char buf[64];
 +
 +    if (fplog == NULL)
 +    {
 +        return;
 +    }
 +
 +    comm = dd->comm;
 +
 +    if (bDynLoadBal)
 +    {
 +        fprintf(fplog,"The maximum number of communication pulses is:");
 +        for(d=0; d<dd->ndim; d++)
 +        {
 +            fprintf(fplog," %c %d",dim2char(dd->dim[d]),comm->cd[d].np_dlb);
 +        }
 +        fprintf(fplog,"\n");
 +        fprintf(fplog,"The minimum size for domain decomposition cells is %.3f nm\n",comm->cellsize_limit);
 +        fprintf(fplog,"The requested allowed shrink of DD cells (option -dds) is: %.2f\n",dlb_scale);
 +        fprintf(fplog,"The allowed shrink of domain decomposition cells is:");
 +        for(d=0; d<DIM; d++)
 +        {
 +            if (dd->nc[d] > 1)
 +            {
 +                if (d >= ddbox->npbcdim && dd->nc[d] == 2)
 +                {
 +                    shrink = 0;
 +                }
 +                else
 +                {
 +                    shrink =
 +                        comm->cellsize_min_dlb[d]/
 +                        (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
 +                }
 +                fprintf(fplog," %c %.2f",dim2char(d),shrink);
 +            }
 +        }
 +        fprintf(fplog,"\n");
 +    }
 +    else
 +    {
 +        set_dd_cell_sizes_slb(dd,ddbox,FALSE,np);
 +        fprintf(fplog,"The initial number of communication pulses is:");
 +        for(d=0; d<dd->ndim; d++)
 +        {
 +            fprintf(fplog," %c %d",dim2char(dd->dim[d]),np[dd->dim[d]]);
 +        }
 +        fprintf(fplog,"\n");
 +        fprintf(fplog,"The initial domain decomposition cell size is:");
 +        for(d=0; d<DIM; d++) {
 +            if (dd->nc[d] > 1)
 +            {
 +                fprintf(fplog," %c %.2f nm",
 +                        dim2char(d),dd->comm->cellsize_min[d]);
 +            }
 +        }
 +        fprintf(fplog,"\n\n");
 +    }
 +    
 +    if (comm->bInterCGBondeds || dd->vsite_comm || dd->constraint_comm)
 +    {
 +        fprintf(fplog,"The maximum allowed distance for charge groups involved in interactions is:\n");
 +        fprintf(fplog,"%40s  %-7s %6.3f nm\n",
 +                "non-bonded interactions","",comm->cutoff);
 +
 +        if (bDynLoadBal)
 +        {
 +            limit = dd->comm->cellsize_limit;
 +        }
 +        else
 +        {
 +            if (dynamic_dd_box(ddbox,ir))
 +            {
 +                fprintf(fplog,"(the following are initial values, they could change due to box deformation)\n");
 +            }
 +            limit = dd->comm->cellsize_min[XX];
 +            for(d=1; d<DIM; d++)
 +            {
 +                limit = min(limit,dd->comm->cellsize_min[d]);
 +            }
 +        }
 +
 +        if (comm->bInterCGBondeds)
 +        {
 +            fprintf(fplog,"%40s  %-7s %6.3f nm\n",
 +                    "two-body bonded interactions","(-rdd)",
 +                    max(comm->cutoff,comm->cutoff_mbody));
 +            fprintf(fplog,"%40s  %-7s %6.3f nm\n",
 +                    "multi-body bonded interactions","(-rdd)",
 +                    (comm->bBondComm || dd->bGridJump) ? comm->cutoff_mbody : min(comm->cutoff,limit));
 +        }
 +        if (dd->vsite_comm)
 +        {
 +            fprintf(fplog,"%40s  %-7s %6.3f nm\n",
 +                    "virtual site constructions","(-rcon)",limit);
 +        }
 +        if (dd->constraint_comm)
 +        {
 +            sprintf(buf,"atoms separated by up to %d constraints",
 +                    1+ir->nProjOrder);
 +            fprintf(fplog,"%40s  %-7s %6.3f nm\n",
 +                    buf,"(-rcon)",limit);
 +        }
 +        fprintf(fplog,"\n");
 +    }
 +    
 +    fflush(fplog);
 +}
 +
 +void set_dd_parameters(FILE *fplog,gmx_domdec_t *dd,real dlb_scale,
 +                       t_inputrec *ir,t_forcerec *fr,
 +                       gmx_ddbox_t *ddbox)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  d,dim,npulse,npulse_d_max,npulse_d;
 +    gmx_bool bNoCutOff;
 +    int  natoms_tot;
 +    real vol_frac;
 +
 +    comm = dd->comm;
 +
 +    bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
 +
 +    if (EEL_PME(ir->coulombtype))
 +    {
 +        init_ddpme(dd,&comm->ddpme[0],0);
 +        if (comm->npmedecompdim >= 2)
 +        {
 +            init_ddpme(dd,&comm->ddpme[1],1);
 +        }
 +    }
 +    else
 +    {
 +        comm->npmenodes = 0;
 +        if (dd->pme_nodeid >= 0)
 +        {
 +            gmx_fatal_collective(FARGS,NULL,dd,
 +                                 "Can not have separate PME nodes without PME electrostatics");
 +        }
 +    }
 +    
 +    /* If each molecule is a single charge group
 +     * or we use domain decomposition for each periodic dimension,
 +     * we do not need to take pbc into account for the bonded interactions.
 +     */
 +    if (fr->ePBC == epbcNONE || !comm->bInterCGBondeds ||
 +        (dd->nc[XX]>1 && dd->nc[YY]>1 && (dd->nc[ZZ]>1 || fr->ePBC==epbcXY)))
 +    {
 +        fr->bMolPBC = FALSE;
 +    }
 +    else
 +    {
 +        fr->bMolPBC = TRUE;
 +    }
 +        
 +    if (debug)
 +    {
 +        fprintf(debug,"The DD cut-off is %f\n",comm->cutoff);
 +    }
 +    if (comm->eDLB != edlbNO)
 +    {
 +        /* Determine the maximum number of comm. pulses in one dimension */
 +        
 +        comm->cellsize_limit = max(comm->cellsize_limit,comm->cutoff_mbody);
 +        
 +        /* Determine the maximum required number of grid pulses */
 +        if (comm->cellsize_limit >= comm->cutoff)
 +        {
 +            /* Only a single pulse is required */
 +            npulse = 1;
 +        }
 +        else if (!bNoCutOff && comm->cellsize_limit > 0)
 +        {
 +            /* We round down slightly here to avoid overhead due to the latency
 +             * of extra communication calls when the cut-off
 +             * would be only slightly longer than the cell size.
 +             * Later cellsize_limit is redetermined,
 +             * so we can not miss interactions due to this rounding.
 +             */
 +            npulse = (int)(0.96 + comm->cutoff/comm->cellsize_limit);
 +        }
 +        else
 +        {
 +            /* There is no cell size limit */
 +            npulse = max(dd->nc[XX]-1,max(dd->nc[YY]-1,dd->nc[ZZ]-1));
 +        }
 +
 +        if (!bNoCutOff && npulse > 1)
 +        {
 +            /* See if we can do with less pulses, based on dlb_scale */
 +            npulse_d_max = 0;
 +            for(d=0; d<dd->ndim; d++)
 +            {
 +                dim = dd->dim[d];
 +                npulse_d = (int)(1 + dd->nc[dim]*comm->cutoff
 +                                 /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
 +                npulse_d_max = max(npulse_d_max,npulse_d);
 +            }
 +            npulse = min(npulse,npulse_d_max);
 +        }
 +        
 +        /* This env var can override npulse */
 +        d = dd_nst_env(fplog,"GMX_DD_NPULSE",0);
 +        if (d > 0)
 +        {
 +            npulse = d;
 +        }
 +
 +        comm->maxpulse = 1;
 +        comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
 +        for(d=0; d<dd->ndim; d++)
 +        {
 +            comm->cd[d].np_dlb = min(npulse,dd->nc[dd->dim[d]]-1);
 +            comm->cd[d].np_nalloc = comm->cd[d].np_dlb;
 +            snew(comm->cd[d].ind,comm->cd[d].np_nalloc);
 +            comm->maxpulse = max(comm->maxpulse,comm->cd[d].np_dlb);
 +            if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
 +            {
 +                comm->bVacDLBNoLimit = FALSE;
 +            }
 +        }
 +        
 +        /* cellsize_limit is set for LINCS in init_domain_decomposition */
 +        if (!comm->bVacDLBNoLimit)
 +        {
 +            comm->cellsize_limit = max(comm->cellsize_limit,
 +                                       comm->cutoff/comm->maxpulse);
 +        }
 +        comm->cellsize_limit = max(comm->cellsize_limit,comm->cutoff_mbody);
 +        /* Set the minimum cell size for each DD dimension */
 +        for(d=0; d<dd->ndim; d++)
 +        {
 +            if (comm->bVacDLBNoLimit ||
 +                comm->cd[d].np_dlb*comm->cellsize_limit >= comm->cutoff)
 +            {
 +                comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
 +            }
 +            else
 +            {
 +                comm->cellsize_min_dlb[dd->dim[d]] =
 +                    comm->cutoff/comm->cd[d].np_dlb;
 +            }
 +        }
 +        if (comm->cutoff_mbody <= 0)
 +        {
 +            comm->cutoff_mbody = min(comm->cutoff,comm->cellsize_limit);
 +        }
 +        if (comm->bDynLoadBal)
 +        {
 +            set_dlb_limits(dd);
 +        }
 +    }
 +    
 +    print_dd_settings(fplog,dd,ir,comm->bDynLoadBal,dlb_scale,ddbox);
 +    if (comm->eDLB == edlbAUTO)
 +    {
 +        if (fplog)
 +        {
 +            fprintf(fplog,"When dynamic load balancing gets turned on, these settings will change to:\n");
 +        }
 +        print_dd_settings(fplog,dd,ir,TRUE,dlb_scale,ddbox);
 +    }
 +
 +    if (ir->ePBC == epbcNONE)
 +    {
 +        vol_frac = 1 - 1/(double)dd->nnodes;
 +    }
 +    else
 +    {
 +        vol_frac =
 +            (1 + comm_box_frac(dd->nc,comm->cutoff,ddbox))/(double)dd->nnodes;
 +    }
 +    if (debug)
 +    {
 +        fprintf(debug,"Volume fraction for all DD zones: %f\n",vol_frac);
 +    }
 +    natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
 +   
 +    dd->ga2la = ga2la_init(natoms_tot,vol_frac*natoms_tot);
 +}
 +
 +static void merge_cg_buffers(int ncell,
 +                             gmx_domdec_comm_dim_t *cd, int pulse,
 +                             int  *ncg_cell,
 +                             int  *index_gl, int  *recv_i,
 +                             rvec *cg_cm,    rvec *recv_vr,
 +                             int *cgindex,
 +                             cginfo_mb_t *cginfo_mb,int *cginfo)
 +{
 +    gmx_domdec_ind_t *ind,*ind_p;
 +    int p,cell,c,cg,cg0,cg1,cg_gl,nat;
 +    int shift,shift_at;
 +    
 +    ind = &cd->ind[pulse];
 +    
 +    /* First correct the already stored data */
 +    shift = ind->nrecv[ncell];
 +    for(cell=ncell-1; cell>=0; cell--)
 +    {
 +        shift -= ind->nrecv[cell];
 +        if (shift > 0)
 +        {
 +            /* Move the cg's present from previous grid pulses */
 +            cg0 = ncg_cell[ncell+cell];
 +            cg1 = ncg_cell[ncell+cell+1];
 +            cgindex[cg1+shift] = cgindex[cg1];
 +            for(cg=cg1-1; cg>=cg0; cg--)
 +            {
 +                index_gl[cg+shift] = index_gl[cg];
 +                copy_rvec(cg_cm[cg],cg_cm[cg+shift]);
 +                cgindex[cg+shift] = cgindex[cg];
 +                cginfo[cg+shift] = cginfo[cg];
 +            }
 +            /* Correct the already stored send indices for the shift */
 +            for(p=1; p<=pulse; p++)
 +            {
 +                ind_p = &cd->ind[p];
 +                cg0 = 0;
 +                for(c=0; c<cell; c++)
 +                {
 +                    cg0 += ind_p->nsend[c];
 +                }
 +                cg1 = cg0 + ind_p->nsend[cell];
 +                for(cg=cg0; cg<cg1; cg++)
 +                {
 +                    ind_p->index[cg] += shift;
 +                }
 +            }
 +        }
 +    }
 +
 +    /* Merge in the communicated buffers */
 +    shift = 0;
 +    shift_at = 0;
 +    cg0 = 0;
 +    for(cell=0; cell<ncell; cell++)
 +    {
 +        cg1 = ncg_cell[ncell+cell+1] + shift;
 +        if (shift_at > 0)
 +        {
 +            /* Correct the old cg indices */
 +            for(cg=ncg_cell[ncell+cell]; cg<cg1; cg++)
 +            {
 +                cgindex[cg+1] += shift_at;
 +            }
 +        }
 +        for(cg=0; cg<ind->nrecv[cell]; cg++)
 +        {
 +            /* Copy this charge group from the buffer */
 +            index_gl[cg1] = recv_i[cg0];
 +            copy_rvec(recv_vr[cg0],cg_cm[cg1]);
 +            /* Add it to the cgindex */
 +            cg_gl = index_gl[cg1];
 +            cginfo[cg1] = ddcginfo(cginfo_mb,cg_gl);
 +            nat = GET_CGINFO_NATOMS(cginfo[cg1]);
 +            cgindex[cg1+1] = cgindex[cg1] + nat;
 +            cg0++;
 +            cg1++;
 +            shift_at += nat;
 +        }
 +        shift += ind->nrecv[cell];
 +        ncg_cell[ncell+cell+1] = cg1;
 +    }
 +}
 +
 +static void make_cell2at_index(gmx_domdec_comm_dim_t *cd,
 +                               int nzone,int cg0,const int *cgindex)
 +{
 +    int cg,zone,p;
 +    
 +    /* Store the atom block boundaries for easy copying of communication buffers
 +     */
 +    cg = cg0;
 +    for(zone=0; zone<nzone; zone++)
 +    {
 +        for(p=0; p<cd->np; p++) {
 +            cd->ind[p].cell2at0[zone] = cgindex[cg];
 +            cg += cd->ind[p].nrecv[zone];
 +            cd->ind[p].cell2at1[zone] = cgindex[cg];
 +        }
 +    }
 +}
 +
 +static gmx_bool missing_link(t_blocka *link,int cg_gl,char *bLocalCG)
 +{
 +    int  i;
 +    gmx_bool bMiss;
 +
 +    bMiss = FALSE;
 +    for(i=link->index[cg_gl]; i<link->index[cg_gl+1]; i++)
 +    {
 +        if (!bLocalCG[link->a[i]])
 +        {
 +            bMiss = TRUE;
 +        }
 +    }
 +
 +    return bMiss;
 +}
 +
 +static void setup_dd_communication(gmx_domdec_t *dd,
 +                                   matrix box,gmx_ddbox_t *ddbox,t_forcerec *fr)
 +{
 +    int dim_ind,dim,dim0,dim1=-1,dim2=-1,dimd,p,nat_tot;
 +    int nzone,nzone_send,zone,zonei,cg0,cg1;
 +    int c,i,j,cg,cg_gl,nrcg;
 +    int *zone_cg_range,pos_cg,*index_gl,*cgindex,*recv_i;
 +    gmx_domdec_comm_t *comm;
 +    gmx_domdec_zones_t *zones;
 +    gmx_domdec_comm_dim_t *cd;
 +    gmx_domdec_ind_t *ind;
 +    cginfo_mb_t *cginfo_mb;
 +    gmx_bool bBondComm,bDist2B,bDistMB,bDistMB_pulse,bDistBonded,bScrew;
 +    real r_mb,r_comm2,r_scomm2,r_bcomm2,r,r_0,r_1,r2,rb2,r2inc,inv_ncg,tric_sh;
 +    rvec rb,rn;
 +    real corner[DIM][4],corner_round_0=0,corner_round_1[4];
 +    real bcorner[DIM],bcorner_round_1=0;
 +    ivec tric_dist;
 +    rvec *cg_cm,*normal,*v_d,*v_0=NULL,*v_1=NULL,*recv_vr;
 +    real skew_fac2_d,skew_fac_01;
 +    rvec sf2_round;
 +    int  nsend,nat;
 +    
 +    if (debug)
 +    {
 +        fprintf(debug,"Setting up DD communication\n");
 +    }
 +    
 +    comm  = dd->comm;
 +    cg_cm = fr->cg_cm;
 +
 +    for(dim_ind=0; dim_ind<dd->ndim; dim_ind++)
 +    {
 +        dim = dd->dim[dim_ind];
 +
 +        /* Check if we need to use triclinic distances */
 +        tric_dist[dim_ind] = 0;
 +        for(i=0; i<=dim_ind; i++)
 +        {
 +            if (ddbox->tric_dir[dd->dim[i]])
 +            {
 +                tric_dist[dim_ind] = 1;
 +            }
 +        }
 +    }
 +
 +    bBondComm = comm->bBondComm;
 +
 +    /* Do we need to determine extra distances for multi-body bondeds? */
 +    bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
 +    
 +    /* Do we need to determine extra distances for only two-body bondeds? */
 +    bDist2B = (bBondComm && !bDistMB);
 +
 +    r_comm2  = sqr(comm->cutoff);
 +    r_bcomm2 = sqr(comm->cutoff_mbody);
 +
 +    if (debug)
 +    {
 +        fprintf(debug,"bBondComm %d, r_bc %f\n",bBondComm,sqrt(r_bcomm2));
 +    }
 +
 +    zones = &comm->zones;
 +    
 +    dim0 = dd->dim[0];
 +    /* The first dimension is equal for all cells */
 +    corner[0][0] = comm->cell_x0[dim0];
 +    if (bDistMB)
 +    {
 +        bcorner[0] = corner[0][0];
 +    }
 +    if (dd->ndim >= 2)
 +    {
 +        dim1 = dd->dim[1];
 +        /* This cell row is only seen from the first row */
 +        corner[1][0] = comm->cell_x0[dim1];
 +        /* All rows can see this row */
 +        corner[1][1] = comm->cell_x0[dim1];
 +        if (dd->bGridJump)
 +        {
 +            corner[1][1] = max(comm->cell_x0[dim1],comm->zone_d1[1].mch0);
 +            if (bDistMB)
 +            {
 +                /* For the multi-body distance we need the maximum */
 +                bcorner[1] = max(comm->cell_x0[dim1],comm->zone_d1[1].p1_0);
 +            }
 +        }
 +        /* Set the upper-right corner for rounding */
 +        corner_round_0 = comm->cell_x1[dim0];
 +        
 +        if (dd->ndim >= 3)
 +        {
 +            dim2 = dd->dim[2];
 +            for(j=0; j<4; j++)
 +            {
 +                corner[2][j] = comm->cell_x0[dim2];
 +            }
 +            if (dd->bGridJump)
 +            {
 +                /* Use the maximum of the i-cells that see a j-cell */
 +                for(i=0; i<zones->nizone; i++)
 +                {
 +                    for(j=zones->izone[i].j0; j<zones->izone[i].j1; j++)
 +                    {
 +                        if (j >= 4)
 +                        {
 +                            corner[2][j-4] =
 +                                max(corner[2][j-4],
 +                                    comm->zone_d2[zones->shift[i][dim0]][zones->shift[i][dim1]].mch0);
 +                        }
 +                    }
 +                }
 +                if (bDistMB)
 +                {
 +                    /* For the multi-body distance we need the maximum */
 +                    bcorner[2] = comm->cell_x0[dim2];
 +                    for(i=0; i<2; i++)
 +                    {
 +                        for(j=0; j<2; j++)
 +                        {
 +                            bcorner[2] = max(bcorner[2],
 +                                             comm->zone_d2[i][j].p1_0);
 +                        }
 +                    }
 +                }
 +            }
 +            
 +            /* Set the upper-right corner for rounding */
 +            /* Cell (0,0,0) and cell (1,0,0) can see cell 4 (0,1,1)
 +             * Only cell (0,0,0) can see cell 7 (1,1,1)
 +             */
 +            corner_round_1[0] = comm->cell_x1[dim1];
 +            corner_round_1[3] = comm->cell_x1[dim1];
 +            if (dd->bGridJump)
 +            {
 +                corner_round_1[0] = max(comm->cell_x1[dim1],
 +                                        comm->zone_d1[1].mch1);
 +                if (bDistMB)
 +                {
 +                    /* For the multi-body distance we need the maximum */
 +                    bcorner_round_1 = max(comm->cell_x1[dim1],
 +                                          comm->zone_d1[1].p1_1);
 +                }
 +            }
 +        }
 +    }
 +    
 +    /* Triclinic stuff */
 +    normal = ddbox->normal;
 +    skew_fac_01 = 0;
 +    if (dd->ndim >= 2)
 +    {
 +        v_0 = ddbox->v[dim0];
 +        if (ddbox->tric_dir[dim0] && ddbox->tric_dir[dim1])
 +        {
 +            /* Determine the coupling coefficient for the distances
 +             * to the cell planes along dim0 and dim1 through dim2.
 +             * This is required for correct rounding.
 +             */
 +            skew_fac_01 =
 +                ddbox->v[dim0][dim1+1][dim0]*ddbox->v[dim1][dim1+1][dim1];
 +            if (debug)
 +            {
 +                fprintf(debug,"\nskew_fac_01 %f\n",skew_fac_01);
 +            }
 +        }
 +    }
 +    if (dd->ndim >= 3)
 +    {
 +        v_1 = ddbox->v[dim1];
 +    }
 +    
 +    zone_cg_range = zones->cg_range;
 +    index_gl = dd->index_gl;
 +    cgindex  = dd->cgindex;
 +    cginfo_mb = fr->cginfo_mb;
 +    
 +    zone_cg_range[0]   = 0;
 +    zone_cg_range[1]   = dd->ncg_home;
 +    comm->zone_ncg1[0] = dd->ncg_home;
 +    pos_cg             = dd->ncg_home;
 +    
 +    nat_tot = dd->nat_home;
 +    nzone = 1;
 +    for(dim_ind=0; dim_ind<dd->ndim; dim_ind++)
 +    {
 +        dim = dd->dim[dim_ind];
 +        cd = &comm->cd[dim_ind];
 +        
 +        if (dim >= ddbox->npbcdim && dd->ci[dim] == 0)
 +        {
 +            /* No pbc in this dimension, the first node should not comm. */
 +            nzone_send = 0;
 +        }
 +        else
 +        {
 +            nzone_send = nzone;
 +        }
 +
 +        bScrew = (dd->bScrewPBC && dim == XX);
 +        
 +        v_d = ddbox->v[dim];
 +        skew_fac2_d = sqr(ddbox->skew_fac[dim]);
 +
 +        cd->bInPlace = TRUE;
 +        for(p=0; p<cd->np; p++)
 +        {
 +            /* Only atoms communicated in the first pulse are used
 +             * for multi-body bonded interactions or for bBondComm.
 +             */
 +            bDistBonded   = ((bDistMB || bDist2B) && p == 0);
 +            bDistMB_pulse = (bDistMB && bDistBonded);
 +
 +            ind = &cd->ind[p];
 +            nsend = 0;
 +            nat = 0;
 +            for(zone=0; zone<nzone_send; zone++)
 +            {
 +                if (tric_dist[dim_ind] && dim_ind > 0)
 +                {
 +                    /* Determine slightly more optimized skew_fac's
 +                     * for rounding.
 +                     * This reduces the number of communicated atoms
 +                     * by about 10% for 3D DD of rhombic dodecahedra.
 +                     */
 +                    for(dimd=0; dimd<dim; dimd++)
 +                    {
 +                        sf2_round[dimd] = 1;
 +                        if (ddbox->tric_dir[dimd])
 +                        {
 +                            for(i=dd->dim[dimd]+1; i<DIM; i++)
 +                            {
 +                                /* If we are shifted in dimension i
 +                                 * and the cell plane is tilted forward
 +                                 * in dimension i, skip this coupling.
 +                                 */
 +                                if (!(zones->shift[nzone+zone][i] &&
 +                                      ddbox->v[dimd][i][dimd] >= 0))
 +                                {
 +                                    sf2_round[dimd] +=
 +                                        sqr(ddbox->v[dimd][i][dimd]);
 +                                }
 +                            }
 +                            sf2_round[dimd] = 1/sf2_round[dimd];
 +                        }
 +                    }
 +                }
 +
 +                zonei = zone_perm[dim_ind][zone];
 +                if (p == 0)
 +                {
 +                    /* Here we permutate the zones to obtain a convenient order
 +                     * for neighbor searching
 +                     */
 +                    cg0 = zone_cg_range[zonei];
 +                    cg1 = zone_cg_range[zonei+1];
 +                }
 +                else
 +                {
 +                    /* Look only at the cg's received in the previous grid pulse
 +                     */
 +                    cg1 = zone_cg_range[nzone+zone+1];
 +                    cg0 = cg1 - cd->ind[p-1].nrecv[zone];
 +                }
 +                ind->nsend[zone] = 0;
 +                for(cg=cg0; cg<cg1; cg++)
 +                {
 +                    r2  = 0;
 +                    rb2 = 0;
 +                    if (tric_dist[dim_ind] == 0)
 +                    {
 +                        /* Rectangular direction, easy */
 +                        r = cg_cm[cg][dim] - corner[dim_ind][zone];
 +                        if (r > 0)
 +                        {
 +                            r2 += r*r;
 +                        }
 +                        if (bDistMB_pulse)
 +                        {
 +                            r = cg_cm[cg][dim] - bcorner[dim_ind];
 +                            if (r > 0)
 +                            {
 +                                rb2 += r*r;
 +                            }
 +                        }
 +                        /* Rounding gives at most a 16% reduction
 +                         * in communicated atoms
 +                         */
 +                        if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
 +                        {
 +                            r = cg_cm[cg][dim0] - corner_round_0;
 +                            /* This is the first dimension, so always r >= 0 */
 +                            r2 += r*r;
 +                            if (bDistMB_pulse)
 +                            {
 +                                rb2 += r*r;
 +                            }
 +                        }
 +                        if (dim_ind == 2 && (zonei == 2 || zonei == 3))
 +                        {
 +                            r = cg_cm[cg][dim1] - corner_round_1[zone];
 +                            if (r > 0)
 +                            {
 +                                r2 += r*r;
 +                            }
 +                            if (bDistMB_pulse)
 +                            {
 +                                r = cg_cm[cg][dim1] - bcorner_round_1;
 +                                if (r > 0)
 +                                {
 +                                    rb2 += r*r;
 +                                }
 +                            }
 +                        }
 +                    }
 +                    else
 +                    {
 +                        /* Triclinic direction, more complicated */
 +                        clear_rvec(rn);
 +                        clear_rvec(rb);
 +                        /* Rounding, conservative as the skew_fac multiplication
 +                         * will slightly underestimate the distance.
 +                         */
 +                        if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
 +                        {
 +                            rn[dim0] = cg_cm[cg][dim0] - corner_round_0;
 +                            for(i=dim0+1; i<DIM; i++)
 +                            {
 +                                rn[dim0] -= cg_cm[cg][i]*v_0[i][dim0];
 +                            }
 +                            r2 = rn[dim0]*rn[dim0]*sf2_round[dim0];
 +                            if (bDistMB_pulse)
 +                            {
 +                                rb[dim0] = rn[dim0];
 +                                rb2 = r2;
 +                            }
 +                            /* Take care that the cell planes along dim0 might not
 +                             * be orthogonal to those along dim1 and dim2.
 +                             */
 +                            for(i=1; i<=dim_ind; i++)
 +                            {
 +                                dimd = dd->dim[i];
 +                                if (normal[dim0][dimd] > 0)
 +                                {
 +                                    rn[dimd] -= rn[dim0]*normal[dim0][dimd];
 +                                    if (bDistMB_pulse)
 +                                    {
 +                                        rb[dimd] -= rb[dim0]*normal[dim0][dimd];
 +                                    }
 +                                }
 +                            }
 +                        }
 +                        if (dim_ind == 2 && (zonei == 2 || zonei == 3))
 +                        {
 +                            rn[dim1] += cg_cm[cg][dim1] - corner_round_1[zone];
 +                            tric_sh = 0;
 +                            for(i=dim1+1; i<DIM; i++)
 +                            {
 +                                tric_sh -= cg_cm[cg][i]*v_1[i][dim1];
 +                            }
 +                            rn[dim1] += tric_sh;
 +                            if (rn[dim1] > 0)
 +                            {
 +                                r2 += rn[dim1]*rn[dim1]*sf2_round[dim1];
 +                                /* Take care of coupling of the distances
 +                                 * to the planes along dim0 and dim1 through dim2.
 +                                 */
 +                                r2 -= rn[dim0]*rn[dim1]*skew_fac_01;
 +                                /* Take care that the cell planes along dim1
 +                                 * might not be orthogonal to that along dim2.
 +                                 */
 +                                if (normal[dim1][dim2] > 0)
 +                                {
 +                                    rn[dim2] -= rn[dim1]*normal[dim1][dim2];
 +                                }
 +                            }
 +                            if (bDistMB_pulse)
 +                            {
 +                                rb[dim1] +=
 +                                    cg_cm[cg][dim1] - bcorner_round_1 + tric_sh;
 +                                if (rb[dim1] > 0)
 +                                {
 +                                    rb2 += rb[dim1]*rb[dim1]*sf2_round[dim1];
 +                                    /* Take care of coupling of the distances
 +                                     * to the planes along dim0 and dim1 through dim2.
 +                                     */
 +                                    rb2 -= rb[dim0]*rb[dim1]*skew_fac_01;
 +                                    /* Take care that the cell planes along dim1
 +                                     * might not be orthogonal to that along dim2.
 +                                     */
 +                                    if (normal[dim1][dim2] > 0)
 +                                    {
 +                                        rb[dim2] -= rb[dim1]*normal[dim1][dim2];
 +                                    }
 +                                }
 +                            }
 +                        }
 +                        /* The distance along the communication direction */
 +                        rn[dim] += cg_cm[cg][dim] - corner[dim_ind][zone];
 +                        tric_sh = 0;
 +                        for(i=dim+1; i<DIM; i++)
 +                        {
 +                            tric_sh -= cg_cm[cg][i]*v_d[i][dim];
 +                        }
 +                        rn[dim] += tric_sh;
 +                        if (rn[dim] > 0)
 +                        {
 +                            r2 += rn[dim]*rn[dim]*skew_fac2_d;
 +                            /* Take care of coupling of the distances
 +                             * to the planes along dim0 and dim1 through dim2.
 +                             */
 +                            if (dim_ind == 1 && zonei == 1)
 +                            {
 +                                r2 -= rn[dim0]*rn[dim]*skew_fac_01;
 +                            }
 +                        }
 +                        if (bDistMB_pulse)
 +                        {
 +                            clear_rvec(rb);
 +                            rb[dim] += cg_cm[cg][dim] - bcorner[dim_ind] + tric_sh;
 +                            if (rb[dim] > 0)
 +                            {
 +                                rb2 += rb[dim]*rb[dim]*skew_fac2_d;
 +                                /* Take care of coupling of the distances
 +                                 * to the planes along dim0 and dim1 through dim2.
 +                                 */
 +                                if (dim_ind == 1 && zonei == 1)
 +                                {
 +                                    rb2 -= rb[dim0]*rb[dim]*skew_fac_01;
 +                                }
 +                            }
 +                        }
 +                    }
 +                    
 +                    if (r2 < r_comm2 ||
 +                        (bDistBonded &&
 +                         ((bDistMB && rb2 < r_bcomm2) ||
 +                          (bDist2B && r2  < r_bcomm2)) &&
 +                         (!bBondComm ||
 +                          (GET_CGINFO_BOND_INTER(fr->cginfo[cg]) &&
 +                           missing_link(comm->cglink,index_gl[cg],
 +                                        comm->bLocalCG)))))
 +                    {
 +                        /* Make an index to the local charge groups */
 +                        if (nsend+1 > ind->nalloc)
 +                        {
 +                            ind->nalloc = over_alloc_large(nsend+1);
 +                            srenew(ind->index,ind->nalloc);
 +                        }
 +                        if (nsend+1 > comm->nalloc_int)
 +                        {
 +                            comm->nalloc_int = over_alloc_large(nsend+1);
 +                            srenew(comm->buf_int,comm->nalloc_int);
 +                        }
 +                        ind->index[nsend] = cg;
 +                        comm->buf_int[nsend] = index_gl[cg];
 +                        ind->nsend[zone]++;
 +                        vec_rvec_check_alloc(&comm->vbuf,nsend+1);
 +
 +                        if (dd->ci[dim] == 0)
 +                        {
 +                            /* Correct cg_cm for pbc */
 +                            rvec_add(cg_cm[cg],box[dim],comm->vbuf.v[nsend]);
 +                            if (bScrew)
 +                            {
 +                                comm->vbuf.v[nsend][YY] =
 +                                    box[YY][YY]-comm->vbuf.v[nsend][YY];
 +                                comm->vbuf.v[nsend][ZZ] =
 +                                    box[ZZ][ZZ]-comm->vbuf.v[nsend][ZZ];
 +                            }
 +                        }
 +                        else
 +                        {
 +                            copy_rvec(cg_cm[cg],comm->vbuf.v[nsend]);
 +                        }
 +                        nsend++;
 +                        nat += cgindex[cg+1] - cgindex[cg];
 +                    }
 +                }
 +            }
 +            /* Clear the counts in case we do not have pbc */
 +            for(zone=nzone_send; zone<nzone; zone++)
 +            {
 +                ind->nsend[zone] = 0;
 +            }
 +            ind->nsend[nzone]   = nsend;
 +            ind->nsend[nzone+1] = nat;
 +            /* Communicate the number of cg's and atoms to receive */
 +            dd_sendrecv_int(dd, dim_ind, dddirBackward,
 +                            ind->nsend, nzone+2,
 +                            ind->nrecv, nzone+2);
 +            
 +            /* The rvec buffer is also required for atom buffers of size nsend
 +             * in dd_move_x and dd_move_f.
 +             */
 +            vec_rvec_check_alloc(&comm->vbuf,ind->nsend[nzone+1]);
 +
 +            if (p > 0)
 +            {
 +                /* We can receive in place if only the last zone is not empty */
 +                for(zone=0; zone<nzone-1; zone++)
 +                {
 +                    if (ind->nrecv[zone] > 0)
 +                    {
 +                        cd->bInPlace = FALSE;
 +                    }
 +                }
 +                if (!cd->bInPlace)
 +                {
 +                    /* The int buffer is only required here for the cg indices */
 +                    if (ind->nrecv[nzone] > comm->nalloc_int2)
 +                    {
 +                        comm->nalloc_int2 = over_alloc_dd(ind->nrecv[nzone]);
 +                        srenew(comm->buf_int2,comm->nalloc_int2);
 +                    }
 +                    /* The rvec buffer is also required for atom buffers
 +                     * of size nrecv in dd_move_x and dd_move_f.
 +                     */
 +                    i = max(cd->ind[0].nrecv[nzone+1],ind->nrecv[nzone+1]);
 +                    vec_rvec_check_alloc(&comm->vbuf2,i);
 +                }
 +            }
 +            
 +            /* Make space for the global cg indices */
 +            if (pos_cg + ind->nrecv[nzone] > dd->cg_nalloc
 +                || dd->cg_nalloc == 0)
 +            {
 +                dd->cg_nalloc = over_alloc_dd(pos_cg + ind->nrecv[nzone]);
 +                srenew(index_gl,dd->cg_nalloc);
 +                srenew(cgindex,dd->cg_nalloc+1);
 +            }
 +            /* Communicate the global cg indices */
 +            if (cd->bInPlace)
 +            {
 +                recv_i = index_gl + pos_cg;
 +            }
 +            else
 +            {
 +                recv_i = comm->buf_int2;
 +            }
 +            dd_sendrecv_int(dd, dim_ind, dddirBackward,
 +                            comm->buf_int, nsend,
 +                            recv_i,        ind->nrecv[nzone]);
 +
 +            /* Make space for cg_cm */
 +            if (pos_cg + ind->nrecv[nzone] > fr->cg_nalloc)
 +            {
 +                dd_realloc_fr_cg(fr,pos_cg + ind->nrecv[nzone]);
 +                cg_cm = fr->cg_cm;
 +            }
 +            /* Communicate cg_cm */
 +            if (cd->bInPlace)
 +            {
 +                recv_vr = cg_cm + pos_cg;
 +            }
 +            else
 +            {
 +                recv_vr = comm->vbuf2.v;
 +            }
 +            dd_sendrecv_rvec(dd, dim_ind, dddirBackward,
 +                             comm->vbuf.v, nsend,
 +                             recv_vr,      ind->nrecv[nzone]);
 +            
 +            /* Make the charge group index */
 +            if (cd->bInPlace)
 +            {
 +                zone = (p == 0 ? 0 : nzone - 1);
 +                while (zone < nzone)
 +                {
 +                    for(cg=0; cg<ind->nrecv[zone]; cg++)
 +                    {
 +                        cg_gl = index_gl[pos_cg];
 +                        fr->cginfo[pos_cg] = ddcginfo(cginfo_mb,cg_gl);
 +                        nrcg = GET_CGINFO_NATOMS(fr->cginfo[pos_cg]);
 +                        cgindex[pos_cg+1] = cgindex[pos_cg] + nrcg;
 +                        if (bBondComm)
 +                        {
 +                            /* Update the charge group presence,
 +                             * so we can use it in the next pass of the loop.
 +                             */
 +                            comm->bLocalCG[cg_gl] = TRUE;
 +                        }
 +                        pos_cg++;
 +                    }
 +                    if (p == 0)
 +                    {
 +                        comm->zone_ncg1[nzone+zone] = ind->nrecv[zone];
 +                    }
 +                    zone++;
 +                    zone_cg_range[nzone+zone] = pos_cg;
 +                }
 +            }
 +            else
 +            {
 +                /* This part of the code is never executed with bBondComm. */
 +                merge_cg_buffers(nzone,cd,p,zone_cg_range,
 +                                 index_gl,recv_i,cg_cm,recv_vr,
 +                                 cgindex,fr->cginfo_mb,fr->cginfo);
 +                pos_cg += ind->nrecv[nzone];
 +            }
 +            nat_tot += ind->nrecv[nzone+1];
 +        }
 +        if (!cd->bInPlace)
 +        {
 +            /* Store the atom block for easy copying of communication buffers */
 +            make_cell2at_index(cd,nzone,zone_cg_range[nzone],cgindex);
 +        }
 +        nzone += nzone;
 +    }
 +    dd->index_gl = index_gl;
 +    dd->cgindex  = cgindex;
 +    
 +    dd->ncg_tot = zone_cg_range[zones->n];
 +    dd->nat_tot = nat_tot;
 +    comm->nat[ddnatHOME] = dd->nat_home;
 +    for(i=ddnatZONE; i<ddnatNR; i++)
 +    {
 +        comm->nat[i] = dd->nat_tot;
 +    }
 +
 +    if (!bBondComm)
 +    {
 +        /* We don't need to update cginfo, since that was alrady done above.
 +         * So we pass NULL for the forcerec.
 +         */
 +        dd_set_cginfo(dd->index_gl,dd->ncg_home,dd->ncg_tot,
 +                      NULL,comm->bLocalCG);
 +    }
 +
 +    if (debug)
 +    {
 +        fprintf(debug,"Finished setting up DD communication, zones:");
 +        for(c=0; c<zones->n; c++)
 +        {
 +            fprintf(debug," %d",zones->cg_range[c+1]-zones->cg_range[c]);
 +        }
 +        fprintf(debug,"\n");
 +    }
 +}
 +
 +static void set_cg_boundaries(gmx_domdec_zones_t *zones)
 +{
 +    int c;
 +    
 +    for(c=0; c<zones->nizone; c++)
 +    {
 +        zones->izone[c].cg1  = zones->cg_range[c+1];
 +        zones->izone[c].jcg0 = zones->cg_range[zones->izone[c].j0];
 +        zones->izone[c].jcg1 = zones->cg_range[zones->izone[c].j1];
 +    }
 +}
 +
 +static int comp_cgsort(const void *a,const void *b)
 +{
 +    int comp;
 +    
 +    gmx_cgsort_t *cga,*cgb;
 +    cga = (gmx_cgsort_t *)a;
 +    cgb = (gmx_cgsort_t *)b;
 +    
 +    comp = cga->nsc - cgb->nsc;
 +    if (comp == 0)
 +    {
 +        comp = cga->ind_gl - cgb->ind_gl;
 +    }
 +    
 +    return comp;
 +}
 +
 +static void order_int_cg(int n,gmx_cgsort_t *sort,
 +                         int *a,int *buf)
 +{
 +    int i;
 +    
 +    /* Order the data */
 +    for(i=0; i<n; i++)
 +    {
 +        buf[i] = a[sort[i].ind];
 +    }
 +    
 +    /* Copy back to the original array */
 +    for(i=0; i<n; i++)
 +    {
 +        a[i] = buf[i];
 +    }
 +}
 +
 +static void order_vec_cg(int n,gmx_cgsort_t *sort,
 +                         rvec *v,rvec *buf)
 +{
 +    int i;
 +    
 +    /* Order the data */
 +    for(i=0; i<n; i++)
 +    {
 +        copy_rvec(v[sort[i].ind],buf[i]);
 +    }
 +    
 +    /* Copy back to the original array */
 +    for(i=0; i<n; i++)
 +    {
 +        copy_rvec(buf[i],v[i]);
 +    }
 +}
 +
 +static void order_vec_atom(int ncg,int *cgindex,gmx_cgsort_t *sort,
 +                           rvec *v,rvec *buf)
 +{
 +    int a,atot,cg,cg0,cg1,i;
 +    
 +    /* Order the data */
 +    a = 0;
 +    for(cg=0; cg<ncg; cg++)
 +    {
 +        cg0 = cgindex[sort[cg].ind];
 +        cg1 = cgindex[sort[cg].ind+1];
 +        for(i=cg0; i<cg1; i++)
 +        {
 +            copy_rvec(v[i],buf[a]);
 +            a++;
 +        }
 +    }
 +    atot = a;
 +    
 +    /* Copy back to the original array */
 +    for(a=0; a<atot; a++)
 +    {
 +        copy_rvec(buf[a],v[a]);
 +    }
 +}
 +
 +static void ordered_sort(int nsort2,gmx_cgsort_t *sort2,
 +                         int nsort_new,gmx_cgsort_t *sort_new,
 +                         gmx_cgsort_t *sort1)
 +{
 +    int i1,i2,i_new;
 +    
 +    /* The new indices are not very ordered, so we qsort them */
 +    qsort_threadsafe(sort_new,nsort_new,sizeof(sort_new[0]),comp_cgsort);
 +    
 +    /* sort2 is already ordered, so now we can merge the two arrays */
 +    i1 = 0;
 +    i2 = 0;
 +    i_new = 0;
 +    while(i2 < nsort2 || i_new < nsort_new)
 +    {
 +        if (i2 == nsort2)
 +        {
 +            sort1[i1++] = sort_new[i_new++];
 +        }
 +        else if (i_new == nsort_new)
 +        {
 +            sort1[i1++] = sort2[i2++];
 +        }
 +        else if (sort2[i2].nsc < sort_new[i_new].nsc ||
 +                 (sort2[i2].nsc == sort_new[i_new].nsc &&
 +                  sort2[i2].ind_gl < sort_new[i_new].ind_gl))
 +        {
 +            sort1[i1++] = sort2[i2++];
 +        }
 +        else
 +        {
 +            sort1[i1++] = sort_new[i_new++];
 +        }
 +    }
 +}
 +
 +static void dd_sort_state(gmx_domdec_t *dd,int ePBC,
 +                          rvec *cgcm,t_forcerec *fr,t_state *state,
 +                          int ncg_home_old)
 +{
 +    gmx_domdec_sort_t *sort;
 +    gmx_cgsort_t *cgsort,*sort_i;
 +    int  ncg_new,nsort2,nsort_new,i,cell_index,*ibuf,cgsize;
 +    rvec *vbuf;
 +    
 +    sort = dd->comm->sort;
 +    
 +    if (dd->ncg_home > sort->sort_nalloc)
 +    {
 +        sort->sort_nalloc = over_alloc_dd(dd->ncg_home);
 +        srenew(sort->sort1,sort->sort_nalloc);
 +        srenew(sort->sort2,sort->sort_nalloc);
 +    }
 +    
 +    if (ncg_home_old >= 0)
 +    {
 +        /* The charge groups that remained in the same ns grid cell
 +         * are completely ordered. So we can sort efficiently by sorting
 +         * the charge groups that did move into the stationary list.
 +         */
 +        ncg_new = 0;
 +        nsort2 = 0;
 +        nsort_new = 0;
 +        for(i=0; i<dd->ncg_home; i++)
 +        {
 +            /* Check if this cg did not move to another node */
 +            cell_index = fr->ns.grid->cell_index[i];
 +            if (cell_index !=  4*fr->ns.grid->ncells)
 +            {
 +                if (i >= ncg_home_old || cell_index != sort->sort1[i].nsc)
 +                {
 +                    /* This cg is new on this node or moved ns grid cell */
 +                    if (nsort_new >= sort->sort_new_nalloc)
 +                    {
 +                        sort->sort_new_nalloc = over_alloc_dd(nsort_new+1);
 +                        srenew(sort->sort_new,sort->sort_new_nalloc);
 +                    }
 +                    sort_i = &(sort->sort_new[nsort_new++]);
 +                }
 +                else
 +                {
 +                    /* This cg did not move */
 +                    sort_i = &(sort->sort2[nsort2++]);
 +                }
 +                /* Sort on the ns grid cell indices
 +                 * and the global topology index
 +                 */
 +                sort_i->nsc    = cell_index;
 +                sort_i->ind_gl = dd->index_gl[i];
 +                sort_i->ind    = i;
 +                ncg_new++;
 +            }
 +        }
 +        if (debug)
 +        {
 +            fprintf(debug,"ordered sort cgs: stationary %d moved %d\n",
 +                    nsort2,nsort_new);
 +        }
 +        /* Sort efficiently */
 +        ordered_sort(nsort2,sort->sort2,nsort_new,sort->sort_new,sort->sort1);
 +    }
 +    else
 +    {
 +        cgsort = sort->sort1;
 +        ncg_new = 0;
 +        for(i=0; i<dd->ncg_home; i++)
 +        {
 +            /* Sort on the ns grid cell indices
 +             * and the global topology index
 +             */
 +            cgsort[i].nsc    = fr->ns.grid->cell_index[i];
 +            cgsort[i].ind_gl = dd->index_gl[i];
 +            cgsort[i].ind    = i;
 +            if (cgsort[i].nsc != 4*fr->ns.grid->ncells)
 +            {
 +                ncg_new++;
 +            }
 +        }
 +        if (debug)
 +        {
 +            fprintf(debug,"qsort cgs: %d new home %d\n",dd->ncg_home,ncg_new);
 +        }
 +        /* Determine the order of the charge groups using qsort */
 +        qsort_threadsafe(cgsort,dd->ncg_home,sizeof(cgsort[0]),comp_cgsort);
 +    }
 +    cgsort = sort->sort1;
 +    
 +    /* We alloc with the old size, since cgindex is still old */
 +    vec_rvec_check_alloc(&dd->comm->vbuf,dd->cgindex[dd->ncg_home]);
 +    vbuf = dd->comm->vbuf.v;
 +    
 +    /* Remove the charge groups which are no longer at home here */
 +    dd->ncg_home = ncg_new;
 +    
 +    /* Reorder the state */
 +    for(i=0; i<estNR; i++)
 +    {
 +        if (EST_DISTR(i) && (state->flags & (1<<i)))
 +        {
 +            switch (i)
 +            {
 +            case estX:
 +                order_vec_atom(dd->ncg_home,dd->cgindex,cgsort,state->x,vbuf);
 +                break;
 +            case estV:
 +                order_vec_atom(dd->ncg_home,dd->cgindex,cgsort,state->v,vbuf);
 +                break;
 +            case estSDX:
 +                order_vec_atom(dd->ncg_home,dd->cgindex,cgsort,state->sd_X,vbuf);
 +                break;
 +            case estCGP:
 +                order_vec_atom(dd->ncg_home,dd->cgindex,cgsort,state->cg_p,vbuf);
 +                break;
 +            case estLD_RNG:
 +            case estLD_RNGI:
 +            case estDISRE_INITF:
 +            case estDISRE_RM3TAV:
 +            case estORIRE_INITF:
 +            case estORIRE_DTAV:
 +                /* No ordering required */
 +                break;
 +            default:
 +                gmx_incons("Unknown state entry encountered in dd_sort_state");
 +                break;
 +            }
 +        }
 +    }
 +    /* Reorder cgcm */
 +    order_vec_cg(dd->ncg_home,cgsort,cgcm,vbuf);
 +    
 +    if (dd->ncg_home+1 > sort->ibuf_nalloc)
 +    {
 +        sort->ibuf_nalloc = over_alloc_dd(dd->ncg_home+1);
 +        srenew(sort->ibuf,sort->ibuf_nalloc);
 +    }
 +    ibuf = sort->ibuf;
 +    /* Reorder the global cg index */
 +    order_int_cg(dd->ncg_home,cgsort,dd->index_gl,ibuf);
 +    /* Reorder the cginfo */
 +    order_int_cg(dd->ncg_home,cgsort,fr->cginfo,ibuf);
 +    /* Rebuild the local cg index */
 +    ibuf[0] = 0;
 +    for(i=0; i<dd->ncg_home; i++)
 +    {
 +        cgsize = dd->cgindex[cgsort[i].ind+1] - dd->cgindex[cgsort[i].ind];
 +        ibuf[i+1] = ibuf[i] + cgsize;
 +    }
 +    for(i=0; i<dd->ncg_home+1; i++)
 +    {
 +        dd->cgindex[i] = ibuf[i];
 +    }
 +    /* Set the home atom number */
 +    dd->nat_home = dd->cgindex[dd->ncg_home];
 +    
 +    /* Copy the sorted ns cell indices back to the ns grid struct */
 +    for(i=0; i<dd->ncg_home; i++)
 +    {
 +        fr->ns.grid->cell_index[i] = cgsort[i].nsc;
 +    }
 +    fr->ns.grid->nr = dd->ncg_home;
 +}
 +
 +static void add_dd_statistics(gmx_domdec_t *dd)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int ddnat;
 +    
 +    comm = dd->comm;
 +    
 +    for(ddnat=ddnatZONE; ddnat<ddnatNR; ddnat++)
 +    {
 +        comm->sum_nat[ddnat-ddnatZONE] +=
 +            comm->nat[ddnat] - comm->nat[ddnat-1];
 +    }
 +    comm->ndecomp++;
 +}
 +
 +void reset_dd_statistics_counters(gmx_domdec_t *dd)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int ddnat;
 +    
 +    comm = dd->comm;
 +
 +    /* Reset all the statistics and counters for total run counting */
 +    for(ddnat=ddnatZONE; ddnat<ddnatNR; ddnat++)
 +    {
 +        comm->sum_nat[ddnat-ddnatZONE] = 0;
 +    }
 +    comm->ndecomp = 0;
 +    comm->nload = 0;
 +    comm->load_step = 0;
 +    comm->load_sum = 0;
 +    comm->load_max = 0;
 +    clear_ivec(comm->load_lim);
 +    comm->load_mdf = 0;
 +    comm->load_pme = 0;
 +}
 +
 +void print_dd_statistics(t_commrec *cr,t_inputrec *ir,FILE *fplog)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int ddnat;
 +    double av;
 +   
 +    comm = cr->dd->comm;
 +    
 +    gmx_sumd(ddnatNR-ddnatZONE,comm->sum_nat,cr);
 +    
 +    if (fplog == NULL)
 +    {
 +        return;
 +    }
 +    
 +    fprintf(fplog,"\n    D O M A I N   D E C O M P O S I T I O N   S T A T I S T I C S\n\n");
 +            
 +    for(ddnat=ddnatZONE; ddnat<ddnatNR; ddnat++)
 +    {
 +        av = comm->sum_nat[ddnat-ddnatZONE]/comm->ndecomp;
 +        switch(ddnat)
 +        {
 +        case ddnatZONE:
 +            fprintf(fplog,
 +                    " av. #atoms communicated per step for force:  %d x %.1f\n",
 +                    2,av);
 +            break;
 +        case ddnatVSITE:
 +            if (cr->dd->vsite_comm)
 +            {
 +                fprintf(fplog,
 +                        " av. #atoms communicated per step for vsites: %d x %.1f\n",
 +                        (EEL_PME(ir->coulombtype) || ir->coulombtype==eelEWALD) ? 3 : 2,
 +                        av);
 +            }
 +            break;
 +        case ddnatCON:
 +            if (cr->dd->constraint_comm)
 +            {
 +                fprintf(fplog,
 +                        " av. #atoms communicated per step for LINCS:  %d x %.1f\n",
 +                        1 + ir->nLincsIter,av);
 +            }
 +            break;
 +        default:
 +            gmx_incons(" Unknown type for DD statistics");
 +        }
 +    }
 +    fprintf(fplog,"\n");
 +    
 +    if (comm->bRecordLoad && EI_DYNAMICS(ir->eI))
 +    {
 +        print_dd_load_av(fplog,cr->dd);
 +    }
 +}
 +
 +void dd_partition_system(FILE            *fplog,
 +                         gmx_large_int_t      step,
 +                         t_commrec       *cr,
 +                         gmx_bool            bMasterState,
 +                         int             nstglobalcomm,
 +                         t_state         *state_global,
 +                         gmx_mtop_t      *top_global,
 +                         t_inputrec      *ir,
 +                         t_state         *state_local,
 +                         rvec            **f,
 +                         t_mdatoms       *mdatoms,
 +                         gmx_localtop_t  *top_local,
 +                         t_forcerec      *fr,
 +                         gmx_vsite_t     *vsite,
 +                         gmx_shellfc_t   shellfc,
 +                         gmx_constr_t    constr,
 +                         t_nrnb          *nrnb,
 +                         gmx_wallcycle_t wcycle,
 +                         gmx_bool            bVerbose)
 +{
 +    gmx_domdec_t *dd;
 +    gmx_domdec_comm_t *comm;
 +    gmx_ddbox_t ddbox={0};
 +    t_block *cgs_gl;
 +    gmx_large_int_t step_pcoupl;
 +    rvec cell_ns_x0,cell_ns_x1;
 +    int  i,j,n,cg0=0,ncg_home_old=-1,nat_f_novirsum;
 +    gmx_bool bBoxChanged,bNStGlobalComm,bDoDLB,bCheckDLB,bTurnOnDLB,bLogLoad;
 +    gmx_bool bRedist,bSortCG,bResortAll;
 +    ivec ncells_old,np;
 +    real grid_density;
 +    char sbuf[22];
 +      
 +    dd = cr->dd;
 +    comm = dd->comm;
 +
 +    bBoxChanged = (bMasterState || DEFORM(*ir));
 +    if (ir->epc != epcNO)
 +    {
 +        /* With nstpcouple > 1 pressure coupling happens.
 +         * one step after calculating the pressure.
 +         * Box scaling happens at the end of the MD step,
 +         * after the DD partitioning.
 +         * We therefore have to do DLB in the first partitioning
 +         * after an MD step where P-coupling occured.
 +         * We need to determine the last step in which p-coupling occurred.
 +         * MRS -- need to validate this for vv?
 +         */
 +        n = ir->nstpcouple;
 +        if (n == 1)
 +        {
 +            step_pcoupl = step - 1;
 +        }
 +        else
 +        {
 +            step_pcoupl = ((step - 1)/n)*n + 1;
 +        }
 +        if (step_pcoupl >= comm->globalcomm_step)
 +        {
 +            bBoxChanged = TRUE;
 +        }
 +    }
 +
 +    bNStGlobalComm = (step >= comm->globalcomm_step + nstglobalcomm);
 +
 +    if (!comm->bDynLoadBal)
 +    {
 +        bDoDLB = FALSE;
 +    }
 +    else
 +    {
 +        /* Should we do dynamic load balacing this step?
 +         * Since it requires (possibly expensive) global communication,
 +         * we might want to do DLB less frequently.
 +         */
 +        if (bBoxChanged || ir->epc != epcNO)
 +        {
 +            bDoDLB = bBoxChanged;
 +        }
 +        else
 +        {
 +            bDoDLB = bNStGlobalComm;
 +        }
 +    }
 +
 +    /* Check if we have recorded loads on the nodes */
 +    if (comm->bRecordLoad && dd_load_count(comm))
 +    {
 +        if (comm->eDLB == edlbAUTO && !comm->bDynLoadBal)
 +        {
 +            /* Check if we should use DLB at the second partitioning
 +             * and every 100 partitionings,
 +             * so the extra communication cost is negligible.
 +             */
 +            n = max(100,nstglobalcomm);
 +            bCheckDLB = (comm->n_load_collect == 0 ||
 +                         comm->n_load_have % n == n-1);
 +        }
 +        else
 +        {
 +            bCheckDLB = FALSE;
 +        }
 +        
 +        /* Print load every nstlog, first and last step to the log file */
 +        bLogLoad = ((ir->nstlog > 0 && step % ir->nstlog == 0) ||
 +                    comm->n_load_collect == 0 ||
 +                    (ir->nsteps >= 0 &&
 +                     (step + ir->nstlist > ir->init_step + ir->nsteps)));
 +
 +        /* Avoid extra communication due to verbose screen output
 +         * when nstglobalcomm is set.
 +         */
 +        if (bDoDLB || bLogLoad || bCheckDLB ||
 +            (bVerbose && (ir->nstlist == 0 || nstglobalcomm <= ir->nstlist)))
 +        {
 +            get_load_distribution(dd,wcycle);
 +            if (DDMASTER(dd))
 +            {
 +                if (bLogLoad)
 +                {
 +                    dd_print_load(fplog,dd,step-1);
 +                }
 +                if (bVerbose)
 +                {
 +                    dd_print_load_verbose(dd);
 +                }
 +            }
 +            comm->n_load_collect++;
 +
 +            if (bCheckDLB) {
 +                /* Since the timings are node dependent, the master decides */
 +                if (DDMASTER(dd))
 +                {
 +                    bTurnOnDLB =
 +                        (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS);
 +                    if (debug)
 +                    {
 +                        fprintf(debug,"step %s, imb loss %f\n",
 +                                gmx_step_str(step,sbuf),
 +                                dd_force_imb_perf_loss(dd));
 +                    }
 +                }
 +                dd_bcast(dd,sizeof(bTurnOnDLB),&bTurnOnDLB);
 +                if (bTurnOnDLB)
 +                {
 +                    turn_on_dlb(fplog,cr,step);
 +                    bDoDLB = TRUE;
 +                }
 +            }
 +        }
 +        comm->n_load_have++;
 +    }
 +
 +    cgs_gl = &comm->cgs_gl;
 +
 +    bRedist = FALSE;
 +    if (bMasterState)
 +    {
 +        /* Clear the old state */
 +        clear_dd_indices(dd,0,0);
 +
 +        set_ddbox(dd,bMasterState,cr,ir,state_global->box,
 +                  TRUE,cgs_gl,state_global->x,&ddbox);
 +    
 +        get_cg_distribution(fplog,step,dd,cgs_gl,
 +                            state_global->box,&ddbox,state_global->x);
 +        
 +        dd_distribute_state(dd,cgs_gl,
 +                            state_global,state_local,f);
 +        
 +        dd_make_local_cgs(dd,&top_local->cgs);
 +        
 +        if (dd->ncg_home > fr->cg_nalloc)
 +        {
 +            dd_realloc_fr_cg(fr,dd->ncg_home);
 +        }
 +        calc_cgcm(fplog,0,dd->ncg_home,
 +                  &top_local->cgs,state_local->x,fr->cg_cm);
 +        
 +        inc_nrnb(nrnb,eNR_CGCM,dd->nat_home);
 +        
 +        dd_set_cginfo(dd->index_gl,0,dd->ncg_home,fr,comm->bLocalCG);
 +
 +        cg0 = 0;
 +    }
 +    else if (state_local->ddp_count != dd->ddp_count)
 +    {
 +        if (state_local->ddp_count > dd->ddp_count)
 +        {
 +            gmx_fatal(FARGS,"Internal inconsistency state_local->ddp_count (%d) > dd->ddp_count (%d)",state_local->ddp_count,dd->ddp_count);
 +        }
 +        
 +        if (state_local->ddp_count_cg_gl != state_local->ddp_count)
 +        {
 +            gmx_fatal(FARGS,"Internal inconsistency state_local->ddp_count_cg_gl (%d) != state_local->ddp_count (%d)",state_local->ddp_count_cg_gl,state_local->ddp_count);
 +        }
 +        
 +        /* Clear the old state */
 +        clear_dd_indices(dd,0,0);
 +        
 +        /* Build the new indices */
 +        rebuild_cgindex(dd,cgs_gl->index,state_local);
 +        make_dd_indices(dd,cgs_gl->index,0);
 +        
 +        /* Redetermine the cg COMs */
 +        calc_cgcm(fplog,0,dd->ncg_home,
 +                  &top_local->cgs,state_local->x,fr->cg_cm);
 +        
 +        inc_nrnb(nrnb,eNR_CGCM,dd->nat_home);
 +
 +        dd_set_cginfo(dd->index_gl,0,dd->ncg_home,fr,comm->bLocalCG);
 +
 +        set_ddbox(dd,bMasterState,cr,ir,state_local->box,
 +                  TRUE,&top_local->cgs,state_local->x,&ddbox);
 +
 +        bRedist = comm->bDynLoadBal;
 +    }
 +    else
 +    {
 +        /* We have the full state, only redistribute the cgs */
 +
 +        /* Clear the non-home indices */
 +        clear_dd_indices(dd,dd->ncg_home,dd->nat_home);
 +
 +        /* Avoid global communication for dim's without pbc and -gcom */
 +        if (!bNStGlobalComm)
 +        {
 +            copy_rvec(comm->box0    ,ddbox.box0    );
 +            copy_rvec(comm->box_size,ddbox.box_size);
 +        }
 +        set_ddbox(dd,bMasterState,cr,ir,state_local->box,
 +                  bNStGlobalComm,&top_local->cgs,state_local->x,&ddbox);
 +
 +        bBoxChanged = TRUE;
 +        bRedist = TRUE;
 +    }
 +    /* For dim's without pbc and -gcom */
 +    copy_rvec(ddbox.box0    ,comm->box0    );
 +    copy_rvec(ddbox.box_size,comm->box_size);
 +    
 +    set_dd_cell_sizes(dd,&ddbox,dynamic_dd_box(&ddbox,ir),bMasterState,bDoDLB,
 +                      step,wcycle);
 +    
 +    if (comm->nstDDDumpGrid > 0 && step % comm->nstDDDumpGrid == 0)
 +    {
 +        write_dd_grid_pdb("dd_grid",step,dd,state_local->box,&ddbox);
 +    }
 +    
 +    /* Check if we should sort the charge groups */
 +    if (comm->nstSortCG > 0)
 +    {
 +        bSortCG = (bMasterState ||
 +                   (bRedist && (step % comm->nstSortCG == 0)));
 +    }
 +    else
 +    {
 +        bSortCG = FALSE;
 +    }
 +
 +    ncg_home_old = dd->ncg_home;
 +
 +    if (bRedist)
 +    {
 +        cg0 = dd_redistribute_cg(fplog,step,dd,ddbox.tric_dir,
 +                                 state_local,f,fr,mdatoms,
 +                                 !bSortCG,nrnb);
 +    }
 +    
 +    get_nsgrid_boundaries(fr->ns.grid,dd,
 +                          state_local->box,&ddbox,&comm->cell_x0,&comm->cell_x1,
 +                          dd->ncg_home,fr->cg_cm,
 +                          cell_ns_x0,cell_ns_x1,&grid_density);
 +
 +    if (bBoxChanged)
 +    {
 +        comm_dd_ns_cell_sizes(dd,&ddbox,cell_ns_x0,cell_ns_x1,step);
 +    }
 +
 +    copy_ivec(fr->ns.grid->n,ncells_old);
 +    grid_first(fplog,fr->ns.grid,dd,&ddbox,fr->ePBC,
 +               state_local->box,cell_ns_x0,cell_ns_x1,
 +               fr->rlistlong,grid_density);
 +    /* We need to store tric_dir for dd_get_ns_ranges called from ns.c */
 +    copy_ivec(ddbox.tric_dir,comm->tric_dir);
 +
 +    if (bSortCG)
 +    {
 +        /* Sort the state on charge group position.
 +         * This enables exact restarts from this step.
 +         * It also improves performance by about 15% with larger numbers
 +         * of atoms per node.
 +         */
 +        
 +        /* Fill the ns grid with the home cell,
 +         * so we can sort with the indices.
 +         */
 +        set_zones_ncg_home(dd);
 +        fill_grid(fplog,&comm->zones,fr->ns.grid,dd->ncg_home,
 +                  0,dd->ncg_home,fr->cg_cm);
 +        
 +        /* Check if we can user the old order and ns grid cell indices
 +         * of the charge groups to sort the charge groups efficiently.
 +         */
 +        bResortAll = (bMasterState ||
 +                      fr->ns.grid->n[XX] != ncells_old[XX] ||
 +                      fr->ns.grid->n[YY] != ncells_old[YY] ||
 +                      fr->ns.grid->n[ZZ] != ncells_old[ZZ]);
 +
 +        if (debug)
 +        {
 +            fprintf(debug,"Step %s, sorting the %d home charge groups\n",
 +                    gmx_step_str(step,sbuf),dd->ncg_home);
 +        }
 +        dd_sort_state(dd,ir->ePBC,fr->cg_cm,fr,state_local,
 +                      bResortAll ? -1 : ncg_home_old);
 +        /* Rebuild all the indices */
 +        cg0 = 0;
 +        ga2la_clear(dd->ga2la);
 +    }
 +    
 +    /* Setup up the communication and communicate the coordinates */
 +    setup_dd_communication(dd,state_local->box,&ddbox,fr);
 +    
 +    /* Set the indices */
 +    make_dd_indices(dd,cgs_gl->index,cg0);
 +
 +    /* Set the charge group boundaries for neighbor searching */
 +    set_cg_boundaries(&comm->zones);
 +    
 +    /*
 +    write_dd_pdb("dd_home",step,"dump",top_global,cr,
 +                 -1,state_local->x,state_local->box);
 +    */
 +    
 +    /* Extract a local topology from the global topology */
 +    for(i=0; i<dd->ndim; i++)
 +    {
 +        np[dd->dim[i]] = comm->cd[i].np;
 +    }
 +    dd_make_local_top(fplog,dd,&comm->zones,dd->npbcdim,state_local->box,
 +                      comm->cellsize_min,np,
 +                      fr,vsite,top_global,top_local);
 +    
 +    /* Set up the special atom communication */
 +    n = comm->nat[ddnatZONE];
 +    for(i=ddnatZONE+1; i<ddnatNR; i++)
 +    {
 +        switch(i)
 +        {
 +        case ddnatVSITE:
 +            if (vsite && vsite->n_intercg_vsite)
 +            {
 +                n = dd_make_local_vsites(dd,n,top_local->idef.il);
 +            }
 +            break;
 +        case ddnatCON:
 +            if (dd->bInterCGcons)
 +            {
 +                /* Only for inter-cg constraints we need special code */
 +                n = dd_make_local_constraints(dd,n,top_global,
 +                                              constr,ir->nProjOrder,
 +                                              &top_local->idef.il[F_CONSTR]);
 +            }
 +            break;
 +        default:
 +            gmx_incons("Unknown special atom type setup");
 +        }
 +        comm->nat[i] = n;
 +    }
 +    
 +    /* Make space for the extra coordinates for virtual site
 +     * or constraint communication.
 +     */
 +    state_local->natoms = comm->nat[ddnatNR-1];
 +    if (state_local->natoms > state_local->nalloc)
 +    {
 +        dd_realloc_state(state_local,f,state_local->natoms);
 +    }
 +
 +    if (fr->bF_NoVirSum)
 +    {
 +        if (vsite && vsite->n_intercg_vsite)
 +        {
 +            nat_f_novirsum = comm->nat[ddnatVSITE];
 +        }
 +        else
 +        {
 +            if (EEL_FULL(ir->coulombtype) && dd->n_intercg_excl > 0)
 +            {
 +                nat_f_novirsum = dd->nat_tot;
 +            }
 +            else
 +            {
 +                nat_f_novirsum = dd->nat_home;
 +            }
 +        }
 +    }
 +    else
 +    {
 +        nat_f_novirsum = 0;
 +    }
 +
 +    /* Set the number of atoms required for the force calculation.
 +     * Forces need to be constrained when using a twin-range setup
 +     * or with energy minimization. For simple simulations we could
 +     * avoid some allocation, zeroing and copying, but this is
 +     * probably not worth the complications ande checking.
 +     */
 +    forcerec_set_ranges(fr,dd->ncg_home,dd->ncg_tot,
 +                        dd->nat_tot,comm->nat[ddnatCON],nat_f_novirsum);
 +
 +    /* We make the all mdatoms up to nat_tot_con.
 +     * We could save some work by only setting invmass
 +     * between nat_tot and nat_tot_con.
 +     */
 +    /* This call also sets the new number of home particles to dd->nat_home */
 +    atoms2md(top_global,ir,
 +             comm->nat[ddnatCON],dd->gatindex,0,dd->nat_home,mdatoms);
 +
 +    /* Now we have the charges we can sort the FE interactions */
 +    dd_sort_local_top(dd,mdatoms,top_local);
 +
 +    if (shellfc)
 +    {
 +        /* Make the local shell stuff, currently no communication is done */
 +        make_local_shells(cr,mdatoms,shellfc);
 +    }
 +    
 +      if (ir->implicit_solvent)
 +    {
 +        make_local_gb(cr,fr->born,ir->gb_algorithm);
 +    }
 +      
 +    if (!(cr->duty & DUTY_PME))
 +    {
 +        /* Send the charges to our PME only node */
 +        gmx_pme_send_q(cr,mdatoms->nChargePerturbed,
 +                       mdatoms->chargeA,mdatoms->chargeB,
 +                       dd_pme_maxshift_x(dd),dd_pme_maxshift_y(dd));
 +    }
 +    
 +    if (constr)
 +    {
 +        set_constraints(constr,top_local,ir,mdatoms,cr);
 +    }
 +    
 +    if (ir->ePull != epullNO)
 +    {
 +        /* Update the local pull groups */
 +        dd_make_local_pull_groups(dd,ir->pull,mdatoms);
 +    }
 +    
 +    if (ir->bRot)
 +    {
 +        /* Update the local rotation groups */
 +        dd_make_local_rotation_groups(dd,ir->rot);
 +    }
 +
 +
 +    add_dd_statistics(dd);
 +    
 +    /* Make sure we only count the cycles for this DD partitioning */
 +    clear_dd_cycle_counts(dd);
 +    
 +    /* Because the order of the atoms might have changed since
 +     * the last vsite construction, we need to communicate the constructing
 +     * atom coordinates again (for spreading the forces this MD step).
 +     */
 +    dd_move_x_vsites(dd,state_local->box,state_local->x);
 +    
 +    if (comm->nstDDDump > 0 && step % comm->nstDDDump == 0)
 +    {
 +        dd_move_x(dd,state_local->box,state_local->x);
 +        write_dd_pdb("dd_dump",step,"dump",top_global,cr,
 +                     -1,state_local->x,state_local->box);
 +    }
 +
 +    if (bNStGlobalComm)
 +    {
 +        /* Store the global communication step */
 +        comm->globalcomm_step = step;
 +    }
 +    
 +    /* Increase the DD partitioning counter */
 +    dd->ddp_count++;
 +    /* The state currently matches this DD partitioning count, store it */
 +    state_local->ddp_count = dd->ddp_count;
 +    if (bMasterState)
 +    {
 +        /* The DD master node knows the complete cg distribution,
 +         * store the count so we can possibly skip the cg info communication.
 +         */
 +        comm->master_cg_ddp_count = (bSortCG ? 0 : dd->ddp_count);
 +    }
 +
 +    if (comm->DD_debug > 0)
 +    {
 +        /* Set the env var GMX_DD_DEBUG if you suspect corrupted indices */
 +        check_index_consistency(dd,top_global->natoms,ncg_mtop(top_global),
 +                                "after partitioning");
 +    }
 +}
Simple merge
index 0000000000000000000000000000000000000000,0000000000000000000000000000000000000000..71eadab9e71e79edabe445eb0d6379098cefc956
new file mode 100644 (file)
--- /dev/null
--- /dev/null
@@@ -1,0 -1,0 +1,1379 @@@
++/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
++ *
++ *
++ *                This source code is part of
++ *
++ *                 G   R   O   M   A   C   S
++ *
++ *          GROningen MAchine for Chemical Simulations
++ *
++ * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
++ * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
++ * Copyright (c) 2001-2012, The GROMACS development team,
++ * check out http://www.gromacs.org for more information.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License
++ * as published by the Free Software Foundation; either version 2
++ * of the License, or (at your option) any later version.
++ *
++ * If you want to redistribute modifications, please consider that
++ * scientific software is very special. Version control is crucial -
++ * bugs must be traceable. We will be happy to consider code for
++ * inclusion in the official distribution, but derived work must not
++ * be called official GROMACS. Details are found in the README & COPYING
++ * files - if they are missing, get the official version at www.gromacs.org.
++ *
++ * To help us fund GROMACS development, we humbly ask that you cite
++ * the papers on the package - you can find them in the top README file.
++ *
++ * For more info, check our website at http://www.gromacs.org
++ *
++ * And Hey:
++ * GROwing Monsters And Cloning Shrimps
++ */
++#ifdef HAVE_CONFIG_H
++#include <config.h>
++#endif
++
++#ifdef GMX_CRAY_XT3
++#include<catamount/dclock.h>
++#endif
++
++
++#include <stdio.h>
++#include <time.h>
++#ifdef HAVE_SYS_TIME_H
++#include <sys/time.h>
++#endif
++#include <math.h>
++#include "typedefs.h"
++#include "string2.h"
++#include "gmxfio.h"
++#include "smalloc.h"
++#include "names.h"
++#include "confio.h"
++#include "mvdata.h"
++#include "txtdump.h"
++#include "pbc.h"
++#include "chargegroup.h"
++#include "vec.h"
++#include "nrnb.h"
++#include "mshift.h"
++#include "mdrun.h"
++#include "update.h"
++#include "physics.h"
++#include "main.h"
++#include "mdatoms.h"
++#include "force.h"
++#include "bondf.h"
++#include "pme.h"
++#include "disre.h"
++#include "orires.h"
++#include "network.h"
++#include "calcmu.h"
++#include "constr.h"
++#include "xvgr.h"
++#include "trnio.h"
++#include "xtcio.h"
++#include "copyrite.h"
++#include "gmx_random.h"
++#include "domdec.h"
++#include "partdec.h"
++#include "gmx_wallcycle.h"
++#include "macros.h"
++
++#ifdef GMX_LIB_MPI
++#include <mpi.h>
++#endif
++#ifdef GMX_THREADS
++#include "tmpi.h"
++#endif
++
++void GenerateGibbsProbabilities(real *ene, real *p_k, real *pks, int minfep, int maxfep) {
++
++    int i;
++    real maxene;
++
++    *pks = 0.0;
++    maxene = ene[minfep];
++    /* find the maximum value */
++    for (i=minfep;i<=maxfep;i++)
++    {
++        if (ene[i]>maxene)
++        {
++            maxene = ene[i];
++        }
++    }
++    /* find the denominator */
++    for (i=minfep;i<=maxfep;i++)
++    {
++        *pks += exp(ene[i]-maxene);
++    }
++    /*numerators*/
++    for (i=minfep;i<=maxfep;i++)
++    {
++        p_k[i] = exp(ene[i]-maxene) / *pks;
++    }
++}
++
++void GenerateWeightedGibbsProbabilities(real *ene, real *p_k, real *pks, int nlim, real *nvals,real delta) {
++
++    int i;
++    real maxene;
++    real *nene;
++    *pks = 0.0;
++
++    snew(nene,nlim);
++    for (i=0;i<nlim;i++) {
++        if (nvals[i] == 0) {
++            /* add the delta, since we need to make sure it's greater than zero, and
++               we need a non-arbitrary number? */
++            nene[i] = ene[i] + log(nvals[i]+delta);
++        } else {
++            nene[i] = ene[i] + log(nvals[i]);
++        }
++    }
++
++    /* find the maximum value */
++    maxene = nene[0];
++    for (i=0;i<nlim;i++)
++    {
++        if (nene[i] > maxene) {
++            maxene = nene[i];
++        }
++    }
++
++    /* subtract off the maximum, avoiding overflow */
++    for (i=0;i<nlim;i++)
++    {
++        nene[i] -= maxene;
++    }
++
++    /* find the denominator */
++    for (i=0;i<nlim;i++)
++    {
++        *pks += exp(nene[i]);
++    }
++
++    /*numerators*/
++    for (i=0;i<nlim;i++)
++    {
++        p_k[i] = exp(nene[i]) / *pks;
++    }
++    sfree(nene);
++}
++
++real do_logsum(int N, real *a_n) {
++
++    /*     RETURN VALUE */
++    /* log(\sum_{i=0}^(N-1) exp[a_n]) */
++    real maxarg;
++    real sum;
++    int i;
++    real logsum;
++    /*     compute maximum argument to exp(.) */
++
++    maxarg = a_n[0];
++    for(i=1;i<N;i++)
++    {
++        maxarg = max(maxarg,a_n[i]);
++    }
++
++    /* compute sum of exp(a_n - maxarg) */
++    sum = 0.0;
++    for (i=0;i<N;i++)
++    {
++        sum = sum + exp(a_n[i] - maxarg);
++    }
++
++    /*     compute log sum */
++    logsum = log(sum) + maxarg;
++    return logsum;
++}
++
++int FindMinimum(real *min_metric, int N) {
++
++    real min_val;
++    int min_nval,nval;
++
++    min_nval = 0;
++    min_val = min_metric[0];
++
++    for (nval=0; nval<N; nval++)
++    {
++        if (min_metric[nval] < min_val)
++        {
++            min_val = min_metric[nval];
++            min_nval = nval;
++        }
++    }
++    return min_nval;
++}
++
++static gmx_bool CheckHistogramRatios(int nhisto, real *histo, real ratio)
++{
++
++    int i;
++    real nmean;
++    gmx_bool bIfFlat;
++
++    nmean = 0;
++    for (i=0;i<nhisto;i++)
++    {
++        nmean += histo[i];
++    }
++
++    if (nmean == 0)
++    {
++        /* no samples! is bad!*/
++        bIfFlat = FALSE;
++        return bIfFlat;
++    }
++    nmean /= (real)nhisto;
++
++    bIfFlat = TRUE;
++    for (i=0;i<nhisto;i++)
++    {
++        /* make sure that all points are in the ratio < x <  1/ratio range  */
++        if (!((histo[i]/nmean < 1.0/ratio) && (histo[i]/nmean > ratio)))
++        {
++            bIfFlat = FALSE;
++            break;
++        }
++    }
++    return bIfFlat;
++}
++
++static gmx_bool CheckIfDoneEquilibrating(int nlim, t_expanded *expand, df_history_t *dfhist, gmx_large_int_t step)
++{
++
++    int i,totalsamples;
++    gmx_bool bDoneEquilibrating=TRUE;
++    gmx_bool bIfFlat;
++
++    /* assume we have equilibrated the weights, then check to see if any of the conditions are not met */
++
++    /* calculate the total number of samples */
++    switch (expand->elmceq)
++    {
++    case elmceqNO:
++        /* We have not equilibrated, and won't, ever. */
++        return FALSE;
++    case elmceqYES:
++        /* we have equilibrated -- we're done */
++        return TRUE;
++    case elmceqSTEPS:
++        /* first, check if we are equilibrating by steps, if we're still under */
++        if (step < expand->equil_steps)
++        {
++            bDoneEquilibrating = FALSE;
++        }
++        break;
++    case elmceqSAMPLES:
++        totalsamples = 0;
++        for (i=0;i<nlim;i++)
++        {
++            totalsamples += dfhist->n_at_lam[i];
++        }
++        if (totalsamples < expand->equil_samples)
++        {
++            bDoneEquilibrating = FALSE;
++        }
++        break;
++    case elmceqNUMATLAM:
++        for (i=0;i<nlim;i++)
++        {
++            if (dfhist->n_at_lam[i] < expand->equil_n_at_lam) /* we are still doing the initial sweep, so we're definitely not
++                                                                 done equilibrating*/
++            {
++                bDoneEquilibrating  = FALSE;
++                break;
++            }
++        }
++        break;
++    case elmceqWLDELTA:
++        if (EWL(expand->elamstats)) /* This check is in readir as well, but
++                                    just to be sure */
++        {
++            if (dfhist->wl_delta > expand->equil_wl_delta)
++            {
++                bDoneEquilibrating = FALSE;
++            }
++        }
++        break;
++    case elmceqRATIO:
++        /* we can use the flatness as a judge of good weights, as long as
++           we're not doing minvar, or Wang-Landau.
++           But turn off for now until we figure out exactly how we do this.
++        */
++
++        if (!(EWL(expand->elamstats) || expand->elamstats==elamstatsMINVAR))
++        {
++            /* we want to use flatness -avoiding- the forced-through samples.  Plus, we need to convert to
++               floats for this histogram function. */
++
++            real *modhisto;
++            snew(modhisto,nlim);
++            for (i=0;i<nlim;i++)
++            {
++                modhisto[i] = 1.0*(dfhist->n_at_lam[i]-expand->lmc_forced_nstart);
++            }
++            bIfFlat = CheckHistogramRatios(nlim,modhisto,expand->equil_ratio);
++            sfree(modhisto);
++            if (!bIfFlat)
++            {
++                bDoneEquilibrating = FALSE;
++            }
++        }
++    default:
++        bDoneEquilibrating = TRUE;
++    }
++    /* one last case to go though, if we are doing slow growth to get initial values, we haven't finished equilibrating */
++
++    if (expand->lmc_forced_nstart > 0)
++    {
++        for (i=0;i<nlim;i++)
++        {
++            if (dfhist->n_at_lam[i] < expand->lmc_forced_nstart) /* we are still doing the initial sweep, so we're definitely not
++                                                                    done equilibrating*/
++            {
++                bDoneEquilibrating = FALSE;
++                break;
++            }
++        }
++    }
++    return bDoneEquilibrating;
++}
++
++static gmx_bool UpdateWeights(int nlim, t_expanded *expand, df_history_t *dfhist,
++                              int fep_state, real *scaled_lamee, real *weighted_lamee, gmx_large_int_t step)
++{
++    real maxdiff = 0.000000001;
++    gmx_bool bSufficientSamples;
++    int i, k, n, nz, indexi, indexk, min_n, max_n, nlam, totali;
++    int n0,np1,nm1,nval,min_nvalm,min_nvalp,maxc;
++    real omega_m1_0,omega_p1_m1,omega_m1_p1,omega_p1_0,clam_osum;
++    real de,de_function,dr,denom,maxdr,pks=0;
++    real min_val,cnval,zero_sum_weights;
++    real *omegam_array, *weightsm_array, *omegap_array, *weightsp_array, *varm_array, *varp_array, *dwp_array, *dwm_array;
++    real clam_varm, clam_varp, clam_weightsm, clam_weightsp, clam_minvar;
++    real *lam_weights, *lam_minvar_corr, *lam_variance, *lam_dg, *p_k;
++    real *numweighted_lamee, *logfrac;
++    int *nonzero;
++    real chi_m1_0,chi_p1_0,chi_m2_0,chi_p2_0,chi_p1_m1,chi_p2_m1,chi_m1_p1,chi_m2_p1;
++
++    /* if we have equilibrated the weights, exit now */
++    if (dfhist->bEquil)
++    {
++        return FALSE;
++    }
++
++    if (CheckIfDoneEquilibrating(nlim,expand,dfhist,step))
++    {
++        dfhist->bEquil = TRUE;
++        /* zero out the visited states so we know how many equilibrated states we have
++           from here on out.*/
++        for (i=0;i<nlim;i++)
++        {
++            dfhist->n_at_lam[i] = 0;
++        }
++        return TRUE;
++    }
++
++    /* If we reached this far, we have not equilibrated yet, keep on
++       going resetting the weights */
++
++    if (EWL(expand->elamstats))
++    {
++        if (expand->elamstats==elamstatsWL)  /* Standard Wang-Landau */
++        {
++            dfhist->sum_weights[fep_state] -= dfhist->wl_delta;
++            dfhist->wl_histo[fep_state] += 1.0;
++        }
++        else if (expand->elamstats==elamstatsWWL) /* Weighted Wang-Landau */
++        {
++            snew(p_k,nlim);
++
++            /* first increment count */
++            GenerateGibbsProbabilities(weighted_lamee,p_k,&pks,0,nlim-1);
++            for (i=0;i<nlim;i++) {
++                dfhist->wl_histo[i] += p_k[i];
++            }
++
++            /* then increment weights (uses count) */
++            pks = 0.0;
++            GenerateWeightedGibbsProbabilities(weighted_lamee,p_k,&pks,nlim,dfhist->wl_histo,dfhist->wl_delta);
++
++            for (i=0;i<nlim;i++)
++            {
++                dfhist->sum_weights[i] -= dfhist->wl_delta*p_k[i];
++            }
++            /* Alternate definition, using logarithms. Shouldn't make very much difference! */
++            /*
++              real di;
++              for (i=0;i<nlim;i++)
++              {
++                di = 1+dfhist->wl_delta*p_k[i];
++                dfhist->sum_weights[i] -= log(di);
++              }
++            */
++            sfree(p_k);
++        }
++
++        zero_sum_weights =  dfhist->sum_weights[0];
++        for (i=0;i<nlim;i++)
++        {
++            dfhist->sum_weights[i] -= zero_sum_weights;
++        }
++    }
++
++    if (expand->elamstats==elamstatsBARKER || expand->elamstats==elamstatsMETROPOLIS || expand->elamstats==elamstatsMINVAR) {
++
++        de_function = 0;  /* to get rid of warnings, but this value will not be used because of the logic */
++        maxc = 2*expand->c_range+1;
++
++        snew(lam_dg,nlim);
++        snew(lam_variance,nlim);
++
++        snew(omegap_array,maxc);
++        snew(weightsp_array,maxc);
++        snew(varp_array,maxc);
++        snew(dwp_array,maxc);
++
++        snew(omegam_array,maxc);
++        snew(weightsm_array,maxc);
++        snew(varm_array,maxc);
++        snew(dwm_array,maxc);
++
++        /* unpack the current lambdas -- we will only update 2 of these */
++
++        for (i=0;i<nlim-1;i++)
++        { /* only through the second to last */
++            lam_dg[i] = dfhist->sum_dg[i+1] - dfhist->sum_dg[i];
++            lam_variance[i] = pow(dfhist->sum_variance[i+1],2) - pow(dfhist->sum_variance[i],2);
++        }
++
++        /* accumulate running averages */
++        for (nval = 0; nval<maxc; nval++)
++        {
++            /* constants for later use */
++            cnval = (real)(nval-expand->c_range);
++            /* actually, should be able to rewrite it w/o exponential, for better numerical stability */
++            if (fep_state > 0)
++            {
++                de = exp(cnval - (scaled_lamee[fep_state]-scaled_lamee[fep_state-1]));
++                if (expand->elamstats==elamstatsBARKER || expand->elamstats==elamstatsMINVAR)
++                {
++                    de_function = 1.0/(1.0+de);
++                }
++                else if (expand->elamstats==elamstatsMETROPOLIS)
++                {
++                    if (de < 1.0)
++                    {
++                        de_function = 1.0;
++                    }
++                    else
++                    {
++                        de_function = 1.0/de;
++                    }
++                }
++                dfhist->accum_m[fep_state][nval] += de_function;
++                dfhist->accum_m2[fep_state][nval] += de_function*de_function;
++            }
++
++            if (fep_state < nlim-1)
++            {
++                de = exp(-cnval + (scaled_lamee[fep_state+1]-scaled_lamee[fep_state]));
++                if (expand->elamstats==elamstatsBARKER || expand->elamstats==elamstatsMINVAR)
++                {
++                    de_function = 1.0/(1.0+de);
++                }
++                else if (expand->elamstats==elamstatsMETROPOLIS)
++                {
++                    if (de < 1.0)
++                    {
++                        de_function = 1.0;
++                    }
++                    else
++                    {
++                        de_function = 1.0/de;
++                    }
++                }
++                dfhist->accum_p[fep_state][nval] += de_function;
++                dfhist->accum_p2[fep_state][nval] += de_function*de_function;
++            }
++
++            /* Metropolis transition and Barker transition (unoptimized Bennett) acceptance weight determination */
++
++            n0  = dfhist->n_at_lam[fep_state];
++            if (fep_state > 0) {nm1 = dfhist->n_at_lam[fep_state-1];} else {nm1 = 0;}
++            if (fep_state < nlim-1) {np1 = dfhist->n_at_lam[fep_state+1];} else {np1 = 0;}
++
++            /* logic SHOULD keep these all set correctly whatever the logic, but apparently it can't figure it out. */
++            chi_m1_0=chi_p1_0=chi_m2_0=chi_p2_0=chi_p1_m1=chi_p2_m1=chi_m1_p1=chi_m2_p1=0;
++
++            if (n0 > 0)
++            {
++                chi_m1_0 = dfhist->accum_m[fep_state][nval]/n0;
++                chi_p1_0 = dfhist->accum_p[fep_state][nval]/n0;
++                chi_m2_0 = dfhist->accum_m2[fep_state][nval]/n0;
++                chi_p2_0 = dfhist->accum_p2[fep_state][nval]/n0;
++            }
++
++            if ((fep_state > 0 ) && (nm1 > 0))
++            {
++                chi_p1_m1 = dfhist->accum_p[fep_state-1][nval]/nm1;
++                chi_p2_m1 = dfhist->accum_p2[fep_state-1][nval]/nm1;
++            }
++
++            if ((fep_state < nlim-1) && (np1 > 0))
++            {
++                chi_m1_p1 = dfhist->accum_m[fep_state+1][nval]/np1;
++                chi_m2_p1 = dfhist->accum_m2[fep_state+1][nval]/np1;
++            }
++
++            omega_m1_0 = 0;
++            omega_p1_0 = 0;
++            clam_weightsm = 0;
++            clam_weightsp = 0;
++            clam_varm = 0;
++            clam_varp = 0;
++
++            if (fep_state > 0)
++            {
++                if (n0 > 0)
++                {
++                    omega_m1_0 = chi_m2_0/(chi_m1_0*chi_m1_0) - 1.0;
++                }
++                if (nm1 > 0)
++                {
++                    omega_p1_m1 = chi_p2_m1/(chi_p1_m1*chi_p1_m1) - 1.0;
++                }
++                if ((n0 > 0) && (nm1 > 0))
++                {
++                    clam_weightsm = (log(chi_m1_0) - log(chi_p1_m1)) + cnval;
++                    clam_varm = (1.0/n0)*(omega_m1_0) + (1.0/nm1)*(omega_p1_m1);
++                }
++            }
++
++            if (fep_state < nlim-1)
++            {
++                if (n0 > 0)
++                {
++                    omega_p1_0 = chi_p2_0/(chi_p1_0*chi_p1_0) - 1.0;
++                }
++                if (np1 > 0)
++                {
++                    omega_m1_p1 = chi_m2_p1/(chi_m1_p1*chi_m1_p1) - 1.0;
++                }
++                if ((n0 > 0) && (np1 > 0))
++                {
++                    clam_weightsp = (log(chi_m1_p1) - log(chi_p1_0)) + cnval;
++                    clam_varp = (1.0/np1)*(omega_m1_p1) + (1.0/n0)*(omega_p1_0);
++                }
++            }
++
++            if (n0 > 0)
++            {
++                omegam_array[nval]             = omega_m1_0;
++            }
++            else
++            {
++                omegam_array[nval]             = 0;
++            }
++            weightsm_array[nval]           = clam_weightsm;
++            varm_array[nval]               = clam_varm;
++            if (nm1 > 0)
++            {
++                dwm_array[nval]  = fabs( (cnval + log((1.0*n0)/nm1)) - lam_dg[fep_state-1] );
++            }
++            else
++            {
++                dwm_array[nval]  = fabs( cnval - lam_dg[fep_state-1] );
++            }
++
++            if (n0 > 0)
++            {
++                omegap_array[nval]             = omega_p1_0;
++            }
++            else
++            {
++                omegap_array[nval]             = 0;
++            }
++            weightsp_array[nval]           = clam_weightsp;
++            varp_array[nval]               = clam_varp;
++            if ((np1 > 0) && (n0 > 0))
++            {
++                dwp_array[nval]  = fabs( (cnval + log((1.0*np1)/n0)) - lam_dg[fep_state] );
++            }
++            else
++            {
++                dwp_array[nval]  = fabs( cnval - lam_dg[fep_state] );
++            }
++
++        }
++
++        /* find the C's closest to the old weights value */
++
++        min_nvalm = FindMinimum(dwm_array,maxc);
++        omega_m1_0    = omegam_array[min_nvalm];
++        clam_weightsm = weightsm_array[min_nvalm];
++        clam_varm     = varm_array[min_nvalm];
++
++        min_nvalp = FindMinimum(dwp_array,maxc);
++        omega_p1_0    = omegap_array[min_nvalp];
++        clam_weightsp = weightsp_array[min_nvalp];
++        clam_varp     = varp_array[min_nvalp];
++
++        clam_osum = omega_m1_0 + omega_p1_0;
++        clam_minvar = 0;
++        if (clam_osum > 0)
++        {
++            clam_minvar = 0.5*log(clam_osum);
++        }
++
++        if (fep_state > 0)
++        {
++            lam_dg[fep_state-1] = clam_weightsm;
++            lam_variance[fep_state-1] = clam_varm;
++        }
++
++        if (fep_state < nlim-1)
++        {
++            lam_dg[fep_state] = clam_weightsp;
++            lam_variance[fep_state] = clam_varp;
++        }
++
++        if (expand->elamstats==elamstatsMINVAR)
++        {
++            bSufficientSamples = TRUE;
++            /* make sure they are all past a threshold */
++            for (i=0;i<nlim;i++)
++            {
++                if (dfhist->n_at_lam[i] < expand->minvarmin)
++                {
++                    bSufficientSamples = FALSE;
++                }
++            }
++            if (bSufficientSamples)
++            {
++                dfhist->sum_minvar[fep_state] = clam_minvar;
++                if (fep_state==0)
++                {
++                    for (i=0;i<nlim;i++)
++                    {
++                        dfhist->sum_minvar[i]+=(expand->minvar_const-clam_minvar);
++                    }
++                    expand->minvar_const = clam_minvar;
++                    dfhist->sum_minvar[fep_state] = 0.0;
++                }
++                else
++                {
++                    dfhist->sum_minvar[fep_state] -= expand->minvar_const;
++                }
++            }
++        }
++
++        /* we need to rezero minvar now, since it could change at fep_state = 0 */
++        dfhist->sum_dg[0] = 0.0;
++        dfhist->sum_variance[0] = 0.0;
++        dfhist->sum_weights[0] = dfhist->sum_dg[0] + dfhist->sum_minvar[0]; /* should be zero */
++
++        for (i=1;i<nlim;i++)
++        {
++            dfhist->sum_dg[i] = lam_dg[i-1] + dfhist->sum_dg[i-1];
++            dfhist->sum_variance[i] = sqrt(lam_variance[i-1] + pow(dfhist->sum_variance[i-1],2));
++            dfhist->sum_weights[i] = dfhist->sum_dg[i] + dfhist->sum_minvar[i];
++        }
++
++        sfree(lam_dg);
++        sfree(lam_variance);
++
++        sfree(omegam_array);
++        sfree(weightsm_array);
++        sfree(varm_array);
++        sfree(dwm_array);
++
++        sfree(omegap_array);
++        sfree(weightsp_array);
++        sfree(varp_array);
++        sfree(dwp_array);
++    }
++    return FALSE;
++}
++
++static int ChooseNewLambda(FILE *log, int nlim, t_expanded *expand, df_history_t *dfhist, int fep_state, real *weighted_lamee, real *p_k, gmx_rng_t rng)
++{
++    /* Choose new lambda value, and update transition matrix */
++
++    int i,ifep,jfep,minfep,maxfep,lamnew,lamtrial,starting_fep_state;
++    real r1,r2,pks,de_old,de_new,de,trialprob,tprob=0;
++    real **Tij;
++    real *propose,*accept,*remainder;
++    real sum,pnorm;
++    gmx_bool bRestricted;
++
++    starting_fep_state = fep_state;
++    lamnew = fep_state; /* so that there is a default setting -- stays the same */
++
++    if (!EWL(expand->elamstats))   /* ignore equilibrating the weights if using WL */
++    {
++        if ((expand->lmc_forced_nstart > 0) && (dfhist->n_at_lam[nlim-1] <= expand->lmc_forced_nstart))
++        {
++            /* Use a marching method to run through the lambdas and get preliminary free energy data,
++               before starting 'free' sampling.  We start free sampling when we have enough at each lambda */
++
++            /* if we have enough at this lambda, move on to the next one */
++
++            if (dfhist->n_at_lam[fep_state] == expand->lmc_forced_nstart)
++            {
++                lamnew = fep_state+1;
++                if (lamnew == nlim)  /* whoops, stepped too far! */
++                {
++                    lamnew -= 1;
++                }
++            }
++            else
++            {
++                lamnew = fep_state;
++            }
++            return lamnew;
++        }
++    }
++
++    snew(propose,nlim);
++    snew(accept,nlim);
++    snew(remainder,nlim);
++
++    for (i=0;i<expand->lmc_repeats;i++)
++    {
++
++        for(ifep=0;ifep<nlim;ifep++)
++        {
++            propose[ifep] = 0;
++            accept[ifep] = 0;
++        }
++
++        if ((expand->elmcmove==elmcmoveGIBBS) || (expand->elmcmove==elmcmoveMETGIBBS))
++        {
++            bRestricted = TRUE;
++            /* use the Gibbs sampler, with restricted range */
++            if (expand->gibbsdeltalam < 0)
++            {
++                minfep = 0;
++                maxfep = nlim-1;
++                bRestricted = FALSE;
++            }
++            else
++            {
++                minfep = fep_state - expand->gibbsdeltalam;
++                maxfep = fep_state + expand->gibbsdeltalam;
++                if (minfep < 0)
++                {
++                    minfep = 0;
++                }
++                if (maxfep > nlim-1)
++                {
++                    maxfep = nlim-1;
++                }
++            }
++
++            GenerateGibbsProbabilities(weighted_lamee,p_k,&pks,minfep,maxfep);
++
++            if (expand->elmcmove == elmcmoveGIBBS)
++            {
++                for (ifep=minfep;ifep<=maxfep;ifep++)
++                {
++                    propose[ifep] = p_k[ifep];
++                    accept[ifep] = 1.0;
++                }
++                /* Gibbs sampling */
++                r1 = gmx_rng_uniform_real(rng);
++                for (lamnew=minfep;lamnew<=maxfep;lamnew++)
++                {
++                    if (r1 <= p_k[lamnew])
++                    {
++                        break;
++                    }
++                    r1 -= p_k[lamnew];
++                }
++            }
++            else if (expand->elmcmove==elmcmoveMETGIBBS)
++            {
++
++                /* Metropolized Gibbs sampling */
++                for (ifep=minfep;ifep<=maxfep;ifep++)
++                {
++                    remainder[ifep] = 1 - p_k[ifep];
++                }
++
++                /* find the proposal probabilities */
++
++                if (remainder[fep_state] == 0) {
++                    /* only the current state has any probability */
++                    /* we have to stay at the current state */
++                    lamnew=fep_state;
++                } else {
++                    for (ifep=minfep;ifep<=maxfep;ifep++)
++                    {
++                        if (ifep != fep_state)
++                        {
++                            propose[ifep] = p_k[ifep]/remainder[fep_state];
++                        }
++                        else
++                        {
++                            propose[ifep] = 0;
++                        }
++                    }
++
++                    r1 = gmx_rng_uniform_real(rng);
++                    for (lamtrial=minfep;lamtrial<=maxfep;lamtrial++)
++                    {
++                        pnorm = p_k[lamtrial]/remainder[fep_state];
++                        if (lamtrial!=fep_state)
++                        {
++                            if (r1 <= pnorm)
++                            {
++                                break;
++                            }
++                            r1 -= pnorm;
++                        }
++                    }
++
++                    /* we have now selected lamtrial according to p(lamtrial)/1-p(fep_state) */
++                    tprob = 1.0;
++                    /* trial probability is min{1,\frac{1 - p(old)}{1-p(new)} MRS 1/8/2008 */
++                    trialprob = (remainder[fep_state])/(remainder[lamtrial]);
++                    if (trialprob < tprob)
++                    {
++                        tprob = trialprob;
++                    }
++                    r2 = gmx_rng_uniform_real(rng);
++                    if (r2 < tprob)
++                    {
++                        lamnew = lamtrial;
++                    }
++                    else
++                    {
++                        lamnew = fep_state;
++                    }
++                }
++
++                /* now figure out the acceptance probability for each */
++                for (ifep=minfep;ifep<=maxfep;ifep++)
++                {
++                    tprob = 1.0;
++                    if (remainder[ifep] != 0) {
++                        trialprob = (remainder[fep_state])/(remainder[ifep]);
++                    }
++                    else
++                    {
++                        trialprob = 1.0; /* this state is the only choice! */
++                    }
++                    if (trialprob < tprob)
++                    {
++                        tprob = trialprob;
++                    }
++                    /* probability for fep_state=0, but that's fine, it's never proposed! */
++                    accept[ifep] = tprob;
++                }
++            }
++
++            if (lamnew > maxfep)
++            {
++                /* it's possible some rounding is failing */
++                if (remainder[fep_state] < 2.0e-15)
++                {
++                    /* probably numerical rounding error -- no state other than the original has weight */
++                    lamnew = fep_state;
++                }
++                else
++                {
++                    /* probably not a numerical issue */
++                    int loc=0;
++                    int nerror = 200+(maxfep-minfep+1)*60;
++                    char *errorstr;
++                    snew(errorstr,nerror);
++                    /* if its greater than maxfep, then something went wrong -- probably underflow in the calculation
++                       of sum weights. Generated detailed info for failure */
++                    loc += sprintf(errorstr,"Something wrong in choosing new lambda state with a Gibbs move -- probably underflow in weight determination.\nDenominator is: %3d%17.10e\n  i                dE        numerator          weights\n",0,pks);
++                    for (ifep=minfep;ifep<=maxfep;ifep++)
++                    {
++                        loc += sprintf(&errorstr[loc],"%3d %17.10e%17.10e%17.10e\n",ifep,weighted_lamee[ifep],p_k[ifep],dfhist->sum_weights[ifep]);
++                    }
++                    gmx_fatal(FARGS,errorstr);
++                }
++            }
++        }
++        else if ((expand->elmcmove==elmcmoveMETROPOLIS) || (expand->elmcmove==elmcmoveBARKER))
++        {
++            /* use the metropolis sampler with trial +/- 1 */
++            r1 = gmx_rng_uniform_real(rng);
++            if (r1 < 0.5)
++            {
++                if (fep_state == 0) {
++                    lamtrial = fep_state;
++                }
++                else
++                {
++                    lamtrial = fep_state-1;
++                }
++            }
++            else
++            {
++                if (fep_state == nlim-1) {
++                    lamtrial = fep_state;
++                }
++                else
++                {
++                    lamtrial = fep_state+1;
++                }
++            }
++
++            de = weighted_lamee[lamtrial] - weighted_lamee[fep_state];
++            if (expand->elmcmove==elmcmoveMETROPOLIS)
++            {
++                tprob = 1.0;
++                trialprob = exp(de);
++                if (trialprob < tprob)
++                {
++                    tprob = trialprob;
++                }
++                propose[fep_state] = 0;
++                propose[lamtrial] = 1.0; /* note that this overwrites the above line if fep_state = ntrial, which only occurs at the ends */
++                accept[fep_state] = 1.0; /* doesn't actually matter, never proposed unless fep_state = ntrial, in which case it's 1.0 anyway */
++                accept[lamtrial] = tprob;
++
++            }
++            else if (expand->elmcmove==elmcmoveBARKER)
++            {
++                tprob = 1.0/(1.0+exp(-de));
++
++                propose[fep_state] = (1-tprob);
++                propose[lamtrial] += tprob; /* we add, to account for the fact that at the end, they might be the same point */
++                accept[fep_state] = 1.0;
++                accept[lamtrial] = 1.0;
++            }
++
++            r2 = gmx_rng_uniform_real(rng);
++            if (r2 < tprob) {
++                lamnew = lamtrial;
++            } else {
++                lamnew = fep_state;
++            }
++        }
++
++        for (ifep=0;ifep<nlim;ifep++)
++        {
++            dfhist->Tij[fep_state][ifep] += propose[ifep]*accept[ifep];
++            dfhist->Tij[fep_state][fep_state] += propose[ifep]*(1.0-accept[ifep]);
++        }
++        fep_state = lamnew;
++    }
++
++    dfhist->Tij_empirical[starting_fep_state][lamnew] += 1.0;
++
++    sfree(propose);
++    sfree(accept);
++    sfree(remainder);
++
++    return lamnew;
++}
++
++/* print out the weights to the log, along with current state */
++extern void PrintFreeEnergyInfoToFile(FILE *outfile, t_lambda *fep, t_expanded *expand, t_simtemp *simtemp, df_history_t *dfhist,
++                                      int nlam, int frequency, gmx_large_int_t step)
++{
++    int nlim,i,ifep,jfep;
++    real dw,dg,dv,dm,Tprint;
++    real *temps;
++    const char *print_names[efptNR] = {" FEPL","MassL","CoulL"," VdwL","BondL","RestT","Temp.(K)"};
++    gmx_bool bSimTemp = FALSE;
++
++    nlim = fep->n_lambda;
++    if (simtemp != NULL) {
++        bSimTemp = TRUE;
++    }
++
++    if (mod(step,frequency)==0)
++    {
++        fprintf(outfile,"             MC-lambda information\n");
++        if (EWL(expand->elamstats) && (!(dfhist->bEquil))) {
++            fprintf(outfile,"  Wang-Landau incrementor is: %11.5g\n",dfhist->wl_delta);
++        }
++        fprintf(outfile,"  N");
++        for (i=0;i<efptNR;i++)
++        {
++            if (fep->separate_dvdl[i])
++            {
++                fprintf(outfile,"%7s",print_names[i]);
++            }
++            else if ((i == efptTEMPERATURE) && bSimTemp)
++            {
++                fprintf(outfile,"%10s",print_names[i]); /* more space for temperature formats */
++            }
++        }
++        fprintf(outfile,"    Count   ");
++        if (expand->elamstats==elamstatsMINVAR)
++        {
++            fprintf(outfile,"W(in kT)   G(in kT)  dG(in kT)  dV(in kT)\n");
++        }
++        else
++        {
++            fprintf(outfile,"G(in kT)  dG(in kT)\n");
++        }
++        for (ifep=0;ifep<nlim;ifep++)
++        {
++            if (ifep==nlim-1)
++            {
++                dw=0.0;
++                dg=0.0;
++                dv=0.0;
++                dm=0.0;
++            }
++            else
++            {
++                dw = dfhist->sum_weights[ifep+1] - dfhist->sum_weights[ifep];
++                dg = dfhist->sum_dg[ifep+1] - dfhist->sum_dg[ifep];
++                dv = sqrt(pow(dfhist->sum_variance[ifep+1],2) - pow(dfhist->sum_variance[ifep],2));
++                dm = dfhist->sum_minvar[ifep+1] - dfhist->sum_minvar[ifep];
++
++            }
++            fprintf(outfile,"%3d",(ifep+1));
++            for (i=0;i<efptNR;i++)
++            {
++                if (fep->separate_dvdl[i])
++                {
++                    fprintf(outfile,"%7.3f",fep->all_lambda[i][ifep]);
++                } else if (i == efptTEMPERATURE && bSimTemp)
++                {
++                    fprintf(outfile,"%9.3f",simtemp->temperatures[ifep]);
++                }
++            }
++            if (EWL(expand->elamstats) && (!(dfhist->bEquil)))  /* if performing WL and still haven't equilibrated */
++            {
++                if (expand->elamstats == elamstatsWL)
++                {
++                    fprintf(outfile," %8d",(int)dfhist->wl_histo[ifep]);
++                } else {
++                    fprintf(outfile," %8.3f",dfhist->wl_histo[ifep]);
++                }
++            }
++            else   /* we have equilibrated weights */
++            {
++                fprintf(outfile," %8d",dfhist->n_at_lam[ifep]);
++            }
++            if (expand->elamstats==elamstatsMINVAR)
++            {
++                fprintf(outfile," %10.5f %10.5f %10.5f %10.5f",dfhist->sum_weights[ifep],dfhist->sum_dg[ifep],dg,dv);
++            }
++            else
++            {
++                fprintf(outfile," %10.5f %10.5f",dfhist->sum_weights[ifep],dw);
++            }
++            if (ifep == nlam) {
++                fprintf(outfile," <<\n");
++            }
++            else
++            {
++                fprintf(outfile,"   \n");
++            }
++        }
++        fprintf(outfile,"\n");
++
++        if ((mod(step,expand->nstTij)==0) && (expand->nstTij > 0) && (step > 0))
++        {
++            fprintf(outfile,"                     Transition Matrix\n");
++            for (ifep=0;ifep<nlim;ifep++)
++            {
++                fprintf(outfile,"%12d",(ifep+1));
++            }
++            fprintf(outfile,"\n");
++            for (ifep=0;ifep<nlim;ifep++)
++            {
++                for (jfep=0;jfep<nlim;jfep++)
++                {
++                    if (dfhist->n_at_lam[ifep] > 0)
++                    {
++                        if (expand->bSymmetrizedTMatrix)
++                        {
++                            Tprint = (dfhist->Tij[ifep][jfep]+dfhist->Tij[jfep][ifep])/(dfhist->n_at_lam[ifep]+dfhist->n_at_lam[jfep]);
++                        } else {
++                            Tprint = (dfhist->Tij[ifep][jfep])/(dfhist->n_at_lam[ifep]);
++                        }
++                    }
++                    else
++                    {
++                        Tprint = 0.0;
++                    }
++                    fprintf(outfile,"%12.8f",Tprint);
++                }
++                fprintf(outfile,"%3d\n",(ifep+1));
++            }
++
++            fprintf(outfile,"                  Empirical Transition Matrix\n");
++            for (ifep=0;ifep<nlim;ifep++)
++            {
++                fprintf(outfile,"%12d",(ifep+1));
++            }
++            fprintf(outfile,"\n");
++            for (ifep=0;ifep<nlim;ifep++)
++            {
++                for (jfep=0;jfep<nlim;jfep++)
++                {
++                    if (dfhist->n_at_lam[ifep] > 0)
++                    {
++                        if (expand->bSymmetrizedTMatrix)
++                        {
++                            Tprint = (dfhist->Tij_empirical[ifep][jfep]+dfhist->Tij_empirical[jfep][ifep])/(dfhist->n_at_lam[ifep]+dfhist->n_at_lam[jfep]);
++                        } else {
++                            Tprint = dfhist->Tij_empirical[ifep][jfep]/(dfhist->n_at_lam[ifep]);
++                        }
++                    }
++                    else
++                    {
++                        Tprint = 0.0;
++                    }
++                    fprintf(outfile,"%12.8f",Tprint);
++                }
++                fprintf(outfile,"%3d\n",(ifep+1));
++            }
++        }
++      }
++}
++
++extern void get_mc_state(gmx_rng_t rng,t_state *state)
++{
++    gmx_rng_get_state(rng,state->mc_rng,state->mc_rngi);
++}
++
++extern void set_mc_state(gmx_rng_t rng,t_state *state)
++{
++    gmx_rng_set_state(rng,state->mc_rng,state->mc_rngi[0]);
++}
++
++extern int ExpandedEnsembleDynamics(FILE *log,t_inputrec *ir, gmx_enerdata_t *enerd,
++                                    t_state *state, t_extmass *MassQ, df_history_t *dfhist,
++                                    gmx_large_int_t step, gmx_rng_t mcrng,
++                                    rvec *v, t_mdatoms *mdatoms)
++{
++    real *pfep_lamee,*p_k, *scaled_lamee, *weighted_lamee;
++    int i,nlam,nlim,lamnew,totalsamples;
++    real oneovert,maxscaled=0,maxweighted=0;
++    t_expanded *expand;
++    t_simtemp *simtemp;
++    double *temperature_lambdas;
++    gmx_bool bIfReset,bSwitchtoOneOverT,bDoneEquilibrating=FALSE;
++
++    expand = ir->expandedvals;
++    simtemp = ir->simtempvals;
++      nlim = ir->fepvals->n_lambda;
++    nlam = state->fep_state;
++
++    snew(scaled_lamee,nlim);
++    snew(weighted_lamee,nlim);
++    snew(pfep_lamee,nlim);
++    snew(p_k,nlim);
++
++    if (expand->bInit_weights)  /* if initialized weights, we need to fill them in */
++    {
++        dfhist->wl_delta = expand->init_wl_delta;  /* MRS -- this would fit better somewhere else? */
++        for (i=0;i<nlim;i++) {
++            dfhist->sum_weights[i] = expand->init_lambda_weights[i];
++            dfhist->sum_dg[i] = expand->init_lambda_weights[i];
++        }
++        expand->bInit_weights = FALSE;
++    }
++
++      /* update the count at the current lambda*/
++      dfhist->n_at_lam[nlam]++;
++
++    /* need to calculate the PV term somewhere, but not needed here? Not until there's a lambda state that's
++       pressure controlled.*/
++    /*
++      pVTerm = 0;
++      where does this PV term go?
++      for (i=0;i<nlim;i++)
++      {
++      fep_lamee[i] += pVTerm;
++      }
++    */
++
++      /* determine the minimum value to avoid overflow.  Probably a better way to do this */
++      /* we don't need to include the pressure term, since the volume is the same between the two.
++         is there some term we are neglecting, however? */
++
++    if (ir->efep != efepNO)
++    {
++        for (i=0;i<nlim;i++)
++        {
++            if (ir->bSimTemp)
++            {
++                /* Note -- this assumes no mass changes, since kinetic energy is not added  . . . */
++                scaled_lamee[i] = (enerd->enerpart_lambda[i+1]-enerd->enerpart_lambda[0])/(simtemp->temperatures[i]*BOLTZ)
++                    + enerd->term[F_EPOT]*(1.0/(simtemp->temperatures[i])- 1.0/(simtemp->temperatures[nlam]))/BOLTZ;
++            }
++            else
++            {
++                scaled_lamee[i] = (enerd->enerpart_lambda[i+1]-enerd->enerpart_lambda[0])/(expand->mc_temp*BOLTZ);
++                /* mc_temp is currently set to the system reft unless otherwise defined */
++            }
++
++            /* save these energies for printing, so they don't get overwritten by the next step */
++            /* they aren't overwritten in the non-free energy case, but we always print with these
++               for simplicity */
++        }
++    } else {
++        if (ir->bSimTemp) {
++            for (i=0;i<nlim;i++) {
++                scaled_lamee[i] = enerd->term[F_EPOT]*(1.0/simtemp->temperatures[i] - 1.0/simtemp->temperatures[nlam])/BOLTZ;
++            }
++        }
++    }
++
++      for (i=0;i<nlim;i++) {
++        pfep_lamee[i] = scaled_lamee[i];
++
++        weighted_lamee[i] = dfhist->sum_weights[i] - scaled_lamee[i];
++        if (i==0)
++        {
++            maxscaled = scaled_lamee[i];
++            maxweighted = weighted_lamee[i];
++        }
++        else
++        {
++            if (scaled_lamee[i] > maxscaled)
++            {
++                maxscaled = scaled_lamee[i];
++            }
++            if (weighted_lamee[i] > maxweighted)
++            {
++                maxweighted = weighted_lamee[i];
++            }
++        }
++      }
++
++      for (i=0;i<nlim;i++)
++    {
++        scaled_lamee[i] -= maxscaled;
++        weighted_lamee[i] -= maxweighted;
++      }
++
++      /* update weights - we decide whether or not to actually do this inside */
++
++      bDoneEquilibrating = UpdateWeights(nlim,expand,dfhist,nlam,scaled_lamee,weighted_lamee,step);
++    if (bDoneEquilibrating)
++    {
++        if (log) {
++            fprintf(log,"\nStep %d: Weights have equilibrated, using criteria: %s\n",(int)step,elmceq_names[expand->elmceq]);
++        }
++    }
++
++    lamnew = ChooseNewLambda(log,nlim,expand,dfhist,nlam,weighted_lamee,p_k,mcrng);
++    /* if using simulated tempering, we need to adjust the temperatures */
++    if (ir->bSimTemp && (lamnew != nlam)) /* only need to change the temperatures if we change the state */
++    {
++        int i, j, n, d;
++        real *buf_ngtc;
++        real told;
++        int nstart, nend, gt;
++
++        snew(buf_ngtc,ir->opts.ngtc);
++
++        for (i=0;i<ir->opts.ngtc;i++) {
++            if (ir->opts.ref_t[i] > 0) {
++                told = ir->opts.ref_t[i];
++                ir->opts.ref_t[i] =  simtemp->temperatures[lamnew];
++                buf_ngtc[i] = sqrt(ir->opts.ref_t[i]/told); /* using the buffer as temperature scaling */
++            }
++        }
++
++        /* we don't need to manipulate the ekind information, as it isn't due to be reset until the next step anyway */
++
++        nstart = mdatoms->start;
++        nend   = nstart + mdatoms->homenr;
++        for (n=nstart; n<nend; n++)
++        {
++            gt = 0;
++            if (mdatoms->cTC)
++            {
++                gt = mdatoms->cTC[n];
++            }
++            for(d=0; d<DIM; d++)
++            {
++                 v[n][d] *= buf_ngtc[gt];
++            }
++        }
++
++        if (IR_NPT_TROTTER(ir) || IR_NPH_TROTTER(ir) || IR_NVT_TROTTER(ir)) {
++            /* we need to recalculate the masses if the temperature has changed */
++            init_npt_masses(ir,state,MassQ,FALSE);
++            for (i=0;i<state->nnhpres;i++)
++            {
++                for (j=0;j<ir->opts.nhchainlength;j++)
++                {
++                    state->nhpres_vxi[i+j] *= buf_ngtc[i];
++                }
++            }
++            for (i=0;i<ir->opts.ngtc;i++)
++            {
++                for (j=0;j<ir->opts.nhchainlength;j++)
++                {
++                    state->nosehoover_vxi[i+j] *= buf_ngtc[i];
++                }
++            }
++        }
++        sfree(buf_ngtc);
++    }
++
++      /* now check on the Wang-Landau updating critera */
++
++      if (EWL(expand->elamstats))
++    {
++        bSwitchtoOneOverT = FALSE;
++        if (expand->bWLoneovert) {
++            totalsamples = 0;
++            for (i=0;i<nlim;i++)
++            {
++                totalsamples += dfhist->n_at_lam[i];
++            }
++            oneovert = (1.0*nlim)/totalsamples;
++            /* oneovert has decreasd by a bit since last time, so we actually make sure its within one of this number */
++            /* switch to 1/t incrementing when wl_delta has decreased at least once, and wl_delta is now less than 1/t */
++            if ((dfhist->wl_delta <= ((totalsamples)/(totalsamples-1.00001))*oneovert) &&
++                (dfhist->wl_delta < expand->init_wl_delta))
++            {
++                bSwitchtoOneOverT = TRUE;
++            }
++        }
++        if (bSwitchtoOneOverT) {
++            dfhist->wl_delta = oneovert; /* now we reduce by this each time, instead of only at flatness */
++        } else {
++            bIfReset = CheckHistogramRatios(nlim,dfhist->wl_histo,expand->wl_ratio);
++            if (bIfReset)
++            {
++                for (i=0;i<nlim;i++)
++                {
++                    dfhist->wl_histo[i] = 0;
++                }
++                dfhist->wl_delta *= expand->wl_scale;
++                if (log) {
++                    fprintf(log,"\nStep %d: weights are now:",(int)step);
++                    for (i=0;i<nlim;i++)
++                    {
++                        fprintf(log," %.5f",dfhist->sum_weights[i]);
++                    }
++                    fprintf(log,"\n");
++                }
++            }
++        }
++    }
++    sfree(scaled_lamee);
++    sfree(weighted_lamee);
++    sfree(p_k);
++
++    return lamnew;
++}
++
++
index 0c8751ec86a645c7e82353707e29c2d664ebced5,0000000000000000000000000000000000000000..ef3dd6f9c1d01d3cab30582ec757ad65262c3834
mode 100644,000000..100644
--- /dev/null
@@@ -1,729 -1,0 +1,829 @@@
-  * 
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
-  * 
++ *
 + *                This source code is part of
-  * 
++ *
 + *                 G   R   O   M   A   C   S
-  * 
++ *
 + *          GROningen MAchine for Chemical Simulations
-  * 
++ *
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
-  * 
++ *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
-  * 
++ *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
-  * 
++ *
 + * For more info, check our website at http://www.gromacs.org
-         real       lambda,
++ *
 + * And Hey:
 + * GROwing Monsters And Cloning Shrimps
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <math.h>
 +#include <string.h>
 +#include <assert.h>
 +#include "sysstuff.h"
 +#include "typedefs.h"
 +#include "macros.h"
 +#include "smalloc.h"
 +#include "macros.h"
 +#include "physics.h"
 +#include "force.h"
 +#include "nonbonded.h"
 +#include "names.h"
 +#include "network.h"
 +#include "pbc.h"
 +#include "ns.h"
 +#include "nrnb.h"
 +#include "bondf.h"
 +#include "mshift.h"
 +#include "txtdump.h"
 +#include "coulomb.h"
 +#include "pme.h"
 +#include "mdrun.h"
 +#include "domdec.h"
 +#include "partdec.h"
 +#include "qmmm.h"
 +
 +
 +void ns(FILE *fp,
 +        t_forcerec *fr,
 +        rvec       x[],
 +        matrix     box,
 +        gmx_groups_t *groups,
 +        t_grpopts  *opts,
 +        gmx_localtop_t *top,
 +        t_mdatoms  *md,
 +        t_commrec  *cr,
 +        t_nrnb     *nrnb,
-     
-   if (fr->bTwinRange) 
++        real       *lambda,
 +        real       *dvdlambda,
 +        gmx_grppairener_t *grppener,
 +        gmx_bool       bFillGrid,
 +        gmx_bool       bDoLongRange,
 +        gmx_bool       bDoForces,
 +        rvec       *f)
 +{
 +  char   *ptr;
 +  int    nsearch;
 +
 +
 +  if (!fr->ns.nblist_initialized)
 +  {
 +      init_neighbor_list(fp, fr, md->homenr);
 +  }
-     
++
++  if (fr->bTwinRange)
 +    fr->nlr=0;
 +
 +    nsearch = search_neighbours(fp,fr,x,box,top,groups,cr,nrnb,md,
 +                                lambda,dvdlambda,grppener,
 +                                bFillGrid,bDoLongRange,
 +                                bDoForces,f);
 +  if (debug)
 +    fprintf(debug,"nsearch = %d\n",nsearch);
-                        real       lambda,  
++
 +  /* Check whether we have to do dynamic load balancing */
 +  /*if ((nsb->nstDlb > 0) && (mod(step,nsb->nstDlb) == 0))
 +    count_nb(cr,nsb,&(top->blocks[ebCGS]),nns,fr->nlr,
 +    &(top->idef),opts->ngener);
 +  */
 +  if (fr->ns.dump_nl > 0)
 +    dump_nblist(fp,cr,fr,fr->ns.dump_nl);
 +}
 +
 +void do_force_lowlevel(FILE       *fplog,   gmx_large_int_t step,
 +                       t_forcerec *fr,      t_inputrec *ir,
 +                       t_idef     *idef,    t_commrec  *cr,
 +                       t_nrnb     *nrnb,    gmx_wallcycle_t wcycle,
 +                       t_mdatoms  *md,
 +                       t_grpopts  *opts,
 +                       rvec       x[],      history_t  *hist,
 +                       rvec       f[],
 +                       gmx_enerdata_t *enerd,
 +                       t_fcdata   *fcd,
 +                       gmx_mtop_t     *mtop,
 +                       gmx_localtop_t *top,
 +                       gmx_genborn_t *born,
 +                       t_atomtypes *atype,
 +                       gmx_bool       bBornRadii,
 +                       matrix     box,
-                        t_blocka   *excl,    
++                       t_lambda   *fepvals,
++                       real       *lambda,
 +                       t_graph    *graph,
-     int     i,status;
++                       t_blocka   *excl,
 +                       rvec       mu_tot[],
 +                       int        flags,
 +                       float      *cycles_pme)
 +{
-     real    dvdlambda,Vsr,Vlr,Vcorr=0,vdip,vcharge;
++    int     i,j,status;
 +    int     donb_flags;
 +    gmx_bool    bDoEpot,bSepDVDL,bSB;
 +    int     pme_flags;
 +    matrix  boxs;
 +    rvec    box_size;
-     double  lam_i;
-     real    dvdl_dum;
++    real    Vsr,Vlr,Vcorr=0,vdip,vcharge;
 +    t_pbc   pbc;
 +    real    dvdgb;
 +    char    buf[22];
 +    gmx_enerdata_t ed_lam;
-     
- #define PRINT_SEPDVDL(s,v,dvdl) if (bSepDVDL) fprintf(fplog,sepdvdlformat,s,v,dvdl);
-     
++    double  clam_i,vlam_i;
++    real    dvdl_dum[efptNR], dvdlambda[efptNR], lam_i[efptNR];
++    real    dvdlsum,dvdl_walls;
 +
 +#ifdef GMX_MPI
 +    double  t0=0.0,t1,t2,t3; /* time measurement for coarse load balancing */
 +#endif
-     
++
++#define PRINT_SEPDVDL(s,v,dvdlambda) if (bSepDVDL) fprintf(fplog,sepdvdlformat,s,v,dvdlambda);
++
 +
 +    set_pbc(&pbc,fr->ePBC,box);
-     
++
++    /* reset free energy components */
++    for (i=0;i<efptNR;i++)
++    {
++        dvdlambda[i] = 0;
++        dvdl_dum[i] = 0;
++    }
++
 +    /* Reset box */
 +    for(i=0; (i<DIM); i++)
 +    {
 +        box_size[i]=box[i][i];
 +    }
-     
++
 +    bSepDVDL=(fr->bSepDVDL && do_per_step(step,ir->nstlog));
 +    debug_gmx();
-     
++
 +    /* do QMMM first if requested */
 +    if(fr->bQMMM)
 +    {
 +        enerd->term[F_EQM] = calculate_QMMM(cr,x,f,fr,md);
 +    }
-     
++
 +    if (bSepDVDL)
 +    {
 +        fprintf(fplog,"Step %s: non-bonded V and dVdl for node %d:\n",
 +                gmx_step_str(step,buf),cr->nodeid);
 +    }
-     
-     dvdlambda = 0;
-     
++
 +    /* Call the short range functions all in one go. */
-     
++
 +#ifdef GMX_MPI
 +    /*#define TAKETIME ((cr->npmenodes) && (fr->timesteps < 12))*/
 +#define TAKETIME FALSE
 +    if (TAKETIME)
 +    {
 +        MPI_Barrier(cr->mpi_comm_mygroup);
 +        t0=MPI_Wtime();
 +    }
 +#endif
-         dvdlambda = do_walls(ir,fr,box,md,x,f,lambda,
-                              enerd->grpp.ener[egLJSR],nrnb);
-         PRINT_SEPDVDL("Walls",0.0,dvdlambda);
-         enerd->dvdl_lin += dvdlambda;
++
 +    if (ir->nwall)
 +    {
-               
++        /* foreign lambda component for walls */
++        dvdl_walls = do_walls(ir,fr,box,md,x,f,lambda[efptVDW],
++                 enerd->grpp.ener[egLJSR],nrnb);
++        PRINT_SEPDVDL("Walls",0.0,dvdl_walls);
++        dvdlambda[efptVDW] += dvdl_walls;
++        enerd->dvdl_lin[efptVDW] += dvdl_walls;
 +    }
-               
++
 +      /* If doing GB, reset dvda and calculate the Born radii */
 +      if (ir->implicit_solvent)
 +      {
 +              /* wallcycle_start(wcycle,ewcGB); */
-               
++
 +              for(i=0;i<born->nr;i++)
 +              {
 +                      fr->dvda[i]=0;
 +              }
-               
++
 +              if(bBornRadii)
 +              {
 +                      calc_gb_rad(cr,fr,ir,top,atype,x,&(fr->gblist),born,md,nrnb);
 +              }
-       
++
 +              /* wallcycle_stop(wcycle, ewcGB); */
 +      }
-                  lambda,&dvdlambda,-1,-1,donb_flags);
++
 +    where();
 +    donb_flags = 0;
 +    if (flags & GMX_FORCE_FORCES)
 +    {
 +        donb_flags |= GMX_DONB_FORCES;
 +    }
++
 +    do_nonbonded(cr,fr,x,f,md,excl,
 +                 fr->bBHAM ?
 +                 enerd->grpp.ener[egBHAMSR] :
 +                 enerd->grpp.ener[egLJSR],
 +                 enerd->grpp.ener[egCOULSR],
 +                               enerd->grpp.ener[egGB],box_size,nrnb,
-     if (ir->n_flambda > 0 && (flags & GMX_FORCE_DHDL) && ir->sc_alpha != 0)
++                 lambda,dvdlambda,-1,-1,donb_flags);
 +    /* If we do foreign lambda and we have soft-core interactions
 +     * we have to recalculate the (non-linear) energies contributions.
 +     */
-         init_enerdata(mtop->groups.grps[egcENER].nr,ir->n_flambda,&ed_lam);
-         
++    if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && fepvals->sc_alpha != 0)
 +    {
-             lam_i = (i==0 ? lambda : ir->flambda[i-1]);
-             dvdl_dum = 0;
++        init_enerdata(mtop->groups.grps[egcENER].nr,fepvals->n_lambda,&ed_lam);
++
 +        for(i=0; i<enerd->n_lambda; i++)
 +        {
-                          lam_i,&dvdl_dum,-1,-1,
++            for (j=0;j<efptNR;j++)
++            {
++                lam_i[j] = (i==0 ? lambda[j] : fepvals->all_lambda[j][i-1]);
++            }
 +            reset_enerdata(&ir->opts,fr,TRUE,&ed_lam,FALSE);
 +            do_nonbonded(cr,fr,x,f,md,excl,
 +                         fr->bBHAM ?
 +                         ed_lam.grpp.ener[egBHAMSR] :
 +                         ed_lam.grpp.ener[egLJSR],
 +                         ed_lam.grpp.ener[egCOULSR],
 +                         enerd->grpp.ener[egGB], box_size,nrnb,
-       
-       /* If we are doing GB, calculate bonded forces and apply corrections 
++                         lam_i,dvdl_dum,-1,-1,
 +                         GMX_DONB_FOREIGNLAMBDA);
 +            sum_epot(&ir->opts,&ed_lam);
 +            enerd->enerpart_lambda[i] += ed_lam.term[F_EPOT];
 +        }
 +        destroy_enerdata(&ed_lam);
 +    }
 +    where();
-     
-     if (ir->sc_alpha != 0)
++
++      /* If we are doing GB, calculate bonded forces and apply corrections
 +       * to the solvation forces */
++    /* MRS: Eventually, many need to include free energy contribution here! */
 +      if (ir->implicit_solvent)  {
 +              calc_gb_forces(cr,md,born,top,atype,x,f,fr,idef,
 +                       ir->gb_algorithm,ir->sa_algorithm,nrnb,bBornRadii,&pbc,graph,enerd);
 +    }
 +
 +#ifdef GMX_MPI
 +    if (TAKETIME)
 +    {
 +        t1=MPI_Wtime();
 +        fr->t_fnbf += t1-t0;
 +    }
 +#endif
-         enerd->dvdl_nonlin += dvdlambda;
++
++    if (fepvals->sc_alpha!=0)
 +    {
-         enerd->dvdl_lin    += dvdlambda;
++        enerd->dvdl_nonlin[efptVDW] += dvdlambda[efptVDW];
 +    }
 +    else
 +    {
-     PRINT_SEPDVDL("VdW and Coulomb SR particle-p.",Vsr,dvdlambda);
++        enerd->dvdl_lin[efptVDW] += dvdlambda[efptVDW];
++    }
++
++    if (fepvals->sc_alpha!=0)
++
++        /* even though coulomb part is linear, we already added it, beacuse we
++           need to go through the vdw calculation anyway */
++    {
++        enerd->dvdl_nonlin[efptCOUL] += dvdlambda[efptCOUL];
 +    }
++    else
++    {
++        enerd->dvdl_lin[efptCOUL] += dvdlambda[efptCOUL];
++    }
++
 +    Vsr = 0;
 +    if (bSepDVDL)
 +    {
 +        for(i=0; i<enerd->grpp.nener; i++)
 +        {
 +            Vsr +=
 +                (fr->bBHAM ?
 +                 enerd->grpp.ener[egBHAMSR][i] :
 +                 enerd->grpp.ener[egLJSR][i])
 +                + enerd->grpp.ener[egCOULSR][i] + enerd->grpp.ener[egGB][i];
 +        }
++        dvdlsum = dvdlambda[efptVDW]+dvdlambda[efptCOUL];
++        PRINT_SEPDVDL("VdW and Coulomb SR particle-p.",Vsr,dvdlsum);
 +    }
-     
 +    debug_gmx();
-     
-     /* Shift the coordinates. Must be done before bonded forces and PPPM, 
-      * but is also necessary for SHAKE and update, therefore it can NOT 
++
++
 +    if (debug)
 +    {
 +        pr_rvecs(debug,0,"fshift after SR",fr->fshift,SHIFTS);
 +    }
-     
++
++    /* Shift the coordinates. Must be done before bonded forces and PPPM,
++     * but is also necessary for SHAKE and update, therefore it can NOT
 +     * go when no bonded forces have to be evaluated.
 +     */
-     
++
 +    /* Here sometimes we would not need to shift with NBFonly,
 +     * but we do so anyhow for consistency of the returned coordinates.
 +     */
 +    if (graph)
 +    {
 +        shift_self(graph,box,x);
 +        if (TRICLINIC(box))
 +        {
 +            inc_nrnb(nrnb,eNR_SHIFTX,2*graph->nnodes);
 +        }
 +        else
 +        {
 +            inc_nrnb(nrnb,eNR_SHIFTX,graph->nnodes);
 +        }
 +    }
 +    /* Check whether we need to do bondeds or correct for exclusions */
 +    if (fr->bMolPBC &&
 +        ((flags & GMX_FORCE_BONDED)
 +         || EEL_RF(fr->eeltype) || EEL_FULL(fr->eeltype)))
 +    {
 +        /* Since all atoms are in the rectangular or triclinic unit-cell,
 +         * only single box vector shifts (2 in x) are required.
 +         */
 +        set_pbc_dd(&pbc,fr->ePBC,cr->dd,TRUE,box);
 +    }
 +    debug_gmx();
-         
++
 +    if (flags & GMX_FORCE_BONDED)
 +    {
 +        calc_bonds(fplog,cr->ms,
 +                   idef,x,hist,f,fr,&pbc,graph,enerd,nrnb,lambda,md,fcd,
 +                   DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL, atype, born,
 +                   fr->bSepDVDL && do_per_step(step,ir->nstlog),step);
-         if (ir->n_flambda > 0 && (flags & GMX_FORCE_DHDL) &&
++
 +        /* Check if we have to determine energy differences
 +         * at foreign lambda's.
 +         */
-             init_enerdata(mtop->groups.grps[egcENER].nr,ir->n_flambda,&ed_lam);
-             
++        if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) &&
 +            idef->ilsort != ilsortNO_FE)
 +        {
 +            if (idef->ilsort != ilsortFE_SORTED)
 +            {
 +                gmx_incons("The bonded interactions are not sorted for free energy");
 +            }
-                 lam_i = (i==0 ? lambda : ir->flambda[i-1]);
-                 dvdl_dum = 0;
++            init_enerdata(mtop->groups.grps[egcENER].nr,fepvals->n_lambda,&ed_lam);
++
 +            for(i=0; i<enerd->n_lambda; i++)
 +            {
-                 calc_bonds_lambda(fplog,
-                                   idef,x,fr,&pbc,graph,&ed_lam,nrnb,lam_i,md,
-                                   fcd,
-                                   DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL);
 +                reset_enerdata(&ir->opts,fr,TRUE,&ed_lam,FALSE);
-         
-         clear_mat(fr->vir_el_recip);  
-         
++                for (j=0;j<efptNR;j++)
++                {
++                    lam_i[j] = (i==0 ? lambda[j] : fepvals->all_lambda[j][i-1]);
++                }
++                calc_bonds_lambda(fplog,idef,x,fr,&pbc,graph,&ed_lam,nrnb,lam_i,md,
++                                  fcd,DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL);
 +                sum_epot(&ir->opts,&ed_lam);
 +                enerd->enerpart_lambda[i] += ed_lam.term[F_EPOT];
 +            }
 +            destroy_enerdata(&ed_lam);
 +        }
 +        debug_gmx();
 +    }
 +
 +    where();
 +
 +    *cycles_pme = 0;
 +    if (EEL_FULL(fr->eeltype))
 +    {
 +        bSB = (ir->nwall == 2);
 +        if (bSB)
 +        {
 +            copy_mat(box,boxs);
 +            svmul(ir->wall_ewald_zfac,boxs[ZZ],boxs[ZZ]);
 +            box_size[ZZ] *= ir->wall_ewald_zfac;
 +        }
-                 dvdlambda = 0;
++
++        clear_mat(fr->vir_el_recip);
++
 +        if (fr->bEwald)
 +        {
 +            if (fr->n_tpi == 0)
 +            {
-                                            lambda,&dvdlambda,&vdip,&vcharge);
++                dvdlambda[efptCOUL] = 0;
 +                Vcorr = ewald_LRcorrection(fplog,md->start,md->start+md->homenr,
 +                                           cr,fr,
 +                                           md->chargeA,
 +                                           md->nChargePerturbed ? md->chargeB : NULL,
 +                                           excl,x,bSB ? boxs : box,mu_tot,
 +                                           ir->ewald_geometry,
 +                                           ir->epsilon_surface,
-                 enerd->dvdl_lin += dvdlambda;
++                                           lambda[efptCOUL],&dvdlambda[efptCOUL],&vdip,&vcharge);
 +                PRINT_SEPDVDL("Ewald excl./charge/dip. corr.",Vcorr,dvdlambda);
-         
-         dvdlambda = 0;
++                enerd->dvdl_lin[efptCOUL] += dvdlambda[efptCOUL];
 +            }
 +            else
 +            {
 +                if (ir->ewald_geometry != eewg3D || ir->epsilon_surface != 0)
 +                {
 +                    gmx_fatal(FARGS,"TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions");
 +                }
 +                /* The TPI molecule does not have exclusions with the rest
 +                 * of the system and no intra-molecular PME grid contributions
 +                 * will be calculated in gmx_pme_calc_energy.
 +                 */
 +                Vcorr = 0;
 +            }
 +        }
-                                         &Vlr,lambda,&dvdlambda,
++
++        dvdlambda[efptCOUL] = 0;
 +        status = 0;
 +        switch (fr->eeltype)
 +        {
 +        case eelPME:
 +        case eelPMESWITCH:
 +        case eelPMEUSER:
 +        case eelPMEUSERSWITCH:
 +        case eelP3M_AD:
 +            if (cr->duty & DUTY_PME)
 +            {
 +                assert(fr->n_tpi >= 0);
 +                if (fr->n_tpi == 0 || (flags & GMX_FORCE_STATECHANGED))
 +                {
 +                    pme_flags = GMX_PME_SPREAD_Q | GMX_PME_SOLVE;
 +                    if (flags & GMX_FORCE_FORCES)
 +                    {
 +                        pme_flags |= GMX_PME_CALC_F;
 +                    }
 +                    if (flags & GMX_FORCE_VIRIAL)
 +                    {
 +                        pme_flags |= GMX_PME_CALC_ENER_VIR;
 +                    }
 +                    if (fr->n_tpi > 0)
 +                    {
 +                        /* We don't calculate f, but we do want the potential */
 +                        pme_flags |= GMX_PME_CALC_POT;
 +                    }
 +                    wallcycle_start(wcycle,ewcPMEMESH);
 +                    status = gmx_pme_do(fr->pmedata,
 +                                        md->start,md->homenr - fr->n_tpi,
 +                                        x,fr->f_novirsum,
 +                                        md->chargeA,md->chargeB,
 +                                        bSB ? boxs : box,cr,
 +                                        DOMAINDECOMP(cr) ? dd_pme_maxshift_x(cr->dd) : 0,
 +                                        DOMAINDECOMP(cr) ? dd_pme_maxshift_y(cr->dd) : 0,
 +                                        nrnb,wcycle,
 +                                        fr->vir_el_recip,fr->ewaldcoeff,
-                 PRINT_SEPDVDL("PME mesh",Vlr,dvdlambda);
-             } 
++                                        &Vlr,lambda[efptCOUL],&dvdlambda[efptCOUL],
 +                                        pme_flags);
 +                    *cycles_pme = wallcycle_stop(wcycle,ewcPMEMESH);
 +
 +                    /* We should try to do as little computation after
 +                     * this as possible, because parallel PME synchronizes
 +                     * the nodes, so we want all load imbalance of the rest
 +                     * of the force calculation to be before the PME call.
 +                     * DD load balancing is done on the whole time of
 +                     * the force call (without PME).
 +                     */
 +                }
 +                if (fr->n_tpi > 0)
 +                {
 +                    /* Determine the PME grid energy of the test molecule
 +                     * with the PME grid potential of the other charges.
 +                     */
 +                    gmx_pme_calc_energy(fr->pmedata,fr->n_tpi,
 +                                        x + md->homenr - fr->n_tpi,
 +                                        md->chargeA + md->homenr - fr->n_tpi,
 +                                        &Vlr);
 +                }
-                            lambda,&dvdlambda,fr->ewald_table);
-             PRINT_SEPDVDL("Ewald long-range",Vlr,dvdlambda);
++                PRINT_SEPDVDL("PME mesh",Vlr,dvdlambda[efptCOUL]);
++            }
 +            else
 +            {
 +                /* Energies and virial are obtained later from the PME nodes */
 +                /* but values have to be zeroed out here */
 +                Vlr=0.0;
 +            }
 +            break;
 +        case eelEWALD:
 +            Vlr = do_ewald(fplog,FALSE,ir,x,fr->f_novirsum,
 +                           md->chargeA,md->chargeB,
 +                           box_size,cr,md->homenr,
 +                           fr->vir_el_recip,fr->ewaldcoeff,
-         enerd->dvdl_lin += dvdlambda;
++                           lambda[efptCOUL],&dvdlambda[efptCOUL],fr->ewald_table);
++            PRINT_SEPDVDL("Ewald long-range",Vlr,dvdlambda[efptCOUL]);
 +            break;
 +        default:
 +            Vlr = 0;
 +            gmx_fatal(FARGS,"No such electrostatics method implemented %s",
 +                      eel_names[fr->eeltype]);
 +        }
 +        if (status != 0)
 +        {
 +            gmx_fatal(FARGS,"Error %d in long range electrostatics routine %s",
 +                      status,EELTYPE(fr->eeltype));
 +              }
-             dvdlambda = 0;
-             
++        enerd->dvdl_lin[efptCOUL] += dvdlambda[efptCOUL];
 +        enerd->term[F_COUL_RECIP] = Vlr + Vcorr;
 +        if (debug)
 +        {
 +            fprintf(debug,"Vlr = %g, Vcorr = %g, Vlr_corr = %g\n",
 +                    Vlr,Vcorr,enerd->term[F_COUL_RECIP]);
 +            pr_rvecs(debug,0,"vir_el_recip after corr",fr->vir_el_recip,DIM);
 +            pr_rvecs(debug,0,"fshift after LR Corrections",fr->fshift,SHIFTS);
 +        }
 +    }
 +    else
 +    {
 +        if (EEL_RF(fr->eeltype))
 +        {
-                                        fr->fshift,&pbc,lambda,&dvdlambda);
++            dvdlambda[efptCOUL] = 0;
++
 +            if (fr->eeltype != eelRF_NEC)
 +            {
 +                enerd->term[F_RF_EXCL] =
 +                    RF_excl_correction(fplog,fr,graph,md,excl,x,f,
-             
-             enerd->dvdl_lin += dvdlambda;
++                                       fr->fshift,&pbc,lambda[efptCOUL],&dvdlambda[efptCOUL]);
 +            }
-                           enerd->term[F_RF_EXCL],dvdlambda);
++
++            enerd->dvdl_lin[efptCOUL] += dvdlambda[efptCOUL];
 +            PRINT_SEPDVDL("RF exclusion correction",
-       
++                          enerd->term[F_RF_EXCL],dvdlambda[efptCOUL]);
 +        }
 +    }
 +    where();
 +    debug_gmx();
-         print_nrnb(debug,nrnb); 
++
 +    if (debug)
 +    {
-     
++        print_nrnb(debug,nrnb);
 +    }
 +    debug_gmx();
-             fprintf(stderr,"* PP load balancing info: node %d, step %s, rel wait time=%3.0f%% , load string value: %7.2f\n", 
-                     cr->nodeid, gmx_step_str(fr->timesteps,buf), 
-                     100*fr->t_wait/(fr->t_wait+fr->t_fnbf), 
++
 +#ifdef GMX_MPI
 +    if (TAKETIME)
 +    {
 +        t2=MPI_Wtime();
 +        MPI_Barrier(cr->mpi_comm_mygroup);
 +        t3=MPI_Wtime();
 +        fr->t_wait += t3-t2;
 +        if (fr->timesteps == 11)
 +        {
-         }       
++            fprintf(stderr,"* PP load balancing info: node %d, step %s, rel wait time=%3.0f%% , load string value: %7.2f\n",
++                    cr->nodeid, gmx_step_str(fr->timesteps,buf),
++                    100*fr->t_wait/(fr->t_wait+fr->t_fnbf),
 +                    (fr->t_fnbf+fr->t_wait)/fr->t_fnbf);
-     
++        }
 +        fr->timesteps++;
 +    }
 +#endif
-     
++
 +    if (debug)
 +    {
 +        pr_rvecs(debug,0,"fshift after bondeds",fr->fshift,SHIFTS);
 +    }
- void init_enerdata(int ngener,int n_flambda,gmx_enerdata_t *enerd)
++
 +}
 +
-     
++void init_enerdata(int ngener,int n_lambda,gmx_enerdata_t *enerd)
 +{
 +    int i,n2;
-     
++
 +    for(i=0; i<F_NRE; i++)
 +    {
 +        enerd->term[i] = 0;
 +    }
-     if (n_flambda)
++
++
++    for(i=0; i<efptNR; i++) {
++        enerd->dvdl_lin[i]  = 0;
++        enerd->dvdl_nonlin[i]  = 0;
++    }
++
 +    n2=ngener*ngener;
 +    if (debug)
 +    {
 +        fprintf(debug,"Creating %d sized group matrix for energies\n",n2);
 +    }
 +    enerd->grpp.nener = n2;
 +    for(i=0; (i<egNR); i++)
 +    {
 +        snew(enerd->grpp.ener[i],n2);
 +    }
 +
-         enerd->n_lambda = 1 + n_flambda;
++    if (n_lambda)
 +    {
-   
++        enerd->n_lambda = 1 + n_lambda;
 +        snew(enerd->enerpart_lambda,enerd->n_lambda);
 +    }
 +    else
 +    {
 +        enerd->n_lambda = 0;
 +    }
 +}
 +
 +void destroy_enerdata(gmx_enerdata_t *enerd)
 +{
 +    int i;
 +
 +    for(i=0; (i<egNR); i++)
 +    {
 +        sfree(enerd->grpp.ener[i]);
 +    }
 +
 +    if (enerd->n_lambda)
 +    {
 +        sfree(enerd->enerpart_lambda);
 +    }
 +}
 +
 +static real sum_v(int n,real v[])
 +{
 +  real t;
 +  int  i;
-     
++
 +  t = 0.0;
 +  for(i=0; (i<n); i++)
 +    t = t + v[i];
-   
++
 +  return t;
 +}
 +
 +void sum_epot(t_grpopts *opts,gmx_enerdata_t *enerd)
 +{
 +  gmx_grppairener_t *grpp;
 +  real *epot;
 +  int i;
-     
++
 +  grpp = &enerd->grpp;
 +  epot = enerd->term;
 +
 +  /* Accumulate energies */
 +  epot[F_COUL_SR]  = sum_v(grpp->nener,grpp->ener[egCOULSR]);
 +  epot[F_LJ]       = sum_v(grpp->nener,grpp->ener[egLJSR]);
 +  epot[F_LJ14]     = sum_v(grpp->nener,grpp->ener[egLJ14]);
 +  epot[F_COUL14]   = sum_v(grpp->nener,grpp->ener[egCOUL14]);
 +  epot[F_COUL_LR]  = sum_v(grpp->nener,grpp->ener[egCOULLR]);
 +  epot[F_LJ_LR]    = sum_v(grpp->nener,grpp->ener[egLJLR]);
 +  /* We have already added 1-2,1-3, and 1-4 terms to F_GBPOL */
 +  epot[F_GBPOL]   += sum_v(grpp->nener,grpp->ener[egGB]);
-     if (i != F_DISRESVIOL && i != F_ORIRESDEV && i != F_DIHRESVIOL)
-       epot[F_EPOT] += epot[i];
++
 +/* lattice part of LR doesnt belong to any group
 + * and has been added earlier
 + */
 +  epot[F_BHAM]     = sum_v(grpp->nener,grpp->ener[egBHAMSR]);
 +  epot[F_BHAM_LR]  = sum_v(grpp->nener,grpp->ener[egBHAMLR]);
 +
 +  epot[F_EPOT] = 0;
 +  for(i=0; (i<F_EPOT); i++)
- void sum_dhdl(gmx_enerdata_t *enerd,double lambda,t_inputrec *ir)
++  {
++      if (i != F_DISRESVIOL && i != F_ORIRESDEV)
++      {
++          epot[F_EPOT] += epot[i];
++      }
++  }
 +}
 +
-     int i;
-     double dlam,dhdl_lin;
++void sum_dhdl(gmx_enerdata_t *enerd, real *lambda, t_lambda *fepvals)
 +{
-     enerd->term[F_DVDL] = enerd->dvdl_lin + enerd->dvdl_nonlin;
-     if (debug)
++    int i,j,index;
++    double dlam;
 +
-         fprintf(debug,"dvdl: %f, non-linear %f + linear %f\n",
-                 enerd->term[F_DVDL],enerd->dvdl_nonlin,enerd->dvdl_lin);
++    enerd->dvdl_lin[efptVDW] += enerd->term[F_DVDL_VDW];  /* include dispersion correction */
++    enerd->term[F_DVDL] = 0.0;
++    for (i=0;i<efptNR;i++)
 +    {
-      * For the constraint dvdl this is not exact, but we have no other option.
++        if (fepvals->separate_dvdl[i])
++        {
++            /* could this be done more readably/compactly? */
++            switch (i) {
++            case (efptCOUL):
++                index = F_DVDL_COUL;
++                break;
++            case (efptVDW):
++                index = F_DVDL_VDW;
++                break;
++            case (efptBONDED):
++                index = F_DVDL_BONDED;
++                break;
++            case (efptRESTRAINT):
++                index = F_DVDL_RESTRAINT;
++                break;
++            case (efptMASS):
++                index = F_DKDL;
++                break;
++            default:
++                index = F_DVDL;
++                break;
++            }
++            enerd->term[index] = enerd->dvdl_lin[i] + enerd->dvdl_nonlin[i];
++            if (debug)
++            {
++                fprintf(debug,"dvdl-%s[%2d]: %f: non-linear %f + linear %f\n",
++                        efpt_names[i],i,enerd->term[index],enerd->dvdl_nonlin[i],enerd->dvdl_lin[i]);
++            }
++        }
++        else
++        {
++            enerd->term[F_DVDL] += enerd->dvdl_lin[i] + enerd->dvdl_nonlin[i];
++            if (debug)
++            {
++                fprintf(debug,"dvd-%sl[%2d]: %f: non-linear %f + linear %f\n",
++                        efpt_names[0],i,enerd->term[F_DVDL],enerd->dvdl_nonlin[i],enerd->dvdl_lin[i]);
++            }
++        }
 +    }
 +
 +    /* Notes on the foreign lambda free energy difference evaluation:
 +     * Adding the potential and ekin terms that depend linearly on lambda
 +     * as delta lam * dvdl to the energy differences is exact.
-     for(i=1; i<enerd->n_lambda; i++)
-     {
-         dlam = (ir->flambda[i-1] - lambda);
-         dhdl_lin =
-             enerd->dvdl_lin + enerd->term[F_DKDL] + enerd->term[F_DHDL_CON];
-         if (debug)
++     * For the constraints this is not exact, but we have no other option
++     * without literally changing the lengths and reevaluating the energies at each step.
++     * (try to remedy this post 4.6 - MRS)
 +     * For the non-bonded LR term we assume that the soft-core (if present)
 +     * no longer affects the energy beyond the short-range cut-off,
 +     * which is a very good approximation (except for exotic settings).
++     * (investigate how to overcome this post 4.6 - MRS)
 +     */
-             fprintf(debug,"enerdiff lam %g: non-linear %f linear %f*%f\n",
-                     ir->flambda[i-1],
-                     enerd->enerpart_lambda[i] - enerd->enerpart_lambda[0],
-                     dlam,dhdl_lin);
-         }
-         enerd->enerpart_lambda[i] += dlam*dhdl_lin;
++
++    for(i=0; i<fepvals->n_lambda; i++)
++    {                                         /* note we are iterating over fepvals here!
++                                                 For the current lam, dlam = 0 automatically,
++                                                 so we don't need to add anything to the
++                                                 enerd->enerpart_lambda[0] */
++
++        /* we don't need to worry about dvdl contributions to the current lambda, because
++           it's automatically zero */
++
++        /* first kinetic energy term */
++        dlam = (fepvals->all_lambda[efptMASS][i] - lambda[efptMASS]);
++
++        enerd->enerpart_lambda[i+1] += enerd->term[F_DKDL]*dlam;
++
++        for (j=0;j<efptNR;j++)
 +        {
-   gmx_bool bKeepLR;
-   int  i,j;
-   
-   /* First reset all energy components, except for the long range terms
-    * on the master at non neighbor search steps, since the long range
-    * terms have already been summed at the last neighbor search step.
-    */
-   bKeepLR = (fr->bTwinRange && !bNS);
-   for(i=0; (i<egNR); i++) {
-     if (!(bKeepLR && bMaster && (i == egCOULLR || i == egLJLR))) {
-       for(j=0; (j<enerd->grpp.nener); j++)
-       enerd->grpp.ener[i][j] = 0.0;
++            if (j==efptMASS) {continue;} /* no other mass term to worry about */
 +
++            dlam = (fepvals->all_lambda[j][i]-lambda[j]);
++            enerd->enerpart_lambda[i+1] += dlam*enerd->dvdl_lin[j];
++            if (debug)
++            {
++                fprintf(debug,"enerdiff lam %g: (%15s), non-linear %f linear %f*%f\n",
++                        fepvals->all_lambda[j][i],efpt_names[j],
++                        (enerd->enerpart_lambda[i+1] - enerd->enerpart_lambda[0]),
++                        dlam,enerd->dvdl_lin[j]);
++            }
++        }
 +    }
 +}
 +
 +void reset_enerdata(t_grpopts *opts,
 +                    t_forcerec *fr,gmx_bool bNS,
 +                    gmx_enerdata_t *enerd,
 +                    gmx_bool bMaster)
 +{
-   }
-   enerd->dvdl_lin    = 0.0;
-   enerd->dvdl_nonlin = 0.0;
++    gmx_bool bKeepLR;
++    int  i,j;
++
++    /* First reset all energy components, except for the long range terms
++     * on the master at non neighbor search steps, since the long range
++     * terms have already been summed at the last neighbor search step.
++     */
++    bKeepLR = (fr->bTwinRange && !bNS);
++    for(i=0; (i<egNR); i++) {
++        if (!(bKeepLR && bMaster && (i == egCOULLR || i == egLJLR))) {
++            for(j=0; (j<enerd->grpp.nener); j++)
++                enerd->grpp.ener[i][j] = 0.0;
++        }
++    }
++    for (i=0;i<efptNR;i++)
++    {
++        enerd->dvdl_lin[i]    = 0.0;
++        enerd->dvdl_nonlin[i] = 0.0;
 +    }
-   /* Normal potential energy components */
-   for(i=0; (i<=F_EPOT); i++) {
-     enerd->term[i] = 0.0;
-   }
-   /* Initialize the dVdlambda term with the long range contribution */
-   enerd->term[F_DVDL]     = 0.0;
-   enerd->term[F_DKDL]     = 0.0;
-   enerd->term[F_DHDL_CON] = 0.0;
-   if (enerd->n_lambda > 0)
-   {
-       for(i=0; i<enerd->n_lambda; i++)
-       {
-           enerd->enerpart_lambda[i] = 0.0;
-       }
-   }
 +
++    /* Normal potential energy components */
++    for(i=0; (i<=F_EPOT); i++) {
++        enerd->term[i] = 0.0;
++    }
++    /* Initialize the dVdlambda term with the long range contribution */
++    /* Initialize the dvdl term with the long range contribution */
++    enerd->term[F_DVDL]            = 0.0;
++    enerd->term[F_DVDL_COUL]       = 0.0;
++    enerd->term[F_DVDL_VDW]        = 0.0;
++    enerd->term[F_DVDL_BONDED]     = 0.0;
++    enerd->term[F_DVDL_RESTRAINT]  = 0.0;
++    enerd->term[F_DKDL]            = 0.0;
++    if (enerd->n_lambda > 0)
++    {
++        for(i=0; i<enerd->n_lambda; i++)
++        {
++            enerd->enerpart_lambda[i] = 0.0;
++        }
++    }
 +}
Simple merge
Simple merge
index c08eaef67a0ee0b7692a7f0fb2088a68c376b37b,0000000000000000000000000000000000000000..4a0aff42b83f75bedcb401da680c3f04e1dc15ea
mode 100644,000000..100644
--- /dev/null
@@@ -1,742 -1,0 +1,818 @@@
-         calc_dispcorr(fplog,ir,fr,0,top_global->natoms,box,state->lambda,
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include "typedefs.h"
 +#include "string2.h"
 +#include "smalloc.h"
 +#include "mdrun.h"
 +#include "domdec.h"
 +#include "mtop_util.h"
 +#include "gmx_wallcycle.h"
 +#include "vcm.h"
 +#include "nrnb.h"
 +#include "macros.h"
 +
 +/* Is the signal in one simulation independent of other simulations? */
 +gmx_bool gs_simlocal[eglsNR] = { TRUE, FALSE, FALSE, TRUE };
 +
 +/* check which of the multisim simulations has the shortest number of
 +   steps and return that number of nsteps */
 +gmx_large_int_t get_multisim_nsteps(const t_commrec *cr,
 +                                    gmx_large_int_t nsteps)
 +{
 +    gmx_large_int_t steps_out;
 +
 +    if MASTER(cr)
 +    {
 +        gmx_large_int_t *buf;
 +        int s;
 +
 +        snew(buf,cr->ms->nsim);
 +
 +        buf[cr->ms->sim] = nsteps;
 +        gmx_sumli_sim(cr->ms->nsim, buf, cr->ms);
 +
 +        steps_out=-1;
 +        for(s=0; s<cr->ms->nsim; s++)
 +        {
 +            /* find the smallest positive number */
 +            if (buf[s]>= 0 && ((steps_out < 0) || (buf[s]<steps_out)) )
 +            {
 +                steps_out=buf[s];
 +            }
 +        }
 +        sfree(buf);
 +
 +        /* if we're the limiting simulation, don't do anything */
 +        if (steps_out>=0 && steps_out<nsteps) 
 +        {
 +            char strbuf[255];
 +            snprintf(strbuf, 255, "Will stop simulation %%d after %s steps (another simulation will end then).\n", gmx_large_int_pfmt);
 +            fprintf(stderr, strbuf, cr->ms->sim, steps_out);
 +        }
 +    }
 +    /* broadcast to non-masters */
 +    gmx_bcast(sizeof(gmx_large_int_t), &steps_out, cr);
 +    return steps_out;
 +}
 +
 +int multisim_min(const gmx_multisim_t *ms,int nmin,int n)
 +{
 +    int  *buf;
 +    gmx_bool bPos,bEqual;
 +    int  s,d;
 +
 +    snew(buf,ms->nsim);
 +    buf[ms->sim] = n;
 +    gmx_sumi_sim(ms->nsim,buf,ms);
 +    bPos   = TRUE;
 +    bEqual = TRUE;
 +    for(s=0; s<ms->nsim; s++)
 +    {
 +        bPos   = bPos   && (buf[s] > 0);
 +        bEqual = bEqual && (buf[s] == buf[0]);
 +    }
 +    if (bPos)
 +    {
 +        if (bEqual)
 +        {
 +            nmin = min(nmin,buf[0]);
 +        }
 +        else
 +        {
 +            /* Find the least common multiple */
 +            for(d=2; d<nmin; d++)
 +            {
 +                s = 0;
 +                while (s < ms->nsim && d % buf[s] == 0)
 +                {
 +                    s++;
 +                }
 +                if (s == ms->nsim)
 +                {
 +                    /* We found the LCM and it is less than nmin */
 +                    nmin = d;
 +                    break;
 +                }
 +            }
 +        }
 +    }
 +    sfree(buf);
 +
 +    return nmin;
 +}
 +
 +int multisim_nstsimsync(const t_commrec *cr,
 +                        const t_inputrec *ir,int repl_ex_nst)
 +{
 +    int nmin;
 +
 +    if (MASTER(cr))
 +    {
 +        nmin = INT_MAX;
 +        nmin = multisim_min(cr->ms,nmin,ir->nstlist);
 +        nmin = multisim_min(cr->ms,nmin,ir->nstcalcenergy);
 +        nmin = multisim_min(cr->ms,nmin,repl_ex_nst);
 +        if (nmin == INT_MAX)
 +        {
 +            gmx_fatal(FARGS,"Can not find an appropriate interval for inter-simulation communication, since nstlist, nstcalcenergy and -replex are all <= 0");
 +        }
 +        /* Avoid inter-simulation communication at every (second) step */
 +        if (nmin <= 2)
 +        {
 +            nmin = 10;
 +        }
 +    }
 +
 +    gmx_bcast(sizeof(int),&nmin,cr);
 +
 +    return nmin;
 +}
 +
 +void init_global_signals(globsig_t *gs,const t_commrec *cr,
 +                         const t_inputrec *ir,int repl_ex_nst)
 +{
 +    int i;
 +
 +    if (MULTISIM(cr))
 +    {
 +        gs->nstms = multisim_nstsimsync(cr,ir,repl_ex_nst);
 +        if (debug)
 +        {
 +            fprintf(debug,"Syncing simulations for checkpointing and termination every %d steps\n",gs->nstms);
 +        }
 +    }
 +    else
 +    {
 +        gs->nstms = 1;
 +    }
 +
 +    for(i=0; i<eglsNR; i++)
 +    {
 +        gs->sig[i] = 0;
 +        gs->set[i] = 0;
 +    }
 +}
 +
 +void copy_coupling_state(t_state *statea,t_state *stateb, 
 +                         gmx_ekindata_t *ekinda,gmx_ekindata_t *ekindb, t_grpopts* opts) 
 +{
 +    
 +    /* MRS note -- might be able to get rid of some of the arguments.  Look over it when it's all debugged */
 +    
 +    int i,j,nc;
 +
 +    /* Make sure we have enough space for x and v */
 +    if (statea->nalloc > stateb->nalloc)
 +    {
 +        stateb->nalloc = statea->nalloc;
 +        srenew(stateb->x,stateb->nalloc);
 +        srenew(stateb->v,stateb->nalloc);
 +    }
 +
 +    stateb->natoms     = statea->natoms;
 +    stateb->ngtc       = statea->ngtc;
 +    stateb->nnhpres    = statea->nnhpres;
 +    stateb->veta       = statea->veta;
 +    if (ekinda) 
 +    {
 +        copy_mat(ekinda->ekin,ekindb->ekin);
 +        for (i=0; i<stateb->ngtc; i++) 
 +        {
 +            ekindb->tcstat[i].T = ekinda->tcstat[i].T;
 +            ekindb->tcstat[i].Th = ekinda->tcstat[i].Th;
 +            copy_mat(ekinda->tcstat[i].ekinh,ekindb->tcstat[i].ekinh);
 +            copy_mat(ekinda->tcstat[i].ekinf,ekindb->tcstat[i].ekinf);
 +            ekindb->tcstat[i].ekinscalef_nhc =  ekinda->tcstat[i].ekinscalef_nhc;
 +            ekindb->tcstat[i].ekinscaleh_nhc =  ekinda->tcstat[i].ekinscaleh_nhc;
 +            ekindb->tcstat[i].vscale_nhc =  ekinda->tcstat[i].vscale_nhc;
 +        }
 +    }
 +    copy_rvecn(statea->x,stateb->x,0,stateb->natoms);
 +    copy_rvecn(statea->v,stateb->v,0,stateb->natoms);
 +    copy_mat(statea->box,stateb->box);
 +    copy_mat(statea->box_rel,stateb->box_rel);
 +    copy_mat(statea->boxv,stateb->boxv);
 +
 +    for (i = 0; i<stateb->ngtc; i++) 
 +    { 
 +        nc = i*opts->nhchainlength;
 +        for (j=0; j<opts->nhchainlength; j++) 
 +        {
 +            stateb->nosehoover_xi[nc+j]  = statea->nosehoover_xi[nc+j];
 +            stateb->nosehoover_vxi[nc+j] = statea->nosehoover_vxi[nc+j];
 +        }
 +    }
 +    if (stateb->nhpres_xi != NULL)
 +    {
 +        for (i = 0; i<stateb->nnhpres; i++) 
 +        {
 +            nc = i*opts->nhchainlength;
 +            for (j=0; j<opts->nhchainlength; j++) 
 +            {
 +                stateb->nhpres_xi[nc+j]  = statea->nhpres_xi[nc+j];
 +                stateb->nhpres_vxi[nc+j] = statea->nhpres_vxi[nc+j];
 +            }
 +        }
 +    }
 +}
 +
 +real compute_conserved_from_auxiliary(t_inputrec *ir, t_state *state, t_extmass *MassQ)
 +{
 +    real quantity = 0;
 +    switch (ir->etc) 
 +    {
 +    case etcNO:
 +        break;
 +    case etcBERENDSEN:
 +        break;
 +    case etcNOSEHOOVER:
 +        quantity = NPT_energy(ir,state,MassQ);                
 +        break;
 +    case etcVRESCALE:
 +        quantity = vrescale_energy(&(ir->opts),state->therm_integral);
 +        break;
 +    default:
 +        break;
 +    }
 +    return quantity;
 +}
 +
 +void compute_globals(FILE *fplog, gmx_global_stat_t gstat, t_commrec *cr, t_inputrec *ir, 
 +                     t_forcerec *fr, gmx_ekindata_t *ekind, 
 +                     t_state *state, t_state *state_global, t_mdatoms *mdatoms, 
 +                     t_nrnb *nrnb, t_vcm *vcm, gmx_wallcycle_t wcycle,
 +                     gmx_enerdata_t *enerd,tensor force_vir, tensor shake_vir, tensor total_vir, 
 +                     tensor pres, rvec mu_tot, gmx_constr_t constr, 
 +                     globsig_t *gs,gmx_bool bInterSimGS,
 +                     matrix box, gmx_mtop_t *top_global, real *pcurr, 
 +                     int natoms, gmx_bool *bSumEkinhOld, int flags)
 +{
 +    int  i,gsi;
 +    real gs_buf[eglsNR];
 +    tensor corr_vir,corr_pres,shakeall_vir;
 +    gmx_bool bEner,bPres,bTemp, bVV;
 +    gmx_bool bRerunMD, bStopCM, bGStat, bIterate, 
 +        bFirstIterate,bReadEkin,bEkinAveVel,bScaleEkin, bConstrain;
 +    real ekin,temp,prescorr,enercorr,dvdlcorr;
 +    
 +    /* translate CGLO flags to gmx_booleans */
 +    bRerunMD = flags & CGLO_RERUNMD;
 +    bStopCM = flags & CGLO_STOPCM;
 +    bGStat = flags & CGLO_GSTAT;
 +
 +    bReadEkin = (flags & CGLO_READEKIN);
 +    bScaleEkin = (flags & CGLO_SCALEEKIN);
 +    bEner = flags & CGLO_ENERGY;
 +    bTemp = flags & CGLO_TEMPERATURE;
 +    bPres  = (flags & CGLO_PRESSURE);
 +    bConstrain = (flags & CGLO_CONSTRAINT);
 +    bIterate = (flags & CGLO_ITERATE);
 +    bFirstIterate = (flags & CGLO_FIRSTITERATE);
 +
 +    /* we calculate a full state kinetic energy either with full-step velocity verlet
 +       or half step where we need the pressure */
 +    
 +    bEkinAveVel = (ir->eI==eiVV || (ir->eI==eiVVAK && bPres) || bReadEkin);
 +    
 +    /* in initalization, it sums the shake virial in vv, and to 
 +       sums ekinh_old in leapfrog (or if we are calculating ekinh_old) for other reasons */
 +
 +    /* ########## Kinetic energy  ############## */
 +    
 +    if (bTemp) 
 +    {
 +        /* Non-equilibrium MD: this is parallellized, but only does communication
 +         * when there really is NEMD.
 +         */
 +        
 +        if (PAR(cr) && (ekind->bNEMD)) 
 +        {
 +            accumulate_u(cr,&(ir->opts),ekind);
 +        }
 +        debug_gmx();
 +        if (bReadEkin)
 +        {
 +            restore_ekinstate_from_state(cr,ekind,&state_global->ekinstate);
 +        }
 +        else 
 +        {
 +
 +            calc_ke_part(state,&(ir->opts),mdatoms,ekind,nrnb,bEkinAveVel,bIterate);
 +        }
 +        
 +        debug_gmx();
 +    }
 +
 +    /* Calculate center of mass velocity if necessary, also parallellized */
 +    if (bStopCM)
 +    {
 +        calc_vcm_grp(fplog,mdatoms->start,mdatoms->homenr,mdatoms,
 +                     state->x,state->v,vcm);
 +    }
 +
 +    if (bTemp || bStopCM || bPres || bEner || bConstrain)
 +    {
 +        if (!bGStat)
 +        {
 +            /* We will not sum ekinh_old,                                                            
 +             * so signal that we still have to do it.                                                
 +             */
 +            *bSumEkinhOld = TRUE;
 +
 +        }
 +        else
 +        {
 +            if (gs != NULL)
 +            {
 +                for(i=0; i<eglsNR; i++)
 +                {
 +                    gs_buf[i] = gs->sig[i];
 +                }
 +            }
 +            if (PAR(cr)) 
 +            {
 +                wallcycle_start(wcycle,ewcMoveE);
 +                global_stat(fplog,gstat,cr,enerd,force_vir,shake_vir,mu_tot,
 +                            ir,ekind,constr,bStopCM ? vcm : NULL,
 +                            gs != NULL ? eglsNR : 0,gs_buf,
 +                            top_global,state,
 +                            *bSumEkinhOld,flags);
 +                wallcycle_stop(wcycle,ewcMoveE);
 +            }
 +            if (gs != NULL)
 +            {
 +                if (MULTISIM(cr) && bInterSimGS)
 +                {
 +                    if (MASTER(cr))
 +                    {
 +                        /* Communicate the signals between the simulations */
 +                        gmx_sum_sim(eglsNR,gs_buf,cr->ms);
 +                    }
 +                    /* Communicate the signals form the master to the others */
 +                    gmx_bcast(eglsNR*sizeof(gs_buf[0]),gs_buf,cr);
 +                }
 +                for(i=0; i<eglsNR; i++)
 +                {
 +                    if (bInterSimGS || gs_simlocal[i])
 +                    {
 +                        /* Set the communicated signal only when it is non-zero,
 +                         * since signals might not be processed at each MD step.
 +                         */
 +                        gsi = (gs_buf[i] >= 0 ?
 +                               (int)(gs_buf[i] + 0.5) :
 +                               (int)(gs_buf[i] - 0.5));
 +                        if (gsi != 0)
 +                        {
 +                            gs->set[i] = gsi;
 +                        }
 +                        /* Turn off the local signal */
 +                        gs->sig[i] = 0;
 +                    }
 +                }
 +            }
 +            *bSumEkinhOld = FALSE;
 +        }
 +    }
 +    
 +    if (!ekind->bNEMD && debug && bTemp && (vcm->nr > 0))
 +    {
 +        correct_ekin(debug,
 +                     mdatoms->start,mdatoms->start+mdatoms->homenr,
 +                     state->v,vcm->group_p[0],
 +                     mdatoms->massT,mdatoms->tmass,ekind->ekin);
 +    }
 +    
 +    /* Do center of mass motion removal */
 +    if (bStopCM)
 +    {
 +        check_cm_grp(fplog,vcm,ir,1);
 +        do_stopcm_grp(fplog,mdatoms->start,mdatoms->homenr,mdatoms->cVCM,
 +                      state->x,state->v,vcm);
 +        inc_nrnb(nrnb,eNR_STOPCM,mdatoms->homenr);
 +    }
 +
 +    if (bEner)
 +    {
 +        /* Calculate the amplitude of the cosine velocity profile */
 +        ekind->cosacc.vcos = ekind->cosacc.mvcos/mdatoms->tmass;
 +    }
 +
 +    if (bTemp) 
 +    {
 +        /* Sum the kinetic energies of the groups & calc temp */
 +        /* compute full step kinetic energies if vv, or if vv-avek and we are computing the pressure with IR_NPT_TROTTER */
 +        /* three maincase:  VV with AveVel (md-vv), vv with AveEkin (md-vv-avek), leap with AveEkin (md).  
 +           Leap with AveVel is not supported; it's not clear that it will actually work.  
 +           bEkinAveVel: If TRUE, we simply multiply ekin by ekinscale to get a full step kinetic energy. 
 +           If FALSE, we average ekinh_old and ekinh*ekinscale_nhc to get an averaged half step kinetic energy.
 +           bSaveEkinOld: If TRUE (in the case of iteration = bIterate is TRUE), we don't reset the ekinscale_nhc.  
 +           If FALSE, we go ahead and erase over it.
 +        */ 
 +        enerd->term[F_TEMP] = sum_ekin(&(ir->opts),ekind,&(enerd->term[F_DKDL]),
 +                                       bEkinAveVel,bIterate,bScaleEkin);
 + 
 +        enerd->term[F_EKIN] = trace(ekind->ekin);
 +    }
 +    
 +    /* ##########  Long range energy information ###### */
 +    
 +    if (bEner || bPres || bConstrain) 
 +    {
-         enerd->term[F_DVDL] += dvdlcorr;
-         if (fr->efep != efepNO) {
-             enerd->dvdl_lin += dvdlcorr;
-         }
++        calc_dispcorr(fplog,ir,fr,0,top_global->natoms,box,state->lambda[efptVDW],
 +                      corr_pres,corr_vir,&prescorr,&enercorr,&dvdlcorr);
 +    }
 +    
 +    if (bEner && bFirstIterate) 
 +    {
 +        enerd->term[F_DISPCORR] = enercorr;
 +        enerd->term[F_EPOT] += enercorr;
-         
++        enerd->term[F_DVDL_VDW] += dvdlcorr;
 +    }
 +    
 +    /* ########## Now pressure ############## */
 +    if (bPres || bConstrain) 
 +    {
 +        
 +        m_add(force_vir,shake_vir,total_vir);
 +        
 +        /* Calculate pressure and apply LR correction if PPPM is used.
 +         * Use the box from last timestep since we already called update().
 +         */
 +        
 +        enerd->term[F_PRES] = calc_pres(fr->ePBC,ir->nwall,box,ekind->ekin,total_vir,pres);
 +        
 +        /* Calculate long range corrections to pressure and energy */
 +        /* this adds to enerd->term[F_PRES] and enerd->term[F_ETOT], 
 +           and computes enerd->term[F_DISPCORR].  Also modifies the 
 +           total_vir and pres tesors */
 +        
 +        m_add(total_vir,corr_vir,total_vir);
 +        m_add(pres,corr_pres,pres);
 +        enerd->term[F_PDISPCORR] = prescorr;
 +        enerd->term[F_PRES] += prescorr;
 +        *pcurr = enerd->term[F_PRES];
 +        /* calculate temperature using virial */
 +        enerd->term[F_VTEMP] = calc_temp(trace(total_vir),ir->opts.nrdf[0]);
-                             "nstdhdl",&ir->nstdhdl);
++
 +    }    
 +}
 +
 +void check_nst_param(FILE *fplog,t_commrec *cr,
 +                     const char *desc_nst,int nst,
 +                     const char *desc_p,int *p)
 +{
 +    char buf[STRLEN];
 +
 +    if (*p > 0 && *p % nst != 0)
 +    {
 +        /* Round up to the next multiple of nst */
 +        *p = ((*p)/nst + 1)*nst;
 +        sprintf(buf,"NOTE: %s changes %s to %d\n",desc_nst,desc_p,*p);
 +        md_print_warning(cr,fplog,buf);
 +    }
 +}
 +
++void set_current_lambdas(gmx_large_int_t step, t_lambda *fepvals, gmx_bool bRerunMD,
++                         t_trxframe *rerun_fr,t_state *state_global, t_state *state, double lam0[])
++/* find the current lambdas.  If rerunning, we either read in a state, or a lambda value,
++   requiring different logic. */
++{
++    real frac;
++    int i,fep_state=0;
++    if (bRerunMD)
++    {
++        if (rerun_fr->bLambda)
++        {
++            if (fepvals->delta_lambda!=0)
++            {
++                state_global->lambda[efptFEP] = rerun_fr->lambda;
++                for (i=0;i<efptNR;i++)
++                {
++                    if (i!= efptFEP)
++                    {
++                        state->lambda[i] = state_global->lambda[i];
++                    }
++                }
++            }
++            else
++            {
++                /* find out between which two value of lambda we should be */
++                frac = (step*fepvals->delta_lambda);
++                fep_state = floor(frac*fepvals->n_lambda);
++                /* interpolate between this state and the next */
++                /* this assumes that the initial lambda corresponds to lambda==0, which is verified in grompp */
++                frac = (frac*fepvals->n_lambda)-fep_state;
++                for (i=0;i<efptNR;i++)
++                {
++                    state_global->lambda[i] = lam0[i] + (fepvals->all_lambda[i][fep_state]) +
++                        frac*(fepvals->all_lambda[i][fep_state+1]-fepvals->all_lambda[i][fep_state]);
++                }
++            }
++        }
++        else if (rerun_fr->bFepState)
++        {
++            state_global->fep_state = rerun_fr->fep_state;
++            for (i=0;i<efptNR;i++)
++            {
++                state_global->lambda[i] = fepvals->all_lambda[i][fep_state];
++            }
++        }
++    }
++    else
++    {
++        if (fepvals->delta_lambda!=0)
++        {
++            /* find out between which two value of lambda we should be */
++            frac = (step*fepvals->delta_lambda);
++            if (fepvals->n_lambda > 0)
++            {
++                fep_state = floor(frac*fepvals->n_lambda);
++                /* interpolate between this state and the next */
++                /* this assumes that the initial lambda corresponds to lambda==0, which is verified in grompp */
++                frac = (frac*fepvals->n_lambda)-fep_state;
++                for (i=0;i<efptNR;i++)
++                {
++                    state_global->lambda[i] = lam0[i] + (fepvals->all_lambda[i][fep_state]) +
++                        frac*(fepvals->all_lambda[i][fep_state+1]-fepvals->all_lambda[i][fep_state]);
++                }
++            }
++            else
++            {
++                for (i=0;i<efptNR;i++)
++                {
++                    state_global->lambda[i] = lam0[i] + frac;
++                }
++            }
++        }
++    }
++    for (i=0;i<efptNR;i++)
++    {
++        state->lambda[i] = state_global->lambda[i];
++    }
++}
++
 +void reset_all_counters(FILE *fplog,t_commrec *cr,
 +                        gmx_large_int_t step,
 +                        gmx_large_int_t *step_rel,t_inputrec *ir,
 +                        gmx_wallcycle_t wcycle,t_nrnb *nrnb,
 +                        gmx_runtime_t *runtime)
 +{
 +    char buf[STRLEN],sbuf[STEPSTRSIZE];
 +
 +    /* Reset all the counters related to performance over the run */
 +    sprintf(buf,"Step %s: resetting all time and cycle counters\n",
 +            gmx_step_str(step,sbuf));
 +    md_print_warning(cr,fplog,buf);
 +
 +    wallcycle_stop(wcycle,ewcRUN);
 +    wallcycle_reset_all(wcycle);
 +    if (DOMAINDECOMP(cr))
 +    {
 +        reset_dd_statistics_counters(cr->dd);
 +    }
 +    init_nrnb(nrnb);
 +    ir->init_step += *step_rel;
 +    ir->nsteps    -= *step_rel;
 +    *step_rel = 0;
 +    wallcycle_start(wcycle,ewcRUN);
 +    runtime_start(runtime);
 +    print_date_and_time(fplog,cr->nodeid,"Restarted time",runtime);
 +}
 +
 +void min_zero(int *n,int i)
 +{
 +    if (i > 0 && (*n == 0 || i < *n))
 +    {
 +        *n = i;
 +    }
 +}
 +
 +int lcd4(int i1,int i2,int i3,int i4)
 +{
 +    int nst;
 +
 +    nst = 0;
 +    min_zero(&nst,i1);
 +    min_zero(&nst,i2);
 +    min_zero(&nst,i3);
 +    min_zero(&nst,i4);
 +    if (nst == 0)
 +    {
 +        gmx_incons("All 4 inputs for determininig nstglobalcomm are <= 0");
 +    }
 +    
 +    while (nst > 1 && ((i1 > 0 && i1 % nst != 0)  ||
 +                       (i2 > 0 && i2 % nst != 0)  ||
 +                       (i3 > 0 && i3 % nst != 0)  ||
 +                       (i4 > 0 && i4 % nst != 0)))
 +    {
 +        nst--;
 +    }
 +
 +    return nst;
 +}
 +
 +int check_nstglobalcomm(FILE *fplog,t_commrec *cr,
 +                        int nstglobalcomm,t_inputrec *ir)
 +{
 +    char buf[STRLEN];
 +
 +    if (!EI_DYNAMICS(ir->eI))
 +    {
 +        nstglobalcomm = 1;
 +    }
 +
 +    if (nstglobalcomm == -1)
 +    {
 +        if (!(ir->nstcalcenergy > 0 ||
 +              ir->nstlist > 0 ||
 +              ir->etc != etcNO ||
 +              ir->epc != epcNO))
 +        {
 +            nstglobalcomm = 10;
 +            if (ir->nstenergy > 0 && ir->nstenergy < nstglobalcomm)
 +            {
 +                nstglobalcomm = ir->nstenergy;
 +            }
 +        }
 +        else
 +        {
 +            /* Ensure that we do timely global communication for
 +             * (possibly) each of the four following options.
 +             */
 +            nstglobalcomm = lcd4(ir->nstcalcenergy,
 +                                 ir->nstlist,
 +                                 ir->etc != etcNO ? ir->nsttcouple : 0,
 +                                 ir->epc != epcNO ? ir->nstpcouple : 0);
 +        }
 +    }
 +    else
 +    {
 +        if (ir->nstlist > 0 &&
 +            nstglobalcomm > ir->nstlist && nstglobalcomm % ir->nstlist != 0)
 +        {
 +            nstglobalcomm = (nstglobalcomm / ir->nstlist)*ir->nstlist;
 +            sprintf(buf,"WARNING: nstglobalcomm is larger than nstlist, but not a multiple, setting it to %d\n",nstglobalcomm);
 +            md_print_warning(cr,fplog,buf);
 +        }
 +        if (ir->nstcalcenergy > 0)
 +        {
 +            check_nst_param(fplog,cr,"-gcom",nstglobalcomm,
 +                            "nstcalcenergy",&ir->nstcalcenergy);
 +        }
 +        if (ir->etc != etcNO && ir->nsttcouple > 0)
 +        {
 +            check_nst_param(fplog,cr,"-gcom",nstglobalcomm,
 +                            "nsttcouple",&ir->nsttcouple);
 +        }
 +        if (ir->epc != epcNO && ir->nstpcouple > 0)
 +        {
 +            check_nst_param(fplog,cr,"-gcom",nstglobalcomm,
 +                            "nstpcouple",&ir->nstpcouple);
 +        }
 +
 +        check_nst_param(fplog,cr,"-gcom",nstglobalcomm,
 +                        "nstenergy",&ir->nstenergy);
 +
 +        check_nst_param(fplog,cr,"-gcom",nstglobalcomm,
 +                        "nstlog",&ir->nstlog);
 +    }
 +
 +    if (ir->comm_mode != ecmNO && ir->nstcomm < nstglobalcomm)
 +    {
 +        sprintf(buf,"WARNING: Changing nstcomm from %d to %d\n",
 +                ir->nstcomm,nstglobalcomm);
 +        md_print_warning(cr,fplog,buf);
 +        ir->nstcomm = nstglobalcomm;
 +    }
 +
 +    return nstglobalcomm;
 +}
 +
 +void check_ir_old_tpx_versions(t_commrec *cr,FILE *fplog,
 +                               t_inputrec *ir,gmx_mtop_t *mtop)
 +{
 +    /* Check required for old tpx files */
 +    if (IR_TWINRANGE(*ir) && ir->nstlist > 1 &&
 +        ir->nstcalcenergy % ir->nstlist != 0)
 +    {
 +        md_print_warning(cr,fplog,"Old tpr file with twin-range settings: modifying energy calculation and/or T/P-coupling frequencies");
 +
 +        if (gmx_mtop_ftype_count(mtop,F_CONSTR) +
 +            gmx_mtop_ftype_count(mtop,F_CONSTRNC) > 0 &&
 +            ir->eConstrAlg == econtSHAKE)
 +        {
 +            md_print_warning(cr,fplog,"With twin-range cut-off's and SHAKE the virial and pressure are incorrect");
 +            if (ir->epc != epcNO)
 +            {
 +                gmx_fatal(FARGS,"Can not do pressure coupling with twin-range cut-off's and SHAKE");
 +            }
 +        }
 +        check_nst_param(fplog,cr,"nstlist",ir->nstlist,
 +                        "nstcalcenergy",&ir->nstcalcenergy);
 +        if (ir->epc != epcNO)
 +        {
 +            check_nst_param(fplog,cr,"nstlist",ir->nstlist,
 +                            "nstpcouple",&ir->nstpcouple);
 +        }
 +        check_nst_param(fplog,cr,"nstcalcenergy",ir->nstcalcenergy,
 +                        "nstenergy",&ir->nstenergy);
 +        check_nst_param(fplog,cr,"nstcalcenergy",ir->nstcalcenergy,
 +                        "nstlog",&ir->nstlog);
 +        if (ir->efep != efepNO)
 +        {
 +            check_nst_param(fplog,cr,"nstcalcenergy",ir->nstcalcenergy,
++                            "nstdhdl",&ir->fepvals->nstdhdl);
 +        }
 +    }
 +}
 +
 +void rerun_parallel_comm(t_commrec *cr,t_trxframe *fr,
 +                         gmx_bool *bNotLastFrame)
 +{
 +    gmx_bool bAlloc;
 +    rvec *xp,*vp;
 +
 +    bAlloc = (fr->natoms == 0);
 +
 +    if (MASTER(cr) && !*bNotLastFrame)
 +    {
 +        fr->natoms = -1;
 +    }
 +    xp = fr->x;
 +    vp = fr->v;
 +    gmx_bcast(sizeof(*fr),fr,cr);
 +    fr->x = xp;
 +    fr->v = vp;
 +
 +    *bNotLastFrame = (fr->natoms >= 0);
 +
 +    if (*bNotLastFrame && PARTDECOMP(cr))
 +    {
 +        /* x and v are the only variable size quantities stored in trr
 +         * that are required for rerun (f is not needed).
 +         */
 +        if (bAlloc)
 +        {
 +            snew(fr->x,fr->natoms);
 +            snew(fr->v,fr->natoms);
 +        }
 +        if (fr->bX)
 +        {
 +            gmx_bcast(fr->natoms*sizeof(fr->x[0]),fr->x[0],cr);
 +        }
 +        if (fr->bV)
 +        {
 +            gmx_bcast(fr->natoms*sizeof(fr->v[0]),fr->v[0],cr);
 +        }
 +    }
 +}
 +
 +void md_print_warning(const t_commrec *cr,FILE *fplog,const char *buf)
 +{
 +    if (MASTER(cr))
 +    {
 +        fprintf(stderr,"\n%s\n",buf);
 +    }
 +    if (fplog)
 +    {
 +        fprintf(fplog,"\n%s\n",buf);
 +    }
 +}
index e23ec7771f224756d612e88d80fa939d0287d911,0000000000000000000000000000000000000000..825b47caa1f481ae92732896d51d561edca7d20f
mode 100644,000000..100644
--- /dev/null
@@@ -1,1276 -1,0 +1,1436 @@@
-  * 
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
-  * 
++ *
 + *                This source code is part of
-  * 
++ *
 + *                 G   R   O   M   A   C   S
-  * 
++ *
 + *          GROningen MAchine for Chemical Simulations
-  * 
++ *
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
-  * 
++ *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
-  * 
++ *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
-  * 
++ *
 + * For more info, check our website at http://www.gromacs.org
++ *
 + * And Hey:
 + * GROwing Monsters And Cloning Shrimps
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <string.h>
 +#include <float.h>
 +#include "typedefs.h"
 +#include "string2.h"
 +#include "mdebin.h"
 +#include "smalloc.h"
 +#include "physics.h"
 +#include "enxio.h"
 +#include "vec.h"
 +#include "disre.h"
 +#include "main.h"
 +#include "network.h"
 +#include "names.h"
 +#include "orires.h"
 +#include "constr.h"
 +#include "mtop_util.h"
 +#include "xvgr.h"
 +#include "gmxfio.h"
 +#include "macros.h"
- static const char *tricl_boxs_nm[] = { 
++#include "mdrun.h"
 +#include "mdebin_bar.h"
 +
 +
 +static const char *conrmsd_nm[] = { "Constr. rmsd", "Constr.2 rmsd" };
 +
 +static const char *boxs_nm[] = { "Box-X", "Box-Y", "Box-Z" };
 +
-     "Box-YX", "Box-ZX", "Box-ZY" 
++static const char *tricl_boxs_nm[] = {
 +    "Box-XX", "Box-YY", "Box-ZZ",
++    "Box-YX", "Box-ZX", "Box-ZY"
 +};
 +
 +static const char *vol_nm[] = { "Volume" };
 +
 +static const char *dens_nm[] = {"Density" };
 +
 +static const char *pv_nm[] = {"pV" };
 +
 +static const char *enthalpy_nm[] = {"Enthalpy" };
 +
 +static const char *boxvel_nm[] = {
 +    "Box-Vel-XX", "Box-Vel-YY", "Box-Vel-ZZ",
 +    "Box-Vel-YX", "Box-Vel-ZX", "Box-Vel-ZY"
 +};
 +
 +#define NBOXS asize(boxs_nm)
 +#define NTRICLBOXS asize(tricl_boxs_nm)
 +
-         else if ((i == F_DVDL) || (i == F_DKDL))
 +t_mdebin *init_mdebin(ener_file_t fp_ene,
 +                      const gmx_mtop_t *mtop,
 +                      const t_inputrec *ir,
 +                      FILE *fp_dhdl)
 +{
 +    const char *ener_nm[F_NRE];
 +    static const char *vir_nm[] = {
 +        "Vir-XX", "Vir-XY", "Vir-XZ",
 +        "Vir-YX", "Vir-YY", "Vir-YZ",
 +        "Vir-ZX", "Vir-ZY", "Vir-ZZ"
 +    };
 +    static const char *sv_nm[] = {
 +        "ShakeVir-XX", "ShakeVir-XY", "ShakeVir-XZ",
 +        "ShakeVir-YX", "ShakeVir-YY", "ShakeVir-YZ",
 +        "ShakeVir-ZX", "ShakeVir-ZY", "ShakeVir-ZZ"
 +    };
 +    static const char *fv_nm[] = {
 +        "ForceVir-XX", "ForceVir-XY", "ForceVir-XZ",
 +        "ForceVir-YX", "ForceVir-YY", "ForceVir-YZ",
 +        "ForceVir-ZX", "ForceVir-ZY", "ForceVir-ZZ"
 +    };
 +    static const char *pres_nm[] = {
 +        "Pres-XX","Pres-XY","Pres-XZ",
 +        "Pres-YX","Pres-YY","Pres-YZ",
 +        "Pres-ZX","Pres-ZY","Pres-ZZ"
 +    };
 +    static const char *surft_nm[] = {
 +        "#Surf*SurfTen"
 +    };
 +    static const char *mu_nm[] = {
 +        "Mu-X", "Mu-Y", "Mu-Z"
 +    };
 +    static const char *vcos_nm[] = {
 +        "2CosZ*Vel-X"
 +    };
 +    static const char *visc_nm[] = {
 +        "1/Viscosity"
 +    };
 +    static const char *baro_nm[] = {
 +        "Barostat"
 +    };
 +
 +    char     **grpnms;
 +    const gmx_groups_t *groups;
 +    char     **gnm;
 +    char     buf[256];
 +    const char     *bufi;
 +    t_mdebin *md;
 +    int      i,j,ni,nj,n,nh,k,kk,ncon,nset;
 +    gmx_bool     bBHAM,bNoseHoover,b14;
 +
 +    snew(md,1);
 +
 +    md->bVir=TRUE;
 +    md->bPress=TRUE;
 +    md->bSurft=TRUE;
 +    md->bMu=TRUE;
 +
 +    if (EI_DYNAMICS(ir->eI))
 +    {
 +        md->delta_t = ir->delta_t;
 +    }
 +    else
 +    {
 +        md->delta_t = 0;
 +    }
 +
 +    groups = &mtop->groups;
 +
 +    bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
 +    b14   = (gmx_mtop_ftype_count(mtop,F_LJ14) > 0 ||
 +             gmx_mtop_ftype_count(mtop,F_LJC14_Q) > 0);
 +
 +    ncon = gmx_mtop_ftype_count(mtop,F_CONSTR);
 +    nset = gmx_mtop_ftype_count(mtop,F_SETTLE);
 +    md->bConstr    = (ncon > 0 || nset > 0);
 +    md->bConstrVir = FALSE;
 +    if (md->bConstr) {
 +        if (ncon > 0 && ir->eConstrAlg == econtLINCS) {
 +            if (ir->eI == eiSD2)
 +                md->nCrmsd = 2;
 +            else
 +                md->nCrmsd = 1;
 +        }
 +        md->bConstrVir = (getenv("GMX_CONSTRAINTVIR") != NULL);
 +    } else {
 +        md->nCrmsd = 0;
 +    }
 +
 +    /* Energy monitoring */
 +    for(i=0;i<egNR;i++)
 +    {
 +        md->bEInd[i]=FALSE;
 +    }
 +
 +#ifndef GMX_OPENMM
 +    for(i=0; i<F_NRE; i++)
 +    {
 +        md->bEner[i] = FALSE;
 +        if (i == F_LJ)
 +            md->bEner[i] = !bBHAM;
 +        else if (i == F_BHAM)
 +            md->bEner[i] = bBHAM;
 +        else if (i == F_EQM)
 +            md->bEner[i] = ir->bQMMM;
 +        else if (i == F_COUL_LR)
 +            md->bEner[i] = (ir->rcoulomb > ir->rlist);
 +        else if (i == F_LJ_LR)
 +            md->bEner[i] = (!bBHAM && ir->rvdw > ir->rlist);
 +        else if (i == F_BHAM_LR)
 +            md->bEner[i] = (bBHAM && ir->rvdw > ir->rlist);
 +        else if (i == F_RF_EXCL)
 +            md->bEner[i] = (EEL_RF(ir->coulombtype) && ir->coulombtype != eelRF_NEC);
 +        else if (i == F_COUL_RECIP)
 +            md->bEner[i] = EEL_FULL(ir->coulombtype);
 +        else if (i == F_LJ14)
 +            md->bEner[i] = b14;
 +        else if (i == F_COUL14)
 +            md->bEner[i] = b14;
 +        else if (i == F_LJC14_Q || i == F_LJC_PAIRS_NB)
 +            md->bEner[i] = FALSE;
-         else if (i == F_DHDL_CON)
-             md->bEner[i] = (ir->efep != efepNO && md->bConstr);
++        else if ((i == F_DVDL_COUL && ir->fepvals->separate_dvdl[efptCOUL]) ||
++                 (i == F_DVDL_VDW  && ir->fepvals->separate_dvdl[efptVDW]) ||
++                 (i == F_DVDL_BONDED && ir->fepvals->separate_dvdl[efptBONDED]) ||
++                 (i == F_DVDL_RESTRAINT && ir->fepvals->separate_dvdl[efptRESTRAINT]) ||
++                 (i == F_DKDL && ir->fepvals->separate_dvdl[efptMASS]) ||
++                 (i == F_DVDL && ir->fepvals->separate_dvdl[efptFEP]))
 +            md->bEner[i] = (ir->efep != efepNO);
-         else if (i==F_VTEMP) 
 +        else if ((interaction_function[i].flags & IF_VSITE) ||
 +                 (i == F_CONSTR) || (i == F_CONSTRNC) || (i == F_SETTLE))
 +            md->bEner[i] = FALSE;
 +        else if ((i == F_COUL_SR) || (i == F_EPOT) || (i == F_PRES)  || (i==F_EQM))
 +            md->bEner[i] = TRUE;
 +        else if ((i == F_GBPOL) && ir->implicit_solvent==eisGBSA)
 +            md->bEner[i] = TRUE;
 +        else if ((i == F_NPSOLVATION) && ir->implicit_solvent==eisGBSA && (ir->sa_algorithm != esaNO))
 +            md->bEner[i] = TRUE;
 +        else if ((i == F_GB12) || (i == F_GB13) || (i == F_GB14))
 +            md->bEner[i] = FALSE;
 +        else if ((i == F_ETOT) || (i == F_EKIN) || (i == F_TEMP))
 +            md->bEner[i] = EI_DYNAMICS(ir->eI);
-     md->bMTTK = IR_NPT_TROTTER(ir);
++        else if (i==F_VTEMP)
 +            md->bEner[i] =  (EI_DYNAMICS(ir->eI) && getenv("GMX_VIRIAL_TEMPERATURE"));
 +        else if (i == F_DISPCORR || i == F_PDISPCORR)
 +            md->bEner[i] = (ir->eDispCorr != edispcNO);
 +        else if (i == F_DISRESVIOL)
 +            md->bEner[i] = (gmx_mtop_ftype_count(mtop,F_DISRES) > 0);
 +        else if (i == F_ORIRESDEV)
 +            md->bEner[i] = (gmx_mtop_ftype_count(mtop,F_ORIRES) > 0);
 +        else if (i == F_CONNBONDS)
 +            md->bEner[i] = FALSE;
 +        else if (i == F_COM_PULL)
 +            md->bEner[i] = (ir->ePull == epullUMBRELLA || ir->ePull == epullCONST_F || ir->bRot);
 +        else if (i == F_ECONSERVED)
 +            md->bEner[i] = ((ir->etc == etcNOSEHOOVER || ir->etc == etcVRESCALE) &&
 +                            (ir->epc == epcNO || ir->epc==epcMTTK));
 +        else
 +            md->bEner[i] = (gmx_mtop_ftype_count(mtop,i) > 0);
 +    }
 +#else
 +    /* OpenMM always produces only the following 4 energy terms */
 +    md->bEner[F_EPOT] = TRUE;
 +    md->bEner[F_EKIN] = TRUE;
 +    md->bEner[F_ETOT] = TRUE;
 +    md->bEner[F_TEMP] = TRUE;
 +#endif
 +
 +    /* for adress simulations, most energy terms are not meaningfull, and thus disabled*/
 +    if (ir->bAdress && !debug) {
 +        for (i = 0; i < F_NRE; i++) {
 +            md->bEner[i] = FALSE;
 +            if(i == F_EKIN){ md->bEner[i] = TRUE;}
 +            if(i == F_TEMP){ md->bEner[i] = TRUE;}
 +        }
 +        md->bVir=FALSE;
 +        md->bPress=FALSE;
 +        md->bSurft=FALSE;
 +        md->bMu=FALSE;
 +    }
 +
 +    md->f_nre=0;
 +    for(i=0; i<F_NRE; i++)
 +    {
 +        if (md->bEner[i])
 +        {
 +            /* FIXME: The constness should not be cast away */
 +            /*ener_nm[f_nre]=(char *)interaction_function[i].longname;*/
 +            ener_nm[md->f_nre]=interaction_function[i].longname;
 +            md->f_nre++;
 +        }
 +    }
 +
 +    md->epc = ir->epc;
 +    md->bDiagPres = !TRICLINIC(ir->ref_p);
 +    md->ref_p = (ir->ref_p[XX][XX]+ir->ref_p[YY][YY]+ir->ref_p[ZZ][ZZ])/DIM;
 +    md->bTricl = TRICLINIC(ir->compress) || TRICLINIC(ir->deform);
 +    md->bDynBox = DYNAMIC_BOX(*ir);
 +    md->etc = ir->etc;
 +    md->bNHC_trotter = IR_NVT_TROTTER(ir);
-         md->ib    = get_ebin_space(md->ebin, 
-                                    md->bTricl ? NTRICLBOXS : NBOXS, 
++    md->bPrintNHChains = ir-> bPrintNHChains;
++    md->bMTTK = (IR_NPT_TROTTER(ir) || IR_NPH_TROTTER(ir));
 +
 +    md->ebin  = mk_ebin();
 +    /* Pass NULL for unit to let get_ebin_space determine the units
 +     * for interaction_function[i].longname
 +     */
 +    md->ie    = get_ebin_space(md->ebin,md->f_nre,ener_nm,NULL);
 +    if (md->nCrmsd)
 +    {
 +        /* This should be called directly after the call for md->ie,
 +         * such that md->iconrmsd follows directly in the list.
 +         */
 +        md->iconrmsd = get_ebin_space(md->ebin,md->nCrmsd,conrmsd_nm,"");
 +    }
 +    if (md->bDynBox)
 +    {
-     md->nNHC = ir->opts.nhchainlength; /* shorthand for number of NH chains */ 
++        md->ib    = get_ebin_space(md->ebin,
++                                   md->bTricl ? NTRICLBOXS : NBOXS,
 +                                   md->bTricl ? tricl_boxs_nm : boxs_nm,
 +                                   unit_length);
 +        md->ivol  = get_ebin_space(md->ebin, 1, vol_nm,  unit_volume);
 +        md->idens = get_ebin_space(md->ebin, 1, dens_nm, unit_density_SI);
 +        if (md->bDiagPres)
 +        {
 +            md->ipv   = get_ebin_space(md->ebin, 1, pv_nm,   unit_energy);
 +            md->ienthalpy = get_ebin_space(md->ebin, 1, enthalpy_nm,   unit_energy);
 +        }
 +    }
 +    if (md->bConstrVir)
 +    {
 +        md->isvir = get_ebin_space(md->ebin,asize(sv_nm),sv_nm,unit_energy);
 +        md->ifvir = get_ebin_space(md->ebin,asize(fv_nm),fv_nm,unit_energy);
 +    }
 +    if (md->bVir)
 +        md->ivir   = get_ebin_space(md->ebin,asize(vir_nm),vir_nm,unit_energy);
 +    if (md->bPress)
 +        md->ipres  = get_ebin_space(md->ebin,asize(pres_nm),pres_nm,unit_pres_bar);
 +    if (md->bSurft)
 +        md->isurft = get_ebin_space(md->ebin,asize(surft_nm),surft_nm,
 +                                unit_surft_bar);
 +    if (md->epc == epcPARRINELLORAHMAN || md->epc == epcMTTK)
 +    {
 +        md->ipc = get_ebin_space(md->ebin,md->bTricl ? 6 : 3,
 +                                 boxvel_nm,unit_vel);
 +    }
 +    if (md->bMu)
 +        md->imu    = get_ebin_space(md->ebin,asize(mu_nm),mu_nm,unit_dipole_D);
 +    if (ir->cos_accel != 0)
 +    {
 +        md->ivcos = get_ebin_space(md->ebin,asize(vcos_nm),vcos_nm,unit_vel);
 +        md->ivisc = get_ebin_space(md->ebin,asize(visc_nm),visc_nm,
 +                                   unit_invvisc_SI);
 +    }
 +
 +    /* Energy monitoring */
 +    for(i=0;i<egNR;i++)
 +    {
 +        md->bEInd[i] = FALSE;
 +    }
 +    md->bEInd[egCOULSR] = TRUE;
 +    md->bEInd[egLJSR  ] = TRUE;
 +
 +    if (ir->rcoulomb > ir->rlist)
 +    {
 +        md->bEInd[egCOULLR] = TRUE;
 +    }
 +    if (!bBHAM)
 +    {
 +        if (ir->rvdw > ir->rlist)
 +        {
 +            md->bEInd[egLJLR]   = TRUE;
 +        }
 +    }
 +    else
 +    {
 +        md->bEInd[egLJSR]   = FALSE;
 +        md->bEInd[egBHAMSR] = TRUE;
 +        if (ir->rvdw > ir->rlist)
 +        {
 +            md->bEInd[egBHAMLR]   = TRUE;
 +        }
 +    }
 +    if (b14)
 +    {
 +        md->bEInd[egLJ14] = TRUE;
 +        md->bEInd[egCOUL14] = TRUE;
 +    }
 +    md->nEc=0;
 +    for(i=0; (i<egNR); i++)
 +    {
 +        if (md->bEInd[i])
 +        {
 +            md->nEc++;
 +        }
 +    }
 +
 +    n=groups->grps[egcENER].nr;
 +    /* for adress simulations, most energy terms are not meaningfull, and thus disabled*/
 +    if (!ir->bAdress){
 +        /*standard simulation*/
 +        md->nEg=n;
 +        md->nE=(n*(n+1))/2;
 +    }
 +    else if (!debug) {
 +        /*AdResS simulation*/
 +       md->nU=0;
 +       md->nEg=0;
 +       md->nE=0;
 +       md->nEc=0;
 +       md->isvir=FALSE;
 +    }
 +    snew(md->igrp,md->nE);
 +    if (md->nE > 1)
 +    {
 +        n=0;
 +        snew(gnm,md->nEc);
 +        for(k=0; (k<md->nEc); k++)
 +        {
 +            snew(gnm[k],STRLEN);
 +        }
 +        for(i=0; (i<groups->grps[egcENER].nr); i++)
 +        {
 +            ni=groups->grps[egcENER].nm_ind[i];
 +            for(j=i; (j<groups->grps[egcENER].nr); j++)
 +            {
 +                nj=groups->grps[egcENER].nm_ind[j];
 +                for(k=kk=0; (k<egNR); k++)
 +                {
 +                    if (md->bEInd[k])
 +                    {
 +                        sprintf(gnm[kk],"%s:%s-%s",egrp_nm[k],
 +                                *(groups->grpname[ni]),*(groups->grpname[nj]));
 +                        kk++;
 +                    }
 +                }
 +                md->igrp[n]=get_ebin_space(md->ebin,md->nEc,
 +                                           (const char **)gnm,unit_energy);
 +                n++;
 +            }
 +        }
 +        for(k=0; (k<md->nEc); k++)
 +        {
 +            sfree(gnm[k]);
 +        }
 +        sfree(gnm);
 +
 +        if (n != md->nE)
 +        {
 +            gmx_incons("Number of energy terms wrong");
 +        }
 +    }
 +
 +    md->nTC=groups->grps[egcTC].nr;
-         md->nTCP = 1;  /* assume only one possible coupling system for barostat 
++    md->nNHC = ir->opts.nhchainlength; /* shorthand for number of NH chains */
 +    if (md->bMTTK)
 +    {
-     } 
-     else 
++        md->nTCP = 1;  /* assume only one possible coupling system for barostat
 +                          for now */
-     if (md->etc == etcNOSEHOOVER) {
-         if (md->bNHC_trotter) { 
++    }
++    else
 +    {
 +        md->nTCP = 0;
 +    }
-         else 
++    if (md->etc == etcNOSEHOOVER)
++    {
++        if (md->bNHC_trotter)
++        {
 +            md->mde_n = 2*md->nNHC*md->nTC;
 +        }
-     } else { 
++        else
 +        {
 +            md->mde_n = 2*md->nTC;
 +        }
 +        if (md->epc == epcMTTK)
 +        {
 +            md->mdeb_n = 2*md->nNHC*md->nTCP;
 +        }
-     bNoseHoover = (getenv("GMX_NOSEHOOVER_CHAINS") != NULL); /* whether to print Nose-Hoover chains */
++    } else {
 +        md->mde_n = md->nTC;
 +        md->mdeb_n = 0;
 +    }
 +
 +    snew(md->tmp_r,md->mde_n);
 +    snew(md->tmp_v,md->mde_n);
 +    snew(md->grpnms,md->mde_n);
 +    grpnms = md->grpnms;
 +
 +    for(i=0; (i<md->nTC); i++)
 +    {
 +        ni=groups->grps[egcTC].nm_ind[i];
 +        sprintf(buf,"T-%s",*(groups->grpname[ni]));
 +        grpnms[i]=strdup(buf);
 +    }
 +    md->itemp=get_ebin_space(md->ebin,md->nTC,(const char **)grpnms,
 +                             unit_temp_K);
 +
-         if (bNoseHoover) 
 +    if (md->etc == etcNOSEHOOVER)
 +    {
-             if (md->bNHC_trotter) 
++        if (md->bPrintNHChains)
 +        {
-                 for(i=0; (i<md->nTC); i++) 
++            if (md->bNHC_trotter)
 +            {
-                     for(j=0; (j<md->nNHC); j++) 
++                for(i=0; (i<md->nTC); i++)
 +                {
 +                    ni=groups->grps[egcTC].nm_ind[i];
 +                    bufi = *(groups->grpname[ni]);
-                 if (md->bMTTK) 
++                    for(j=0; (j<md->nNHC); j++)
 +                    {
 +                        sprintf(buf,"Xi-%d-%s",j,bufi);
 +                        grpnms[2*(i*md->nNHC+j)]=strdup(buf);
 +                        sprintf(buf,"vXi-%d-%s",j,bufi);
 +                        grpnms[2*(i*md->nNHC+j)+1]=strdup(buf);
 +                    }
 +                }
 +                md->itc=get_ebin_space(md->ebin,md->mde_n,
 +                                       (const char **)grpnms,unit_invtime);
-                     for(i=0; (i<md->nTCP); i++) 
++                if (md->bMTTK)
 +                {
-                         for(j=0; (j<md->nNHC); j++) 
++                    for(i=0; (i<md->nTCP); i++)
 +                    {
 +                        bufi = baro_nm[0];  /* All barostat DOF's together for now. */
-             } 
++                        for(j=0; (j<md->nNHC); j++)
 +                        {
 +                            sprintf(buf,"Xi-%d-%s",j,bufi);
 +                            grpnms[2*(i*md->nNHC+j)]=strdup(buf);
 +                            sprintf(buf,"vXi-%d-%s",j,bufi);
 +                            grpnms[2*(i*md->nNHC+j)+1]=strdup(buf);
 +                        }
 +                    }
 +                    md->itcb=get_ebin_space(md->ebin,md->mdeb_n,
 +                                            (const char **)grpnms,unit_invtime);
 +                }
-                 for(i=0; (i<md->nTC); i++) 
++            }
 +            else
 +            {
-     else if (md->etc == etcBERENDSEN || md->etc == etcYES || 
++                for(i=0; (i<md->nTC); i++)
 +                {
 +                    ni=groups->grps[egcTC].nm_ind[i];
 +                    bufi = *(groups->grpname[ni]);
 +                    sprintf(buf,"Xi-%s",bufi);
 +                    grpnms[2*i]=strdup(buf);
 +                    sprintf(buf,"vXi-%s",bufi);
 +                    grpnms[2*i+1]=strdup(buf);
 +                }
 +                md->itc=get_ebin_space(md->ebin,md->mde_n,
 +                                       (const char **)grpnms,unit_invtime);
 +            }
 +        }
 +    }
-     md->dhc=NULL; 
-     if (ir->separate_dhdl_file == sepdhdlfileNO )
++    else if (md->etc == etcBERENDSEN || md->etc == etcYES ||
 +             md->etc == etcVRESCALE)
 +    {
 +        for(i=0; (i<md->nTC); i++)
 +        {
 +            ni=groups->grps[egcTC].nm_ind[i];
 +            sprintf(buf,"Lamb-%s",*(groups->grpname[ni]));
 +            grpnms[i]=strdup(buf);
 +        }
 +        md->itc=get_ebin_space(md->ebin,md->mde_n,(const char **)grpnms,"");
 +    }
 +
 +    sfree(grpnms);
 +
 +
 +    md->nU=groups->grps[egcACC].nr;
 +    if (md->nU > 1)
 +    {
 +        snew(grpnms,3*md->nU);
 +        for(i=0; (i<md->nU); i++)
 +        {
 +            ni=groups->grps[egcACC].nm_ind[i];
 +            sprintf(buf,"Ux-%s",*(groups->grpname[ni]));
 +            grpnms[3*i+XX]=strdup(buf);
 +            sprintf(buf,"Uy-%s",*(groups->grpname[ni]));
 +            grpnms[3*i+YY]=strdup(buf);
 +            sprintf(buf,"Uz-%s",*(groups->grpname[ni]));
 +            grpnms[3*i+ZZ]=strdup(buf);
 +        }
 +        md->iu=get_ebin_space(md->ebin,3*md->nU,(const char **)grpnms,unit_vel);
 +        sfree(grpnms);
 +    }
 +
 +    if ( fp_ene )
 +    {
 +        do_enxnms(fp_ene,&md->ebin->nener,&md->ebin->enm);
 +    }
 +
 +    md->print_grpnms=NULL;
 +
 +    /* check whether we're going to write dh histograms */
-         int i;
++    md->dhc=NULL;
++    if (ir->fepvals->separate_dhdl_file == esepdhdlfileNO )
 +    {
-     md->dhdl_derivatives = (ir->dhdl_derivatives==dhdlderivativesYES);
 +        snew(md->dhc, 1);
 +
 +        mde_delta_h_coll_init(md->dhc, ir);
 +        md->fp_dhdl = NULL;
 +    }
 +    else
 +    {
 +        md->fp_dhdl = fp_dhdl;
 +    }
- FILE *open_dhdl(const char *filename,const t_inputrec *ir,
-                 const output_env_t oenv)
++    if (ir->bSimTemp) {
++        int i;
++        snew(md->temperatures,ir->fepvals->n_lambda);
++        for (i=0;i<ir->fepvals->n_lambda;i++)
++        {
++            md->temperatures[i] = ir->simtempvals->temperatures[i];
++        }
++    }
 +    return md;
 +}
 +
-     const char *dhdl="dH/d\\lambda",*deltag="\\DeltaH",*lambda="\\lambda";
++extern FILE *open_dhdl(const char *filename,const t_inputrec *ir,
++                       const output_env_t oenv)
 +{
 +    FILE *fp;
-     sprintf(label_x,"%s (%s)","Time",unit_time);
-     if (ir->n_flambda == 0)
++    const char *dhdl="dH/d\\lambda",*deltag="\\DeltaH",*lambda="\\lambda",
++        *lambdastate="\\lambda state",*remain="remaining";
 +    char title[STRLEN],label_x[STRLEN],label_y[STRLEN];
++    int  i,np,nps,nsets,nsets_de,nsetsbegin;
++    t_lambda *fep;
 +    char **setname;
 +    char buf[STRLEN];
++    int bufplace=0;
++
++    int nsets_dhdl = 0;
++    int s = 0;
++    int nsetsextend;
++
++    /* for simplicity */
++    fep = ir->fepvals;
 +
-         sprintf(title,"%s, %s",dhdl,deltag);
-         sprintf(label_y,"(%s)",unit_energy);
++    if (fep->n_lambda == 0)
 +    {
 +        sprintf(title,"%s",dhdl);
++        sprintf(label_x,"Time (ps)");
 +        sprintf(label_y,"%s (%s %s)",
 +                dhdl,unit_energy,"[\\lambda]\\S-1\\N");
 +    }
 +    else
 +    {
-     if (ir->delta_lambda == 0)
++        sprintf(title,"%s and %s",dhdl,deltag);
++        sprintf(label_x,"Time (ps)");
++        sprintf(label_y,"%s and %s (%s %s)",
++                dhdl,deltag,unit_energy,"[\\8l\\4]\\S-1\\N");
 +    }
 +    fp = gmx_fio_fopen(filename,"w+");
 +    xvgr_header(fp,title,label_x,label_y,exvggtXNY,oenv);
 +
-         sprintf(buf,"T = %g (K), %s = %g",
-                 ir->opts.ref_t[0],lambda,ir->init_lambda);
++    if (!(ir->bSimTemp))
 +    {
-     else
++        bufplace = sprintf(buf,"T = %g (K) ",
++                ir->opts.ref_t[0]);
 +    }
-         sprintf(buf,"T = %g (K)",
-                 ir->opts.ref_t[0]);
++    if (ir->efep != efepSLOWGROWTH)
 +    {
-     if (ir->n_flambda > 0)
++        if (fep->n_lambda == 0)
++        {
++            sprintf(&(buf[bufplace]),"%s = %g",
++                    lambda,fep->init_lambda);
++        }
++        else
++        {
++            sprintf(&(buf[bufplace]),"%s = %d",
++                    lambdastate,fep->init_fep_state);
++        }
 +    }
 +    xvgr_subtitle(fp,buf,oenv);
 +
-         int nsets,s,nsi=0;
++    for (i=0;i<efptNR;i++)
++    {
++        if (fep->separate_dvdl[i]) {nsets_dhdl++;}
++    }
++
++    /* count the number of delta_g states */
++    nsets_de = fep->n_lambda;
++
++    nsets = nsets_dhdl + nsets_de; /* dhdl + fep differences */
++
++    if (fep->n_lambda>0 && ir->bExpanded)
++    {
++        nsets += 1;   /*add fep state for expanded ensemble */
++    }
++
++    if (fep->bPrintEnergy)
++    {
++        nsets += 1;  /* add energy to the dhdl as well */
++    }
++
++    nsetsextend = nsets;
++    if ((ir->epc!=epcNO) && (fep->n_lambda>0))
++    {
++        nsetsextend += 1; /* for PV term, other terms possible if required for the reduced potential (only needed with foreign lambda) */
++    }
++    snew(setname,nsetsextend);
++
++    if (ir->bExpanded)
++    {
++        /* state for the fep_vals, if we have alchemical sampling */
++        sprintf(buf,"%s","Thermodynamic state");
++        setname[s] = strdup(buf);
++        s+=1;
++    }
++
++    if (fep->bPrintEnergy)
++    {
++        sprintf(buf,"%s (%s)","Energy",unit_energy);
++        setname[s] = strdup(buf);
++        s+=1;
++    }
++
++    for (i=0;i<efptNR;i++)
++    {
++        if (fep->separate_dvdl[i]) {
++            sprintf(buf,"%s (%s)",dhdl,efpt_names[i]);
++            setname[s] = strdup(buf);
++            s+=1;
++        }
++    }
++
++    if (fep->n_lambda > 0)
 +    {
-          * from this xvg legend.  */
-         nsets = ( (ir->dhdl_derivatives==dhdlderivativesYES) ? 1 : 0) + 
-                   ir->n_flambda;
-         snew(setname,nsets);
-         if (ir->dhdl_derivatives == dhdlderivativesYES)
 +        /* g_bar has to determine the lambda values used in this simulation
-             sprintf(buf,"%s %s %g",dhdl,lambda,ir->init_lambda);
-             setname[nsi++] = gmx_strdup(buf);
++         * from this xvg legend.
++         */
++
++        if (ir->bExpanded) {
++            nsetsbegin = 1;  /* for including the expanded ensemble */
++        } else {
++            nsetsbegin = 0;
++        }
++
++        if (fep->bPrintEnergy)
 +        {
-         for(s=0; s<ir->n_flambda; s++)
++            nsetsbegin += 1;
 +        }
-             sprintf(buf,"%s %s %g",deltag,lambda,ir->flambda[s]);
-             setname[nsi++] = gmx_strdup(buf);
++        nsetsbegin += nsets_dhdl;
++
++        for(s=nsetsbegin; s<nsets; s++)
 +        {
-         xvgr_legend(fp,nsets,(const char**)setname,oenv);
++            nps = sprintf(buf,"%s %s (",deltag,lambda);
++            for (i=0;i<efptNR;i++)
++            {
++                if (fep->separate_dvdl[i])
++                {
++                    np = sprintf(&buf[nps],"%g,",fep->all_lambda[i][s-(nsetsbegin)]);
++                    nps += np;
++                }
++            }
++            if (ir->bSimTemp)
++            {
++                /* print the temperature for this state if doing simulated annealing */
++                sprintf(&buf[nps],"T = %g (%s))",ir->simtempvals->temperatures[s-(nsetsbegin)],unit_temp_K);
++            }
++            else
++            {
++                sprintf(&buf[nps-1],")");  /* -1 to overwrite the last comma */
++            }
++            setname[s] = strdup(buf);
++        }
++        if (ir->epc!=epcNO) {
++            np = sprintf(buf,"pV (%s)",unit_energy);
++            setname[nsetsextend-1] = strdup(buf);  /* the first entry after nsets */
 +        }
-         for(s=0; s<nsets; s++)
 +
-     if (j != md->f_nre) 
++        xvgr_legend(fp,nsetsextend,(const char **)setname,oenv);
++
++        for(s=0; s<nsetsextend; s++)
 +        {
 +            sfree(setname[s]);
 +        }
 +        sfree(setname);
 +    }
 +
 +    return fp;
 +}
 +
 +static void copy_energy(t_mdebin *md, real e[],real ecpy[])
 +{
 +    int i,j;
 +
 +    for(i=j=0; (i<F_NRE); i++)
 +        if (md->bEner[i])
 +            ecpy[j++] = e[i];
- void upd_mdebin(t_mdebin *md, gmx_bool write_dhdl,
++    if (j != md->f_nre)
 +        gmx_incons("Number of energy terms wrong");
 +}
 +
-     gmx_bool   bNoseHoover;
++void upd_mdebin(t_mdebin *md,
++                gmx_bool bDoDHDL,
 +                gmx_bool bSum,
 +                double time,
 +                real tmass,
 +                gmx_enerdata_t *enerd,
 +                t_state *state,
++                t_lambda *fep,
++                t_expanded *expand,
 +                matrix  box,
 +                tensor svir,
 +                tensor fvir,
 +                tensor vir,
 +                tensor pres,
 +                gmx_ekindata_t *ekind,
 +                rvec mu_tot,
 +                gmx_constr_t constr)
 +{
 +    int    i,j,k,kk,m,n,gid;
 +    real   crmsd[2],tmp6[6];
 +    real   bs[NTRICLBOXS],vol,dens,pv,enthalpy;
 +    real   eee[egNR];
 +    real   ecopy[F_NRE];
++    double store_dhdl[efptNR];
++    double *dE=NULL;
++    real   store_energy=0;
 +    real   tmp;
 +
 +    /* Do NOT use the box in the state variable, but the separate box provided
 +     * as an argument. This is because we sometimes need to write the box from
 +     * the last timestep to match the trajectory frames.
 +     */
 +    copy_energy(md, enerd->term,ecopy);
 +    add_ebin(md->ebin,md->ie,md->f_nre,ecopy,bSum);
 +    if (md->nCrmsd)
 +    {
 +        crmsd[0] = constr_rmsd(constr,FALSE);
 +        if (md->nCrmsd > 1)
 +        {
 +            crmsd[1] = constr_rmsd(constr,TRUE);
 +        }
 +        add_ebin(md->ebin,md->iconrmsd,md->nCrmsd,crmsd,FALSE);
 +    }
 +    if (md->bDynBox)
 +    {
 +        int nboxs;
 +        if(md->bTricl)
 +        {
 +            bs[0] = box[XX][XX];
 +            bs[1] = box[YY][YY];
 +            bs[2] = box[ZZ][ZZ];
 +            bs[3] = box[YY][XX];
 +            bs[4] = box[ZZ][XX];
 +            bs[5] = box[ZZ][YY];
 +            nboxs=NTRICLBOXS;
 +        }
 +        else
 +        {
 +            bs[0] = box[XX][XX];
 +            bs[1] = box[YY][YY];
 +            bs[2] = box[ZZ][ZZ];
 +            nboxs=NBOXS;
 +        }
 +        vol  = box[XX][XX]*box[YY][YY]*box[ZZ][ZZ];
 +        dens = (tmass*AMU)/(vol*NANO*NANO*NANO);
-                not the instantaneous pressure */  
 +        add_ebin(md->ebin,md->ib   ,nboxs,bs   ,bSum);
 +        add_ebin(md->ebin,md->ivol ,1    ,&vol ,bSum);
 +        add_ebin(md->ebin,md->idens,1    ,&dens,bSum);
 +
 +        if (md->bDiagPres)
 +        {
 +            /* This is pV (in kJ/mol).  The pressure is the reference pressure,
-         add_ebin(md->ebin,md->ivisc,1,&tmp,bSum);    
++               not the instantaneous pressure */
 +            pv = vol*md->ref_p/PRESFAC;
 +
 +            add_ebin(md->ebin,md->ipv  ,1    ,&pv  ,bSum);
 +            enthalpy = pv + enerd->term[F_ETOT];
 +            add_ebin(md->ebin,md->ienthalpy  ,1    ,&enthalpy  ,bSum);
 +        }
 +    }
 +    if (md->bConstrVir)
 +    {
 +        add_ebin(md->ebin,md->isvir,9,svir[0],bSum);
 +        add_ebin(md->ebin,md->ifvir,9,fvir[0],bSum);
 +    }
 +    if (md->bVir)
 +        add_ebin(md->ebin,md->ivir,9,vir[0],bSum);
 +    if (md->bPress)
 +        add_ebin(md->ebin,md->ipres,9,pres[0],bSum);
 +    if (md->bSurft){
 +        tmp = (pres[ZZ][ZZ]-(pres[XX][XX]+pres[YY][YY])*0.5)*box[ZZ][ZZ];
 +        add_ebin(md->ebin,md->isurft,1,&tmp,bSum);
 +    }
 +    if (md->epc == epcPARRINELLORAHMAN || md->epc == epcMTTK)
 +    {
 +        tmp6[0] = state->boxv[XX][XX];
 +        tmp6[1] = state->boxv[YY][YY];
 +        tmp6[2] = state->boxv[ZZ][ZZ];
 +        tmp6[3] = state->boxv[YY][XX];
 +        tmp6[4] = state->boxv[ZZ][XX];
 +        tmp6[5] = state->boxv[ZZ][YY];
 +        add_ebin(md->ebin,md->ipc,md->bTricl ? 6 : 3,tmp6,bSum);
 +    }
 +    if(md->bMu)
 +        add_ebin(md->ebin,md->imu,3,mu_tot,bSum);
 +    if (ekind && ekind->cosacc.cos_accel != 0)
 +    {
 +        vol  = box[XX][XX]*box[YY][YY]*box[ZZ][ZZ];
 +        dens = (tmass*AMU)/(vol*NANO*NANO*NANO);
 +        add_ebin(md->ebin,md->ivcos,1,&(ekind->cosacc.vcos),bSum);
 +        /* 1/viscosity, unit 1/(kg m^-1 s^-1) */
 +        tmp = 1/(ekind->cosacc.cos_accel/(ekind->cosacc.vcos*PICO)
 +                 *dens*vol*sqr(box[ZZ][ZZ]*NANO/(2*M_PI)));
-         /* whether to print Nose-Hoover chains: */
-         bNoseHoover = (getenv("GMX_NOSEHOOVER_CHAINS") != NULL); 
++        add_ebin(md->ebin,md->ivisc,1,&tmp,bSum);
 +    }
 +    if (md->nE > 1)
 +    {
 +        n=0;
 +        for(i=0; (i<md->nEg); i++)
 +        {
 +            for(j=i; (j<md->nEg); j++)
 +            {
 +                gid=GID(i,j,md->nEg);
 +                for(k=kk=0; (k<egNR); k++)
 +                {
 +                    if (md->bEInd[k])
 +                    {
 +                        eee[kk++] = enerd->grpp.ener[k][gid];
 +                    }
 +                }
 +                add_ebin(md->ebin,md->igrp[n],md->nEc,eee,bSum);
 +                n++;
 +            }
 +        }
 +    }
 +
 +    if (ekind)
 +    {
 +        for(i=0; (i<md->nTC); i++)
 +        {
 +            md->tmp_r[i] = ekind->tcstat[i].T;
 +        }
 +        add_ebin(md->ebin,md->itemp,md->nTC,md->tmp_r,bSum);
 +
-             if (bNoseHoover) 
 +        if (md->etc == etcNOSEHOOVER)
 +        {
-                     for(i=0; (i<md->nTC); i++) 
++            /* whether to print Nose-Hoover chains: */
++            if (md->bPrintNHChains)
 +            {
 +                if (md->bNHC_trotter)
 +                {
-                         for (j=0;j<md->nNHC;j++) 
++                    for(i=0; (i<md->nTC); i++)
 +                    {
-                     add_ebin(md->ebin,md->itc,md->mde_n,md->tmp_r,bSum);      
++                        for (j=0;j<md->nNHC;j++)
 +                        {
 +                            k = i*md->nNHC+j;
 +                            md->tmp_r[2*k] = state->nosehoover_xi[k];
 +                            md->tmp_r[2*k+1] = state->nosehoover_vxi[k];
 +                        }
 +                    }
-                         for(i=0; (i<md->nTCP); i++) 
++                    add_ebin(md->ebin,md->itc,md->mde_n,md->tmp_r,bSum);
 +
 +                    if (md->bMTTK) {
-                             for (j=0;j<md->nNHC;j++) 
++                        for(i=0; (i<md->nTCP); i++)
 +                        {
-                         add_ebin(md->ebin,md->itcb,md->mdeb_n,md->tmp_r,bSum);      
++                            for (j=0;j<md->nNHC;j++)
 +                            {
 +                                k = i*md->nNHC+j;
 +                                md->tmp_r[2*k] = state->nhpres_xi[k];
 +                                md->tmp_r[2*k+1] = state->nhpres_vxi[k];
 +                            }
 +                        }
-                 } 
-                 else 
++                        add_ebin(md->ebin,md->itcb,md->mdeb_n,md->tmp_r,bSum);
 +                    }
-         else if (md->etc == etcBERENDSEN || md->etc == etcYES || 
++                }
++                else
 +                {
 +                    for(i=0; (i<md->nTC); i++)
 +                    {
 +                        md->tmp_r[2*i] = state->nosehoover_xi[i];
 +                        md->tmp_r[2*i+1] = state->nosehoover_vxi[i];
 +                    }
 +                    add_ebin(md->ebin,md->itc,md->mde_n,md->tmp_r,bSum);
 +                }
 +            }
 +        }
-     if (write_dhdl)
++        else if (md->etc == etcBERENDSEN || md->etc == etcYES ||
 +                 md->etc == etcVRESCALE)
 +        {
 +            for(i=0; (i<md->nTC); i++)
 +            {
 +                md->tmp_r[i] = ekind->tcstat[i].lambda;
 +            }
 +            add_ebin(md->ebin,md->itc,md->nTC,md->tmp_r,bSum);
 +        }
 +    }
 +
 +    if (ekind && md->nU > 1)
 +    {
 +        for(i=0; (i<md->nU); i++)
 +        {
 +            copy_rvec(ekind->grpstat[i].u,md->tmp_v[i]);
 +        }
 +        add_ebin(md->ebin,md->iu,3*md->nU,md->tmp_v[0],bSum);
 +    }
 +
 +    ebin_increase_count(md->ebin,bSum);
 +
 +    /* BAR + thermodynamic integration values */
-         if (md->fp_dhdl)
-         {
-             fprintf(md->fp_dhdl,"%.4f", time);
-             if (md->dhdl_derivatives)
++    if ((md->fp_dhdl || md->dhc) && bDoDHDL && (enerd->n_lambda > 0))
 +    {
-                 fprintf(md->fp_dhdl," %g", enerd->term[F_DVDL]+ 
-                                            enerd->term[F_DKDL]+
-                                            enerd->term[F_DHDL_CON]);
++        snew(dE,enerd->n_lambda-1);
++        for(i=0; i<enerd->n_lambda-1; i++) {
++            dE[i] = enerd->enerpart_lambda[i+1]-enerd->enerpart_lambda[0];  /* zero for simulated tempering */
++            if (md->temperatures!=NULL)
 +            {
-             for(i=1; i<enerd->n_lambda; i++)
++                /* MRS: is this right, given the way we have defined the exchange probabilities? */
++                /* is this even useful to have at all? */
++                dE[i] += (md->temperatures[i]/md->temperatures[state->fep_state]-1.0)*enerd->term[F_EKIN];
 +            }
-                 fprintf(md->fp_dhdl," %g",
-                         enerd->enerpart_lambda[i]-enerd->enerpart_lambda[0]);
++        }
++    }
++
++    if (md->fp_dhdl && bDoDHDL)
++    {
++        fprintf(md->fp_dhdl,"%.4f",time);
++        /* the current free energy state */
++
++        /* print the current state if we are doing expanded ensemble */
++        if (expand->elmcmove > elmcmoveNO) {
++            fprintf(md->fp_dhdl," %4d",state->fep_state);
++        }
++        /* total energy (for if the temperature changes */
++        if (fep->bPrintEnergy)
++        {
++            store_energy = enerd->term[F_ETOT];
++            fprintf(md->fp_dhdl," %#.8g",store_energy);
++        }
++
++        for (i=0;i<efptNR;i++)
++        {
++            if (fep->separate_dvdl[i])
 +            {
-             fprintf(md->fp_dhdl,"\n");
++                fprintf(md->fp_dhdl," %#.8g",enerd->term[F_DVDL+i]); /* assumes F_DVDL is first */
 +            }
-         /* and the binary BAR output */
-         if (md->dhc)
 +        }
-             mde_delta_h_coll_add_dh(md->dhc, 
-                                     enerd->term[F_DVDL]+ enerd->term[F_DKDL]+
-                                     enerd->term[F_DHDL_CON],
-                                     enerd->enerpart_lambda, time, 
-                                     state->lambda);
++        for(i=1; i<enerd->n_lambda; i++)
++        {
++            fprintf(md->fp_dhdl," %#.8g",dE[i-1]);
++
++        }
++        if ((md->epc!=epcNO)  && (enerd->n_lambda > 0))
 +        {
-     ebin_increase_count(md->ebin,FALSE); 
++            fprintf(md->fp_dhdl," %#.8g",pv);   /* PV term only needed when there are alternate state lambda */
 +        }
++        fprintf(md->fp_dhdl,"\n");
++        /* and the binary free energy output */
++    }
++    if (md->dhc && bDoDHDL)
++    {
++        int idhdl = 0;
++        for (i=0;i<efptNR;i++)
++        {
++            if (fep->separate_dvdl[i])
++            {
++                store_dhdl[idhdl] = enerd->term[F_DVDL+i]; /* assumes F_DVDL is first */
++                idhdl+=1;
++            }
++        }
++        /* store_dh is dE */
++        mde_delta_h_coll_add_dh(md->dhc,
++                                (double)state->fep_state,
++                                store_energy,
++                                pv,
++                                (expand->elamstats>elamstatsNO),
++                                (fep->bPrintEnergy),
++                                (md->epc!=epcNO),
++                                idhdl,
++                                fep->n_lambda,
++                                store_dhdl,
++                                dE,
++                                time);
++    }
++    if ((md->fp_dhdl || md->dhc) && bDoDHDL && (enerd->n_lambda >0))
++    {
++        sfree(dE);
 +    }
 +}
 +
++
 +void upd_mdebin_step(t_mdebin *md)
 +{
- void print_ebin_header(FILE *log,gmx_large_int_t steps,double time,real lamb)
++    ebin_increase_count(md->ebin,FALSE);
 +}
 +
 +static void npr(FILE *log,int n,char c)
 +{
 +    for(; (n>0); n--) fprintf(log,"%c",c);
 +}
 +
 +static void pprint(FILE *log,const char *s,t_mdebin *md)
 +{
 +    char CHAR='#';
 +    int  slen;
 +    char buf1[22],buf2[22];
 +
 +    slen = strlen(s);
 +    fprintf(log,"\t<======  ");
 +    npr(log,slen,CHAR);
 +    fprintf(log,"  ==>\n");
 +    fprintf(log,"\t<====  %s  ====>\n",s);
 +    fprintf(log,"\t<==  ");
 +    npr(log,slen,CHAR);
 +    fprintf(log,"  ======>\n\n");
 +
 +    fprintf(log,"\tStatistics over %s steps using %s frames\n",
 +            gmx_step_str(md->ebin->nsteps_sim,buf1),
 +            gmx_step_str(md->ebin->nsum_sim,buf2));
 +    fprintf(log,"\n");
 +}
 +
-             "Step","Time","Lambda",gmx_step_str(steps,buf),time,lamb);
++void print_ebin_header(FILE *log,gmx_large_int_t steps,double time,real lambda)
 +{
 +    char buf[22];
 +
 +    fprintf(log,"   %12s   %12s   %12s\n"
 +            "   %12s   %12.5f   %12.5f\n\n",
-     double      enxlambda_data[2]; 
++            "Step","Time","Lambda",gmx_step_str(steps,buf),time,lambda);
 +}
 +
 +void print_ebin(ener_file_t fp_ene,gmx_bool bEne,gmx_bool bDR,gmx_bool bOR,
 +                FILE *log,
 +                gmx_large_int_t step,double time,
 +                int mode,gmx_bool bCompact,
 +                t_mdebin *md,t_fcdata *fcd,
 +                gmx_groups_t *groups,t_grpopts *opts)
 +{
 +    /*static char **grpnms=NULL;*/
 +    char        buf[246];
 +    int         i,j,n,ni,nj,ndr,nor,b;
 +    int         ndisre=0;
 +    real        *disre_rm3tav, *disre_rt;
 +
 +    /* these are for the old-style blocks (1 subblock, only reals), because
 +       there can be only one per ID for these */
 +    int         nr[enxNR];
 +    int         id[enxNR];
 +    real        *block[enxNR];
 +
 +    /* temporary arrays for the lambda values to write out */
-                 nr[enxORI]    = (fcd->orires.oinsl != fcd->orires.otav) ? 
++    double      enxlambda_data[2];
 +
 +    t_enxframe  fr;
 +
 +    switch (mode)
 +    {
 +        case eprNORMAL:
 +            init_enxframe(&fr);
 +            fr.t            = time;
 +            fr.step         = step;
 +            fr.nsteps       = md->ebin->nsteps;
 +            fr.dt           = md->delta_t;
 +            fr.nsum         = md->ebin->nsum;
 +            fr.nre          = (bEne) ? md->ebin->nener : 0;
 +            fr.ener         = md->ebin->e;
 +            ndisre          = bDR ? fcd->disres.npair : 0;
 +            disre_rm3tav    = fcd->disres.rm3tav;
 +            disre_rt        = fcd->disres.rt;
 +            /* Optional additional old-style (real-only) blocks. */
 +            for(i=0; i<enxNR; i++)
 +            {
 +                nr[i] = 0;
 +            }
 +            if (fcd->orires.nr > 0 && bOR)
 +            {
 +                diagonalize_orires_tensors(&(fcd->orires));
 +                nr[enxOR]     = fcd->orires.nr;
 +                block[enxOR]  = fcd->orires.otav;
 +                id[enxOR]     = enxOR;
-             }        
++                nr[enxORI]    = (fcd->orires.oinsl != fcd->orires.otav) ?
 +                          fcd->orires.nr : 0;
 +                block[enxORI] = fcd->orires.oinsl;
 +                id[enxORI]    = enxORI;
 +                nr[enxORT]    = fcd->orires.nex*12;
 +                block[enxORT] = fcd->orires.eig;
 +                id[enxORT]    = enxORT;
-                     fr.block[b].id=id[b]; 
++            }
 +
 +            /* whether we are going to wrte anything out: */
 +            if (fr.nre || ndisre || nr[enxOR] || nr[enxORI])
 +            {
 +
 +                /* the old-style blocks go first */
 +                fr.nblock = 0;
 +                for(i=0; i<enxNR; i++)
 +                {
 +                    if (nr[i] > 0)
 +                    {
 +                        fr.nblock = i + 1;
 +                    }
 +                }
 +                add_blocks_enxframe(&fr, fr.nblock);
 +                for(b=0;b<fr.nblock;b++)
 +                {
 +                    add_subblocks_enxblock(&(fr.block[b]), 1);
-                 /* we can now free & reset the data in the blocks */
-                 if (md->dhc)
-                     mde_delta_h_coll_reset(md->dhc);
++                    fr.block[b].id=id[b];
 +                    fr.block[b].sub[0].nr = nr[b];
 +#ifndef GMX_DOUBLE
 +                    fr.block[b].sub[0].type = xdr_datatype_float;
 +                    fr.block[b].sub[0].fval = block[b];
 +#else
 +                    fr.block[b].sub[0].type = xdr_datatype_double;
 +                    fr.block[b].sub[0].dval = block[b];
 +#endif
 +                }
 +
 +                /* check for disre block & fill it. */
 +                if (ndisre>0)
 +                {
 +                    int db = fr.nblock;
 +                    fr.nblock+=1;
 +                    add_blocks_enxframe(&fr, fr.nblock);
 +
 +                    add_subblocks_enxblock(&(fr.block[db]), 2);
 +                    fr.block[db].id=enxDISRE;
 +                    fr.block[db].sub[0].nr=ndisre;
 +                    fr.block[db].sub[1].nr=ndisre;
 +#ifndef GMX_DOUBLE
 +                    fr.block[db].sub[0].type=xdr_datatype_float;
 +                    fr.block[db].sub[1].type=xdr_datatype_float;
 +                    fr.block[db].sub[0].fval=disre_rt;
 +                    fr.block[db].sub[1].fval=disre_rm3tav;
 +#else
 +                    fr.block[db].sub[0].type=xdr_datatype_double;
 +                    fr.block[db].sub[1].type=xdr_datatype_double;
 +                    fr.block[db].sub[0].dval=disre_rt;
 +                    fr.block[db].sub[1].dval=disre_rm3tav;
 +#endif
 +                }
 +                /* here we can put new-style blocks */
 +
 +                /* Free energy perturbation blocks */
 +                if (md->dhc)
 +                {
 +                    mde_delta_h_coll_handle_block(md->dhc, &fr, fr.nblock);
 +                }
 +
++                /* we can now free & reset the data in the blocks */
++                if (md->dhc)
++                {
++                    mde_delta_h_coll_reset(md->dhc);
++                }
++
 +                /* do the actual I/O */
 +                do_enx(fp_ene,&fr);
 +                gmx_fio_check_file_position(enx_file_pointer(fp_ene));
 +                if (fr.nre)
 +                {
 +                    /* We have stored the sums, so reset the sum history */
 +                    reset_ebin_sums(md->ebin);
 +                }
-         pr_ebin(log,md->ebin,md->ie,md->f_nre+md->nCrmsd,5,mode,TRUE);  
 +            }
 +            free_enxframe(&fr);
 +            break;
 +        case eprAVER:
 +            if (log)
 +            {
 +                pprint(log,"A V E R A G E S",md);
 +            }
 +            break;
 +        case eprRMS:
 +            if (log)
 +            {
 +                pprint(log,"R M S - F L U C T U A T I O N S",md);
 +            }
 +            break;
 +        default:
 +            gmx_fatal(FARGS,"Invalid print mode (%d)",mode);
 +    }
 +
 +    if (log)
 +    {
 +        for(i=0;i<opts->ngtc;i++)
 +        {
 +            if(opts->annealing[i]!=eannNO)
 +            {
 +                fprintf(log,"Current ref_t for group %s: %8.1f\n",
 +                        *(groups->grpname[groups->grps[egcTC].nm_ind[i]]),
 +                        opts->ref_t[i]);
 +            }
 +        }
 +        if (mode==eprNORMAL && fcd->orires.nr>0)
 +        {
 +            print_orires_log(log,&(fcd->orires));
 +        }
 +        fprintf(log,"   Energies (%s)\n",unit_energy);
-                         mode,TRUE);      
++        pr_ebin(log,md->ebin,md->ie,md->f_nre+md->nCrmsd,5,mode,TRUE);
 +        fprintf(log,"\n");
 +
 +        if (!bCompact)
 +        {
 +            if (md->bDynBox)
 +            {
 +                pr_ebin(log,md->ebin,md->ib, md->bTricl ? NTRICLBOXS : NBOXS,5,
-                 pr_ebin(log,md->ebin,md->isvir,9,3,mode,FALSE);  
++                        mode,TRUE);
 +                fprintf(log,"\n");
 +            }
 +            if (md->bConstrVir)
 +            {
 +                fprintf(log,"   Constraint Virial (%s)\n",unit_energy);
-                 pr_ebin(log,md->ebin,md->ifvir,9,3,mode,FALSE);  
++                pr_ebin(log,md->ebin,md->isvir,9,3,mode,FALSE);
 +                fprintf(log,"\n");
 +                fprintf(log,"   Force Virial (%s)\n",unit_energy);
-             pr_ebin(log,md->ebin,md->ivir,9,3,mode,FALSE);   
++                pr_ebin(log,md->ebin,md->ifvir,9,3,mode,FALSE);
 +                fprintf(log,"\n");
 +            }
 +            fprintf(log,"   Total Virial (%s)\n",unit_energy);
-             pr_ebin(log,md->ebin,md->ipres,9,3,mode,FALSE);  
++            pr_ebin(log,md->ebin,md->ivir,9,3,mode,FALSE);
 +            fprintf(log,"\n");
 +            fprintf(log,"   Pressure (%s)\n",unit_pres_bar);
-             pr_ebin(log,md->ebin,md->imu,3,3,mode,FALSE);    
++            pr_ebin(log,md->ebin,md->ipres,9,3,mode,FALSE);
 +            fprintf(log,"\n");
 +            fprintf(log,"   Total Dipole (%s)\n",unit_dipole_D);
-     {         
++            pr_ebin(log,md->ebin,md->imu,3,3,mode,FALSE);
 +            fprintf(log,"\n");
 +
 +            if (md->nE > 1)
 +            {
 +                if (md->print_grpnms==NULL)
 +                {
 +                    snew(md->print_grpnms,md->nE);
 +                    n=0;
 +                    for(i=0; (i<md->nEg); i++)
 +                    {
 +                        ni=groups->grps[egcENER].nm_ind[i];
 +                        for(j=i; (j<md->nEg); j++)
 +                        {
 +                            nj=groups->grps[egcENER].nm_ind[j];
 +                            sprintf(buf,"%s-%s",*(groups->grpname[ni]),
 +                                    *(groups->grpname[nj]));
 +                            md->print_grpnms[n++]=strdup(buf);
 +                        }
 +                    }
 +                }
 +                sprintf(buf,"Epot (%s)",unit_energy);
 +                fprintf(log,"%15s   ",buf);
 +                for(i=0; (i<egNR); i++)
 +                {
 +                    if (md->bEInd[i])
 +                    {
 +                        fprintf(log,"%12s   ",egrp_nm[i]);
 +                    }
 +                }
 +                fprintf(log,"\n");
 +                for(i=0; (i<md->nE); i++)
 +                {
 +                    fprintf(log,"%15s",md->print_grpnms[i]);
 +                    pr_ebin(log,md->ebin,md->igrp[i],md->nEc,md->nEc,mode,
 +                            FALSE);
 +                }
 +                fprintf(log,"\n");
 +            }
 +            if (md->nTC > 1)
 +            {
 +                pr_ebin(log,md->ebin,md->itemp,md->nTC,4,mode,TRUE);
 +                fprintf(log,"\n");
 +            }
 +            if (md->nU > 1)
 +            {
 +                fprintf(log,"%15s   %12s   %12s   %12s\n",
 +                        "Group","Ux","Uy","Uz");
 +                for(i=0; (i<md->nU); i++)
 +                {
 +                    ni=groups->grps[egcACC].nm_ind[i];
 +                    fprintf(log,"%15s",*groups->grpname[ni]);
 +                    pr_ebin(log,md->ebin,md->iu+3*i,3,3,mode,FALSE);
 +                }
 +                fprintf(log,"\n");
 +            }
 +        }
 +    }
 +
 +}
 +
 +void update_energyhistory(energyhistory_t * enerhist,t_mdebin * mdebin)
 +{
 +    int i;
 +
 +    enerhist->nsteps     = mdebin->ebin->nsteps;
 +    enerhist->nsum       = mdebin->ebin->nsum;
 +    enerhist->nsteps_sim = mdebin->ebin->nsteps_sim;
 +    enerhist->nsum_sim   = mdebin->ebin->nsum_sim;
 +    enerhist->nener      = mdebin->ebin->nener;
 +
 +    if (mdebin->ebin->nsum > 0)
 +    {
 +        /* Check if we need to allocate first */
 +        if(enerhist->ener_ave == NULL)
 +        {
 +            snew(enerhist->ener_ave,enerhist->nener);
 +            snew(enerhist->ener_sum,enerhist->nener);
 +        }
 +
 +        for(i=0;i<enerhist->nener;i++)
 +        {
 +            enerhist->ener_ave[i] = mdebin->ebin->e[i].eav;
 +            enerhist->ener_sum[i] = mdebin->ebin->e[i].esum;
 +        }
 +    }
 +
 +    if (mdebin->ebin->nsum_sim > 0)
 +    {
 +        /* Check if we need to allocate first */
 +        if(enerhist->ener_sum_sim == NULL)
 +        {
 +            snew(enerhist->ener_sum_sim,enerhist->nener);
 +        }
 +
 +        for(i=0;i<enerhist->nener;i++)
 +        {
 +            enerhist->ener_sum_sim[i] = mdebin->ebin->e_sim[i].esum;
 +        }
 +    }
 +    if (mdebin->dhc)
 +    {
 +        mde_delta_h_coll_update_energyhistory(mdebin->dhc, enerhist);
 +    }
 +}
 +
 +void restore_energyhistory_from_state(t_mdebin * mdebin,
 +                                      energyhistory_t * enerhist)
 +{
 +    int i;
 +
 +    if ((enerhist->nsum > 0 || enerhist->nsum_sim > 0) &&
 +        mdebin->ebin->nener != enerhist->nener)
 +    {
 +        gmx_fatal(FARGS,"Mismatch between number of energies in run input (%d) and checkpoint file (%d).",
 +                  mdebin->ebin->nener,enerhist->nener);
 +    }
 +
 +    mdebin->ebin->nsteps     = enerhist->nsteps;
 +    mdebin->ebin->nsum       = enerhist->nsum;
 +    mdebin->ebin->nsteps_sim = enerhist->nsteps_sim;
 +    mdebin->ebin->nsum_sim   = enerhist->nsum_sim;
 +
 +    for(i=0; i<mdebin->ebin->nener; i++)
 +    {
 +        mdebin->ebin->e[i].eav  =
 +                  (enerhist->nsum > 0 ? enerhist->ener_ave[i] : 0);
 +        mdebin->ebin->e[i].esum =
 +                  (enerhist->nsum > 0 ? enerhist->ener_sum[i] : 0);
 +        mdebin->ebin->e_sim[i].esum =
 +                  (enerhist->nsum_sim > 0 ? enerhist->ener_sum_sim[i] : 0);
 +    }
 +    if (mdebin->dhc)
++    {
 +        mde_delta_h_coll_restore_energyhistory(mdebin->dhc, enerhist);
 +    }
 +}
Simple merge
Simple merge
index 2c3507e817c8d08073605399dc62b9bfa42fccb3,0000000000000000000000000000000000000000..33d154272f924832391aec14875b8b33de736a2e
mode 100644,000000..100644
--- /dev/null
@@@ -1,2469 -1,0 +1,2474 @@@
-  * 
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
-  * 
++ *
 + *                This source code is part of
-  * 
++ *
 + *                 G   R   O   M   A   C   S
-  * 
++ *
 + *          GROningen MAchine for Chemical Simulations
-  * 
++ *
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
-  * 
++ *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
-  * 
++ *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
-  * 
++ *
 + * For more info, check our website at http://www.gromacs.org
-   
++ *
 + * And Hey:
 + * GROwing Monsters And Cloning Shrimps
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <string.h>
 +#include <time.h>
 +#include <math.h>
 +#include "sysstuff.h"
 +#include "string2.h"
 +#include "network.h"
 +#include "confio.h"
 +#include "copyrite.h"
 +#include "smalloc.h"
 +#include "nrnb.h"
 +#include "main.h"
 +#include "force.h"
 +#include "macros.h"
 +#include "random.h"
 +#include "names.h"
 +#include "gmx_fatal.h"
 +#include "txtdump.h"
 +#include "typedefs.h"
 +#include "update.h"
 +#include "constr.h"
 +#include "vec.h"
 +#include "statutil.h"
 +#include "tgroup.h"
 +#include "mdebin.h"
 +#include "vsite.h"
 +#include "force.h"
 +#include "mdrun.h"
 +#include "domdec.h"
 +#include "partdec.h"
 +#include "trnio.h"
 +#include "mdatoms.h"
 +#include "ns.h"
 +#include "gmx_wallcycle.h"
 +#include "mtop_util.h"
 +#include "gmxfio.h"
 +#include "pme.h"
 +
 +#include "gromacs/linearalgebra/mtxio.h"
 +#include "gromacs/linearalgebra/sparsematrix.h"
 +
 +typedef struct {
 +  t_state s;
 +  rvec    *f;
 +  real    epot;
 +  real    fnorm;
 +  real    fmax;
 +  int     a_fmax;
 +} em_state_t;
 +
 +static em_state_t *init_em_state()
 +{
 +  em_state_t *ems;
-           alg,ftol,gmx_step_str(count,buf)); 
++
 +  snew(ems,1);
 +
++  /* does this need to be here?  Should the array be declared differently (staticaly)in the state definition? */
++  snew(ems->s.lambda,efptNR);
++
 +  return ems;
 +}
 +
 +static void print_em_start(FILE *fplog,t_commrec *cr,gmx_runtime_t *runtime,
 +                           gmx_wallcycle_t wcycle,
 +                           const char *name)
 +{
 +    char buf[STRLEN];
 +
 +    runtime_start(runtime);
 +
 +    sprintf(buf,"Started %s",name);
 +    print_date_and_time(fplog,cr->nodeid,buf,NULL);
 +
 +    wallcycle_start(wcycle,ewcRUN);
 +}
 +static void em_time_end(FILE *fplog,t_commrec *cr,gmx_runtime_t *runtime,
 +                        gmx_wallcycle_t wcycle)
 +{
 +    wallcycle_stop(wcycle,ewcRUN);
 +
 +    runtime_end(runtime);
 +}
 +
 +static void sp_header(FILE *out,const char *minimizer,real ftol,int nsteps)
 +{
 +    fprintf(out,"\n");
 +    fprintf(out,"%s:\n",minimizer);
 +    fprintf(out,"   Tolerance (Fmax)   = %12.5e\n",ftol);
 +    fprintf(out,"   Number of steps    = %12d\n",nsteps);
 +}
 +
 +static void warn_step(FILE *fp,real ftol,gmx_bool bLastStep,gmx_bool bConstrain)
 +{
 +    if (bLastStep)
 +    {
 +        fprintf(fp,"\nReached the maximum number of steps before reaching Fmax < %g\n",ftol);
 +    }
 +    else
 +    {
 +        fprintf(fp,"\nStepsize too small, or no change in energy.\n"
 +                "Converged to machine precision,\n"
 +                "but not to the requested precision Fmax < %g\n",
 +                ftol);
 +        if (sizeof(real)<sizeof(double))
 +        {
 +            fprintf(fp,"\nDouble precision normally gives you higher accuracy.\n");
 +        }
 +        if (bConstrain)
 +        {
 +            fprintf(fp,"You might need to increase your constraint accuracy, or turn\n"
 +                    "off constraints alltogether (set constraints = none in mdp file)\n");
 +        }
 +    }
 +}
 +
 +
 +
 +static void print_converged(FILE *fp,const char *alg,real ftol,
 +                          gmx_large_int_t count,gmx_bool bDone,gmx_large_int_t nsteps,
 +                          real epot,real fmax, int nfmax, real fnorm)
 +{
 +  char buf[STEPSTRSIZE];
 +
 +  if (bDone)
 +    fprintf(fp,"\n%s converged to Fmax < %g in %s steps\n",
-   else 
++          alg,ftol,gmx_step_str(count,buf));
 +  else if(count<nsteps)
 +    fprintf(fp,"\n%s converged to machine precision in %s steps,\n"
 +               "but did not reach the requested Fmax < %g.\n",
 +          alg,gmx_step_str(count,buf),ftol);
-   fprintf(fp,"Potential Energy  = %21.14e\n",epot); 
-   fprintf(fp,"Maximum force     = %21.14e on atom %d\n",fmax,nfmax+1); 
-   fprintf(fp,"Norm of force     = %21.14e\n",fnorm); 
++  else
 +    fprintf(fp,"\n%s did not converge to Fmax < %g in %s steps.\n",
 +          alg,ftol,gmx_step_str(count,buf));
 +
 +#ifdef GMX_DOUBLE
-   fprintf(fp,"Potential Energy  = %14.7e\n",epot); 
-   fprintf(fp,"Maximum force     = %14.7e on atom %d\n",fmax,nfmax+1); 
-   fprintf(fp,"Norm of force     = %14.7e\n",fnorm); 
++  fprintf(fp,"Potential Energy  = %21.14e\n",epot);
++  fprintf(fp,"Maximum force     = %21.14e on atom %d\n",fmax,nfmax+1);
++  fprintf(fp,"Norm of force     = %21.14e\n",fnorm);
 +#else
-     
++  fprintf(fp,"Potential Energy  = %14.7e\n",epot);
++  fprintf(fp,"Maximum force     = %14.7e on atom %d\n",fmax,nfmax+1);
++  fprintf(fp,"Norm of force     = %14.7e\n",fnorm);
 +#endif
 +}
 +
 +static void get_f_norm_max(t_commrec *cr,
 +                         t_grpopts *opts,t_mdatoms *mdatoms,rvec *f,
 +                         real *fnorm,real *fmax,int *a_fmax)
 +{
 +  double fnorm2,*sum;
 +  real fmax2,fmax2_0,fam;
 +  int  la_max,a_max,start,end,i,m,gf;
 +
 +  /* This routine finds the largest force and returns it.
 +   * On parallel machines the global max is taken.
 +   */
 +  fnorm2 = 0;
 +  fmax2 = 0;
 +  la_max = -1;
 +  gf = 0;
 +  start = mdatoms->start;
 +  end   = mdatoms->homenr + start;
 +  if (mdatoms->cFREEZE) {
 +    for(i=start; i<end; i++) {
 +      gf = mdatoms->cFREEZE[i];
 +      fam = 0;
 +      for(m=0; m<DIM; m++)
 +      if (!opts->nFreeze[gf][m])
 +        fam += sqr(f[i][m]);
 +      fnorm2 += fam;
 +      if (fam > fmax2) {
 +      fmax2  = fam;
 +      la_max = i;
 +      }
 +    }
 +  } else {
 +    for(i=start; i<end; i++) {
 +      fam = norm2(f[i]);
 +      fnorm2 += fam;
 +      if (fam > fmax2) {
 +      fmax2  = fam;
 +      la_max = i;
 +      }
 +    }
 +  }
 +
 +  if (la_max >= 0 && DOMAINDECOMP(cr)) {
 +    a_max = cr->dd->gatindex[la_max];
 +  } else {
 +    a_max = la_max;
 +  }
 +  if (PAR(cr)) {
 +    snew(sum,2*cr->nnodes+1);
 +    sum[2*cr->nodeid]   = fmax2;
 +    sum[2*cr->nodeid+1] = a_max;
 +    sum[2*cr->nnodes]   = fnorm2;
 +    gmx_sumd(2*cr->nnodes+1,sum,cr);
 +    fnorm2 = sum[2*cr->nnodes];
 +    /* Determine the global maximum */
 +    for(i=0; i<cr->nnodes; i++) {
 +      if (sum[2*i] > fmax2) {
 +      fmax2 = sum[2*i];
 +      a_max = (int)(sum[2*i+1] + 0.5);
 +      }
 +    }
 +    sfree(sum);
 +  }
 +
 +  if (fnorm)
 +    *fnorm = sqrt(fnorm2);
 +  if (fmax)
 +    *fmax  = sqrt(fmax2);
 +  if (a_fmax)
 +    *a_fmax = a_max;
 +}
 +
 +static void get_state_f_norm_max(t_commrec *cr,
 +                         t_grpopts *opts,t_mdatoms *mdatoms,
 +                         em_state_t *ems)
 +{
 +  get_f_norm_max(cr,opts,mdatoms,ems->f,&ems->fnorm,&ems->fmax,&ems->a_fmax);
 +}
 +
 +void init_em(FILE *fplog,const char *title,
 +             t_commrec *cr,t_inputrec *ir,
 +             t_state *state_global,gmx_mtop_t *top_global,
 +             em_state_t *ems,gmx_localtop_t **top,
 +             rvec **f,rvec **f_global,
 +             t_nrnb *nrnb,rvec mu_tot,
 +             t_forcerec *fr,gmx_enerdata_t **enerd,
 +             t_graph **graph,t_mdatoms *mdatoms,gmx_global_stat_t *gstat,
 +             gmx_vsite_t *vsite,gmx_constr_t constr,
 +             int nfile,const t_filenm fnm[],
 +             gmx_mdoutf_t **outf,t_mdebin **mdebin)
 +{
 +    int  start,homenr,i;
 +    real dvdlambda;
-     
++
 +    if (fplog)
 +    {
 +        fprintf(fplog,"Initiating %s\n",title);
 +    }
-     
-     /* Initiate some variables */
-     if (ir->efep != efepNO)
-     {
-         state_global->lambda = ir->init_lambda;
-     }
-     else 
-     {
-         state_global->lambda = 0.0;
-     }
-     
++
 +    state_global->ngtc = 0;
-     
++
++    /* Initialize lambda variables */
++    initialize_lambdas(fplog,ir,&(state_global->fep_state),state_global->lambda,NULL);
++
 +    init_nrnb(nrnb);
-         
++
 +    if (DOMAINDECOMP(cr))
 +    {
 +        *top = dd_init_local_top(top_global);
-         
++
 +        dd_init_local_state(cr->dd,state_global,&ems->s);
 +
 +        *f = NULL;
-         
++
 +        /* Distribute the charge groups over the nodes from the master node */
 +        dd_partition_system(fplog,ir->init_step,cr,TRUE,1,
 +                            state_global,top_global,ir,
 +                            &ems->s,&ems->f,mdatoms,*top,
 +                            fr,vsite,NULL,constr,
 +                            nrnb,NULL,FALSE);
 +        dd_store_state(cr->dd,&ems->s);
-         
++
 +        if (ir->nstfout)
 +        {
 +            snew(*f_global,top_global->natoms);
 +        }
 +        else
 +        {
 +            *f_global = NULL;
 +        }
 +        *graph = NULL;
 +    }
 +    else
 +    {
 +        snew(*f,top_global->natoms);
 +
 +        /* Just copy the state */
 +        ems->s = *state_global;
 +        snew(ems->s.x,ems->s.nalloc);
 +        snew(ems->f,ems->s.nalloc);
 +        for(i=0; i<state_global->natoms; i++)
 +        {
 +            copy_rvec(state_global->x[i],ems->s.x[i]);
 +        }
 +        copy_mat(state_global->box,ems->s.box);
-             
++
 +        if (PAR(cr) && ir->eI != eiNM)
 +        {
 +            /* Initialize the particle decomposition and split the topology */
 +            *top = split_system(fplog,top_global,ir,cr);
-         
++
 +            pd_cg_range(cr,&fr->cg0,&fr->hcg);
 +        }
 +        else
 +        {
 +            *top = gmx_mtop_generate_local_top(top_global,ir);
 +        }
 +        *f_global = *f;
-         update_mdatoms(mdatoms,state_global->lambda);
-     
++
 +        if (ir->ePBC != epbcNONE && !ir->bPeriodicMols)
 +        {
 +            *graph = mk_graph(fplog,&((*top)->idef),0,top_global->natoms,FALSE,FALSE);
 +        }
 +        else
 +        {
 +            *graph = NULL;
 +        }
 +
 +        if (PARTDECOMP(cr))
 +        {
 +            pd_at_range(cr,&start,&homenr);
 +            homenr -= start;
 +        }
 +        else
 +        {
 +            start  = 0;
 +            homenr = top_global->natoms;
 +        }
 +        atoms2md(top_global,ir,0,NULL,start,homenr,mdatoms);
-     
++        update_mdatoms(mdatoms,state_global->lambda[efptFEP]);
++
 +        if (vsite)
 +        {
 +            set_vsite_top(vsite,*top,mdatoms,cr);
 +        }
 +    }
-         
++
 +    if (constr)
 +    {
 +        if (ir->eConstrAlg == econtSHAKE &&
 +            gmx_mtop_ftype_count(top_global,F_CONSTR) > 0)
 +        {
 +            gmx_fatal(FARGS,"Can not do energy minimization with %s, use %s\n",
 +                      econstr_names[econtSHAKE],econstr_names[econtLINCS]);
 +        }
-                       ems->s.lambda,&dvdlambda,
++
 +        if (!DOMAINDECOMP(cr))
 +        {
 +            set_constraints(constr,*top,ir,mdatoms,cr);
 +        }
 +
 +        if (!ir->bContinuation)
 +        {
 +            /* Constrain the starting coordinates */
 +            dvdlambda=0;
 +            constrain(PAR(cr) ? NULL : fplog,TRUE,TRUE,constr,&(*top)->idef,
 +                      ir,NULL,cr,-1,0,mdatoms,
 +                      ems->s.x,ems->s.x,NULL,ems->s.box,
-     
++                      ems->s.lambda[efptFEP],&dvdlambda,
 +                      NULL,NULL,nrnb,econqCoord,FALSE,0,0);
 +        }
 +    }
-     
++
 +    if (PAR(cr))
 +    {
 +        *gstat = global_stat_init(ir);
 +    }
-     init_enerdata(top_global->groups.grps[egcENER].nr,ir->n_flambda,*enerd);
++
 +    *outf = init_mdoutf(nfile,fnm,0,cr,ir,NULL);
 +
 +    snew(*enerd,1);
-         *mdebin = init_mdebin((*outf)->fp_ene,top_global,ir,NULL); 
++    init_enerdata(top_global->groups.grps[egcENER].nr,ir->fepvals->n_lambda,
++                  *enerd);
 +
 +    if (mdebin != NULL)
 +    {
 +        /* Init bin for energy stuff */
-     
++        *mdebin = init_mdebin((*outf)->fp_ene,top_global,ir,NULL);
 +    }
 +
 +    clear_rvec(mu_tot);
 +    calc_shifts(ems->s.box,fr->shift_vec);
 +}
 +
 +static void finish_em(FILE *fplog,t_commrec *cr,gmx_mdoutf_t *outf,
 +                      gmx_runtime_t *runtime,gmx_wallcycle_t wcycle)
 +{
 +  if (!(cr->duty & DUTY_PME)) {
 +    /* Tell the PME only node to finish */
 +    gmx_pme_finish(cr);
 +  }
 +
 +  done_mdoutf(outf);
 +
 +  em_time_end(fplog,cr,runtime,wcycle);
 +}
 +
 +static void swap_em_state(em_state_t *ems1,em_state_t *ems2)
 +{
 +  em_state_t tmp;
 +
 +  tmp   = *ems1;
 +  *ems1 = *ems2;
 +  *ems2 = tmp;
 +}
 +
 +static void copy_em_coords(em_state_t *ems,t_state *state)
 +{
 +    int i;
 +
 +    for(i=0; (i<state->natoms); i++)
 +    {
 +        copy_rvec(ems->s.x[i],state->x[i]);
 +    }
 +}
 +
 +static void write_em_traj(FILE *fplog,t_commrec *cr,
 +                          gmx_mdoutf_t *outf,
 +                          gmx_bool bX,gmx_bool bF,const char *confout,
 +                          gmx_mtop_t *top_global,
 +                          t_inputrec *ir,gmx_large_int_t step,
 +                          em_state_t *state,
 +                          t_state *state_global,rvec *f_global)
 +{
 +    int mdof_flags;
 +
 +    if ((bX || bF || confout != NULL) && !DOMAINDECOMP(cr))
 +    {
 +        copy_em_coords(state,state_global);
 +        f_global = state->f;
 +    }
-     
++
 +    mdof_flags = 0;
 +    if (bX) { mdof_flags |= MDOF_X; }
 +    if (bF) { mdof_flags |= MDOF_F; }
 +    write_traj(fplog,cr,outf,mdof_flags,
 +               top_global,step,(double)step,
 +               &state->s,state_global,state->f,f_global,NULL,NULL);
-   
++
 +    if (confout != NULL && MASTER(cr))
 +    {
 +        if (ir->ePBC != epbcNONE && !ir->bPeriodicMols && DOMAINDECOMP(cr))
 +        {
 +            /* Make molecules whole only for confout writing */
 +            do_pbc_mtop(fplog,ir->ePBC,state_global->box,top_global,
 +                        state_global->x);
 +        }
 +
 +        write_sto_conf_mtop(confout,
 +                            *top_global->name,top_global,
 +                            state_global->x,NULL,ir->ePBC,state_global->box);
 +    }
 +}
 +
 +static void do_em_step(t_commrec *cr,t_inputrec *ir,t_mdatoms *md,
 +                     em_state_t *ems1,real a,rvec *f,em_state_t *ems2,
 +                     gmx_constr_t constr,gmx_localtop_t *top,
 +                     t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 +                     gmx_large_int_t count)
 +
 +{
 +  t_state *s1,*s2;
 +  int  start,end,gf,i,m;
 +  rvec *x1,*x2;
 +  real dvdlambda;
 +
 +  s1 = &ems1->s;
 +  s2 = &ems2->s;
 +
 +  if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count)
 +    gmx_incons("state mismatch in do_em_step");
 +
 +  s2->flags = s1->flags;
 +
 +  if (s2->nalloc != s1->nalloc) {
 +    s2->nalloc = s1->nalloc;
 +    srenew(s2->x,s1->nalloc);
 +    srenew(ems2->f,  s1->nalloc);
 +    if (s2->flags & (1<<estCGP))
 +      srenew(s2->cg_p,  s1->nalloc);
 +  }
-   s2->lambda = s1->lambda;
++
 +  s2->natoms = s1->natoms;
-     constrain(NULL,TRUE,TRUE,constr,&top->idef,       
++  /* Copy free energy state -> is this necessary? */
++  for (i=0;i<efptNR;i++)
++  {
++      s2->lambda[i] = s1->lambda[i];
++  }
 +  copy_mat(s1->box,s2->box);
 +
 +  start = md->start;
 +  end   = md->start + md->homenr;
 +
 +  x1 = s1->x;
 +  x2 = s2->x;
 +  gf = 0;
 +  for(i=start; i<end; i++) {
 +    if (md->cFREEZE)
 +      gf = md->cFREEZE[i];
 +    for(m=0; m<DIM; m++) {
 +      if (ir->opts.nFreeze[gf][m])
 +      x2[i][m] = x1[i][m];
 +      else
 +      x2[i][m] = x1[i][m] + a*f[i][m];
 +    }
 +  }
 +
 +  if (s2->flags & (1<<estCGP)) {
 +    /* Copy the CG p vector */
 +    x1 = s1->cg_p;
 +    x2 = s2->cg_p;
 +    for(i=start; i<end; i++)
 +      copy_rvec(x1[i],x2[i]);
 +  }
 +
 +  if (DOMAINDECOMP(cr)) {
 +    s2->ddp_count = s1->ddp_count;
 +    if (s2->cg_gl_nalloc < s1->cg_gl_nalloc) {
 +      s2->cg_gl_nalloc = s1->cg_gl_nalloc;
 +      srenew(s2->cg_gl,s2->cg_gl_nalloc);
 +    }
 +    s2->ncg_gl = s1->ncg_gl;
 +    for(i=0; i<s2->ncg_gl; i++)
 +      s2->cg_gl[i] = s1->cg_gl[i];
 +    s2->ddp_count_cg_gl = s1->ddp_count_cg_gl;
 +  }
 +
 +  if (constr) {
 +    wallcycle_start(wcycle,ewcCONSTR);
 +    dvdlambda = 0;
-               s1->x,s2->x,NULL,s2->box,s2->lambda,
++    constrain(NULL,TRUE,TRUE,constr,&top->idef,
 +              ir,NULL,cr,count,0,md,
-     
++              s1->x,s2->x,NULL,s2->box,s2->lambda[efptBONDED],
 +              &dvdlambda,NULL,NULL,nrnb,econqCoord,FALSE,0,0);
 +    wallcycle_stop(wcycle,ewcCONSTR);
 +  }
 +}
 +
 +static void em_dd_partition_system(FILE *fplog,int step,t_commrec *cr,
 +                                   gmx_mtop_t *top_global,t_inputrec *ir,
 +                                   em_state_t *ems,gmx_localtop_t *top,
 +                                   t_mdatoms *mdatoms,t_forcerec *fr,
 +                                   gmx_vsite_t *vsite,gmx_constr_t constr,
 +                                   t_nrnb *nrnb,gmx_wallcycle_t wcycle)
 +{
 +    /* Repartition the domain decomposition */
 +    wallcycle_start(wcycle,ewcDOMDEC);
 +    dd_partition_system(fplog,step,cr,FALSE,1,
 +                        NULL,top_global,ir,
 +                        &ems->s,&ems->f,
 +                        mdatoms,top,fr,vsite,NULL,constr,
 +                        nrnb,wcycle,FALSE);
 +    dd_store_state(cr->dd,&ems->s);
 +    wallcycle_stop(wcycle,ewcDOMDEC);
 +}
-   real dvdl,prescorr,enercorr,dvdlcorr;
++
 +static void evaluate_energy(FILE *fplog,gmx_bool bVerbose,t_commrec *cr,
 +                            t_state *state_global,gmx_mtop_t *top_global,
 +                            em_state_t *ems,gmx_localtop_t *top,
 +                            t_inputrec *inputrec,
 +                            t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 +                            gmx_global_stat_t gstat,
 +                            gmx_vsite_t *vsite,gmx_constr_t constr,
 +                            t_fcdata *fcd,
 +                            t_graph *graph,t_mdatoms *mdatoms,
 +                            t_forcerec *fr,rvec mu_tot,
 +                            gmx_enerdata_t *enerd,tensor vir,tensor pres,
 +                            gmx_large_int_t count,gmx_bool bFirst)
 +{
 +  real t;
 +  gmx_bool bNS;
 +  int  nabnsb;
 +  tensor force_vir,shake_vir,ekin;
-   
++  real dvdlambda,prescorr,enercorr,dvdlcorr;
 +  real terminate=0;
-       
++
 +  /* Set the time to the initial time, the time does not change during EM */
 +  t = inputrec->init_t;
 +
 +  if (bFirst ||
 +      (DOMAINDECOMP(cr) && ems->s.ddp_count < cr->dd->ddp_count)) {
 +    /* This the first state or an old state used before the last ns */
 +    bNS = TRUE;
 +  } else {
 +    bNS = FALSE;
 +    if (inputrec->nstlist > 0) {
 +      bNS = TRUE;
 +    } else if (inputrec->nstlist == -1) {
 +      nabnsb = natoms_beyond_ns_buffer(inputrec,fr,&top->cgs,NULL,ems->s.x);
 +      if (PAR(cr))
 +      gmx_sumi(1,&nabnsb,cr);
 +      bNS = (nabnsb > 0);
 +    }
 +  }
 +
 +  if (vsite)
 +    construct_vsites(fplog,vsite,ems->s.x,nrnb,1,NULL,
 +                   top->idef.iparams,top->idef.il,
 +                   fr->ePBC,fr->bMolPBC,graph,cr,ems->s.box);
 +
 +  if (DOMAINDECOMP(cr)) {
 +    if (bNS) {
 +      /* Repartition the domain decomposition */
 +      em_dd_partition_system(fplog,count,cr,top_global,inputrec,
 +                           ems,top,mdatoms,fr,vsite,constr,
 +                           nrnb,wcycle);
 +    }
 +  }
-       
-   /* Clear the unused shake virial and pressure */
-   clear_mat(shake_vir);
-   clear_mat(pres);
++
 +    /* Calc force & energy on new trial position  */
 +    /* do_force always puts the charge groups in the box and shifts again
 +     * We do not unshift, so molecules are always whole in congrad.c
 +     */
 +    do_force(fplog,cr,inputrec,
 +             count,nrnb,wcycle,top,top_global,&top_global->groups,
 +             ems->s.box,ems->s.x,&ems->s.hist,
 +             ems->f,force_vir,mdatoms,enerd,fcd,
 +             ems->s.lambda,graph,fr,vsite,mu_tot,t,NULL,NULL,TRUE,
 +             GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES | GMX_FORCE_VIRIAL |
 +             (bNS ? GMX_FORCE_NS | GMX_FORCE_DOLR : 0));
-                     CGLO_ENERGY | 
-                     CGLO_PRESSURE | 
-                     CGLO_CONSTRAINT | 
++
++    /* Clear the unused shake virial and pressure */
++    clear_mat(shake_vir);
++    clear_mat(pres);
 +
 +    /* Communicate stuff when parallel */
 +    if (PAR(cr) && inputrec->eI != eiNM)
 +    {
 +        wallcycle_start(wcycle,ewcMoveE);
 +
 +        global_stat(fplog,gstat,cr,enerd,force_vir,shake_vir,mu_tot,
 +                    inputrec,NULL,NULL,NULL,1,&terminate,
 +                    top_global,&ems->s,FALSE,
-     calc_dispcorr(fplog,inputrec,fr,count,top_global->natoms,ems->s.box,ems->s.lambda,
++                    CGLO_ENERGY |
++                    CGLO_PRESSURE |
++                    CGLO_CONSTRAINT |
 +                    CGLO_FIRSTITERATE);
 +
 +        wallcycle_stop(wcycle,ewcMoveE);
 +    }
 +
 +    /* Calculate long range corrections to pressure and energy */
-   
++    calc_dispcorr(fplog,inputrec,fr,count,top_global->natoms,ems->s.box,ems->s.lambda[efptVDW],
 +                  pres,force_vir,&prescorr,&enercorr,&dvdlcorr);
 +    enerd->term[F_DISPCORR] = enercorr;
 +    enerd->term[F_EPOT] += enercorr;
 +    enerd->term[F_PRES] += prescorr;
 +    enerd->term[F_DVDL] += dvdlcorr;
 +
 +  ems->epot = enerd->term[F_EPOT];
-     dvdl = 0;
++
 +  if (constr) {
 +    /* Project out the constraint components of the force */
 +    wallcycle_start(wcycle,ewcCONSTR);
-               ems->s.x,ems->f,ems->f,ems->s.box,ems->s.lambda,&dvdl,
++    dvdlambda = 0;
 +    constrain(NULL,FALSE,FALSE,constr,&top->idef,
 +              inputrec,NULL,cr,count,0,mdatoms,
-       fprintf(fplog,sepdvdlformat,"Constraints",t,dvdl);
-     enerd->term[F_DHDL_CON] += dvdl;
++              ems->s.x,ems->f,ems->f,ems->s.box,ems->s.lambda[efptBONDED],&dvdlambda,
 +              NULL,&shake_vir,nrnb,econqForceDispl,FALSE,0,0);
 +    if (fr->bSepDVDL && fplog)
-   sum_dhdl(enerd,ems->s.lambda,inputrec);
++      fprintf(fplog,sepdvdlformat,"Constraints",t,dvdlambda);
++    enerd->term[F_DVDL_BONDED] += dvdlambda;
 +    m_add(force_vir,shake_vir,vir);
 +    wallcycle_stop(wcycle,ewcCONSTR);
 +  } else {
 +    copy_mat(force_vir,vir);
 +  }
 +
 +  clear_mat(ekin);
 +  enerd->term[F_PRES] =
 +    calc_pres(fr->ePBC,inputrec->nwall,ems->s.box,ekin,vir,pres);
 +
-   
++  sum_dhdl(enerd,ems->s.lambda,inputrec->fepvals);
 +
 +    if (EI_ENERGY_MINIMIZATION(inputrec->eI))
 +    {
 +        get_state_f_norm_max(cr,&(inputrec->opts),mdatoms,ems);
 +    }
 +}
 +
 +static double reorder_partsum(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
 +                            gmx_mtop_t *mtop,
 +                            em_state_t *s_min,em_state_t *s_b)
 +{
 +  rvec *fm,*fb,*fmg;
 +  t_block *cgs_gl;
 +  int ncg,*cg_gl,*index,c,cg,i,a0,a1,a,gf,m;
 +  double partsum;
 +  unsigned char *grpnrFREEZE;
 +
 +  if (debug)
 +    fprintf(debug,"Doing reorder_partsum\n");
 +
 +  fm = s_min->f;
 +  fb = s_b->f;
 +
 +  cgs_gl = dd_charge_groups_global(cr->dd);
 +  index = cgs_gl->index;
 +
 +  /* Collect fm in a global vector fmg.
 +   * This conflicts with the spirit of domain decomposition,
 +   * but to fully optimize this a much more complicated algorithm is required.
 +   */
 +  snew(fmg,mtop->natoms);
-   
++
 +  ncg   = s_min->s.ncg_gl;
 +  cg_gl = s_min->s.cg_gl;
 +  i = 0;
 +  for(c=0; c<ncg; c++) {
 +    cg = cg_gl[c];
 +    a0 = index[cg];
 +    a1 = index[cg+1];
 +    for(a=a0; a<a1; a++) {
 +      copy_rvec(fm[i],fmg[a]);
 +      i++;
 +    }
 +  }
 +  gmx_sum(mtop->natoms*3,fmg[0],cr);
 +
 +  /* Now we will determine the part of the sum for the cgs in state s_b */
 +  ncg   = s_b->s.ncg_gl;
 +  cg_gl = s_b->s.cg_gl;
 +  partsum = 0;
 +  i = 0;
 +  gf = 0;
 +  grpnrFREEZE = mtop->groups.grpnr[egcFREEZE];
 +  for(c=0; c<ncg; c++) {
 +    cg = cg_gl[c];
 +    a0 = index[cg];
 +    a1 = index[cg+1];
 +    for(a=a0; a<a1; a++) {
 +      if (mdatoms->cFREEZE && grpnrFREEZE) {
 +      gf = grpnrFREEZE[i];
 +      }
 +      for(m=0; m<DIM; m++) {
 +      if (!opts->nFreeze[gf][m]) {
 +        partsum += (fb[i][m] - fmg[a][m])*fb[i][m];
 +      }
 +      }
 +      i++;
 +    }
 +  }
-   
++
 +  sfree(fmg);
 +
 +  return partsum;
 +}
 +
 +static real pr_beta(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
 +                  gmx_mtop_t *mtop,
 +                  em_state_t *s_min,em_state_t *s_b)
 +{
 +  rvec *fm,*fb;
 +  double sum;
 +  int  gf,i,m;
 +
 +  /* This is just the classical Polak-Ribiere calculation of beta;
 +   * it looks a bit complicated since we take freeze groups into account,
 +   * and might have to sum it in parallel runs.
 +   */
-       } 
++
 +  if (!DOMAINDECOMP(cr) ||
 +      (s_min->s.ddp_count == cr->dd->ddp_count &&
 +       s_b->s.ddp_count   == cr->dd->ddp_count)) {
 +    fm = s_min->f;
 +    fb = s_b->f;
 +    sum = 0;
 +    gf = 0;
 +    /* This part of code can be incorrect with DD,
 +     * since the atom ordering in s_b and s_min might differ.
 +     */
 +    for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
 +      if (mdatoms->cFREEZE)
 +      gf = mdatoms->cFREEZE[i];
 +      for(m=0; m<DIM; m++)
 +      if (!opts->nFreeze[gf][m]) {
 +        sum += (fb[i][m] - fm[i][m])*fb[i][m];
-              int repl_ex_nst,int repl_ex_seed,
++      }
 +    }
 +  } else {
 +    /* We need to reorder cgs while summing */
 +    sum = reorder_partsum(cr,opts,mdatoms,mtop,s_min,s_b);
 +  }
 +  if (PAR(cr))
 +    gmx_sumd(1,&sum,cr);
 +
 +  return sum/sqr(s_min->fnorm);
 +}
 +
 +double do_cg(FILE *fplog,t_commrec *cr,
 +             int nfile,const t_filenm fnm[],
 +             const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
 +             int nstglobalcomm,
 +             gmx_vsite_t *vsite,gmx_constr_t constr,
 +             int stepout,
 +             t_inputrec *inputrec,
 +             gmx_mtop_t *top_global,t_fcdata *fcd,
 +             t_state *state_global,
 +             t_mdatoms *mdatoms,
 +             t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 +             gmx_edsam_t ed,
 +             t_forcerec *fr,
-   real   stepsize;    
++             int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
 +             gmx_membed_t membed,
 +             real cpt_period,real max_hours,
 +             const char *deviceOptions,
 +             unsigned long Flags,
 +             gmx_runtime_t *runtime)
 +{
 +  const char *CG="Polak-Ribiere Conjugate Gradients";
 +
 +  em_state_t *s_min,*s_a,*s_b,*s_c;
 +  gmx_localtop_t *top;
 +  gmx_enerdata_t *enerd;
 +  rvec   *f;
 +  gmx_global_stat_t gstat;
 +  t_graph    *graph;
 +  rvec   *f_global,*p,*sf,*sfm;
 +  double gpa,gpb,gpc,tmp,sum[2],minstep;
 +  real   fnormn;
-   real   terminate=0;  
++  real   stepsize;
 +  real   a,b,c,beta=0.0;
 +  real   epot_repl=0;
 +  real   pnorm;
 +  t_mdebin   *mdebin;
 +  gmx_bool   converged,foundlower;
 +  rvec   mu_tot;
 +  gmx_bool   do_log=FALSE,do_ene=FALSE,do_x,do_f;
 +  tensor vir,pres;
 +  int    number_steps,neval=0,nstcg=inputrec->nstcgsteep;
 +  gmx_mdoutf_t *outf;
 +  int    i,m,gf,step,nminstep;
-   
++  real   terminate=0;
 +
 +  step=0;
 +
 +  s_min = init_em_state();
 +  s_a   = init_em_state();
 +  s_b   = init_em_state();
 +  s_c   = init_em_state();
 +
 +  /* Init em and store the local state in s_min */
 +  init_em(fplog,CG,cr,inputrec,
 +          state_global,top_global,s_min,&top,&f,&f_global,
 +          nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
 +          nfile,fnm,&outf,&mdebin);
-   
++
 +  /* Print to log file */
 +  print_em_start(fplog,cr,runtime,wcycle,CG);
-              mdatoms->tmass,enerd,&s_min->s,s_min->s.box,
-              NULL,NULL,vir,pres,NULL,mu_tot,constr);
-     
-     print_ebin_header(fplog,step,step,s_min->s.lambda);
++
 +  /* Max number of steps */
 +  number_steps=inputrec->nsteps;
 +
 +  if (MASTER(cr))
 +    sp_header(stderr,CG,inputrec->em_tol,number_steps);
 +  if (fplog)
 +    sp_header(fplog,CG,inputrec->em_tol,number_steps);
 +
 +  /* Call the force routine and some auxiliary (neighboursearching etc.) */
 +  /* do_force always puts the charge groups in the box and shifts again
 +   * We do not unshift, so molecules are always whole in congrad.c
 +   */
 +  evaluate_energy(fplog,bVerbose,cr,
 +                state_global,top_global,s_min,top,
 +                inputrec,nrnb,wcycle,gstat,
 +                vsite,constr,fcd,graph,mdatoms,fr,
 +                mu_tot,enerd,vir,pres,-1,TRUE);
 +  where();
 +
 +  if (MASTER(cr)) {
 +    /* Copy stuff to the energy bin for easy printing etc. */
 +    upd_mdebin(mdebin,FALSE,FALSE,(double)step,
-  
++               mdatoms->tmass,enerd,&s_min->s,inputrec->fepvals,inputrec->expandedvals,s_min->s.box,
++               NULL,NULL,vir,pres,NULL,mu_tot,constr);
++
++    print_ebin_header(fplog,step,step,s_min->s.lambda[efptFEP]);
 +    print_ebin(outf->fp_ene,TRUE,FALSE,FALSE,fplog,step,step,eprNORMAL,
 +               TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
 +  }
 +  where();
 +
 +  /* Estimate/guess the initial stepsize */
 +  stepsize = inputrec->em_stepsize/s_min->fnorm;
-   }  
-   /* Start the loop over CG steps.            
++
 +  if (MASTER(cr)) {
 +    fprintf(stderr,"   F-max             = %12.5e on atom %d\n",
 +          s_min->fmax,s_min->a_fmax+1);
 +    fprintf(stderr,"   F-Norm            = %12.5e\n",
 +          s_min->fnorm/sqrt(state_global->natoms));
 +    fprintf(stderr,"\n");
 +    /* and copy to the log file too... */
 +    fprintf(fplog,"   F-max             = %12.5e on atom %d\n",
 +          s_min->fmax,s_min->a_fmax+1);
 +    fprintf(fplog,"   F-Norm            = %12.5e\n",
 +          s_min->fnorm/sqrt(state_global->natoms));
 +    fprintf(fplog,"\n");
-     
-     /* start taking steps in a new direction 
-      * First time we enter the routine, beta=0, and the direction is 
++  }
++  /* Start the loop over CG steps.
 +   * Each successful step is counted, and we continue until
 +   * we either converge or reach the max number of steps.
 +   */
 +  converged = FALSE;
 +  for(step=0; (number_steps<0 || (number_steps>=0 && step<=number_steps)) && !converged;step++) {
-       if (mdatoms->cFREEZE) 
++
++    /* start taking steps in a new direction
++     * First time we enter the routine, beta=0, and the direction is
 +     * simply the negative gradient.
 +     */
 +
 +    /* Calculate the new direction in p, and the gradient in this direction, gpa */
 +    p  = s_min->s.cg_p;
 +    sf = s_min->f;
 +    gpa = 0;
 +    gf = 0;
 +    for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
-     
++      if (mdatoms->cFREEZE)
 +      gf = mdatoms->cFREEZE[i];
 +      for(m=0; m<DIM; m++) {
 +      if (!inputrec->opts.nFreeze[gf][m]) {
 +        p[i][m] = sf[i][m] + beta*p[i][m];
 +        gpa -= p[i][m]*sf[i][m];
 +        /* f is negative gradient, thus the sign */
 +      } else {
 +          p[i][m] = 0;
 +      }
 +      }
 +    }
-     
++
 +    /* Sum the gradient along the line across CPUs */
 +    if (PAR(cr))
 +      gmx_sumd(1,&gpa,cr);
 +
 +    /* Calculate the norm of the search vector */
 +    get_f_norm_max(cr,&(inputrec->opts),mdatoms,p,&pnorm,NULL,NULL);
-     if(stepsize<=0)     
++
 +    /* Just in case stepsize reaches zero due to numerical precision... */
-     
-     /* 
++    if(stepsize<=0)
 +      stepsize = inputrec->em_stepsize/pnorm;
-     
++
++    /*
 +     * Double check the value of the derivative in the search direction.
 +     * If it is positive it must be due to the old information in the
 +     * CG formula, so just remove that and start over with beta=0.
 +     * This corresponds to a steepest descent step.
 +     */
 +    if(gpa>0) {
 +      beta = 0;
 +      step--; /* Don't count this step since we are restarting */
 +      continue; /* Go back to the beginning of the big for-loop */
 +    }
 +
 +    /* Calculate minimum allowed stepsize, before the average (norm)
 +     * relative change in coordinate is smaller than precision
 +     */
 +    minstep=0;
 +    for (i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
 +      for(m=0; m<DIM; m++) {
 +      tmp = fabs(s_min->s.x[i][m]);
 +      if(tmp < 1.0)
 +        tmp = 1.0;
 +      tmp = p[i][m]/tmp;
 +      minstep += tmp*tmp;
 +      }
 +    }
 +    /* Add up from all CPUs */
 +    if(PAR(cr))
 +      gmx_sumd(1,&minstep,cr);
 +
 +    minstep = GMX_REAL_EPS/sqrt(minstep/(3*state_global->natoms));
 +
 +    if(stepsize<minstep) {
 +      converged=TRUE;
 +      break;
 +    }
-     
++
 +    /* Write coordinates if necessary */
 +    do_x = do_per_step(step,inputrec->nstxout);
 +    do_f = do_per_step(step,inputrec->nstfout);
-     
++
 +    write_em_traj(fplog,cr,outf,do_x,do_f,NULL,
 +                  top_global,inputrec,step,
 +                  s_min,state_global,f_global);
-      * 
++
 +    /* Take a step downhill.
 +     * In theory, we should minimize the function along this direction.
 +     * That is quite possible, but it turns out to take 5-10 function evaluations
 +     * for each line. However, we dont really need to find the exact minimum -
 +     * it is much better to start a new CG step in a modified direction as soon
 +     * as we are close to it. This will save a lot of energy evaluations.
 +     *
 +     * In practice, we just try to take a single step.
 +     * If it worked (i.e. lowered the energy), we increase the stepsize but
 +     * the continue straight to the next CG step without trying to find any minimum.
 +     * If it didn't work (higher energy), there must be a minimum somewhere between
 +     * the old position and the new one.
-      * This leads to lower final energies in the tests I've done. / Erik 
++     *
 +     * Due to the finite numerical accuracy, it turns out that it is a good idea
 +     * to even accept a SMALL increase in energy, if the derivative is still downhill.
-     
++     * This leads to lower final energies in the tests I've done. / Erik
 +     */
 +    s_a->epot = s_min->epot;
 +    a = 0.0;
 +    c = a + stepsize; /* reference position along line is zero */
-     
++
 +    if (DOMAINDECOMP(cr) && s_min->s.ddp_count < cr->dd->ddp_count) {
 +      em_dd_partition_system(fplog,step,cr,top_global,inputrec,
 +                           s_min,top,mdatoms,fr,vsite,constr,
 +                           nrnb,wcycle);
 +    }
 +
 +    /* Take a trial step (new coords in s_c) */
 +    do_em_step(cr,inputrec,mdatoms,s_min,c,s_min->s.cg_p,s_c,
 +             constr,top,nrnb,wcycle,-1);
-     
++
 +    neval++;
 +    /* Calculate energy for the trial step */
 +    evaluate_energy(fplog,bVerbose,cr,
 +                  state_global,top_global,s_c,top,
 +                  inputrec,nrnb,wcycle,gstat,
 +                  vsite,constr,fcd,graph,mdatoms,fr,
 +                  mu_tot,enerd,vir,pres,-1,FALSE);
-       for(m=0; m<DIM; m++) 
++
 +    /* Calc derivative along line */
 +    p  = s_c->s.cg_p;
 +    sf = s_c->f;
 +    gpc=0;
 +    for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
-     }    
++      for(m=0; m<DIM; m++)
 +        gpc -= p[i][m]*sf[i][m];  /* f is negative gradient, thus the sign */
 +    }
 +    /* Sum the gradient along the line across CPUs */
 +    if (PAR(cr))
 +      gmx_sumd(1,&gpc,cr);
 +
 +    /* This is the max amount of increase in energy we tolerate */
 +    tmp=sqrt(GMX_REAL_EPS)*fabs(s_a->epot);
 +
 +    /* Accept the step if the energy is lower, or if it is not significantly higher
 +     * and the line derivative is still negative.
 +     */
 +    if (s_c->epot < s_a->epot || (gpc < 0 && s_c->epot < (s_a->epot + tmp))) {
 +      foundlower = TRUE;
 +      /* Great, we found a better energy. Increase step for next iteration
 +       * if we are still going down, decrease it otherwise
 +       */
 +      if(gpc<0)
 +      stepsize *= 1.618034;  /* The golden section */
 +      else
 +      stepsize *= 0.618034;  /* 1/golden section */
 +    } else {
 +      /* New energy is the same or higher. We will have to do some work
 +       * to find a smaller value in the interval. Take smaller step next time!
 +       */
 +      foundlower = FALSE;
 +      stepsize *= 0.618034;
-     
++    }
++
 +
 +
 +
-         b = 0.5*(a+c);                
-       
 +    /* OK, if we didn't find a lower value we will have to locate one now - there must
 +     * be one in the interval [a=0,c].
 +     * The same thing is valid here, though: Don't spend dozens of iterations to find
 +     * the line minimum. We try to interpolate based on the derivative at the endpoints,
 +     * and only continue until we find a lower value. In most cases this means 1-2 iterations.
 +     *
 +     * I also have a safeguard for potentially really patological functions so we never
 +     * take more than 20 steps before we give up ...
 +     *
 +     * If we already found a lower value we just skip this step and continue to the update.
 +     */
 +    if (!foundlower) {
 +      nminstep=0;
 +
 +      do {
 +      /* Select a new trial point.
 +       * If the derivatives at points a & c have different sign we interpolate to zero,
 +       * otherwise just do a bisection.
 +       */
 +      if(gpa<0 && gpc>0)
 +        b = a + gpa*(a-c)/(gpc-gpa);
 +      else
-       
++        b = 0.5*(a+c);
++
 +      /* safeguard if interpolation close to machine accuracy causes errors:
 +       * never go outside the interval
 +       */
 +      if(b<=a || b>=c)
 +        b = 0.5*(a+c);
-       
++
 +      if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) {
 +        /* Reload the old state */
 +        em_dd_partition_system(fplog,-1,cr,top_global,inputrec,
 +                               s_min,top,mdatoms,fr,vsite,constr,
 +                               nrnb,wcycle);
 +      }
 +
 +      /* Take a trial step to this new point - new coords in s_b */
 +      do_em_step(cr,inputrec,mdatoms,s_min,b,s_min->s.cg_p,s_b,
 +                 constr,top,nrnb,wcycle,-1);
-       
++
 +      neval++;
 +      /* Calculate energy for the trial step */
 +      evaluate_energy(fplog,bVerbose,cr,
 +                      state_global,top_global,s_b,top,
 +                      inputrec,nrnb,wcycle,gstat,
 +                      vsite,constr,fcd,graph,mdatoms,fr,
 +                      mu_tot,enerd,vir,pres,-1,FALSE);
-       
++
 +      /* p does not change within a step, but since the domain decomposition
 +       * might change, we have to use cg_p of s_b here.
 +       */
 +      p  = s_b->s.cg_p;
 +      sf = s_b->f;
 +      gpb=0;
 +      for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
 +        for(m=0; m<DIM; m++)
 +            gpb -= p[i][m]*sf[i][m];   /* f is negative gradient, thus the sign */
 +      }
 +      /* Sum the gradient along the line across CPUs */
 +      if (PAR(cr))
 +        gmx_sumd(1,&gpb,cr);
-       
++
 +      if (debug)
 +        fprintf(debug,"CGE: EpotA %f EpotB %f EpotC %f gpb %f\n",
 +                s_a->epot,s_b->epot,s_c->epot,gpb);
 +
 +      epot_repl = s_b->epot;
-       
-       /* 
++
 +      /* Keep one of the intervals based on the value of the derivative at the new point */
 +      if (gpb > 0) {
 +        /* Replace c endpoint with b */
 +        swap_em_state(s_b,s_c);
 +        c = b;
 +        gpc = gpb;
 +      } else {
 +        /* Replace a endpoint with b */
 +        swap_em_state(s_b,s_a);
 +        a = b;
 +        gpa = gpb;
 +      }
-              (nminstep < 20));     
-       
++
++      /*
 +       * Stop search as soon as we find a value smaller than the endpoints.
 +       * Never run more than 20 steps, no matter what.
 +       */
 +      nminstep++;
 +      } while ((epot_repl > s_a->epot || epot_repl > s_c->epot) &&
-       
++             (nminstep < 20));
++
 +      if (fabs(epot_repl - s_min->epot) < fabs(s_min->epot)*GMX_REAL_EPS ||
 +        nminstep >= 20) {
 +      /* OK. We couldn't find a significantly lower energy.
 +       * If beta==0 this was steepest descent, and then we give up.
 +       * If not, set beta=0 and restart with steepest descent before quitting.
 +         */
 +      if (beta == 0.0) {
 +        /* Converged */
 +        converged = TRUE;
 +        break;
 +      } else {
 +        /* Reset memory before giving up */
 +        beta = 0.0;
 +        continue;
 +      }
 +      }
-       
++
 +      /* Select min energy state of A & C, put the best in B.
 +       */
 +      if (s_c->epot < s_a->epot) {
 +      if (debug)
 +        fprintf(debug,"CGE: C (%f) is lower than A (%f), moving C to B\n",
 +                s_c->epot,s_a->epot);
 +      swap_em_state(s_b,s_c);
 +      gpb = gpc;
 +      b = c;
 +      } else {
 +      if (debug)
 +        fprintf(debug,"CGE: A (%f) is lower than C (%f), moving A to B\n",
 +                s_a->epot,s_c->epot);
 +      swap_em_state(s_b,s_a);
 +      gpb = gpa;
 +      b = a;
 +      }
-     
++
 +    } else {
 +      if (debug)
 +      fprintf(debug,"CGE: Found a lower energy %f, moving C to B\n",
 +              s_c->epot);
 +      swap_em_state(s_b,s_c);
 +      gpb = gpc;
 +      b = c;
 +    }
-     if (nstcg && ((step % nstcg)==0)) 
++
 +    /* new search direction */
 +    /* beta = 0 means forget all memory and restart with steepest descents. */
-     
-     
++    if (nstcg && ((step % nstcg)==0))
 +      beta = 0.0;
 +    else {
 +      /* s_min->fnorm cannot be zero, because then we would have converged
 +       * and broken out.
 +       */
 +
 +      /* Polak-Ribiere update.
 +       * Change to fnorm2/fnorm2_old for Fletcher-Reeves
 +       */
 +      beta = pr_beta(cr,&inputrec->opts,mdatoms,top_global,s_min,s_b);
 +    }
 +    /* Limit beta to prevent oscillations */
 +    if (fabs(beta) > 5.0)
 +      beta = 0.0;
-     
++
++
 +    /* update positions */
 +    swap_em_state(s_min,s_b);
 +    gpa = gpb;
-                mdatoms->tmass,enerd,&s_min->s,s_min->s.box,
-                NULL,NULL,vir,pres,NULL,mu_tot,constr);
++
 +    /* Print it if necessary */
 +    if (MASTER(cr)) {
 +      if(bVerbose)
 +      fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
 +              step,s_min->epot,s_min->fnorm/sqrt(state_global->natoms),
 +              s_min->fmax,s_min->a_fmax+1);
 +      /* Store the new (lower) energies */
 +      upd_mdebin(mdebin,FALSE,FALSE,(double)step,
-       print_ebin_header(fplog,step,step,s_min->s.lambda);
++                 mdatoms->tmass,enerd,&s_min->s,inputrec->fepvals,inputrec->expandedvals,s_min->s.box,
++                 NULL,NULL,vir,pres,NULL,mu_tot,constr);
++
 +      do_log = do_per_step(step,inputrec->nstlog);
 +      do_ene = do_per_step(step,inputrec->nstenergy);
 +      if(do_log)
-     
++          print_ebin_header(fplog,step,step,s_min->s.lambda[efptFEP]);
 +      print_ebin(outf->fp_ene,do_ene,FALSE,FALSE,
 +               do_log ? fplog : NULL,step,step,eprNORMAL,
 +               TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
 +    }
-      */       
++
 +    /* Stop when the maximum force lies below tolerance.
 +     * If we have reached machine precision, converged is already set to true.
-     
++     */
 +    converged = converged || (s_min->fmax < inputrec->em_tol);
-   
-   if (converged)      
++
 +  } /* End of the loop */
-   
++
++  if (converged)
 +    step--; /* we never took that last step in this case */
-         converged = FALSE; 
++
 +    if (s_min->fmax > inputrec->em_tol)
 +    {
 +        if (MASTER(cr))
 +        {
 +            warn_step(stderr,inputrec->em_tol,step-1==number_steps,FALSE);
 +            warn_step(fplog ,inputrec->em_tol,step-1==number_steps,FALSE);
 +        }
-   
++        converged = FALSE;
 +    }
-       print_ebin_header(fplog,step,step,s_min->s.lambda);
++
 +  if (MASTER(cr)) {
 +    /* If we printed energy and/or logfile last step (which was the last step)
 +     * we don't have to do it again, but otherwise print the final values.
 +     */
 +    if(!do_log) {
 +      /* Write final value to log since we didn't do anything the last step */
-   
++      print_ebin_header(fplog,step,step,s_min->s.lambda[efptFEP]);
 +    }
 +    if (!do_ene || !do_log) {
 +      /* Write final energy file entries */
 +      print_ebin(outf->fp_ene,!do_ene,FALSE,FALSE,
 +               !do_log ? fplog : NULL,step,step,eprNORMAL,
 +               TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
 +    }
 +  }
 +
 +  /* Print some stuff... */
 +  if (MASTER(cr))
 +    fprintf(stderr,"\nwriting lowest energy coordinates.\n");
-    */  
++
 +  /* IMPORTANT!
 +   * For accurate normal mode calculation it is imperative that we
 +   * store the last conformation into the full precision binary trajectory.
 +   *
 +   * However, we should only do it if we did NOT already write this step
 +   * above (which we did if do_x or do_f was true).
-   
++   */
 +  do_x = !do_per_step(step,inputrec->nstxout);
 +  do_f = (inputrec->nstfout > 0 && !do_per_step(step,inputrec->nstfout));
-   
++
 +  write_em_traj(fplog,cr,outf,do_x,do_f,ftp2fn(efSTO,nfile,fnm),
 +                top_global,inputrec,step,
 +                s_min,state_global,f_global);
-   
++
 +  fnormn = s_min->fnorm/sqrt(state_global->natoms);
-     
++
 +  if (MASTER(cr)) {
 +    print_converged(stderr,CG,inputrec->em_tol,step,converged,number_steps,
 +                  s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
 +    print_converged(fplog,CG,inputrec->em_tol,step,converged,number_steps,
 +                  s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
-   
++
 +    fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);
 +  }
-   
++
 +  finish_em(fplog,cr,outf,runtime,wcycle);
-                 int repl_ex_nst,int repl_ex_seed,
++
 +  /* To print the actual number of steps we needed somewhere */
 +  runtime->nsteps_done = step;
 +
 +  return 0;
 +} /* That's all folks */
 +
 +
 +double do_lbfgs(FILE *fplog,t_commrec *cr,
 +                int nfile,const t_filenm fnm[],
 +                const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
 +                int nstglobalcomm,
 +                gmx_vsite_t *vsite,gmx_constr_t constr,
 +                int stepout,
 +                t_inputrec *inputrec,
 +                gmx_mtop_t *top_global,t_fcdata *fcd,
 +                t_state *state,
 +                t_mdatoms *mdatoms,
 +                t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 +                gmx_edsam_t ed,
 +                t_forcerec *fr,
-   real   *rho,*alpha,*ff,*xx,*p,*s,*lastx,*lastf,**dx,**dg;   
++                int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
 +                gmx_membed_t membed,
 +                real cpt_period,real max_hours,
 +                const char *deviceOptions,
 +                unsigned long Flags,
 +                gmx_runtime_t *runtime)
 +{
 +  static const char *LBFGS="Low-Memory BFGS Minimizer";
 +  em_state_t ems;
 +  gmx_localtop_t *top;
 +  gmx_enerdata_t *enerd;
 +  rvec   *f;
 +  gmx_global_stat_t gstat;
 +  t_graph    *graph;
 +  rvec   *f_global;
 +  int    ncorr,nmaxcorr,point,cp,neval,nminstep;
 +  double stepsize,gpa,gpb,gpc,tmp,minstep;
-   
++  real   *rho,*alpha,*ff,*xx,*p,*s,*lastx,*lastf,**dx,**dg;
 +  real   *xa,*xb,*xc,*fa,*fb,*fc,*xtmp,*ftmp;
 +  real   a,b,c,maxdelta,delta;
 +  real   diag,Epot0,Epot,EpotA,EpotB,EpotC;
 +  real   dgdx,dgdg,sq,yr,beta;
 +  t_mdebin   *mdebin;
 +  gmx_bool   converged,first;
 +  rvec   mu_tot;
 +  real   fnorm,fmax;
 +  gmx_bool   do_log,do_ene,do_x,do_f,foundlower,*frozen;
 +  tensor vir,pres;
 +  int    start,end,number_steps;
 +  gmx_mdoutf_t *outf;
 +  int    i,k,m,n,nfmax,gf,step;
 +  /* not used */
 +  real   terminate;
 +
 +  if (PAR(cr))
 +    gmx_fatal(FARGS,"Cannot do parallel L-BFGS Minimization - yet.\n");
-   
++
 +  n = 3*state->natoms;
 +  nmaxcorr = inputrec->nbfgscorr;
-   snew(p,n); 
-   snew(lastx,n); 
-   snew(lastf,n); 
++
 +  /* Allocate memory */
 +  /* Use pointers to real so we dont have to loop over both atoms and
 +   * dimensions all the time...
 +   * x/f are allocated as rvec *, so make new x0/f0 pointers-to-real
 +   * that point to the same memory.
 +   */
 +  snew(xa,n);
 +  snew(xb,n);
 +  snew(xc,n);
 +  snew(fa,n);
 +  snew(fb,n);
 +  snew(fc,n);
 +  snew(frozen,n);
 +
-   
++  snew(p,n);
++  snew(lastx,n);
++  snew(lastf,n);
 +  snew(rho,nmaxcorr);
 +  snew(alpha,nmaxcorr);
-   
++
 +  snew(dx,nmaxcorr);
 +  for(i=0;i<nmaxcorr;i++)
 +    snew(dx[i],n);
-   neval = 0; 
++
 +  snew(dg,nmaxcorr);
 +  for(i=0;i<nmaxcorr;i++)
 +    snew(dg[i],n);
 +
 +  step = 0;
-     
++  neval = 0;
 +
 +  /* Init em */
 +  init_em(fplog,LBFGS,cr,inputrec,
 +          state,top_global,&ems,&top,&f,&f_global,
 +          nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
 +          nfile,fnm,&outf,&mdebin);
 +  /* Do_lbfgs is not completely updated like do_steep and do_cg,
 +   * so we free some memory again.
 +   */
 +  sfree(ems.s.x);
 +  sfree(ems.f);
 +
 +  xx = (real *)state->x;
 +  ff = (real *)f;
 +
 +  start = mdatoms->start;
 +  end   = mdatoms->homenr + start;
-   
++
 +  /* Print to log file */
 +  print_em_start(fplog,cr,runtime,wcycle,LBFGS);
-   
++
 +  do_log = do_ene = do_x = do_f = TRUE;
-      for(m=0; m<DIM; m++) 
-        frozen[3*i+m]=inputrec->opts.nFreeze[gf][m];  
++
 +  /* Max number of steps */
 +  number_steps=inputrec->nsteps;
 +
 +  /* Create a 3*natoms index to tell whether each degree of freedom is frozen */
 +  gf = 0;
 +  for(i=start; i<end; i++) {
 +    if (mdatoms->cFREEZE)
 +      gf = mdatoms->cFREEZE[i];
-   
++     for(m=0; m<DIM; m++)
++       frozen[3*i+m]=inputrec->opts.nFreeze[gf][m];
 +  }
 +  if (MASTER(cr))
 +    sp_header(stderr,LBFGS,inputrec->em_tol,number_steps);
 +  if (fplog)
 +    sp_header(fplog,LBFGS,inputrec->em_tol,number_steps);
-   
++
 +  if (vsite)
 +    construct_vsites(fplog,vsite,state->x,nrnb,1,NULL,
 +                   top->idef.iparams,top->idef.il,
 +                   fr->ePBC,fr->bMolPBC,graph,cr,state->box);
-       
++
 +  /* Call the force routine and some auxiliary (neighboursearching etc.) */
 +  /* do_force always puts the charge groups in the box and shifts again
 +   * We do not unshift, so molecules are always whole
 +   */
 +  neval++;
 +  ems.s.x = state->x;
 +  ems.f = f;
 +  evaluate_energy(fplog,bVerbose,cr,
 +                state,top_global,&ems,top,
 +                inputrec,nrnb,wcycle,gstat,
 +                vsite,constr,fcd,graph,mdatoms,fr,
 +                mu_tot,enerd,vir,pres,-1,TRUE);
 +  where();
-              mdatoms->tmass,enerd,state,state->box,
-              NULL,NULL,vir,pres,NULL,mu_tot,constr);
-     
-     print_ebin_header(fplog,step,step,state->lambda);
++
 +  if (MASTER(cr)) {
 +    /* Copy stuff to the energy bin for easy printing etc. */
 +    upd_mdebin(mdebin,FALSE,FALSE,(double)step,
-   
++               mdatoms->tmass,enerd,state,inputrec->fepvals,inputrec->expandedvals,state->box,
++               NULL,NULL,vir,pres,NULL,mu_tot,constr);
++
++    print_ebin_header(fplog,step,step,state->lambda[efptFEP]);
 +    print_ebin(outf->fp_ene,TRUE,FALSE,FALSE,fplog,step,step,eprNORMAL,
 +               TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
 +  }
 +  where();
-   
++
 +  /* This is the starting energy */
 +  Epot = enerd->term[F_EPOT];
-   
++
 +  fnorm = ems.fnorm;
 +  fmax  = ems.fmax;
 +  nfmax = ems.a_fmax;
-    * since it will be multiplied by the non-normalized search direction 
++
 +  /* Set the initial step.
-   
++   * since it will be multiplied by the non-normalized search direction
 +   * vector (force vector the first time), we scale it by the
 +   * norm of the force.
 +   */
-   }   
-   
++
 +  if (MASTER(cr)) {
 +    fprintf(stderr,"Using %d BFGS correction steps.\n\n",nmaxcorr);
 +    fprintf(stderr,"   F-max             = %12.5e on atom %d\n",fmax,nfmax+1);
 +    fprintf(stderr,"   F-Norm            = %12.5e\n",fnorm/sqrt(state->natoms));
 +    fprintf(stderr,"\n");
 +    /* and copy to the log file too... */
 +    fprintf(fplog,"Using %d BFGS correction steps.\n\n",nmaxcorr);
 +    fprintf(fplog,"   F-max             = %12.5e on atom %d\n",fmax,nfmax+1);
 +    fprintf(fplog,"   F-Norm            = %12.5e\n",fnorm/sqrt(state->natoms));
 +    fprintf(fplog,"\n");
-   
-   /* Start the loop over BFGS steps.          
++  }
++
 +  point=0;
 +  for(i=0;i<n;i++)
 +    if(!frozen[i])
 +      dx[point][i] = ff[i];  /* Initial search direction */
 +    else
 +      dx[point][i] = 0;
 +
 +  stepsize = 1.0/fnorm;
 +  converged = FALSE;
-   
++
++  /* Start the loop over BFGS steps.
 +   * Each successful step is counted, and we continue until
 +   * we either converge or reach the max number of steps.
 +   */
-     
++
 +  ncorr=0;
 +
 +  /* Set the gradient from the force */
 +  converged = FALSE;
 +  for(step=0; (number_steps<0 || (number_steps>=0 && step<=number_steps)) && !converged; step++) {
-     
++
 +    /* Write coordinates if necessary */
 +    do_x = do_per_step(step,inputrec->nstxout);
 +    do_f = do_per_step(step,inputrec->nstfout);
-     
++
 +    write_traj(fplog,cr,outf,MDOF_X | MDOF_F,
 +               top_global,step,(real)step,state,state,f,f,NULL,NULL);
 +
 +    /* Do the linesearching in the direction dx[point][0..(n-1)] */
-     
++
 +    /* pointer to current direction - point=0 first time here */
 +    s=dx[point];
-     for(gpa=0,i=0;i<n;i++) 
++
 +    /* calculate line gradient */
-     /* Calculate minimum allowed stepsize, before the average (norm) 
-      * relative change in coordinate is smaller than precision 
++    for(gpa=0,i=0;i<n;i++)
 +      gpa-=s[i]*ff[i];
 +
-     
++    /* Calculate minimum allowed stepsize, before the average (norm)
++     * relative change in coordinate is smaller than precision
 +     */
 +    for(minstep=0,i=0;i<n;i++) {
 +      tmp=fabs(xx[i]);
 +      if(tmp<1.0)
 +      tmp=1.0;
 +      tmp = s[i]/tmp;
 +      minstep += tmp*tmp;
 +    }
 +    minstep = GMX_REAL_EPS/sqrt(minstep/n);
-     
++
 +    if(stepsize<minstep) {
 +      converged=TRUE;
 +      break;
 +    }
-     
++
 +    /* Store old forces and coordinates */
 +    for(i=0;i<n;i++) {
 +      lastx[i]=xx[i];
 +      lastf[i]=ff[i];
 +    }
 +    Epot0=Epot;
-     
++
 +    first=TRUE;
-     
++
 +    for(i=0;i<n;i++)
 +      xa[i]=xx[i];
-      * 
++
 +    /* Take a step downhill.
 +     * In theory, we should minimize the function along this direction.
 +     * That is quite possible, but it turns out to take 5-10 function evaluations
 +     * for each line. However, we dont really need to find the exact minimum -
 +     * it is much better to start a new BFGS step in a modified direction as soon
 +     * as we are close to it. This will save a lot of energy evaluations.
 +     *
 +     * In practice, we just try to take a single step.
 +     * If it worked (i.e. lowered the energy), we increase the stepsize but
 +     * the continue straight to the next BFGS step without trying to find any minimum.
 +     * If it didn't work (higher energy), there must be a minimum somewhere between
 +     * the old position and the new one.
-      * This leads to lower final energies in the tests I've done. / Erik 
++     *
 +     * Due to the finite numerical accuracy, it turns out that it is a good idea
 +     * to even accept a SMALL increase in energy, if the derivative is still downhill.
-     /* Check stepsize first. We do not allow displacements 
++     * This leads to lower final energies in the tests I've done. / Erik
 +     */
 +    foundlower=FALSE;
 +    EpotA = Epot0;
 +    a = 0.0;
 +    c = a + stepsize; /* reference position along line is zero */
 +
-     
++    /* Check stepsize first. We do not allow displacements
 +     * larger than emstep.
 +     */
 +    do {
 +      c = a + stepsize;
 +      maxdelta=0;
 +      for(i=0;i<n;i++) {
 +      delta=c*s[i];
 +      if(delta>maxdelta)
 +        maxdelta=delta;
 +      }
 +      if(maxdelta>inputrec->em_stepsize)
 +      stepsize*=0.1;
 +    } while(maxdelta>inputrec->em_stepsize);
 +
 +    /* Take a trial step */
 +    for (i=0; i<n; i++)
 +      xc[i] = lastx[i] + c*s[i];
-     
++
 +    neval++;
 +    /* Calculate energy for the trial step */
 +    ems.s.x = (rvec *)xc;
 +    ems.f   = (rvec *)fc;
 +    evaluate_energy(fplog,bVerbose,cr,
 +                  state,top_global,&ems,top,
 +                  inputrec,nrnb,wcycle,gstat,
 +                  vsite,constr,fcd,graph,mdatoms,fr,
 +                  mu_tot,enerd,vir,pres,step,FALSE);
 +    EpotC = ems.epot;
-     
++
 +    /* Calc derivative along line */
 +    for(gpc=0,i=0; i<n; i++) {
 +      gpc -= s[i]*fc[i];   /* f is negative gradient, thus the sign */
 +    }
 +    /* Sum the gradient along the line across CPUs */
 +    if (PAR(cr))
 +      gmx_sumd(1,&gpc,cr);
-     
++
 +     /* This is the max amount of increase in energy we tolerate */
 +   tmp=sqrt(GMX_REAL_EPS)*fabs(EpotA);
-     }    
-     
++
 +    /* Accept the step if the energy is lower, or if it is not significantly higher
 +     * and the line derivative is still negative.
 +     */
 +    if(EpotC<EpotA || (gpc<0 && EpotC<(EpotA+tmp))) {
 +      foundlower = TRUE;
 +      /* Great, we found a better energy. Increase step for next iteration
 +       * if we are still going down, decrease it otherwise
 +       */
 +      if(gpc<0)
 +      stepsize *= 1.618034;  /* The golden section */
 +      else
 +      stepsize *= 0.618034;  /* 1/golden section */
 +    } else {
 +      /* New energy is the same or higher. We will have to do some work
 +       * to find a smaller value in the interval. Take smaller step next time!
 +       */
 +      foundlower = FALSE;
 +      stepsize *= 0.618034;
-      
++    }
++
 +    /* OK, if we didn't find a lower value we will have to locate one now - there must
 +     * be one in the interval [a=0,c].
 +     * The same thing is valid here, though: Don't spend dozens of iterations to find
 +     * the line minimum. We try to interpolate based on the derivative at the endpoints,
 +     * and only continue until we find a lower value. In most cases this means 1-2 iterations.
 +     *
 +     * I also have a safeguard for potentially really patological functions so we never
 +     * take more than 20 steps before we give up ...
 +     *
 +     * If we already found a lower value we just skip this step and continue to the update.
 +     */
 +
 +    if(!foundlower) {
-       
++
 +      nminstep=0;
 +      do {
 +      /* Select a new trial point.
 +       * If the derivatives at points a & c have different sign we interpolate to zero,
 +       * otherwise just do a bisection.
 +       */
-         b = 0.5*(a+c);                
-       
++
 +      if(gpa<0 && gpc>0)
 +        b = a + gpa*(a-c)/(gpc-gpa);
 +      else
-       
++        b = 0.5*(a+c);
++
 +      /* safeguard if interpolation close to machine accuracy causes errors:
 +       * never go outside the interval
 +       */
 +      if(b<=a || b>=c)
 +        b = 0.5*(a+c);
-       for (i=0; i<n; i++) 
++
 +      /* Take a trial step */
-       
++      for (i=0; i<n; i++)
 +        xb[i] = lastx[i] + b*s[i];
-       
++
 +      neval++;
 +      /* Calculate energy for the trial step */
 +      ems.s.x = (rvec *)xb;
 +      ems.f   = (rvec *)fb;
 +      evaluate_energy(fplog,bVerbose,cr,
 +                      state,top_global,&ems,top,
 +                      inputrec,nrnb,wcycle,gstat,
 +                      vsite,constr,fcd,graph,mdatoms,fr,
 +                      mu_tot,enerd,vir,pres,step,FALSE);
 +      EpotB = ems.epot;
-       
-       for(gpb=0,i=0; i<n; i++) 
++
 +      fnorm = ems.fnorm;
-       
++
++      for(gpb=0,i=0; i<n; i++)
 +        gpb -= s[i]*fb[i];   /* f is negative gradient, thus the sign */
-       
++
 +      /* Sum the gradient along the line across CPUs */
 +      if (PAR(cr))
 +        gmx_sumd(1,&gpb,cr);
-         xtmp = xb; 
++
 +      /* Keep one of the intervals based on the value of the derivative at the new point */
 +      if(gpb>0) {
 +        /* Replace c endpoint with b */
 +        EpotC = EpotB;
 +        c = b;
 +        gpc = gpb;
 +        /* swap coord pointers b/c */
-         xb = xc; 
++        xtmp = xb;
 +        ftmp = fb;
-         xtmp = xb; 
++        xb = xc;
 +        fb = fc;
 +        xc = xtmp;
 +        fc = ftmp;
 +      } else {
 +        /* Replace a endpoint with b */
 +        EpotA = EpotB;
 +        a = b;
 +        gpa = gpb;
 +        /* swap coord pointers a/b */
-         xb = xa; 
++        xtmp = xb;
 +        ftmp = fb;
-         xa = xtmp; 
++        xb = xa;
 +        fb = fa;
-       
-       /* 
++        xa = xtmp;
 +        fa = ftmp;
 +      }
-       nminstep++; 
++
++      /*
 +       * Stop search as soon as we find a value smaller than the endpoints,
 +       * or if the tolerance is below machine precision.
 +       * Never run more than 20 steps, no matter what.
 +       */
-       
++      nminstep++;
 +      } while((EpotB>EpotA || EpotB>EpotC) && (nminstep<20));
 +
 +      if(fabs(EpotB-Epot0)<GMX_REAL_EPS || nminstep>=20) {
 +      /* OK. We couldn't find a significantly lower energy.
 +       * If ncorr==0 this was steepest descent, and then we give up.
 +       * If not, reset memory to restart as steepest descent before quitting.
 +         */
 +      if(ncorr==0) {
 +      /* Converged */
 +        converged=TRUE;
 +        break;
 +      } else {
 +        /* Reset memory */
 +        ncorr=0;
 +        /* Search in gradient direction */
 +        for(i=0;i<n;i++)
 +          dx[point][i]=ff[i];
 +        /* Reset stepsize */
 +        stepsize = 1.0/fnorm;
 +        continue;
 +      }
 +      }
-       
++
 +      /* Select min energy state of A & C, put the best in xx/ff/Epot
 +       */
 +      if(EpotC<EpotA) {
 +      Epot = EpotC;
 +      /* Use state C */
 +      for(i=0;i<n;i++) {
 +        xx[i]=xc[i];
 +        ff[i]=fc[i];
 +      }
 +      stepsize=c;
 +      } else {
 +      Epot = EpotA;
 +      /* Use state A */
 +      for(i=0;i<n;i++) {
 +        xx[i]=xa[i];
 +        ff[i]=fa[i];
 +      }
 +      stepsize=a;
 +      }
-     /* Update the memory information, and calculate a new 
-      * approximation of the inverse hessian 
++
 +    } else {
 +      /* found lower */
 +      Epot = EpotC;
 +      /* Use state C */
 +      for(i=0;i<n;i++) {
 +      xx[i]=xc[i];
 +      ff[i]=fc[i];
 +      }
 +      stepsize=c;
 +    }
 +
-     
-     /* Have new data in Epot, xx, ff */       
++    /* Update the memory information, and calculate a new
++     * approximation of the inverse hessian
 +     */
-     
++
++    /* Have new data in Epot, xx, ff */
 +    if(ncorr<nmaxcorr)
 +      ncorr++;
 +
 +    for(i=0;i<n;i++) {
 +      dg[point][i]=lastf[i]-ff[i];
 +      dx[point][i]*=stepsize;
 +    }
-     dgdx=0;   
++
 +    dgdg=0;
-     
++    dgdx=0;
 +    for(i=0;i<n;i++) {
 +      dgdg+=dg[point][i]*dg[point][i];
 +      dgdx+=dg[point][i]*dx[point][i];
 +    }
-     
++
 +    diag=dgdx/dgdg;
-     
++
 +    rho[point]=1.0/dgdx;
 +    point++;
-     
++
 +    if(point>=nmaxcorr)
 +      point=0;
-     
++
 +    /* Update */
 +    for(i=0;i<n;i++)
 +      p[i]=ff[i];
-     
++
 +    cp=point;
-       if(cp<0) 
++
 +    /* Recursive update. First go back over the memory points */
 +    for(k=0;k<ncorr;k++) {
 +      cp--;
-       
++      if(cp<0)
 +      cp=ncorr-1;
-       
++
 +      sq=0;
 +      for(i=0;i<n;i++)
 +      sq+=dx[cp][i]*p[i];
-       
++
 +      alpha[cp]=rho[cp]*sq;
-       p[i] -= alpha[cp]*dg[cp][i];            
++
 +      for(i=0;i<n;i++)
-     
++      p[i] -= alpha[cp]*dg[cp][i];
 +    }
-     
++
 +    for(i=0;i<n;i++)
 +      p[i] *= diag;
-       
-       beta = rho[cp]*yr;          
++
 +    /* And then go forward again */
 +    for(k=0;k<ncorr;k++) {
 +      yr = 0;
 +      for(i=0;i<n;i++)
 +      yr += p[i]*dg[cp][i];
-       
++
++      beta = rho[cp]*yr;
 +      beta = alpha[cp]-beta;
-       
-       cp++;   
++
 +      for(i=0;i<n;i++)
 +      p[i] += beta*dx[cp][i];
-     
++
++      cp++;
 +      if(cp>=ncorr)
 +      cp=0;
 +    }
-     
++
 +    for(i=0;i<n;i++)
 +      if(!frozen[i])
 +      dx[point][i] = p[i];
 +      else
 +      dx[point][i] = 0;
 +
 +    stepsize=1.0;
-     
++
 +    /* Test whether the convergence criterion is met */
 +    get_f_norm_max(cr,&(inputrec->opts),mdatoms,f,&fnorm,&fmax,&nfmax);
-                mdatoms->tmass,enerd,state,state->box,
-                NULL,NULL,vir,pres,NULL,mu_tot,constr);
++
 +    /* Print it if necessary */
 +    if (MASTER(cr)) {
 +      if(bVerbose)
 +      fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
 +              step,Epot,fnorm/sqrt(state->natoms),fmax,nfmax+1);
 +      /* Store the new (lower) energies */
 +      upd_mdebin(mdebin,FALSE,FALSE,(double)step,
-       print_ebin_header(fplog,step,step,state->lambda);
++                 mdatoms->tmass,enerd,state,inputrec->fepvals,inputrec->expandedvals,state->box,
++                 NULL,NULL,vir,pres,NULL,mu_tot,constr);
 +      do_log = do_per_step(step,inputrec->nstlog);
 +      do_ene = do_per_step(step,inputrec->nstenergy);
 +      if(do_log)
-     
++          print_ebin_header(fplog,step,step,state->lambda[efptFEP]);
 +      print_ebin(outf->fp_ene,do_ene,FALSE,FALSE,
 +               do_log ? fplog : NULL,step,step,eprNORMAL,
 +               TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
 +    }
-     
++
 +    /* Stop when the maximum force lies below tolerance.
 +     * If we have reached machine precision, converged is already set to true.
 +     */
-     
++
 +    converged = converged || (fmax < inputrec->em_tol);
-   
-   if(converged)       
++
 +  } /* End of the loop */
-   
++
++  if(converged)
 +    step--; /* we never took that last step in this case */
-         converged = FALSE; 
++
 +    if(fmax>inputrec->em_tol)
 +    {
 +        if (MASTER(cr))
 +        {
 +            warn_step(stderr,inputrec->em_tol,step-1==number_steps,FALSE);
 +            warn_step(fplog ,inputrec->em_tol,step-1==number_steps,FALSE);
 +        }
-   
++        converged = FALSE;
 +    }
-     print_ebin_header(fplog,step,step,state->lambda);
++
 +  /* If we printed energy and/or logfile last step (which was the last step)
 +   * we don't have to do it again, but otherwise print the final values.
 +   */
 +  if(!do_log) /* Write final value to log since we didn't do anythin last step */
-   
++    print_ebin_header(fplog,step,step,state->lambda[efptFEP]);
 +  if(!do_ene || !do_log) /* Write final energy file entries */
 +    print_ebin(outf->fp_ene,!do_ene,FALSE,FALSE,
 +             !do_log ? fplog : NULL,step,step,eprNORMAL,
 +             TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
-   
++
 +  /* Print some stuff... */
 +  if (MASTER(cr))
 +    fprintf(stderr,"\nwriting lowest energy coordinates.\n");
-    */  
++
 +  /* IMPORTANT!
 +   * For accurate normal mode calculation it is imperative that we
 +   * store the last conformation into the full precision binary trajectory.
 +   *
 +   * However, we should only do it if we did NOT already write this step
 +   * above (which we did if do_x or do_f was true).
-   
++   */
 +  do_x = !do_per_step(step,inputrec->nstxout);
 +  do_f = !do_per_step(step,inputrec->nstfout);
 +  write_em_traj(fplog,cr,outf,do_x,do_f,ftp2fn(efSTO,nfile,fnm),
 +                top_global,inputrec,step,
 +                &ems,state,f);
-     
++
 +  if (MASTER(cr)) {
 +    print_converged(stderr,LBFGS,inputrec->em_tol,step,converged,
 +                  number_steps,Epot,fmax,nfmax,fnorm/sqrt(state->natoms));
 +    print_converged(fplog,LBFGS,inputrec->em_tol,step,converged,
 +                  number_steps,Epot,fmax,nfmax,fnorm/sqrt(state->natoms));
-   
++
 +    fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);
 +  }
-                 int repl_ex_nst,int repl_ex_seed,
++
 +  finish_em(fplog,cr,outf,runtime,wcycle);
 +
 +  /* To print the actual number of steps we needed somewhere */
 +  runtime->nsteps_done = step;
 +
 +  return 0;
 +} /* That's all folks */
 +
 +
 +double do_steep(FILE *fplog,t_commrec *cr,
 +                int nfile, const t_filenm fnm[],
 +                const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
 +                int nstglobalcomm,
 +                gmx_vsite_t *vsite,gmx_constr_t constr,
 +                int stepout,
 +                t_inputrec *inputrec,
 +                gmx_mtop_t *top_global,t_fcdata *fcd,
 +                t_state *state_global,
 +                t_mdatoms *mdatoms,
 +                t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 +                gmx_edsam_t ed,
 +                t_forcerec *fr,
- { 
++                int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
 +                gmx_membed_t membed,
 +                real cpt_period,real max_hours,
 +                const char *deviceOptions,
 +                unsigned long Flags,
 +                gmx_runtime_t *runtime)
-   t_mdebin   *mdebin; 
-   gmx_bool   bDone,bAbort,do_x,do_f; 
-   tensor vir,pres; 
++{
 +  const char *SD="Steepest Descents";
 +  em_state_t *s_min,*s_try;
 +  rvec       *f_global;
 +  gmx_localtop_t *top;
 +  gmx_enerdata_t *enerd;
 +  rvec   *f;
 +  gmx_global_stat_t gstat;
 +  t_graph    *graph;
 +  real   stepsize,constepsize;
 +  real   ustep,dvdlambda,fnormn;
 +  gmx_mdoutf_t *outf;
-   int    count=0; 
-   int    steps_accepted=0; 
++  t_mdebin   *mdebin;
++  gmx_bool   bDone,bAbort,do_x,do_f;
++  tensor vir,pres;
 +  rvec   mu_tot;
 +  int    nsteps;
-       
++  int    count=0;
++  int    steps_accepted=0;
 +  /* not used */
 +  real   terminate=0;
 +
 +  s_min = init_em_state();
 +  s_try = init_em_state();
 +
 +  /* Init em and store the local state in s_try */
 +  init_em(fplog,SD,cr,inputrec,
 +          state_global,top_global,s_try,&top,&f,&f_global,
 +          nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
 +          nfile,fnm,&outf,&mdebin);
-     
-   /* Set variables for stepsize (in nm). This is the largest  
-    * step that we are going to make in any direction. 
++
 +  /* Print to log file  */
 +  print_em_start(fplog,cr,runtime,wcycle,SD);
-   ustep = inputrec->em_stepsize; 
++
++  /* Set variables for stepsize (in nm). This is the largest
++   * step that we are going to make in any direction.
 +   */
-   
++  ustep = inputrec->em_stepsize;
 +  stepsize = 0;
-   nsteps = inputrec->nsteps; 
-   
-   if (MASTER(cr)) 
++
 +  /* Max number of steps  */
-     
++  nsteps = inputrec->nsteps;
++
++  if (MASTER(cr))
 +    /* Print to the screen  */
 +    sp_header(stderr,SD,inputrec->em_tol,nsteps);
 +  if (fplog)
 +    sp_header(fplog,SD,inputrec->em_tol,nsteps);
-    * count is the counter for the number of steps 
++
 +  /**** HERE STARTS THE LOOP ****
-     
++   * count is the counter for the number of steps
 +   * bDone will be TRUE when the minimization has converged
 +   * bAbort will be TRUE when nsteps steps have been performed or when
 +   * the stepsize becomes smaller than is reasonable for machine precision
 +   */
 +  count  = 0;
 +  bDone  = FALSE;
 +  bAbort = FALSE;
 +  while( !bDone && !bAbort ) {
 +    bAbort = (nsteps >= 0) && (count == nsteps);
-     
++
 +    /* set new coordinates, except for first step */
 +    if (count > 0) {
 +      do_em_step(cr,inputrec,mdatoms,s_min,stepsize,s_min->f,s_try,
 +               constr,top,nrnb,wcycle,count);
 +    }
-        
++
 +    evaluate_energy(fplog,bVerbose,cr,
 +                  state_global,top_global,s_try,top,
 +                  inputrec,nrnb,wcycle,gstat,
 +                  vsite,constr,fcd,graph,mdatoms,fr,
 +                  mu_tot,enerd,vir,pres,count,count==0);
-       print_ebin_header(fplog,count,count,s_try->s.lambda);
++
 +    if (MASTER(cr))
-     
++      print_ebin_header(fplog,count,count,s_try->s.lambda[efptFEP]);
 +
 +    if (count == 0)
 +      s_min->epot = s_try->epot + 1;
-       
++
 +    /* Print it if necessary  */
 +    if (MASTER(cr)) {
 +      if (bVerbose) {
 +      fprintf(stderr,"Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c",
 +              count,ustep,s_try->epot,s_try->fmax,s_try->a_fmax+1,
 +              (s_try->epot < s_min->epot) ? '\n' : '\r');
 +      }
-                  mdatoms->tmass,enerd,&s_try->s,s_try->s.box,
-                  NULL,NULL,vir,pres,NULL,mu_tot,constr);
++
 +      if (s_try->epot < s_min->epot) {
 +      /* Store the new (lower) energies  */
 +      upd_mdebin(mdebin,FALSE,FALSE,(double)count,
-     } 
-     
-     /* Now if the new energy is smaller than the previous...  
++                 mdatoms->tmass,enerd,&s_try->s,inputrec->fepvals,inputrec->expandedvals,
++                   s_try->s.box, NULL,NULL,vir,pres,NULL,mu_tot,constr);
 +      print_ebin(outf->fp_ene,TRUE,
 +                 do_per_step(steps_accepted,inputrec->nstdisreout),
 +                 do_per_step(steps_accepted,inputrec->nstorireout),
 +                 fplog,count,count,eprNORMAL,TRUE,
 +                 mdebin,fcd,&(top_global->groups),&(inputrec->opts));
 +      fflush(fplog);
 +      }
-      * or if we did random steps! 
++    }
++
++    /* Now if the new energy is smaller than the previous...
 +     * or if this is the first step!
-     
++     * or if we did random steps!
 +     */
-       steps_accepted++; 
++
 +    if ( (count==0) || (s_try->epot < s_min->epot) ) {
-       
++      steps_accepted++;
 +
 +      /* Test whether the convergence criterion is met...  */
 +      bDone = (s_try->fmax < inputrec->em_tol);
-       /* The 'Min' array always holds the coords and forces of the minimal 
++
 +      /* Copy the arrays for force, positions and energy  */
-     } 
++      /* The 'Min' array always holds the coords and forces of the minimal
 +       sampled energy  */
 +      swap_em_state(s_min,s_try);
 +      if (count > 0)
 +      ustep *= 1.2;
 +
 +      /* Write to trn, if necessary */
 +      do_x = do_per_step(steps_accepted,inputrec->nstxout);
 +      do_f = do_per_step(steps_accepted,inputrec->nstfout);
 +      write_em_traj(fplog,cr,outf,do_x,do_f,NULL,
 +                    top_global,inputrec,count,
 +                    s_min,state_global,f_global);
-     
++    }
 +    else {
 +      /* If energy is not smaller make the step smaller...  */
 +      ustep *= 0.5;
 +
 +      if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) {
 +      /* Reload the old state */
 +      em_dd_partition_system(fplog,count,cr,top_global,inputrec,
 +                             s_min,top,mdatoms,fr,vsite,constr,
 +                             nrnb,wcycle);
 +      }
 +    }
-     
++
 +    /* Determine new step  */
 +    stepsize = ustep/s_min->fmax;
-     
++
 +    /* Check if stepsize is too small, with 1 nm as a characteristic length */
 +#ifdef GMX_DOUBLE
 +        if (count == nsteps || ustep < 1e-12)
 +#else
 +        if (count == nsteps || ustep < 1e-6)
 +#endif
 +        {
 +            if (MASTER(cr))
 +            {
 +                warn_step(stderr,inputrec->em_tol,count==nsteps,constr!=NULL);
 +                warn_step(fplog ,inputrec->em_tol,count==nsteps,constr!=NULL);
 +            }
 +            bAbort=TRUE;
 +        }
-   
++
 +    count++;
 +  } /* End of the loop  */
-   if (MASTER(cr)) 
-     fprintf(stderr,"\nwriting lowest energy coordinates.\n"); 
++
 +    /* Print some shit...  */
-   
++  if (MASTER(cr))
++    fprintf(stderr,"\nwriting lowest energy coordinates.\n");
 +  write_em_traj(fplog,cr,outf,TRUE,inputrec->nstfout,ftp2fn(efSTO,nfile,fnm),
 +              top_global,inputrec,count,
 +              s_min,state_global,f_global);
 +
 +  fnormn = s_min->fnorm/sqrt(state_global->natoms);
 +
 +  if (MASTER(cr)) {
 +    print_converged(stderr,SD,inputrec->em_tol,count,bDone,nsteps,
 +                  s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
 +    print_converged(fplog,SD,inputrec->em_tol,count,bDone,nsteps,
 +                  s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
 +  }
 +
 +  finish_em(fplog,cr,outf,runtime,wcycle);
-   
++
 +  /* To print the actual number of steps we needed somewhere */
 +  inputrec->nsteps=count;
 +
 +  runtime->nsteps_done = count;
-              int repl_ex_nst,int repl_ex_seed,
++
 +  return 0;
 +} /* That's all folks */
 +
 +
 +double do_nm(FILE *fplog,t_commrec *cr,
 +             int nfile,const t_filenm fnm[],
 +             const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
 +             int nstglobalcomm,
 +             gmx_vsite_t *vsite,gmx_constr_t constr,
 +             int stepout,
 +             t_inputrec *inputrec,
 +             gmx_mtop_t *top_global,t_fcdata *fcd,
 +             t_state *state_global,
 +             t_mdatoms *mdatoms,
 +             t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 +             gmx_edsam_t ed,
 +             t_forcerec *fr,
-     real       t,lambda;
++             int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
 +             gmx_membed_t membed,
 +             real cpt_period,real max_hours,
 +             const char *deviceOptions,
 +             unsigned long Flags,
 +             gmx_runtime_t *runtime)
 +{
 +    const char *NM = "Normal Mode Analysis";
 +    gmx_mdoutf_t *outf;
 +    int        natoms,atom,d;
 +    int        nnodes,node;
 +    rvec       *f_global;
 +    gmx_localtop_t *top;
 +    gmx_enerdata_t *enerd;
 +    rvec       *f;
 +    gmx_global_stat_t gstat;
 +    t_graph    *graph;
-       
++    real       t,t0,lambda,lam0;
 +    gmx_bool       bNS;
 +    tensor     vir,pres;
 +    rvec       mu_tot;
 +    rvec       *fneg,*dfdx;
 +    gmx_bool       bSparse; /* use sparse matrix storage format */
 +    size_t     sz;
 +    gmx_sparsematrix_t * sparse_matrix = NULL;
 +    real *     full_matrix             = NULL;
 +    em_state_t *   state_work;
-     
++
 +    /* added with respect to mdrun */
 +    int        i,j,k,row,col;
 +    real       der_range=10.0*sqrt(GMX_REAL_EPS);
 +    real       x_min;
 +    real       fnorm,fmax;
-     
++
 +    if (constr != NULL)
 +    {
 +        gmx_fatal(FARGS,"Constraints present with Normal Mode Analysis, this combination is not supported");
 +    }
 +
 +    state_work = init_em_state();
-     
++
 +    /* Init em and store the local state in state_minimum */
 +    init_em(fplog,NM,cr,inputrec,
 +            state_global,top_global,state_work,&top,
 +            &f,&f_global,
 +            nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
 +            nfile,fnm,&outf,NULL);
-     
++
 +    natoms = top_global->natoms;
 +    snew(fneg,natoms);
 +    snew(dfdx,natoms);
-     
++
 +#ifndef GMX_DOUBLE
 +    if (MASTER(cr))
 +    {
 +        fprintf(stderr,
 +                "NOTE: This version of Gromacs has been compiled in single precision,\n"
 +                "      which MIGHT not be accurate enough for normal mode analysis.\n"
 +                "      Gromacs now uses sparse matrix storage, so the memory requirements\n"
 +                "      are fairly modest even if you recompile in double precision.\n\n");
 +    }
 +#endif
-       * will be when we use a cutoff.    
++
 +    /* Check if we can/should use sparse storage format.
 +     *
 +     * Sparse format is only useful when the Hessian itself is sparse, which it
-     
++      * will be when we use a cutoff.
 +      * For small systems (n<1000) it is easier to always use full matrix format, though.
 +      */
 +    if(EEL_FULL(fr->eeltype) || fr->rlist==0.0)
 +    {
 +        fprintf(stderr,"Non-cutoff electrostatics used, forcing full Hessian format.\n");
 +        bSparse = FALSE;
 +    }
 +    else if(top_global->natoms < 1000)
 +    {
 +        fprintf(stderr,"Small system size (N=%d), using full Hessian format.\n",top_global->natoms);
 +        bSparse = FALSE;
 +    }
 +    else
 +    {
 +        fprintf(stderr,"Using compressed symmetric sparse Hessian format.\n");
 +        bSparse = TRUE;
 +    }
-     
++
 +    sz = DIM*top_global->natoms;
-     
++
 +    fprintf(stderr,"Allocating Hessian memory...\n\n");
 +
 +    if(bSparse)
 +    {
 +        sparse_matrix=gmx_sparsematrix_init(sz);
 +        sparse_matrix->compressed_symmetric = TRUE;
 +    }
 +    else
 +    {
 +        snew(full_matrix,sz*sz);
 +    }
-     t      = inputrec->init_t;
-     lambda = inputrec->init_lambda;
-     
++
 +    /* Initial values */
-     
++    t0           = inputrec->init_t;
++    lam0         = inputrec->fepvals->init_lambda;
++    t            = t0;
++    lambda       = lam0;
++
 +    init_nrnb(nrnb);
-     
++
 +    where();
-     if (MASTER(cr)) 
++
 +    /* Write start time and temperature */
 +    print_em_start(fplog,cr,runtime,wcycle,NM);
 +
 +    /* fudge nr of steps to nr of atoms */
 +    inputrec->nsteps = natoms*2;
 +
-    
++    if (MASTER(cr))
 +    {
 +        fprintf(stderr,"starting normal mode calculation '%s'\n%d steps.\n\n",
 +                *(top_global->name),(int)inputrec->nsteps);
 +    }
 +
 +    nnodes = cr->nnodes;
-         if (state_work->fmax > 1.0e-3) 
++
 +    /* Make evaluate_energy do a single node force calculation */
 +    cr->nnodes = 1;
 +    evaluate_energy(fplog,bVerbose,cr,
 +                    state_global,top_global,state_work,top,
 +                    inputrec,nrnb,wcycle,gstat,
 +                    vsite,constr,fcd,graph,mdatoms,fr,
 +                    mu_tot,enerd,vir,pres,-1,TRUE);
 +    cr->nnodes = nnodes;
 +
 +    /* if forces are not small, warn user */
 +    get_state_f_norm_max(cr,&(inputrec->opts),mdatoms,state_work);
 +
 +    if (MASTER(cr))
 +    {
 +        fprintf(stderr,"Maximum force:%12.5e\n",state_work->fmax);
-     
++        if (state_work->fmax > 1.0e-3)
 +        {
 +            fprintf(stderr,"Maximum force probably not small enough to");
 +            fprintf(stderr," ensure that you are in an \nenergy well. ");
 +            fprintf(stderr,"Be aware that negative eigenvalues may occur");
 +            fprintf(stderr," when the\nresulting matrix is diagonalized.\n");
 +        }
 +    }
-      *      Loop over all pairs in matrix 
-      * 
-      *      do_force called twice. Once with positive and 
-      *      once with negative displacement 
++
 +    /***********************************************************
 +     *
-     for(atom=cr->nodeid; atom<natoms; atom+=nnodes) 
++     *      Loop over all pairs in matrix
++     *
++     *      do_force called twice. Once with positive and
++     *      once with negative displacement
 +     *
 +     ************************************************************/
 +
 +    /* Steps are divided one by one over the nodes */
-         
-         for (d=0; d<DIM; d++) 
++    for(atom=cr->nodeid; atom<natoms; atom+=nnodes)
 +    {
-           
++
++        for (d=0; d<DIM; d++)
 +        {
 +            x_min = state_work->s.x[atom][d];
 +
 +            state_work->s.x[atom][d] = x_min - der_range;
-                       
++
 +            /* Make evaluate_energy do a single node force calculation */
 +            cr->nnodes = 1;
 +            evaluate_energy(fplog,bVerbose,cr,
 +                            state_global,top_global,state_work,top,
 +                            inputrec,nrnb,wcycle,gstat,
 +                            vsite,constr,fcd,graph,mdatoms,fr,
 +                            mu_tot,enerd,vir,pres,atom*2,FALSE);
-             
++
 +            for(i=0; i<natoms; i++)
 +            {
 +                copy_rvec(state_work->f[i], fneg[i]);
 +            }
-             
++
 +            state_work->s.x[atom][d] = x_min + der_range;
-             for(j=0; j<natoms; j++) 
++
 +            evaluate_energy(fplog,bVerbose,cr,
 +                            state_global,top_global,state_work,top,
 +                            inputrec,nrnb,wcycle,gstat,
 +                            vsite,constr,fcd,graph,mdatoms,fr,
 +                            mu_tot,enerd,vir,pres,atom*2+1,FALSE);
 +            cr->nnodes = nnodes;
 +
 +            /* x is restored to original */
 +            state_work->s.x[atom][d] = x_min;
 +
-                 for (k=0; (k<DIM); k++) 
++            for(j=0; j<natoms; j++)
 +            {
-                     for(j=0; j<natoms; j++) 
++                for (k=0; (k<DIM); k++)
 +                {
 +                    dfdx[j][k] =
 +                        -(state_work->f[j][k] - fneg[j][k])/(2*der_range);
 +                }
 +            }
 +
 +            if (!MASTER(cr))
 +            {
 +#ifdef GMX_MPI
 +#ifdef GMX_DOUBLE
 +#define mpi_type MPI_DOUBLE
 +#else
 +#define mpi_type MPI_FLOAT
 +#endif
 +                MPI_Send(dfdx[0],natoms*DIM,mpi_type,MASTERNODE(cr),cr->nodeid,
 +                         cr->mpi_comm_mygroup);
 +#endif
 +            }
 +            else
 +            {
 +                for(node=0; (node<nnodes && atom+node<natoms); node++)
 +                {
 +                    if (node > 0)
 +                    {
 +#ifdef GMX_MPI
 +                        MPI_Status stat;
 +                        MPI_Recv(dfdx[0],natoms*DIM,mpi_type,node,node,
 +                                 cr->mpi_comm_mygroup,&stat);
 +#undef mpi_type
 +#endif
 +                    }
 +
 +                    row = (atom + node)*DIM + d;
 +
-                         for(k=0; k<DIM; k++) 
++                    for(j=0; j<natoms; j++)
 +                    {
-                             
++                        for(k=0; k<DIM; k++)
 +                        {
 +                            col = j*DIM + k;
-             
++
 +                            if (bSparse)
 +                            {
 +                                if (col >= row && dfdx[j][k] != 0.0)
 +                                {
 +                                    gmx_sparsematrix_increment_value(sparse_matrix,
 +                                                                     row,col,dfdx[j][k]);
 +                                }
 +                            }
 +                            else
 +                            {
 +                                full_matrix[row*sz+col] = dfdx[j][k];
 +                            }
 +                        }
 +                    }
 +                }
 +            }
-                 fflush(fplog);            
++
 +            if (bVerbose && fplog)
 +            {
-         if (MASTER(cr) && bVerbose) 
++                fflush(fplog);
 +            }
 +        }
 +        /* write progress */
-                     min(atom+nnodes,natoms),natoms); 
++        if (MASTER(cr) && bVerbose)
 +        {
 +            fprintf(stderr,"\rFinished step %d out of %d",
-     
-     if (MASTER(cr)) 
++                    min(atom+nnodes,natoms),natoms);
 +            fflush(stderr);
 +        }
 +    }
-     
++
++    if (MASTER(cr))
 +    {
 +        fprintf(stderr,"\n\nWriting Hessian...\n");
 +        gmx_mtxio_write(ftp2fn(efMTX,nfile,fnm),sz,sz,full_matrix,sparse_matrix);
 +    }
 +
 +    finish_em(fplog,cr,outf,runtime,wcycle);
 +
 +    runtime->nsteps_done = natoms*2;
++
 +    return 0;
 +}
++
index f2e93c578977a4b66c35876b41bab91d7d7ce379,0000000000000000000000000000000000000000..15b7eb57ea4165130fa2a7488a2c7aa493788949
mode 100644,000000..100644
--- /dev/null
@@@ -1,2802 -1,0 +1,2802 @@@
-        icoul=4;
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GROwing Monsters And Cloning Shrimps
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#ifdef GMX_THREAD_SHM_FDECOMP
 +#include <pthread.h> 
 +#endif
 +
 +#include <math.h>
 +#include <string.h>
 +#include "sysstuff.h"
 +#include "smalloc.h"
 +#include "macros.h"
 +#include "maths.h"
 +#include "vec.h"
 +#include "network.h"
 +#include "nsgrid.h"
 +#include "force.h"
 +#include "nonbonded.h"
 +#include "ns.h"
 +#include "pbc.h"
 +#include "names.h"
 +#include "gmx_fatal.h"
 +#include "nrnb.h"
 +#include "txtdump.h"
 +#include "mtop_util.h"
 +
 +#include "domdec.h"
 +#include "adress.h"
 +
 +
 +/* 
 + *    E X C L U S I O N   H A N D L I N G
 + */
 +
 +#ifdef DEBUG
 +static void SETEXCL_(t_excl e[],atom_id i,atom_id j)
 +{   e[j] = e[j] | (1<<i); }
 +static void RMEXCL_(t_excl e[],atom_id i,atom_id j) 
 +{ e[j]=e[j] & ~(1<<i); }
 +static gmx_bool ISEXCL_(t_excl e[],atom_id i,atom_id j) 
 +{ return (gmx_bool)(e[j] & (1<<i)); }
 +static gmx_bool NOTEXCL_(t_excl e[],atom_id i,atom_id j)
 +{  return !(ISEXCL(e,i,j)); }
 +#else
 +#define SETEXCL(e,i,j) (e)[((atom_id) (j))] |= (1<<((atom_id) (i)))
 +#define RMEXCL(e,i,j)  (e)[((atom_id) (j))] &= (~(1<<((atom_id) (i))))
 +#define ISEXCL(e,i,j)  (gmx_bool) ((e)[((atom_id) (j))] & (1<<((atom_id) (i))))
 +#define NOTEXCL(e,i,j) !(ISEXCL(e,i,j))
 +#endif
 +
 +/************************************************
 + *
 + *  U T I L I T I E S    F O R    N S
 + *
 + ************************************************/
 +
 +static void reallocate_nblist(t_nblist *nl)
 +{
 +    if (gmx_debug_at)
 +    {
 +        fprintf(debug,"reallocating neigborlist il_code=%d, maxnri=%d\n",
 +                nl->il_code,nl->maxnri); 
 +    }
 +    srenew(nl->iinr,   nl->maxnri);
 +    if (nl->enlist == enlistCG_CG)
 +    {
 +        srenew(nl->iinr_end,nl->maxnri);
 +    }
 +    srenew(nl->gid,    nl->maxnri);
 +    srenew(nl->shift,  nl->maxnri);
 +    srenew(nl->jindex, nl->maxnri+1);
 +}
 +
 +/* ivdw/icoul are used to determine the type of interaction, so we
 + * can set an innerloop index here. The obvious choice for this would have
 + * been the vdwtype/coultype values in the forcerecord, but unfortunately 
 + * those types are braindead - for instance both Buckingham and normal 
 + * Lennard-Jones use the same value (evdwCUT), and a separate gmx_boolean variable
 + * to determine which interaction is used. There is further no special value
 + * for 'no interaction'. For backward compatibility with old TPR files we won't
 + * change this in the 3.x series, so when calling this routine you should use:
 + *
 + * icoul=0 no coulomb interaction
 + * icoul=1 cutoff standard coulomb
 + * icoul=2 reaction-field coulomb
 + * icoul=3 tabulated coulomb
 + *
 + * ivdw=0 no vdw interaction
 + * ivdw=1 standard L-J interaction
 + * ivdw=2 Buckingham
 + * ivdw=3 tabulated vdw.
 + *
 + * Kind of ugly, but it works.
 + */
 +static void init_nblist(t_nblist *nl_sr,t_nblist *nl_lr,
 +                        int maxsr,int maxlr,
 +                        int ivdw, int icoul, 
 +                        gmx_bool bfree, int enlist)
 +{
 +    t_nblist *nl;
 +    int      homenr;
 +    int      i,nn;
 +    
 +    int inloop[20] =
 +    { 
 +        eNR_NBKERNEL_NONE,
 +        eNR_NBKERNEL010,
 +        eNR_NBKERNEL020,
 +        eNR_NBKERNEL030,
 +        eNR_NBKERNEL100,
 +        eNR_NBKERNEL110,
 +        eNR_NBKERNEL120,
 +        eNR_NBKERNEL130,
 +        eNR_NBKERNEL200,
 +        eNR_NBKERNEL210,
 +        eNR_NBKERNEL220,
 +        eNR_NBKERNEL230,
 +        eNR_NBKERNEL300,
 +        eNR_NBKERNEL310,
 +        eNR_NBKERNEL320,
 +        eNR_NBKERNEL330,
 +        eNR_NBKERNEL400,
 +        eNR_NBKERNEL410,
 +        eNR_NBKERNEL_NONE,
 +        eNR_NBKERNEL430
 +    };
 +  
 +    for(i=0; (i<2); i++)
 +    {
 +        nl     = (i == 0) ? nl_sr : nl_lr;
 +        homenr = (i == 0) ? maxsr : maxlr;
 +
 +        if (nl == NULL)
 +        {
 +            continue;
 +        }
 +        
 +        /* Set coul/vdw in neighborlist, and for the normal loops we determine
 +         * an index of which one to call.
 +         */
 +        nl->ivdw  = ivdw;
 +        nl->icoul = icoul;
 +        nl->free_energy = bfree;
 +    
 +        if (bfree)
 +        {
 +            nl->enlist  = enlistATOM_ATOM;
 +            nl->il_code = eNR_NBKERNEL_FREE_ENERGY;
 +        }
 +        else
 +        {
 +            nl->enlist = enlist;
 +
 +            nn = inloop[4*icoul + ivdw];
 +            
 +            /* solvent loops follow directly after the corresponding
 +            * ordinary loops, in the order:
 +            *
 +            * SPC, SPC-SPC, TIP4p, TIP4p-TIP4p
 +            *   
 +            */
 +            switch (enlist) {
 +            case enlistATOM_ATOM:
 +            case enlistCG_CG:
 +                break;
 +            case enlistSPC_ATOM:     nn += 1; break;
 +            case enlistSPC_SPC:      nn += 2; break;
 +            case enlistTIP4P_ATOM:   nn += 3; break;
 +            case enlistTIP4P_TIP4P:  nn += 4; break;
 +            }
 +            
 +            nl->il_code = nn;
 +        }
 +
 +        if (debug)
 +            fprintf(debug,"Initiating neighbourlist type %d for %s interactions,\nwith %d SR, %d LR atoms.\n",
 +                    nl->il_code,ENLISTTYPE(enlist),maxsr,maxlr);
 +        
 +        /* maxnri is influenced by the number of shifts (maximum is 8)
 +         * and the number of energy groups.
 +         * If it is not enough, nl memory will be reallocated during the run.
 +         * 4 seems to be a reasonable factor, which only causes reallocation
 +         * during runs with tiny and many energygroups.
 +         */
 +        nl->maxnri      = homenr*4;
 +        nl->maxnrj      = 0;
 +        nl->maxlen      = 0;
 +        nl->nri         = -1;
 +        nl->nrj         = 0;
 +        nl->iinr        = NULL;
 +        nl->gid         = NULL;
 +        nl->shift       = NULL;
 +        nl->jindex      = NULL;
 +        reallocate_nblist(nl);
 +        nl->jindex[0] = 0;
 +#ifdef GMX_THREAD_SHM_FDECOMP
 +        nl->counter = 0;
 +        snew(nl->mtx,1);
 +        pthread_mutex_init(nl->mtx,NULL);
 +#endif
 +    }
 +}
 +
 +void init_neighbor_list(FILE *log,t_forcerec *fr,int homenr)
 +{
 +   /* Make maxlr tunable! (does not seem to be a big difference though) 
 +    * This parameter determines the number of i particles in a long range 
 +    * neighbourlist. Too few means many function calls, too many means
 +    * cache trashing.
 +    */
 +   int maxsr,maxsr_wat,maxlr,maxlr_wat;
 +   int icoul,icoulf,ivdw;
 +   int solvent;
 +   int enlist_def,enlist_w,enlist_ww;
 +   int i;
 +   t_nblists *nbl;
 +
 +   /* maxsr     = homenr-fr->nWatMol*3; */
 +   maxsr     = homenr;
 +
 +   if (maxsr < 0)
 +   {
 +     gmx_fatal(FARGS,"%s, %d: Negative number of short range atoms.\n"
 +               "Call your Gromacs dealer for assistance.",__FILE__,__LINE__);
 +   }
 +   /* This is just for initial allocation, so we do not reallocate
 +    * all the nlist arrays many times in a row.
 +    * The numbers seem very accurate, but they are uncritical.
 +    */
 +   maxsr_wat = min(fr->nWatMol,(homenr+2)/3); 
 +   if (fr->bTwinRange) 
 +   {
 +       maxlr     = 50;
 +       maxlr_wat = min(maxsr_wat,maxlr);
 +   }
 +   else
 +   {
 +     maxlr = maxlr_wat = 0;
 +   }  
 +
 +   /* Determine the values for icoul/ivdw. */
 +   /* Start with GB */
 +   if(fr->bGB)
 +   {
-        icoul = 3;
++       icoul=enbcoulGB;
 +   }
 +   else if (fr->bcoultab)
 +   {
-        icoul = 2;
++       icoul = enbcoulTAB;
 +   }
 +   else if (EEL_RF(fr->eeltype))
 +   {
-        icoul = 1;
++       icoul = enbcoulRF;
 +   }
 +   else 
 +   {
-        ivdw = 3;
++       icoul = enbcoulOOR;
 +   }
 +   
 +   if (fr->bvdwtab)
 +   {
-        ivdw = 2;
++       ivdw = enbvdwTAB;
 +   }
 +   else if (fr->bBHAM)
 +   {
-        ivdw = 1;
++       ivdw = enbvdwBHAM;
 +   }
 +   else 
 +   {
-            if (fr->bEwald)
++       ivdw = enbvdwLJ;
 +   }
 +
 +   fr->ns.bCGlist = (getenv("GMX_NBLISTCG") != 0);
 +   if (!fr->ns.bCGlist)
 +   {
 +       enlist_def = enlistATOM_ATOM;
 +   }
 +   else
 +   {
 +       enlist_def = enlistCG_CG;
 +       if (log != NULL)
 +       {
 +           fprintf(log,"\nUsing charge-group - charge-group neighbor lists and kernels\n\n");
 +       }
 +   }
 +   
 +   if (fr->solvent_opt == esolTIP4P) {
 +       enlist_w  = enlistTIP4P_ATOM;
 +       enlist_ww = enlistTIP4P_TIP4P;
 +   } else {
 +       enlist_w  = enlistSPC_ATOM;
 +       enlist_ww = enlistSPC_SPC;
 +   }
 +
 +   for(i=0; i<fr->nnblists; i++) 
 +   {
 +       nbl = &(fr->nblists[i]);
 +       init_nblist(&nbl->nlist_sr[eNL_VDWQQ],&nbl->nlist_lr[eNL_VDWQQ],
 +                   maxsr,maxlr,ivdw,icoul,FALSE,enlist_def);
 +       init_nblist(&nbl->nlist_sr[eNL_VDW],&nbl->nlist_lr[eNL_VDW],
 +                   maxsr,maxlr,ivdw,0,FALSE,enlist_def);
 +       init_nblist(&nbl->nlist_sr[eNL_QQ],&nbl->nlist_lr[eNL_QQ],
 +                   maxsr,maxlr,0,icoul,FALSE,enlist_def);
 +       init_nblist(&nbl->nlist_sr[eNL_VDWQQ_WATER],&nbl->nlist_lr[eNL_VDWQQ_WATER],
 +                   maxsr_wat,maxlr_wat,ivdw,icoul, FALSE,enlist_w);
 +       init_nblist(&nbl->nlist_sr[eNL_QQ_WATER],&nbl->nlist_lr[eNL_QQ_WATER],
 +                   maxsr_wat,maxlr_wat,0,icoul, FALSE,enlist_w);
 +       init_nblist(&nbl->nlist_sr[eNL_VDWQQ_WATERWATER],&nbl->nlist_lr[eNL_VDWQQ_WATERWATER],
 +                   maxsr_wat,maxlr_wat,ivdw,icoul, FALSE,enlist_ww);
 +       init_nblist(&nbl->nlist_sr[eNL_QQ_WATERWATER],&nbl->nlist_lr[eNL_QQ_WATERWATER],
 +                   maxsr_wat,maxlr_wat,0,icoul, FALSE,enlist_ww);
 +       
 +       if (fr->efep != efepNO) 
 +       {
-                icoulf = 5;
++           if ((fr->bEwald) && (fr->sc_alphacoul > 0)) /* need to handle long range differently if using softcore */
 +           {
-                          real lambda,real *dvdlambda,
++               icoulf = enbcoulFEWALD;
 +           }
 +           else
 +           {
 +               icoulf = icoul;
 +           }
 +
 +           init_nblist(&nbl->nlist_sr[eNL_VDWQQ_FREE],&nbl->nlist_lr[eNL_VDWQQ_FREE],
 +                       maxsr,maxlr,ivdw,icoulf,TRUE,enlistATOM_ATOM);
 +           init_nblist(&nbl->nlist_sr[eNL_VDW_FREE],&nbl->nlist_lr[eNL_VDW_FREE],
 +                       maxsr,maxlr,ivdw,0,TRUE,enlistATOM_ATOM);
 +           init_nblist(&nbl->nlist_sr[eNL_QQ_FREE],&nbl->nlist_lr[eNL_QQ_FREE],
 +                       maxsr,maxlr,0,icoulf,TRUE,enlistATOM_ATOM);
 +       }  
 +   }
 +   /* QMMM MM list */
 +   if (fr->bQMMM && fr->qr->QMMMscheme != eQMMMschemeoniom)
 +   {
 +       init_nblist(&fr->QMMMlist,NULL,
 +                   maxsr,maxlr,0,icoul,FALSE,enlistATOM_ATOM);
 +   }
 +
 +   fr->ns.nblist_initialized=TRUE;
 +}
 +
 +static void reset_nblist(t_nblist *nl)
 +{
 +     nl->nri       = -1;
 +     nl->nrj       = 0;
 +     nl->maxlen    = 0;
 +     if (nl->jindex)
 +     {
 +         nl->jindex[0] = 0;
 +     }
 +}
 +
 +static void reset_neighbor_list(t_forcerec *fr,gmx_bool bLR,int nls,int eNL)
 +{
 +    int n,i;
 +  
 +    if (bLR) 
 +    {
 +        reset_nblist(&(fr->nblists[nls].nlist_lr[eNL]));
 +    }
 +    else 
 +    {
 +        for(n=0; n<fr->nnblists; n++)
 +        {
 +            for(i=0; i<eNL_NR; i++)
 +            {
 +                reset_nblist(&(fr->nblists[n].nlist_sr[i]));
 +            }
 +        }
 +        if (fr->bQMMM)
 +        { 
 +            /* only reset the short-range nblist */
 +            reset_nblist(&(fr->QMMMlist));
 +        }
 +    }
 +}
 +
 +
 +
 +
 +static inline void new_i_nblist(t_nblist *nlist,
 +                                gmx_bool bLR,atom_id i_atom,int shift,int gid)
 +{
 +    int    i,k,nri,nshift;
 +    
 +    nri = nlist->nri;
 +    
 +    /* Check whether we have to increase the i counter */
 +    if ((nri == -1) ||
 +        (nlist->iinr[nri]  != i_atom) || 
 +        (nlist->shift[nri] != shift) || 
 +        (nlist->gid[nri]   != gid))
 +    {
 +        /* This is something else. Now see if any entries have 
 +         * been added in the list of the previous atom.
 +         */
 +        if ((nri == -1) ||
 +            ((nlist->jindex[nri+1] > nlist->jindex[nri]) && 
 +             (nlist->gid[nri] != -1)))
 +        {
 +            /* If so increase the counter */
 +            nlist->nri++;
 +            nri++;
 +            if (nlist->nri >= nlist->maxnri)
 +            {
 +                nlist->maxnri += over_alloc_large(nlist->nri);
 +                reallocate_nblist(nlist);
 +            }
 +        }
 +        /* Set the number of neighbours and the atom number */
 +        nlist->jindex[nri+1] = nlist->jindex[nri];
 +        nlist->iinr[nri]     = i_atom;
 +        nlist->gid[nri]      = gid;
 +        nlist->shift[nri]    = shift;
 +    }
 +}
 +
 +static inline void close_i_nblist(t_nblist *nlist) 
 +{
 +    int nri = nlist->nri;
 +    int len;
 +    
 +    if (nri >= 0)
 +    {
 +        nlist->jindex[nri+1] = nlist->nrj;
 +        
 +        len=nlist->nrj -  nlist->jindex[nri];
 +        
 +        /* nlist length for water i molecules is treated statically 
 +         * in the innerloops 
 +         */
 +        if (len > nlist->maxlen)
 +        {
 +            nlist->maxlen = len;
 +        }
 +    }
 +}
 +
 +static inline void close_nblist(t_nblist *nlist)
 +{
 +    /* Only close this nblist when it has been initialized.
 +     * Avoid the creation of i-lists with no j-particles.
 +     */
 +    if (nlist->nrj == 0)
 +    {
 +        /* Some assembly kernels do not support empty lists,
 +         * make sure here that we don't generate any empty lists.
 +         * With the current ns code this branch is taken in two cases:
 +         * No i-particles at all: nri=-1 here
 +         * There are i-particles, but no j-particles; nri=0 here
 +         */
 +        nlist->nri = 0;
 +    }
 +    else
 +    {
 +        /* Close list number nri by incrementing the count */
 +        nlist->nri++;
 +    }
 +}
 +
 +static inline void close_neighbor_list(t_forcerec *fr,gmx_bool bLR,int nls,int eNL, 
 +                                       gmx_bool bMakeQMMMnblist)
 +{
 +    int n,i;
 +    
 +    if (bMakeQMMMnblist) {
 +        if (!bLR)
 +        {
 +            close_nblist(&(fr->QMMMlist));
 +        }
 +    }
 +    else 
 +    {
 +        if (bLR)
 +        {
 +            close_nblist(&(fr->nblists[nls].nlist_lr[eNL]));
 +        }
 +        else
 +        { 
 +            for(n=0; n<fr->nnblists; n++)
 +            {
 +                for(i=0; (i<eNL_NR); i++)
 +                {
 +                    close_nblist(&(fr->nblists[n].nlist_sr[i]));
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +static inline void add_j_to_nblist(t_nblist *nlist,atom_id j_atom,gmx_bool bLR)
 +{
 +    int nrj=nlist->nrj;
 +    
 +    if (nlist->nrj >= nlist->maxnrj)
 +    {
 +        nlist->maxnrj = over_alloc_small(nlist->nrj + 1);
 +        if (gmx_debug_at)
 +            fprintf(debug,"Increasing %s nblist %s j size to %d\n",
 +                    bLR ? "LR" : "SR",nrnb_str(nlist->il_code),nlist->maxnrj);
 +        
 +        srenew(nlist->jjnr,nlist->maxnrj);
 +    }
 +
 +    nlist->jjnr[nrj] = j_atom;
 +    nlist->nrj ++;
 +}
 +
 +static inline void add_j_to_nblist_cg(t_nblist *nlist,
 +                                      atom_id j_start,int j_end,
 +                                      t_excl *bexcl,gmx_bool i_is_j,
 +                                      gmx_bool bLR)
 +{
 +    int nrj=nlist->nrj;
 +    int j;
 +
 +    if (nlist->nrj >= nlist->maxnrj)
 +    {
 +        nlist->maxnrj = over_alloc_small(nlist->nrj + 1);
 +        if (gmx_debug_at)
 +            fprintf(debug,"Increasing %s nblist %s j size to %d\n",
 +                    bLR ? "LR" : "SR",nrnb_str(nlist->il_code),nlist->maxnrj);
 +        
 +        srenew(nlist->jjnr    ,nlist->maxnrj);
 +        srenew(nlist->jjnr_end,nlist->maxnrj);
 +        srenew(nlist->excl    ,nlist->maxnrj*MAX_CGCGSIZE);
 +    }
 +
 +    nlist->jjnr[nrj]     = j_start;
 +    nlist->jjnr_end[nrj] = j_end;
 +
 +    if (j_end - j_start > MAX_CGCGSIZE)
 +    {
 +        gmx_fatal(FARGS,"The charge-group - charge-group neighborlist do not support charge groups larger than %d, found a charge group of size %d",MAX_CGCGSIZE,j_end-j_start);
 +    }
 +
 +    /* Set the exclusions */
 +    for(j=j_start; j<j_end; j++)
 +    {
 +        nlist->excl[nrj*MAX_CGCGSIZE + j - j_start] = bexcl[j];
 +    }
 +    if (i_is_j)
 +    {
 +        /* Avoid double counting of intra-cg interactions */
 +        for(j=1; j<j_end-j_start; j++)
 +        {
 +            nlist->excl[nrj*MAX_CGCGSIZE + j] |= (1<<j) - 1;
 +        }
 +    }
 +
 +    nlist->nrj ++;
 +}
 +
 +typedef void
 +put_in_list_t(gmx_bool              bHaveVdW[],
 +              int               ngid,
 +              t_mdatoms *       md,
 +              int               icg,
 +              int               jgid,
 +              int               nj,
 +              atom_id           jjcg[],
 +              atom_id           index[],
 +              t_excl            bExcl[],
 +              int               shift,
 +              t_forcerec *      fr,
 +              gmx_bool              bLR,
 +              gmx_bool              bDoVdW,
 +              gmx_bool              bDoCoul);
 +
 +static void 
 +put_in_list_at(gmx_bool              bHaveVdW[],
 +               int               ngid,
 +               t_mdatoms *       md,
 +               int               icg,
 +               int               jgid,
 +               int               nj,
 +               atom_id           jjcg[],
 +               atom_id           index[],
 +               t_excl            bExcl[],
 +               int               shift,
 +               t_forcerec *      fr,
 +               gmx_bool              bLR,
 +               gmx_bool              bDoVdW,
 +               gmx_bool              bDoCoul)
 +{
 +    /* The a[] index has been removed,
 +     * to put it back in i_atom should be a[i0] and jj should be a[jj].
 +     */
 +    t_nblist *   vdwc;
 +    t_nblist *   vdw;
 +    t_nblist *   coul;
 +    t_nblist *   vdwc_free  = NULL;
 +    t_nblist *   vdw_free   = NULL;
 +    t_nblist *   coul_free  = NULL;
 +    t_nblist *   vdwc_ww    = NULL;
 +    t_nblist *   coul_ww    = NULL;
 +    
 +    int           i,j,jcg,igid,gid,nbl_ind,ind_ij;
 +    atom_id   jj,jj0,jj1,i_atom;
 +    int       i0,nicg,len;
 +    
 +    int       *cginfo;
 +    int       *type,*typeB;
 +    real      *charge,*chargeB;
 +    real      qi,qiB,qq,rlj;
 +    gmx_bool      bFreeEnergy,bFree,bFreeJ,bNotEx,*bPert;
 +    gmx_bool      bDoVdW_i,bDoCoul_i,bDoCoul_i_sol;
 +    int       iwater,jwater;
 +    t_nblist  *nlist;
 +    
 +    /* Copy some pointers */
 +    cginfo  = fr->cginfo;
 +    charge  = md->chargeA;
 +    chargeB = md->chargeB;
 +    type    = md->typeA;
 +    typeB   = md->typeB;
 +    bPert   = md->bPerturbed;
 +    
 +    /* Get atom range */
 +    i0     = index[icg];
 +    nicg   = index[icg+1]-i0;
 +    
 +    /* Get the i charge group info */
 +    igid   = GET_CGINFO_GID(cginfo[icg]);
 +    iwater = GET_CGINFO_SOLOPT(cginfo[icg]);
 +    
 +    bFreeEnergy = FALSE;
 +    if (md->nPerturbed) 
 +    {
 +        /* Check if any of the particles involved are perturbed. 
 +         * If not we can do the cheaper normal put_in_list
 +         * and use more solvent optimization.
 +         */
 +        for(i=0; i<nicg; i++)
 +        {
 +            bFreeEnergy |= bPert[i0+i];
 +        }
 +        /* Loop over the j charge groups */
 +        for(j=0; (j<nj && !bFreeEnergy); j++) 
 +        {
 +            jcg = jjcg[j];
 +            jj0 = index[jcg];
 +            jj1 = index[jcg+1];
 +            /* Finally loop over the atoms in the j-charge group */   
 +            for(jj=jj0; jj<jj1; jj++)
 +            {
 +                bFreeEnergy |= bPert[jj];
 +            }
 +        }
 +    }
 +    
 +    /* Unpack pointers to neighbourlist structs */
 +    if (fr->nnblists == 1)
 +    {
 +        nbl_ind = 0;
 +    }
 +    else
 +    {
 +        nbl_ind = fr->gid2nblists[GID(igid,jgid,ngid)];
 +    }
 +    if (bLR)
 +    {
 +        nlist = fr->nblists[nbl_ind].nlist_lr;
 +    }
 +    else
 +    {
 +        nlist = fr->nblists[nbl_ind].nlist_sr;
 +    }
 +    
 +    if (iwater != esolNO)
 +    {
 +        vdwc = &nlist[eNL_VDWQQ_WATER];
 +        vdw  = &nlist[eNL_VDW];
 +        coul = &nlist[eNL_QQ_WATER];
 +#ifndef DISABLE_WATERWATER_NLIST
 +        vdwc_ww = &nlist[eNL_VDWQQ_WATERWATER];
 +        coul_ww = &nlist[eNL_QQ_WATERWATER];
 +#endif
 +    } 
 +    else 
 +    {
 +        vdwc = &nlist[eNL_VDWQQ];
 +        vdw  = &nlist[eNL_VDW];
 +        coul = &nlist[eNL_QQ];
 +    }
 +    
 +    if (!bFreeEnergy) 
 +    {
 +        if (iwater != esolNO) 
 +        {
 +            /* Loop over the atoms in the i charge group */    
 +            i_atom  = i0;
 +            gid     = GID(igid,jgid,ngid);
 +            /* Create new i_atom for each energy group */
 +            if (bDoCoul && bDoVdW)
 +            {
 +                new_i_nblist(vdwc,bLR,i_atom,shift,gid);
 +#ifndef DISABLE_WATERWATER_NLIST
 +                new_i_nblist(vdwc_ww,bLR,i_atom,shift,gid);
 +#endif
 +            }
 +            if (bDoVdW)
 +            {
 +                new_i_nblist(vdw,bLR,i_atom,shift,gid);
 +            }
 +            if (bDoCoul) 
 +            {
 +                new_i_nblist(coul,bLR,i_atom,shift,gid);
 +#ifndef DISABLE_WATERWATER_NLIST
 +                new_i_nblist(coul_ww,bLR,i_atom,shift,gid);
 +#endif
 +            }      
 +        /* Loop over the j charge groups */
 +            for(j=0; (j<nj); j++) 
 +            {
 +                jcg=jjcg[j];
 +                
 +                if (jcg == icg)
 +                {
 +                    continue;
 +                }
 +                
 +                jj0 = index[jcg];
 +                jwater = GET_CGINFO_SOLOPT(cginfo[jcg]);
 +                
 +                if (iwater == esolSPC && jwater == esolSPC)
 +                {
 +                    /* Interaction between two SPC molecules */
 +                    if (!bDoCoul)
 +                    {
 +                        /* VdW only - only first atoms in each water interact */
 +                        add_j_to_nblist(vdw,jj0,bLR);
 +                    }
 +                    else 
 +                    {
 +#ifdef DISABLE_WATERWATER_NLIST       
 +                        /* Add entries for the three atoms - only do VdW if we need to */
 +                        if (!bDoVdW)
 +                        {
 +                            add_j_to_nblist(coul,jj0,bLR);
 +                        }
 +                        else
 +                        {
 +                            add_j_to_nblist(vdwc,jj0,bLR);
 +                        }
 +                        add_j_to_nblist(coul,jj0+1,bLR);
 +                        add_j_to_nblist(coul,jj0+2,bLR);          
 +#else
 +                        /* One entry for the entire water-water interaction */
 +                        if (!bDoVdW)
 +                        {
 +                            add_j_to_nblist(coul_ww,jj0,bLR);
 +                        }
 +                        else
 +                        {
 +                            add_j_to_nblist(vdwc_ww,jj0,bLR);
 +                        }
 +#endif
 +                    }  
 +                } 
 +                else if (iwater == esolTIP4P && jwater == esolTIP4P) 
 +                {
 +                    /* Interaction between two TIP4p molecules */
 +                    if (!bDoCoul)
 +                    {
 +                        /* VdW only - only first atoms in each water interact */
 +                        add_j_to_nblist(vdw,jj0,bLR);
 +                    }
 +                    else 
 +                    {
 +#ifdef DISABLE_WATERWATER_NLIST       
 +                        /* Add entries for the four atoms - only do VdW if we need to */
 +                        if (bDoVdW)
 +                        {
 +                            add_j_to_nblist(vdw,jj0,bLR);
 +                        }
 +                        add_j_to_nblist(coul,jj0+1,bLR);
 +                        add_j_to_nblist(coul,jj0+2,bLR);          
 +                        add_j_to_nblist(coul,jj0+3,bLR);          
 +#else
 +                        /* One entry for the entire water-water interaction */
 +                        if (!bDoVdW)
 +                        {
 +                            add_j_to_nblist(coul_ww,jj0,bLR);
 +                        }
 +                        else
 +                        {
 +                            add_j_to_nblist(vdwc_ww,jj0,bLR);
 +                        }
 +#endif
 +                    }                                         
 +                }
 +                else 
 +                {
 +                    /* j charge group is not water, but i is.
 +                     * Add entries to the water-other_atom lists; the geometry of the water
 +                     * molecule doesn't matter - that is taken care of in the nonbonded kernel,
 +                     * so we don't care if it is SPC or TIP4P...
 +                     */
 +                    
 +                    jj1 = index[jcg+1];
 +                    
 +                    if (!bDoVdW) 
 +                    {
 +                        for(jj=jj0; (jj<jj1); jj++) 
 +                        {
 +                            if (charge[jj] != 0)
 +                            {
 +                                add_j_to_nblist(coul,jj,bLR);
 +                            }
 +                        }
 +                    }
 +                    else if (!bDoCoul)
 +                    {
 +                        for(jj=jj0; (jj<jj1); jj++)
 +                        {
 +                            if (bHaveVdW[type[jj]])
 +                            {
 +                                add_j_to_nblist(vdw,jj,bLR);
 +                            }
 +                        }
 +                    }
 +                    else 
 +                    {
 +                        /* _charge_ _groups_ interact with both coulomb and LJ */
 +                        /* Check which atoms we should add to the lists!       */
 +                        for(jj=jj0; (jj<jj1); jj++) 
 +                        {
 +                            if (bHaveVdW[type[jj]]) 
 +                            {
 +                                if (charge[jj] != 0)
 +                                {
 +                                    add_j_to_nblist(vdwc,jj,bLR);
 +                                }
 +                                else
 +                                {
 +                                    add_j_to_nblist(vdw,jj,bLR);
 +                                }
 +                            }
 +                            else if (charge[jj] != 0)
 +                            {
 +                                add_j_to_nblist(coul,jj,bLR);
 +                            }
 +                        }
 +                    }
 +                }
 +            }
 +            close_i_nblist(vdw); 
 +            close_i_nblist(coul); 
 +            close_i_nblist(vdwc);  
 +#ifndef DISABLE_WATERWATER_NLIST
 +            close_i_nblist(coul_ww);
 +            close_i_nblist(vdwc_ww); 
 +#endif
 +        } 
 +        else
 +        { 
 +            /* no solvent as i charge group */
 +            /* Loop over the atoms in the i charge group */    
 +            for(i=0; i<nicg; i++) 
 +            {
 +                i_atom  = i0+i;
 +                gid     = GID(igid,jgid,ngid);
 +                qi      = charge[i_atom];
 +                
 +                /* Create new i_atom for each energy group */
 +                if (bDoVdW && bDoCoul)
 +                {
 +                    new_i_nblist(vdwc,bLR,i_atom,shift,gid);
 +                }
 +                if (bDoVdW)
 +                {
 +                    new_i_nblist(vdw,bLR,i_atom,shift,gid);
 +                }
 +                if (bDoCoul)
 +                {
 +                    new_i_nblist(coul,bLR,i_atom,shift,gid);
 +                }
 +                bDoVdW_i  = (bDoVdW  && bHaveVdW[type[i_atom]]);
 +                bDoCoul_i = (bDoCoul && qi!=0);
 +                
 +                if (bDoVdW_i || bDoCoul_i) 
 +                {
 +                    /* Loop over the j charge groups */
 +                    for(j=0; (j<nj); j++) 
 +                    {
 +                        jcg=jjcg[j];
 +                        
 +                        /* Check for large charge groups */
 +                        if (jcg == icg)
 +                        {
 +                            jj0 = i0 + i + 1;
 +                        }
 +                        else
 +                        {
 +                            jj0 = index[jcg];
 +                        }
 +                        
 +                        jj1=index[jcg+1];
 +                        /* Finally loop over the atoms in the j-charge group */       
 +                        for(jj=jj0; jj<jj1; jj++) 
 +                        {
 +                            bNotEx = NOTEXCL(bExcl,i,jj);
 +                            
 +                            if (bNotEx) 
 +                            {
 +                                if (!bDoVdW_i) 
 +                                { 
 +                                    if (charge[jj] != 0)
 +                                    {
 +                                        add_j_to_nblist(coul,jj,bLR);
 +                                    }
 +                                }
 +                                else if (!bDoCoul_i) 
 +                                {
 +                                    if (bHaveVdW[type[jj]])
 +                                    {
 +                                        add_j_to_nblist(vdw,jj,bLR);
 +                                    }
 +                                }
 +                                else 
 +                                {
 +                                    if (bHaveVdW[type[jj]]) 
 +                                    {
 +                                        if (charge[jj] != 0)
 +                                        {
 +                                            add_j_to_nblist(vdwc,jj,bLR);
 +                                        }
 +                                        else
 +                                        {
 +                                            add_j_to_nblist(vdw,jj,bLR);
 +                                        }
 +                                    } 
 +                                    else if (charge[jj] != 0)
 +                                    {
 +                                        add_j_to_nblist(coul,jj,bLR);
 +                                    }
 +                                }
 +                            }
 +                        }
 +                    }
 +                }
 +                close_i_nblist(vdw);
 +                close_i_nblist(coul);
 +                close_i_nblist(vdwc);
 +            }
 +        }
 +    }
 +    else
 +    {
 +        /* we are doing free energy */
 +        vdwc_free = &nlist[eNL_VDWQQ_FREE];
 +        vdw_free  = &nlist[eNL_VDW_FREE];
 +        coul_free = &nlist[eNL_QQ_FREE];
 +        /* Loop over the atoms in the i charge group */    
 +        for(i=0; i<nicg; i++) 
 +        {
 +            i_atom  = i0+i;
 +            gid     = GID(igid,jgid,ngid);
 +            qi      = charge[i_atom];
 +            qiB     = chargeB[i_atom];
 +            
 +            /* Create new i_atom for each energy group */
 +            if (bDoVdW && bDoCoul) 
 +                new_i_nblist(vdwc,bLR,i_atom,shift,gid);
 +            if (bDoVdW)   
 +                new_i_nblist(vdw,bLR,i_atom,shift,gid);
 +            if (bDoCoul) 
 +                new_i_nblist(coul,bLR,i_atom,shift,gid);
 +            
 +            new_i_nblist(vdw_free,bLR,i_atom,shift,gid);
 +            new_i_nblist(coul_free,bLR,i_atom,shift,gid);
 +            new_i_nblist(vdwc_free,bLR,i_atom,shift,gid);
 +            
 +            bDoVdW_i  = (bDoVdW  &&
 +                         (bHaveVdW[type[i_atom]] || bHaveVdW[typeB[i_atom]]));
 +            bDoCoul_i = (bDoCoul && (qi!=0 || qiB!=0));
 +            /* For TIP4P the first atom does not have a charge,
 +             * but the last three do. So we should still put an atom
 +             * without LJ but with charge in the water-atom neighborlist
 +             * for a TIP4p i charge group.
 +             * For SPC type water the first atom has LJ and charge,
 +             * so there is no such problem.
 +             */
 +            if (iwater == esolNO)
 +            {
 +                bDoCoul_i_sol = bDoCoul_i;
 +            }
 +            else
 +            {
 +                bDoCoul_i_sol = bDoCoul;
 +            }
 +            
 +            if (bDoVdW_i || bDoCoul_i_sol) 
 +            {
 +                /* Loop over the j charge groups */
 +                for(j=0; (j<nj); j++)
 +                {
 +                    jcg=jjcg[j];
 +                    
 +                    /* Check for large charge groups */
 +                    if (jcg == icg)
 +                    {
 +                        jj0 = i0 + i + 1;
 +                    }
 +                    else
 +                    {
 +                        jj0 = index[jcg];
 +                    }
 +                    
 +                    jj1=index[jcg+1];
 +                    /* Finally loop over the atoms in the j-charge group */   
 +                    bFree = bPert[i_atom];
 +                    for(jj=jj0; (jj<jj1); jj++) 
 +                    {
 +                        bFreeJ = bFree || bPert[jj];
 +                        /* Complicated if, because the water H's should also
 +                         * see perturbed j-particles
 +                         */
 +                        if (iwater==esolNO || i==0 || bFreeJ) 
 +                        {
 +                            bNotEx = NOTEXCL(bExcl,i,jj);
 +                            
 +                            if (bNotEx) 
 +                            {
 +                                if (bFreeJ)
 +                                {
 +                                    if (!bDoVdW_i) 
 +                                    {
 +                                        if (charge[jj]!=0 || chargeB[jj]!=0)
 +                                        {
 +                                            add_j_to_nblist(coul_free,jj,bLR);
 +                                        }
 +                                    }
 +                                    else if (!bDoCoul_i) 
 +                                    {
 +                                        if (bHaveVdW[type[jj]] || bHaveVdW[typeB[jj]])
 +                                        {
 +                                            add_j_to_nblist(vdw_free,jj,bLR);
 +                                        }
 +                                    }
 +                                    else 
 +                                    {
 +                                        if (bHaveVdW[type[jj]] || bHaveVdW[typeB[jj]]) 
 +                                        {
 +                                            if (charge[jj]!=0 || chargeB[jj]!=0)
 +                                            {
 +                                                add_j_to_nblist(vdwc_free,jj,bLR);
 +                                            }
 +                                            else
 +                                            {
 +                                                add_j_to_nblist(vdw_free,jj,bLR);
 +                                            }
 +                                        }
 +                                        else if (charge[jj]!=0 || chargeB[jj]!=0)
 +                                            add_j_to_nblist(coul_free,jj,bLR);
 +                                    }
 +                                }
 +                                else if (!bDoVdW_i) 
 +                                { 
 +                                    /* This is done whether or not bWater is set */
 +                                    if (charge[jj] != 0)
 +                                    {
 +                                        add_j_to_nblist(coul,jj,bLR);
 +                                    }
 +                                }
 +                                else if (!bDoCoul_i_sol) 
 +                                { 
 +                                    if (bHaveVdW[type[jj]])
 +                                    {
 +                                        add_j_to_nblist(vdw,jj,bLR);
 +                                    }
 +                                }
 +                                else 
 +                                {
 +                                    if (bHaveVdW[type[jj]]) 
 +                                    {
 +                                        if (charge[jj] != 0)
 +                                        {
 +                                            add_j_to_nblist(vdwc,jj,bLR);
 +                                        }
 +                                        else
 +                                        {
 +                                            add_j_to_nblist(vdw,jj,bLR);
 +                                        }
 +                                    } 
 +                                    else if (charge[jj] != 0)
 +                                    {
 +                                        add_j_to_nblist(coul,jj,bLR);
 +                                    }
 +                                }
 +                            }
 +                        }
 +                    }
 +                }
 +            }
 +            close_i_nblist(vdw);
 +            close_i_nblist(coul);
 +            close_i_nblist(vdwc);
 +            close_i_nblist(vdw_free);
 +            close_i_nblist(coul_free);
 +            close_i_nblist(vdwc_free);
 +        }
 +    }
 +}
 +
 +static void 
 +put_in_list_qmmm(gmx_bool              bHaveVdW[],
 +                 int               ngid,
 +                 t_mdatoms *       md,
 +                 int               icg,
 +                 int               jgid,
 +                 int               nj,
 +                 atom_id           jjcg[],
 +                 atom_id           index[],
 +                 t_excl            bExcl[],
 +                 int               shift,
 +                 t_forcerec *      fr,
 +                 gmx_bool              bLR,
 +                 gmx_bool              bDoVdW,
 +                 gmx_bool              bDoCoul)
 +{
 +    t_nblist *   coul;
 +    int         i,j,jcg,igid,gid;
 +    atom_id   jj,jj0,jj1,i_atom;
 +    int       i0,nicg;
 +    gmx_bool      bNotEx;
 +    
 +    /* Get atom range */
 +    i0     = index[icg];
 +    nicg   = index[icg+1]-i0;
 +    
 +    /* Get the i charge group info */
 +    igid   = GET_CGINFO_GID(fr->cginfo[icg]);
 +    
 +    coul = &fr->QMMMlist;
 +    
 +    /* Loop over atoms in the ith charge group */
 +    for (i=0;i<nicg;i++)
 +    {
 +        i_atom = i0+i;
 +        gid    = GID(igid,jgid,ngid);
 +        /* Create new i_atom for each energy group */
 +        new_i_nblist(coul,bLR,i_atom,shift,gid);
 +        
 +        /* Loop over the j charge groups */
 +        for (j=0;j<nj;j++)
 +        {
 +            jcg=jjcg[j];
 +            
 +            /* Charge groups cannot have QM and MM atoms simultaneously */
 +            if (jcg!=icg)
 +            {
 +                jj0 = index[jcg];
 +                jj1 = index[jcg+1];
 +                /* Finally loop over the atoms in the j-charge group */
 +                for(jj=jj0; jj<jj1; jj++)
 +                {
 +                    bNotEx = NOTEXCL(bExcl,i,jj);
 +                    if(bNotEx)
 +                        add_j_to_nblist(coul,jj,bLR);
 +                }
 +            }
 +        }
 +        close_i_nblist(coul);
 +    }
 +}
 +
 +static void 
 +put_in_list_cg(gmx_bool              bHaveVdW[],
 +               int               ngid,
 +               t_mdatoms *       md,
 +               int               icg,
 +               int               jgid,
 +               int               nj,
 +               atom_id           jjcg[],
 +               atom_id           index[],
 +               t_excl            bExcl[],
 +               int               shift,
 +               t_forcerec *      fr,
 +               gmx_bool              bLR,
 +               gmx_bool              bDoVdW,
 +               gmx_bool              bDoCoul)
 +{
 +    int          cginfo;
 +    int          igid,gid,nbl_ind;
 +    t_nblist *   vdwc;
 +    int          j,jcg;
 +
 +    cginfo = fr->cginfo[icg];
 +
 +    igid = GET_CGINFO_GID(cginfo);
 +    gid  = GID(igid,jgid,ngid);
 +
 +    /* Unpack pointers to neighbourlist structs */
 +    if (fr->nnblists == 1)
 +    {
 +        nbl_ind = 0;
 +    }
 +    else
 +    {
 +        nbl_ind = fr->gid2nblists[gid];
 +    }
 +    if (bLR)
 +    {
 +        vdwc = &fr->nblists[nbl_ind].nlist_lr[eNL_VDWQQ];
 +    }
 +    else
 +    {
 +        vdwc = &fr->nblists[nbl_ind].nlist_sr[eNL_VDWQQ];
 +    }
 +
 +    /* Make a new neighbor list for charge group icg.
 +     * Currently simply one neighbor list is made with LJ and Coulomb.
 +     * If required, zero interactions could be removed here
 +     * or in the force loop.
 +     */
 +    new_i_nblist(vdwc,bLR,index[icg],shift,gid);
 +    vdwc->iinr_end[vdwc->nri] = index[icg+1];
 +
 +    for(j=0; (j<nj); j++) 
 +    {
 +        jcg = jjcg[j];
 +        /* Skip the icg-icg pairs if all self interactions are excluded */
 +        if (!(jcg == icg && GET_CGINFO_EXCL_INTRA(cginfo)))
 +        {
 +            /* Here we add the j charge group jcg to the list,
 +             * exclusions are also added to the list.
 +             */
 +            add_j_to_nblist_cg(vdwc,index[jcg],index[jcg+1],bExcl,icg==jcg,bLR);
 +        }
 +    }
 +
 +    close_i_nblist(vdwc);  
 +}
 +
 +static void setexcl(atom_id start,atom_id end,t_blocka *excl,gmx_bool b,
 +                    t_excl bexcl[])
 +{
 +    atom_id i,k;
 +    
 +    if (b)
 +    {
 +        for(i=start; i<end; i++)
 +        {
 +            for(k=excl->index[i]; k<excl->index[i+1]; k++)
 +            {
 +                SETEXCL(bexcl,i-start,excl->a[k]);
 +            }
 +        }
 +    }
 +    else
 +    {
 +        for(i=start; i<end; i++)
 +        {
 +            for(k=excl->index[i]; k<excl->index[i+1]; k++)
 +            {
 +                RMEXCL(bexcl,i-start,excl->a[k]);
 +            }
 +        }
 +    }
 +}
 +
 +int calc_naaj(int icg,int cgtot)
 +{
 +    int naaj;
 +    
 +    if ((cgtot % 2) == 1)
 +    {
 +        /* Odd number of charge groups, easy */
 +        naaj = 1 + (cgtot/2);
 +    }
 +    else if ((cgtot % 4) == 0)
 +    {
 +    /* Multiple of four is hard */
 +        if (icg < cgtot/2)
 +        {
 +            if ((icg % 2) == 0)
 +            {
 +                naaj=1+(cgtot/2);
 +            }
 +            else
 +            {
 +                naaj=cgtot/2;
 +            }
 +        }
 +        else
 +        {
 +            if ((icg % 2) == 1)
 +            {
 +                naaj=1+(cgtot/2);
 +            }
 +            else
 +            {
 +                naaj=cgtot/2;
 +            }
 +        }
 +    }
 +    else
 +    {
 +        /* cgtot/2 = odd */
 +        if ((icg % 2) == 0)
 +        {
 +            naaj=1+(cgtot/2);
 +        }
 +        else
 +        {
 +            naaj=cgtot/2;
 +        }
 +    }
 +#ifdef DEBUG
 +    fprintf(log,"naaj=%d\n",naaj);
 +#endif
 +
 +    return naaj;
 +}
 +
 +/************************************************
 + *
 + *  S I M P L E      C O R E     S T U F F
 + *
 + ************************************************/
 +
 +static real calc_image_tric(rvec xi,rvec xj,matrix box,
 +                            rvec b_inv,int *shift)
 +{
 +    /* This code assumes that the cut-off is smaller than
 +     * a half times the smallest diagonal element of the box.
 +     */
 +    const real h25=2.5;
 +    real dx,dy,dz;
 +    real r2;
 +    int  tx,ty,tz;
 +    
 +    /* Compute diff vector */
 +    dz = xj[ZZ] - xi[ZZ];
 +    dy = xj[YY] - xi[YY];
 +    dx = xj[XX] - xi[XX];
 +    
 +  /* Perform NINT operation, using trunc operation, therefore
 +   * we first add 2.5 then subtract 2 again
 +   */
 +    tz = dz*b_inv[ZZ] + h25;
 +    tz -= 2;
 +    dz -= tz*box[ZZ][ZZ];
 +    dy -= tz*box[ZZ][YY];
 +    dx -= tz*box[ZZ][XX];
 +
 +    ty = dy*b_inv[YY] + h25;
 +    ty -= 2;
 +    dy -= ty*box[YY][YY];
 +    dx -= ty*box[YY][XX];
 +    
 +    tx = dx*b_inv[XX]+h25;
 +    tx -= 2;
 +    dx -= tx*box[XX][XX];
 +  
 +    /* Distance squared */
 +    r2 = (dx*dx) + (dy*dy) + (dz*dz);
 +
 +    *shift = XYZ2IS(tx,ty,tz);
 +
 +    return r2;
 +}
 +
 +static real calc_image_rect(rvec xi,rvec xj,rvec box_size,
 +                            rvec b_inv,int *shift)
 +{
 +    const real h15=1.5;
 +    real ddx,ddy,ddz;
 +    real dx,dy,dz;
 +    real r2;
 +    int  tx,ty,tz;
 +    
 +    /* Compute diff vector */
 +    dx = xj[XX] - xi[XX];
 +    dy = xj[YY] - xi[YY];
 +    dz = xj[ZZ] - xi[ZZ];
 +  
 +    /* Perform NINT operation, using trunc operation, therefore
 +     * we first add 1.5 then subtract 1 again
 +     */
 +    tx = dx*b_inv[XX] + h15;
 +    ty = dy*b_inv[YY] + h15;
 +    tz = dz*b_inv[ZZ] + h15;
 +    tx--;
 +    ty--;
 +    tz--;
 +    
 +    /* Correct diff vector for translation */
 +    ddx = tx*box_size[XX] - dx;
 +    ddy = ty*box_size[YY] - dy;
 +    ddz = tz*box_size[ZZ] - dz;
 +    
 +    /* Distance squared */
 +    r2 = (ddx*ddx) + (ddy*ddy) + (ddz*ddz);
 +    
 +    *shift = XYZ2IS(tx,ty,tz);
 +    
 +    return r2;
 +}
 +
 +static void add_simple(t_ns_buf *nsbuf,int nrj,atom_id cg_j,
 +                       gmx_bool bHaveVdW[],int ngid,t_mdatoms *md,
 +                       int icg,int jgid,t_block *cgs,t_excl bexcl[],
 +                       int shift,t_forcerec *fr,put_in_list_t *put_in_list)
 +{
 +    if (nsbuf->nj + nrj > MAX_CG)
 +    {
 +        put_in_list(bHaveVdW,ngid,md,icg,jgid,nsbuf->ncg,nsbuf->jcg,
 +                    cgs->index,bexcl,shift,fr,FALSE,TRUE,TRUE);
 +        /* Reset buffer contents */
 +        nsbuf->ncg = nsbuf->nj = 0;
 +    }
 +    nsbuf->jcg[nsbuf->ncg++] = cg_j;
 +    nsbuf->nj += nrj;
 +}
 +
 +static void ns_inner_tric(rvec x[],int icg,int *i_egp_flags,
 +                          int njcg,atom_id jcg[],
 +                          matrix box,rvec b_inv,real rcut2,
 +                          t_block *cgs,t_ns_buf **ns_buf,
 +                          gmx_bool bHaveVdW[],int ngid,t_mdatoms *md,
 +                          t_excl bexcl[],t_forcerec *fr,
 +                          put_in_list_t *put_in_list)
 +{
 +    int      shift;
 +    int      j,nrj,jgid;
 +    int      *cginfo=fr->cginfo;
 +    atom_id  cg_j,*cgindex;
 +    t_ns_buf *nsbuf;
 +    
 +    cgindex = cgs->index;
 +    shift   = CENTRAL;
 +    for(j=0; (j<njcg); j++)
 +    {
 +        cg_j   = jcg[j];
 +        nrj    = cgindex[cg_j+1]-cgindex[cg_j];
 +        if (calc_image_tric(x[icg],x[cg_j],box,b_inv,&shift) < rcut2)
 +        {
 +            jgid  = GET_CGINFO_GID(cginfo[cg_j]);
 +            if (!(i_egp_flags[jgid] & EGP_EXCL))
 +            {
 +                add_simple(&ns_buf[jgid][shift],nrj,cg_j,
 +                           bHaveVdW,ngid,md,icg,jgid,cgs,bexcl,shift,fr,
 +                           put_in_list);
 +            }
 +        }
 +    }
 +}
 +
 +static void ns_inner_rect(rvec x[],int icg,int *i_egp_flags,
 +                          int njcg,atom_id jcg[],
 +                          gmx_bool bBox,rvec box_size,rvec b_inv,real rcut2,
 +                          t_block *cgs,t_ns_buf **ns_buf,
 +                          gmx_bool bHaveVdW[],int ngid,t_mdatoms *md,
 +                          t_excl bexcl[],t_forcerec *fr,
 +                          put_in_list_t *put_in_list)
 +{
 +    int      shift;
 +    int      j,nrj,jgid;
 +    int      *cginfo=fr->cginfo;
 +    atom_id  cg_j,*cgindex;
 +    t_ns_buf *nsbuf;
 +
 +    cgindex = cgs->index;
 +    if (bBox)
 +    {
 +        shift = CENTRAL;
 +        for(j=0; (j<njcg); j++)
 +        {
 +            cg_j   = jcg[j];
 +            nrj    = cgindex[cg_j+1]-cgindex[cg_j];
 +            if (calc_image_rect(x[icg],x[cg_j],box_size,b_inv,&shift) < rcut2)
 +            {
 +                jgid  = GET_CGINFO_GID(cginfo[cg_j]);
 +                if (!(i_egp_flags[jgid] & EGP_EXCL))
 +                {
 +                    add_simple(&ns_buf[jgid][shift],nrj,cg_j,
 +                               bHaveVdW,ngid,md,icg,jgid,cgs,bexcl,shift,fr,
 +                               put_in_list);
 +                }
 +            }
 +        }
 +    } 
 +    else
 +    {
 +        for(j=0; (j<njcg); j++)
 +        {
 +            cg_j   = jcg[j];
 +            nrj    = cgindex[cg_j+1]-cgindex[cg_j];
 +            if ((rcut2 == 0) || (distance2(x[icg],x[cg_j]) < rcut2)) {
 +                jgid  = GET_CGINFO_GID(cginfo[cg_j]);
 +                if (!(i_egp_flags[jgid] & EGP_EXCL))
 +                {
 +                    add_simple(&ns_buf[jgid][CENTRAL],nrj,cg_j,
 +                               bHaveVdW,ngid,md,icg,jgid,cgs,bexcl,CENTRAL,fr,
 +                               put_in_list);
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +/* ns_simple_core needs to be adapted for QMMM still 2005 */
 +
 +static int ns_simple_core(t_forcerec *fr,
 +                          gmx_localtop_t *top,
 +                          t_mdatoms *md,
 +                          matrix box,rvec box_size,
 +                          t_excl bexcl[],atom_id *aaj,
 +                          int ngid,t_ns_buf **ns_buf,
 +                          put_in_list_t *put_in_list,gmx_bool bHaveVdW[])
 +{
 +    int      naaj,k;
 +    real     rlist2;
 +    int      nsearch,icg,jcg,igid,i0,nri,nn;
 +    int      *cginfo;
 +    t_ns_buf *nsbuf;
 +    /* atom_id  *i_atoms; */
 +    t_block  *cgs=&(top->cgs);
 +    t_blocka *excl=&(top->excls);
 +    rvec     b_inv;
 +    int      m;
 +    gmx_bool     bBox,bTriclinic;
 +    int      *i_egp_flags;
 +    
 +    rlist2 = sqr(fr->rlist);
 +    
 +    bBox = (fr->ePBC != epbcNONE);
 +    if (bBox)
 +    {
 +        for(m=0; (m<DIM); m++)
 +        {
 +            b_inv[m] = divide_err(1.0,box_size[m]);
 +        }
 +        bTriclinic = TRICLINIC(box);
 +    }
 +    else
 +    {
 +        bTriclinic = FALSE;
 +    }
 +    
 +    cginfo = fr->cginfo;
 +    
 +    nsearch=0;
 +    for (icg=fr->cg0; (icg<fr->hcg); icg++)
 +    {
 +        /*
 +          i0        = cgs->index[icg];
 +          nri       = cgs->index[icg+1]-i0;
 +          i_atoms   = &(cgs->a[i0]);
 +          i_eg_excl = fr->eg_excl + ngid*md->cENER[*i_atoms];
 +          setexcl(nri,i_atoms,excl,TRUE,bexcl);
 +        */
 +        igid = GET_CGINFO_GID(cginfo[icg]);
 +        i_egp_flags = fr->egp_flags + ngid*igid;
 +        setexcl(cgs->index[icg],cgs->index[icg+1],excl,TRUE,bexcl);
 +        
 +        naaj=calc_naaj(icg,cgs->nr);
 +        if (bTriclinic)
 +        {
 +            ns_inner_tric(fr->cg_cm,icg,i_egp_flags,naaj,&(aaj[icg]),
 +                          box,b_inv,rlist2,cgs,ns_buf,
 +                          bHaveVdW,ngid,md,bexcl,fr,put_in_list);
 +        }
 +        else
 +        {
 +            ns_inner_rect(fr->cg_cm,icg,i_egp_flags,naaj,&(aaj[icg]),
 +                          bBox,box_size,b_inv,rlist2,cgs,ns_buf,
 +                          bHaveVdW,ngid,md,bexcl,fr,put_in_list);
 +        }
 +        nsearch += naaj;
 +        
 +        for(nn=0; (nn<ngid); nn++)
 +        {
 +            for(k=0; (k<SHIFTS); k++)
 +            {
 +                nsbuf = &(ns_buf[nn][k]);
 +                if (nsbuf->ncg > 0)
 +                {
 +                    put_in_list(bHaveVdW,ngid,md,icg,nn,nsbuf->ncg,nsbuf->jcg,
 +                                cgs->index,bexcl,k,fr,FALSE,TRUE,TRUE);
 +                    nsbuf->ncg=nsbuf->nj=0;
 +                }
 +            }
 +        }
 +        /* setexcl(nri,i_atoms,excl,FALSE,bexcl); */
 +        setexcl(cgs->index[icg],cgs->index[icg+1],excl,FALSE,bexcl);
 +    }
 +    close_neighbor_list(fr,FALSE,-1,-1,FALSE);
 +    
 +    return nsearch;
 +}
 +
 +/************************************************
 + *
 + *    N S 5     G R I D     S T U F F
 + *
 + ************************************************/
 +
 +static inline void get_dx(int Nx,real gridx,real rc2,int xgi,real x,
 +                          int *dx0,int *dx1,real *dcx2)
 +{
 +    real dcx,tmp;
 +    int  xgi0,xgi1,i;
 +    
 +    if (xgi < 0)
 +    {
 +        *dx0 = 0;
 +        xgi0 = -1;
 +        *dx1 = -1;
 +        xgi1 = 0;
 +    }
 +    else if (xgi >= Nx)
 +    {
 +        *dx0 = Nx;
 +        xgi0 = Nx-1;
 +        *dx1 = Nx-1;
 +        xgi1 = Nx;
 +    }
 +    else
 +    {
 +        dcx2[xgi] = 0;
 +        *dx0 = xgi;
 +        xgi0 = xgi-1;
 +        *dx1 = xgi;
 +        xgi1 = xgi+1;
 +    }
 +    
 +    for(i=xgi0; i>=0; i--)
 +    {
 +        dcx = (i+1)*gridx-x;
 +        tmp = dcx*dcx;
 +        if (tmp >= rc2)
 +            break;
 +        *dx0 = i;
 +        dcx2[i] = tmp;
 +    }
 +    for(i=xgi1; i<Nx; i++)
 +    {
 +        dcx = i*gridx-x;
 +        tmp = dcx*dcx;
 +        if (tmp >= rc2)
 +        {
 +            break;
 +        }
 +        *dx1 = i;
 +        dcx2[i] = tmp;
 +    }
 +}
 +
 +static inline void get_dx_dd(int Nx,real gridx,real rc2,int xgi,real x,
 +                             int ncpddc,int shift_min,int shift_max,
 +                             int *g0,int *g1,real *dcx2)
 +{
 +    real dcx,tmp;
 +    int  g_min,g_max,shift_home;
 +    
 +    if (xgi < 0)
 +    {
 +        g_min = 0;
 +        g_max = Nx - 1;
 +        *g0   = 0;
 +        *g1   = -1;
 +    }
 +    else if (xgi >= Nx)
 +    {
 +        g_min = 0;
 +        g_max = Nx - 1;
 +        *g0   = Nx;
 +        *g1   = Nx - 1;
 +    }
 +    else
 +    {
 +        if (ncpddc == 0)
 +        {
 +            g_min = 0;
 +            g_max = Nx - 1;
 +        }
 +        else
 +        {
 +            if (xgi < ncpddc)
 +            {
 +                shift_home = 0;
 +            }
 +            else
 +            {
 +                shift_home = -1;
 +            }
 +            g_min = (shift_min == shift_home ? 0          : ncpddc);
 +            g_max = (shift_max == shift_home ? ncpddc - 1 : Nx - 1);
 +        }
 +        if (shift_min > 0)
 +        {
 +            *g0 = g_min;
 +            *g1 = g_min - 1;
 +        }
 +        else if (shift_max < 0)
 +        {
 +            *g0 = g_max + 1;
 +            *g1 = g_max;
 +        }
 +        else
 +        {
 +            *g0 = xgi;
 +            *g1 = xgi;
 +            dcx2[xgi] = 0;
 +        }
 +    }
 +    
 +    while (*g0 > g_min)
 +    {
 +        /* Check one grid cell down */
 +        dcx = ((*g0 - 1) + 1)*gridx - x;
 +        tmp = dcx*dcx;
 +        if (tmp >= rc2)
 +        {
 +            break;
 +        }
 +        (*g0)--;
 +        dcx2[*g0] = tmp;
 +    }
 +    
 +    while (*g1 < g_max)
 +    {
 +        /* Check one grid cell up */
 +        dcx = (*g1 + 1)*gridx - x;
 +        tmp = dcx*dcx;
 +        if (tmp >= rc2)
 +        {
 +            break;
 +        }
 +        (*g1)++;
 +        dcx2[*g1] = tmp;
 +    }
 +}
 +
 +
 +#define sqr(x) ((x)*(x))
 +#define calc_dx2(XI,YI,ZI,y) (sqr(XI-y[XX]) + sqr(YI-y[YY]) + sqr(ZI-y[ZZ]))
 +#define calc_cyl_dx2(XI,YI,y) (sqr(XI-y[XX]) + sqr(YI-y[YY]))
 +/****************************************************
 + *
 + *    F A S T   N E I G H B O R  S E A R C H I N G
 + *
 + *    Optimized neighboursearching routine using grid 
 + *    at least 1x1x1, see GROMACS manual
 + *
 + ****************************************************/
 +
 +static void do_longrange(t_commrec *cr,gmx_localtop_t *top,t_forcerec *fr,
 +                         int ngid,t_mdatoms *md,int icg,
 +                         int jgid,int nlr,
 +                         atom_id lr[],t_excl bexcl[],int shift,
 +                         rvec x[],rvec box_size,t_nrnb *nrnb,
-                        real lambda,real *dvdlambda,
++                         real *lambda,real *dvdlambda,
 +                         gmx_grppairener_t *grppener,
 +                         gmx_bool bDoVdW,gmx_bool bDoCoul,
 +                         gmx_bool bEvaluateNow,put_in_list_t *put_in_list,
 +                         gmx_bool bHaveVdW[],
 +                         gmx_bool bDoForces,rvec *f)
 +{
 +    int n,i;
 +    t_nblist *nl;
 +    
 +    for(n=0; n<fr->nnblists; n++)
 +    {
 +        for(i=0; (i<eNL_NR); i++)
 +        {
 +            nl = &fr->nblists[n].nlist_lr[i];
 +            if ((nl->nri > nl->maxnri-32) || bEvaluateNow)
 +            {
 +                close_neighbor_list(fr,TRUE,n,i,FALSE);
 +                /* Evaluate the energies and forces */
 +                do_nonbonded(cr,fr,x,f,md,NULL,
 +                             grppener->ener[fr->bBHAM ? egBHAMLR : egLJLR],
 +                             grppener->ener[egCOULLR],
 +                                                       grppener->ener[egGB],box_size,
 +                             nrnb,lambda,dvdlambda,n,i,
 +                             GMX_DONB_LR | GMX_DONB_FORCES);
 +                
 +                reset_neighbor_list(fr,TRUE,n,i);
 +            }
 +        }
 +    }
 +    
 +    if (!bEvaluateNow)
 +    {  
 +        /* Put the long range particles in a list */
 +        /* do_longrange is never called for QMMM  */
 +        put_in_list(bHaveVdW,ngid,md,icg,jgid,nlr,lr,top->cgs.index,
 +                    bexcl,shift,fr,TRUE,bDoVdW,bDoCoul);
 +    }
 +}
 +
 +static void get_cutoff2(t_forcerec *fr,gmx_bool bDoLongRange,
 +                        real *rvdw2,real *rcoul2,
 +                        real *rs2,real *rm2,real *rl2)
 +{
 +    *rs2 = sqr(fr->rlist);
 +    if (bDoLongRange && fr->bTwinRange)
 +    {
 +        /* The VdW and elec. LR cut-off's could be different,
 +         * so we can not simply set them to rlistlong.
 +         */
 +        if (EVDW_MIGHT_BE_ZERO_AT_CUTOFF(fr->vdwtype) &&
 +            fr->rvdw > fr->rlist)
 +        {
 +            *rvdw2  = sqr(fr->rlistlong);
 +        }
 +        else
 +        {
 +            *rvdw2  = sqr(fr->rvdw);
 +        }
 +        if (EEL_MIGHT_BE_ZERO_AT_CUTOFF(fr->eeltype) &&
 +            fr->rcoulomb > fr->rlist)
 +        {
 +            *rcoul2 = sqr(fr->rlistlong);
 +        }
 +        else
 +        {
 +            *rcoul2 = sqr(fr->rcoulomb);
 +        }
 +    }
 +    else
 +    {
 +        /* Workaround for a gcc -O3 or -ffast-math problem */
 +        *rvdw2  = *rs2;
 +        *rcoul2 = *rs2;
 +    }
 +    *rm2 = min(*rvdw2,*rcoul2);
 +    *rl2 = max(*rvdw2,*rcoul2);
 +}
 +
 +static void init_nsgrid_lists(t_forcerec *fr,int ngid,gmx_ns_t *ns)
 +{
 +    real rvdw2,rcoul2,rs2,rm2,rl2;
 +    int j;
 +
 +    get_cutoff2(fr,TRUE,&rvdw2,&rcoul2,&rs2,&rm2,&rl2);
 +
 +    /* Short range buffers */
 +    snew(ns->nl_sr,ngid);
 +    /* Counters */
 +    snew(ns->nsr,ngid);
 +    snew(ns->nlr_ljc,ngid);
 +    snew(ns->nlr_one,ngid);
 +    
 +    if (rm2 > rs2)
 +    {
 +            /* Long range VdW and Coul buffers */
 +        snew(ns->nl_lr_ljc,ngid);
 +    }
 +    if (rl2 > rm2)
 +    {
 +        /* Long range VdW or Coul only buffers */
 +        snew(ns->nl_lr_one,ngid);
 +    }
 +    for(j=0; (j<ngid); j++) {
 +        snew(ns->nl_sr[j],MAX_CG);
 +        if (rm2 > rs2)
 +        {
 +            snew(ns->nl_lr_ljc[j],MAX_CG);
 +        }
 +        if (rl2 > rm2)
 +        {
 +            snew(ns->nl_lr_one[j],MAX_CG);
 +        }
 +    }
 +    if (debug)
 +    {
 +        fprintf(debug,
 +                "ns5_core: rs2 = %g, rm2 = %g, rl2 = %g (nm^2)\n",
 +                rs2,rm2,rl2);
 +    }
 +}
 +
 +static int nsgrid_core(FILE *log,t_commrec *cr,t_forcerec *fr,
 +                       matrix box,rvec box_size,int ngid,
 +                       gmx_localtop_t *top,
 +                       t_grid *grid,rvec x[],
 +                       t_excl bexcl[],gmx_bool *bExcludeAlleg,
 +                       t_nrnb *nrnb,t_mdatoms *md,
-                       real lambda,real *dvdlambda,
++                       real *lambda,real *dvdlambda,
 +                       gmx_grppairener_t *grppener,
 +                       put_in_list_t *put_in_list,
 +                       gmx_bool bHaveVdW[],
 +                       gmx_bool bDoLongRange,gmx_bool bDoForces,rvec *f,
 +                       gmx_bool bMakeQMMMnblist)
 +{
 +    gmx_ns_t *ns;
 +    atom_id **nl_lr_ljc,**nl_lr_one,**nl_sr;
 +    int     *nlr_ljc,*nlr_one,*nsr;
 +    gmx_domdec_t *dd=NULL;
 +    t_block *cgs=&(top->cgs);
 +    int     *cginfo=fr->cginfo;
 +    /* atom_id *i_atoms,*cgsindex=cgs->index; */
 +    ivec    sh0,sh1,shp;
 +    int     cell_x,cell_y,cell_z;
 +    int     d,tx,ty,tz,dx,dy,dz,cj;
 +#ifdef ALLOW_OFFDIAG_LT_HALFDIAG
 +    int     zsh_ty,zsh_tx,ysh_tx;
 +#endif
 +    int     dx0,dx1,dy0,dy1,dz0,dz1;
 +    int     Nx,Ny,Nz,shift=-1,j,nrj,nns,nn=-1;
 +    real    gridx,gridy,gridz,grid_x,grid_y,grid_z;
 +    real    *dcx2,*dcy2,*dcz2;
 +    int     zgi,ygi,xgi;
 +    int     cg0,cg1,icg=-1,cgsnr,i0,igid,nri,naaj,max_jcg;
 +    int     jcg0,jcg1,jjcg,cgj0,jgid;
 +    int     *grida,*gridnra,*gridind;
 +    gmx_bool    rvdw_lt_rcoul,rcoul_lt_rvdw;
 +    rvec    xi,*cgcm,grid_offset;
 +    real    r2,rs2,rvdw2,rcoul2,rm2,rl2,XI,YI,ZI,dcx,dcy,dcz,tmp1,tmp2;
 +    int     *i_egp_flags;
 +    gmx_bool    bDomDec,bTriclinicX,bTriclinicY;
 +    ivec    ncpddc;
 +    
 +    ns = &fr->ns;
 +    
 +    bDomDec = DOMAINDECOMP(cr);
 +    if (bDomDec)
 +    {
 +        dd = cr->dd;
 +    }
 +    
 +    bTriclinicX = ((YY < grid->npbcdim &&
 +                    (!bDomDec || dd->nc[YY]==1) && box[YY][XX] != 0) ||
 +                   (ZZ < grid->npbcdim &&
 +                    (!bDomDec || dd->nc[ZZ]==1) && box[ZZ][XX] != 0));
 +    bTriclinicY =  (ZZ < grid->npbcdim &&
 +                    (!bDomDec || dd->nc[ZZ]==1) && box[ZZ][YY] != 0);
 +    
 +    cgsnr    = cgs->nr;
 +
 +    get_cutoff2(fr,bDoLongRange,&rvdw2,&rcoul2,&rs2,&rm2,&rl2);
 +
 +    rvdw_lt_rcoul = (rvdw2 >= rcoul2);
 +    rcoul_lt_rvdw = (rcoul2 >= rvdw2);
 +    
 +    if (bMakeQMMMnblist)
 +    {
 +        rm2 = rl2;
 +        rs2 = rl2;
 +    }
 +
 +    nl_sr     = ns->nl_sr;
 +    nsr       = ns->nsr;
 +    nl_lr_ljc = ns->nl_lr_ljc;
 +    nl_lr_one = ns->nl_lr_one;
 +    nlr_ljc   = ns->nlr_ljc;
 +    nlr_one   = ns->nlr_one;
 +    
 +    /* Unpack arrays */
 +    cgcm    = fr->cg_cm;
 +    Nx      = grid->n[XX];
 +    Ny      = grid->n[YY];
 +    Nz      = grid->n[ZZ];
 +    grida   = grid->a;
 +    gridind = grid->index;
 +    gridnra = grid->nra;
 +    nns     = 0;
 +    
 +    gridx      = grid->cell_size[XX];
 +    gridy      = grid->cell_size[YY];
 +    gridz      = grid->cell_size[ZZ];
 +    grid_x     = 1/gridx;
 +    grid_y     = 1/gridy;
 +    grid_z     = 1/gridz;
 +    copy_rvec(grid->cell_offset,grid_offset);
 +    copy_ivec(grid->ncpddc,ncpddc);
 +    dcx2       = grid->dcx2;
 +    dcy2       = grid->dcy2;
 +    dcz2       = grid->dcz2;
 +    
 +#ifdef ALLOW_OFFDIAG_LT_HALFDIAG
 +    zsh_ty = floor(-box[ZZ][YY]/box[YY][YY]+0.5);
 +    zsh_tx = floor(-box[ZZ][XX]/box[XX][XX]+0.5);
 +    ysh_tx = floor(-box[YY][XX]/box[XX][XX]+0.5);
 +    if (zsh_tx!=0 && ysh_tx!=0)
 +    {
 +        /* This could happen due to rounding, when both ratios are 0.5 */
 +        ysh_tx = 0;
 +    }
 +#endif
 +    
 +    debug_gmx();
 +
 +    if (fr->n_tpi)
 +    {
 +        /* We only want a list for the test particle */
 +        cg0 = cgsnr - 1;
 +    }
 +    else
 +    {
 +        cg0 = grid->icg0;
 +    }
 +    cg1 = grid->icg1;
 +
 +    /* Set the shift range */
 +    for(d=0; d<DIM; d++)
 +    {
 +        sh0[d] = -1;
 +        sh1[d] = 1;
 +        /* Check if we need periodicity shifts.
 +         * Without PBC or with domain decomposition we don't need them.
 +         */
 +        if (d >= ePBC2npbcdim(fr->ePBC) || (bDomDec && dd->nc[d] > 1))
 +        {
 +            shp[d] = 0;
 +        }
 +        else
 +        {
 +            if (d == XX &&
 +                box[XX][XX] - fabs(box[YY][XX]) - fabs(box[ZZ][XX]) < sqrt(rl2))
 +            {
 +                shp[d] = 2;
 +            }
 +            else
 +            {
 +                shp[d] = 1;
 +            }
 +        }
 +    }
 +    
 +    /* Loop over charge groups */
 +    for(icg=cg0; (icg < cg1); icg++)
 +    {
 +        igid = GET_CGINFO_GID(cginfo[icg]);
 +        /* Skip this charge group if all energy groups are excluded! */
 +        if (bExcludeAlleg[igid])
 +        {
 +            continue;
 +        }
 +        
 +        i0   = cgs->index[icg];
 +        
 +        if (bMakeQMMMnblist)
 +        { 
 +            /* Skip this charge group if it is not a QM atom while making a
 +             * QM/MM neighbourlist
 +             */
 +            if (md->bQM[i0]==FALSE)
 +            {
 +                continue; /* MM particle, go to next particle */ 
 +            }
 +            
 +            /* Compute the number of charge groups that fall within the control
 +             * of this one (icg)
 +             */
 +            naaj    = calc_naaj(icg,cgsnr);
 +            jcg0    = icg;
 +            jcg1    = icg + naaj;
 +            max_jcg = cgsnr;       
 +        } 
 +        else
 +        { 
 +            /* make a normal neighbourlist */
 +            
 +            if (bDomDec)
 +            {
 +                /* Get the j charge-group and dd cell shift ranges */
 +                dd_get_ns_ranges(cr->dd,icg,&jcg0,&jcg1,sh0,sh1);
 +                max_jcg = 0;
 +            }
 +            else
 +            {
 +                /* Compute the number of charge groups that fall within the control
 +                 * of this one (icg)
 +                 */
 +                naaj = calc_naaj(icg,cgsnr);
 +                jcg0 = icg;
 +                jcg1 = icg + naaj;
 +                
 +                if (fr->n_tpi)
 +                {
 +                    /* The i-particle is awlways the test particle,
 +                     * so we want all j-particles
 +                     */
 +                    max_jcg = cgsnr - 1;
 +                }
 +                else
 +                {
 +                    max_jcg  = jcg1 - cgsnr;
 +                }
 +            }
 +        }
 +        
 +        i_egp_flags = fr->egp_flags + igid*ngid;
 +        
 +        /* Set the exclusions for the atoms in charge group icg using a bitmask */
 +        setexcl(i0,cgs->index[icg+1],&top->excls,TRUE,bexcl);
 +        
 +        ci2xyz(grid,icg,&cell_x,&cell_y,&cell_z);
 +        
 +        /* Changed iicg to icg, DvdS 990115 
 +         * (but see consistency check above, DvdS 990330) 
 +         */
 +#ifdef NS5DB
 +        fprintf(log,"icg=%5d, naaj=%5d, cell %d %d %d\n",
 +                icg,naaj,cell_x,cell_y,cell_z);
 +#endif
 +        /* Loop over shift vectors in three dimensions */
 +        for (tz=-shp[ZZ]; tz<=shp[ZZ]; tz++)
 +        {
 +            ZI = cgcm[icg][ZZ]+tz*box[ZZ][ZZ];
 +            /* Calculate range of cells in Z direction that have the shift tz */
 +            zgi = cell_z + tz*Nz;
 +#define FAST_DD_NS
 +#ifndef FAST_DD_NS
 +            get_dx(Nz,gridz,rl2,zgi,ZI,&dz0,&dz1,dcz2);
 +#else
 +            get_dx_dd(Nz,gridz,rl2,zgi,ZI-grid_offset[ZZ],
 +                      ncpddc[ZZ],sh0[ZZ],sh1[ZZ],&dz0,&dz1,dcz2);
 +#endif
 +            if (dz0 > dz1)
 +            {
 +                continue;
 +            }
 +            for (ty=-shp[YY]; ty<=shp[YY]; ty++)
 +            {
 +                YI = cgcm[icg][YY]+ty*box[YY][YY]+tz*box[ZZ][YY];
 +                /* Calculate range of cells in Y direction that have the shift ty */
 +                if (bTriclinicY)
 +                {
 +                    ygi = (int)(Ny + (YI - grid_offset[YY])*grid_y) - Ny;
 +                }
 +                else
 +                {
 +                    ygi = cell_y + ty*Ny;
 +                }
 +#ifndef FAST_DD_NS
 +                get_dx(Ny,gridy,rl2,ygi,YI,&dy0,&dy1,dcy2);
 +#else
 +                get_dx_dd(Ny,gridy,rl2,ygi,YI-grid_offset[YY],
 +                          ncpddc[YY],sh0[YY],sh1[YY],&dy0,&dy1,dcy2);
 +#endif
 +                if (dy0 > dy1)
 +                {
 +                    continue;
 +                }
 +                for (tx=-shp[XX]; tx<=shp[XX]; tx++)
 +                {
 +                    XI = cgcm[icg][XX]+tx*box[XX][XX]+ty*box[YY][XX]+tz*box[ZZ][XX];
 +                    /* Calculate range of cells in X direction that have the shift tx */
 +                    if (bTriclinicX)
 +                    {
 +                        xgi = (int)(Nx + (XI - grid_offset[XX])*grid_x) - Nx;
 +                    }
 +                    else
 +                    {
 +                        xgi = cell_x + tx*Nx;
 +                    }
 +#ifndef FAST_DD_NS
 +                    get_dx(Nx,gridx,rl2,xgi*Nx,XI,&dx0,&dx1,dcx2);
 +#else
 +                    get_dx_dd(Nx,gridx,rl2,xgi,XI-grid_offset[XX],
 +                              ncpddc[XX],sh0[XX],sh1[XX],&dx0,&dx1,dcx2);
 +#endif
 +                    if (dx0 > dx1)
 +                    {
 +                        continue;
 +                    }
 +                    /* Adress: an explicit cg that has a weigthing function of 0 is excluded
 +                     *  from the neigbour list as it will not interact  */
 +                    if (fr->adress_type != eAdressOff){
 +                        if (md->wf[cgs->index[icg]]==0 && egp_explicit(fr, igid)){
 +                            continue;
 +                        }
 +                    }
 +                    /* Get shift vector */      
 +                    shift=XYZ2IS(tx,ty,tz);
 +#ifdef NS5DB
 +                    range_check(shift,0,SHIFTS);
 +#endif
 +                    for(nn=0; (nn<ngid); nn++)
 +                    {
 +                        nsr[nn]      = 0;
 +                        nlr_ljc[nn]  = 0;
 +                        nlr_one[nn] = 0;
 +                    }
 +#ifdef NS5DB
 +                    fprintf(log,"shift: %2d, dx0,1: %2d,%2d, dy0,1: %2d,%2d, dz0,1: %2d,%2d\n",
 +                            shift,dx0,dx1,dy0,dy1,dz0,dz1);
 +                    fprintf(log,"cgcm: %8.3f  %8.3f  %8.3f\n",cgcm[icg][XX],
 +                            cgcm[icg][YY],cgcm[icg][ZZ]);
 +                    fprintf(log,"xi:   %8.3f  %8.3f  %8.3f\n",XI,YI,ZI);
 +#endif
 +                    for (dx=dx0; (dx<=dx1); dx++)
 +                    {
 +                        tmp1 = rl2 - dcx2[dx];
 +                        for (dy=dy0; (dy<=dy1); dy++)
 +                        {
 +                            tmp2 = tmp1 - dcy2[dy];
 +                            if (tmp2 > 0)
 +                            {
 +                                for (dz=dz0; (dz<=dz1); dz++) {
 +                                    if (tmp2 > dcz2[dz]) {
 +                                        /* Find grid-cell cj in which possible neighbours are */
 +                                        cj   = xyz2ci(Ny,Nz,dx,dy,dz);
 +                                        
 +                                        /* Check out how many cgs (nrj) there in this cell */
 +                                        nrj  = gridnra[cj];
 +                                        
 +                                        /* Find the offset in the cg list */
 +                                        cgj0 = gridind[cj];
 +                                        
 +                                        /* Check if all j's are out of range so we
 +                                         * can skip the whole cell.
 +                                         * Should save some time, especially with DD.
 +                                         */
 +                                        if (nrj == 0 ||
 +                                            (grida[cgj0] >= max_jcg &&
 +                                             (grida[cgj0] >= jcg1 || grida[cgj0+nrj-1] < jcg0)))
 +                                        {
 +                                            continue;
 +                                        }
 +                                        
 +                                        /* Loop over cgs */
 +                                        for (j=0; (j<nrj); j++)
 +                                        {
 +                                            jjcg = grida[cgj0+j];
 +                                            
 +                                            /* check whether this guy is in range! */
 +                                            if ((jjcg >= jcg0 && jjcg < jcg1) ||
 +                                                (jjcg < max_jcg))
 +                                            {
 +                                                r2=calc_dx2(XI,YI,ZI,cgcm[jjcg]);
 +                                                if (r2 < rl2) {
 +                                                    /* jgid = gid[cgsatoms[cgsindex[jjcg]]]; */
 +                                                    jgid = GET_CGINFO_GID(cginfo[jjcg]);
 +                                                    /* check energy group exclusions */
 +                                                    if (!(i_egp_flags[jgid] & EGP_EXCL))
 +                                                    {
 +                                                        if (r2 < rs2)
 +                                                        {
 +                                                            if (nsr[jgid] >= MAX_CG)
 +                                                            {
 +                                                                put_in_list(bHaveVdW,ngid,md,icg,jgid,
 +                                                                            nsr[jgid],nl_sr[jgid],
 +                                                                            cgs->index,/* cgsatoms, */ bexcl,
 +                                                                            shift,fr,FALSE,TRUE,TRUE);
 +                                                                nsr[jgid]=0;
 +                                                            }
 +                                                            nl_sr[jgid][nsr[jgid]++]=jjcg;
 +                                                        } 
 +                                                        else if (r2 < rm2)
 +                                                        {
 +                                                            if (nlr_ljc[jgid] >= MAX_CG)
 +                                                            {
 +                                                                do_longrange(cr,top,fr,ngid,md,icg,jgid,
 +                                                                             nlr_ljc[jgid],
 +                                                                             nl_lr_ljc[jgid],bexcl,shift,x,
 +                                                                             box_size,nrnb,
 +                                                                             lambda,dvdlambda,
 +                                                                             grppener,
 +                                                                             TRUE,TRUE,FALSE,
 +                                                                             put_in_list,
 +                                                                             bHaveVdW,
 +                                                                             bDoForces,f);
 +                                                                nlr_ljc[jgid]=0;
 +                                                            }
 +                                                            nl_lr_ljc[jgid][nlr_ljc[jgid]++]=jjcg;
 +                                                        }
 +                                                        else
 +                                                        {
 +                                                            if (nlr_one[jgid] >= MAX_CG) {
 +                                                                do_longrange(cr,top,fr,ngid,md,icg,jgid,
 +                                                                             nlr_one[jgid],
 +                                                                             nl_lr_one[jgid],bexcl,shift,x,
 +                                                                             box_size,nrnb,
 +                                                                             lambda,dvdlambda,
 +                                                                             grppener,
 +                                                                             rvdw_lt_rcoul,rcoul_lt_rvdw,FALSE,
 +                                                                             put_in_list,
 +                                                                             bHaveVdW,
 +                                                                             bDoForces,f);
 +                                                                nlr_one[jgid]=0;
 +                                                            }
 +                                                            nl_lr_one[jgid][nlr_one[jgid]++]=jjcg;
 +                                                        }
 +                                                    }
 +                                                }
 +                                                nns++;
 +                                            }
 +                                        }
 +                                    }
 +                                }
 +                            }
 +                        }
 +                    }
 +                    /* CHECK whether there is anything left in the buffers */
 +                    for(nn=0; (nn<ngid); nn++)
 +                    {
 +                        if (nsr[nn] > 0)
 +                        {
 +                            put_in_list(bHaveVdW,ngid,md,icg,nn,nsr[nn],nl_sr[nn],
 +                                        cgs->index, /* cgsatoms, */ bexcl,
 +                                        shift,fr,FALSE,TRUE,TRUE);
 +                        }
 +                        
 +                        if (nlr_ljc[nn] > 0)
 +                        {
 +                            do_longrange(cr,top,fr,ngid,md,icg,nn,nlr_ljc[nn],
 +                                         nl_lr_ljc[nn],bexcl,shift,x,box_size,nrnb,
 +                                         lambda,dvdlambda,grppener,TRUE,TRUE,FALSE,
 +                                         put_in_list,bHaveVdW,bDoForces,f);
 +                        }
 +                        
 +                        if (nlr_one[nn] > 0)
 +                        {
 +                            do_longrange(cr,top,fr,ngid,md,icg,nn,nlr_one[nn],
 +                                         nl_lr_one[nn],bexcl,shift,x,box_size,nrnb,
 +                                         lambda,dvdlambda,grppener,
 +                                         rvdw_lt_rcoul,rcoul_lt_rvdw,FALSE,
 +                                         put_in_list,bHaveVdW,bDoForces,f);
 +                        }
 +                    }
 +                }
 +            }
 +        }
 +        /* setexcl(nri,i_atoms,&top->atoms.excl,FALSE,bexcl); */
 +        setexcl(cgs->index[icg],cgs->index[icg+1],&top->excls,FALSE,bexcl);
 +    }
 +    /* Perform any left over force calculations */
 +    for (nn=0; (nn<ngid); nn++)
 +    {
 +        if (rm2 > rs2)
 +        {
 +            do_longrange(cr,top,fr,0,md,icg,nn,nlr_ljc[nn],
 +                         nl_lr_ljc[nn],bexcl,shift,x,box_size,nrnb,
 +                         lambda,dvdlambda,grppener,
 +                         TRUE,TRUE,TRUE,put_in_list,bHaveVdW,bDoForces,f);
 +        }
 +        if (rl2 > rm2) {
 +            do_longrange(cr,top,fr,0,md,icg,nn,nlr_one[nn],
 +                         nl_lr_one[nn],bexcl,shift,x,box_size,nrnb,
 +                         lambda,dvdlambda,grppener,
 +                         rvdw_lt_rcoul,rcoul_lt_rvdw,
 +                         TRUE,put_in_list,bHaveVdW,bDoForces,f);
 +        }
 +    }
 +    debug_gmx();
 +    
 +    /* Close off short range neighbourlists */
 +    close_neighbor_list(fr,FALSE,-1,-1,bMakeQMMMnblist);
 +    
 +    return nns;
 +}
 +
 +void ns_realloc_natoms(gmx_ns_t *ns,int natoms)
 +{
 +    int i;
 +    
 +    if (natoms > ns->nra_alloc)
 +    {
 +        ns->nra_alloc = over_alloc_dd(natoms);
 +        srenew(ns->bexcl,ns->nra_alloc);
 +        for(i=0; i<ns->nra_alloc; i++)
 +        {
 +            ns->bexcl[i] = 0;
 +        }
 +    }
 +}
 +
 +void init_ns(FILE *fplog,const t_commrec *cr,
 +             gmx_ns_t *ns,t_forcerec *fr,
 +             const gmx_mtop_t *mtop,
 +             matrix box)
 +{
 +    int  mt,icg,nr_in_cg,maxcg,i,j,jcg,ngid,ncg;
 +    t_block *cgs;
 +    char *ptr;
 +    
 +    /* Compute largest charge groups size (# atoms) */
 +    nr_in_cg=1;
 +    for(mt=0; mt<mtop->nmoltype; mt++) {
 +        cgs = &mtop->moltype[mt].cgs;
 +        for (icg=0; (icg < cgs->nr); icg++)
 +        {
 +            nr_in_cg=max(nr_in_cg,(int)(cgs->index[icg+1]-cgs->index[icg]));
 +        }
 +    }
 +
 +    /* Verify whether largest charge group is <= max cg.
 +     * This is determined by the type of the local exclusion type 
 +     * Exclusions are stored in bits. (If the type is not large
 +     * enough, enlarge it, unsigned char -> unsigned short -> unsigned long)
 +     */
 +    maxcg = sizeof(t_excl)*8;
 +    if (nr_in_cg > maxcg)
 +    {
 +        gmx_fatal(FARGS,"Max #atoms in a charge group: %d > %d\n",
 +                  nr_in_cg,maxcg);
 +    }
 +    
 +    ngid = mtop->groups.grps[egcENER].nr;
 +    snew(ns->bExcludeAlleg,ngid);
 +    for(i=0; i<ngid; i++) {
 +        ns->bExcludeAlleg[i] = TRUE;
 +        for(j=0; j<ngid; j++)
 +        {
 +            if (!(fr->egp_flags[i*ngid+j] & EGP_EXCL))
 +            {
 +                ns->bExcludeAlleg[i] = FALSE;
 +            }
 +        }
 +    }
 +    
 +    if (fr->bGrid) {
 +        /* Grid search */
 +        ns->grid = init_grid(fplog,fr);
 +        init_nsgrid_lists(fr,ngid,ns);
 +    }
 +    else
 +    {
 +        /* Simple search */
 +        snew(ns->ns_buf,ngid);
 +        for(i=0; (i<ngid); i++)
 +        {
 +            snew(ns->ns_buf[i],SHIFTS);
 +        }
 +        ncg = ncg_mtop(mtop);
 +        snew(ns->simple_aaj,2*ncg);
 +        for(jcg=0; (jcg<ncg); jcg++)
 +        {
 +            ns->simple_aaj[jcg]     = jcg;
 +            ns->simple_aaj[jcg+ncg] = jcg;
 +        }
 +    }
 +    
 +    /* Create array that determines whether or not atoms have VdW */
 +    snew(ns->bHaveVdW,fr->ntype);
 +    for(i=0; (i<fr->ntype); i++)
 +    {
 +        for(j=0; (j<fr->ntype); j++)
 +        {
 +            ns->bHaveVdW[i] = (ns->bHaveVdW[i] || 
 +                               (fr->bBHAM ? 
 +                                ((BHAMA(fr->nbfp,fr->ntype,i,j) != 0) ||
 +                                 (BHAMB(fr->nbfp,fr->ntype,i,j) != 0) ||
 +                                 (BHAMC(fr->nbfp,fr->ntype,i,j) != 0)) :
 +                                ((C6(fr->nbfp,fr->ntype,i,j) != 0) ||
 +                                 (C12(fr->nbfp,fr->ntype,i,j) != 0))));
 +        }
 +    }
 +    if (debug) 
 +        pr_bvec(debug,0,"bHaveVdW",ns->bHaveVdW,fr->ntype,TRUE);
 +    
 +    ns->nra_alloc = 0;
 +    ns->bexcl = NULL;
 +    if (!DOMAINDECOMP(cr))
 +    {
 +        /* This could be reduced with particle decomposition */
 +        ns_realloc_natoms(ns,mtop->natoms);
 +    }
 +
 +    ns->nblist_initialized=FALSE;
 +
 +    /* nbr list debug dump */
 +    {
 +        char *ptr=getenv("GMX_DUMP_NL");
 +        if (ptr)
 +        {
 +            ns->dump_nl=strtol(ptr,NULL,10);
 +            if (fplog)
 +            {
 +                fprintf(fplog, "GMX_DUMP_NL = %d", ns->dump_nl);
 +            }
 +        }
 +        else
 +        {
 +            ns->dump_nl=0;
 +        }
 +    }
 +}
 +
 +                       
 +int search_neighbours(FILE *log,t_forcerec *fr,
 +                      rvec x[],matrix box,
 +                      gmx_localtop_t *top,
 +                      gmx_groups_t *groups,
 +                      t_commrec *cr,
 +                      t_nrnb *nrnb,t_mdatoms *md,
++                      real *lambda,real *dvdlambda,
 +                      gmx_grppairener_t *grppener,
 +                      gmx_bool bFillGrid,
 +                      gmx_bool bDoLongRange,
 +                      gmx_bool bDoForces,rvec *f)
 +{
 +    t_block  *cgs=&(top->cgs);
 +    rvec     box_size,grid_x0,grid_x1;
 +    int      i,j,m,ngid;
 +    real     min_size,grid_dens;
 +    int      nsearch;
 +    gmx_bool     bGrid;
 +    char     *ptr;
 +    gmx_bool     *i_egp_flags;
 +    int      cg_start,cg_end,start,end;
 +    gmx_ns_t *ns;
 +    t_grid   *grid;
 +    gmx_domdec_zones_t *dd_zones;
 +    put_in_list_t *put_in_list;
 +      
 +    ns = &fr->ns;
 +
 +    /* Set some local variables */
 +    bGrid = fr->bGrid;
 +    ngid = groups->grps[egcENER].nr;
 +    
 +    for(m=0; (m<DIM); m++)
 +    {
 +        box_size[m] = box[m][m];
 +    }
 +  
 +    if (fr->ePBC != epbcNONE)
 +    {
 +        if (sqr(fr->rlistlong) >= max_cutoff2(fr->ePBC,box))
 +        {
 +            gmx_fatal(FARGS,"One of the box vectors has become shorter than twice the cut-off length or box_yy-|box_zy| or box_zz has become smaller than the cut-off.");
 +        }
 +        if (!bGrid)
 +        {
 +            min_size = min(box_size[XX],min(box_size[YY],box_size[ZZ]));
 +            if (2*fr->rlistlong >= min_size)
 +                gmx_fatal(FARGS,"One of the box diagonal elements has become smaller than twice the cut-off length.");
 +        }
 +    }
 +    
 +    if (DOMAINDECOMP(cr))
 +    {
 +        ns_realloc_natoms(ns,cgs->index[cgs->nr]);
 +    }
 +    debug_gmx();
 +    
 +    /* Reset the neighbourlists */
 +    reset_neighbor_list(fr,FALSE,-1,-1);
 +    
 +    if (bGrid && bFillGrid)
 +    {
 +              
 +        grid = ns->grid;
 +        if (DOMAINDECOMP(cr))
 +        {
 +            dd_zones = domdec_zones(cr->dd);
 +        }
 +        else
 +        {
 +            dd_zones = NULL;
 +
 +            get_nsgrid_boundaries(grid,NULL,box,NULL,NULL,NULL,
 +                                  cgs->nr,fr->cg_cm,grid_x0,grid_x1,&grid_dens);
 +
 +            grid_first(log,grid,NULL,NULL,fr->ePBC,box,grid_x0,grid_x1,
 +                       fr->rlistlong,grid_dens);
 +        }
 +        debug_gmx();
 +        
 +        /* Don't know why this all is... (DvdS 3/99) */
 +#ifndef SEGV
 +        start = 0;
 +        end   = cgs->nr;
 +#else
 +        start = fr->cg0;
 +        end   = (cgs->nr+1)/2;
 +#endif
 +        
 +        if (DOMAINDECOMP(cr))
 +        {
 +            end = cgs->nr;
 +            fill_grid(log,dd_zones,grid,end,-1,end,fr->cg_cm);
 +            grid->icg0 = 0;
 +            grid->icg1 = dd_zones->izone[dd_zones->nizone-1].cg1;
 +        }
 +        else
 +        {
 +            fill_grid(log,NULL,grid,cgs->nr,fr->cg0,fr->hcg,fr->cg_cm);
 +            grid->icg0 = fr->cg0;
 +            grid->icg1 = fr->hcg;
 +            debug_gmx();
 +            
 +            if (PARTDECOMP(cr))
 +                mv_grid(cr,grid);
 +            debug_gmx();
 +        }
 +        
 +        calc_elemnr(log,grid,start,end,cgs->nr);
 +        calc_ptrs(grid);
 +        grid_last(log,grid,start,end,cgs->nr);
 +        
 +        if (gmx_debug_at)
 +        {
 +            check_grid(debug,grid);
 +            print_grid(debug,grid);
 +        }
 +    }
 +    else if (fr->n_tpi)
 +    {
 +        /* Set the grid cell index for the test particle only.
 +         * The cell to cg index is not corrected, but that does not matter.
 +         */
 +        fill_grid(log,NULL,ns->grid,fr->hcg,fr->hcg-1,fr->hcg,fr->cg_cm);
 +    }
 +    debug_gmx();
 +    
 +    if (!fr->ns.bCGlist)
 +    {
 +        put_in_list = put_in_list_at;
 +    }
 +    else
 +    {
 +        put_in_list = put_in_list_cg;
 +    }
 +
 +    /* Do the core! */
 +    if (bGrid)
 +    {
 +        grid = ns->grid;
 +        nsearch = nsgrid_core(log,cr,fr,box,box_size,ngid,top,
 +                              grid,x,ns->bexcl,ns->bExcludeAlleg,
 +                              nrnb,md,lambda,dvdlambda,grppener,
 +                              put_in_list,ns->bHaveVdW,
 +                              bDoLongRange,bDoForces,f,
 +                              FALSE);
 +        
 +        /* neighbour searching withouth QMMM! QM atoms have zero charge in
 +         * the classical calculation. The charge-charge interaction
 +         * between QM and MM atoms is handled in the QMMM core calculation
 +         * (see QMMM.c). The VDW however, we'd like to compute classically
 +         * and the QM MM atom pairs have just been put in the
 +         * corresponding neighbourlists. in case of QMMM we still need to
 +         * fill a special QMMM neighbourlist that contains all neighbours
 +         * of the QM atoms. If bQMMM is true, this list will now be made: 
 +         */
 +        if (fr->bQMMM && fr->qr->QMMMscheme!=eQMMMschemeoniom)
 +        {
 +            nsearch += nsgrid_core(log,cr,fr,box,box_size,ngid,top,
 +                                   grid,x,ns->bexcl,ns->bExcludeAlleg,
 +                                   nrnb,md,lambda,dvdlambda,grppener,
 +                                   put_in_list_qmmm,ns->bHaveVdW,
 +                                   bDoLongRange,bDoForces,f,
 +                                   TRUE);
 +        }
 +    }
 +    else 
 +    {
 +        nsearch = ns_simple_core(fr,top,md,box,box_size,
 +                                 ns->bexcl,ns->simple_aaj,
 +                                 ngid,ns->ns_buf,put_in_list,ns->bHaveVdW);
 +    }
 +    debug_gmx();
 +    
 +#ifdef DEBUG
 +    pr_nsblock(log);
 +#endif
 +    
 +    inc_nrnb(nrnb,eNR_NS,nsearch);
 +    /* inc_nrnb(nrnb,eNR_LR,fr->nlr); */
 +    
 +    return nsearch;
 +}
 +
 +int natoms_beyond_ns_buffer(t_inputrec *ir,t_forcerec *fr,t_block *cgs,
 +                            matrix scale_tot,rvec *x)
 +{
 +    int  cg0,cg1,cg,a0,a1,a,i,j;
 +    real rint,hbuf2,scale;
 +    rvec *cg_cm,cgsc;
 +    gmx_bool bIsotropic;
 +    int  nBeyond;
 +    
 +    nBeyond = 0;
 +    
 +    rint = max(ir->rcoulomb,ir->rvdw);
 +    if (ir->rlist < rint)
 +    {
 +        gmx_fatal(FARGS,"The neighbor search buffer has negative size: %f nm",
 +                  ir->rlist - rint);
 +    }
 +    cg_cm = fr->cg_cm;
 +    
 +    cg0 = fr->cg0;
 +    cg1 = fr->hcg;
 +    
 +    if (!EI_DYNAMICS(ir->eI) || !DYNAMIC_BOX(*ir))
 +    {
 +        hbuf2 = sqr(0.5*(ir->rlist - rint));
 +        for(cg=cg0; cg<cg1; cg++)
 +        {
 +            a0 = cgs->index[cg];
 +            a1 = cgs->index[cg+1];
 +            for(a=a0; a<a1; a++)
 +            {
 +                if (distance2(cg_cm[cg],x[a]) > hbuf2)
 +                {
 +                    nBeyond++;
 +                }
 +            }
 +        }
 +    }
 +    else
 +    {
 +        bIsotropic = TRUE;
 +        scale = scale_tot[0][0];
 +        for(i=1; i<DIM; i++)
 +        {
 +            /* With anisotropic scaling, the original spherical ns volumes become
 +             * ellipsoids. To avoid costly transformations we use the minimum
 +             * eigenvalue of the scaling matrix for determining the buffer size.
 +             * Since the lower half is 0, the eigenvalues are the diagonal elements.
 +             */
 +            scale = min(scale,scale_tot[i][i]);
 +            if (scale_tot[i][i] != scale_tot[i-1][i-1])
 +            {
 +                bIsotropic = FALSE;
 +            }
 +            for(j=0; j<i; j++)
 +            {
 +                if (scale_tot[i][j] != 0)
 +                {
 +                    bIsotropic = FALSE;
 +                }
 +            }
 +        }
 +        hbuf2 = sqr(0.5*(scale*ir->rlist - rint));
 +        if (bIsotropic)
 +        {
 +            for(cg=cg0; cg<cg1; cg++)
 +            {
 +                svmul(scale,cg_cm[cg],cgsc);
 +                a0 = cgs->index[cg];
 +                a1 = cgs->index[cg+1];
 +                for(a=a0; a<a1; a++)
 +                {
 +                    if (distance2(cgsc,x[a]) > hbuf2)
 +                    {                    
 +                        nBeyond++;
 +                    }
 +                }
 +            }
 +        }
 +        else
 +        {
 +            /* Anistropic scaling */
 +            for(cg=cg0; cg<cg1; cg++)
 +            {
 +                /* Since scale_tot contains the transpose of the scaling matrix,
 +                 * we need to multiply with the transpose.
 +                 */
 +                tmvmul_ur0(scale_tot,cg_cm[cg],cgsc);
 +                a0 = cgs->index[cg];
 +                a1 = cgs->index[cg+1];
 +                for(a=a0; a<a1; a++)
 +                {
 +                    if (distance2(cgsc,x[a]) > hbuf2)
 +                    {
 +                        nBeyond++;
 +                    }
 +                }
 +            }
 +        }
 +    }
 +    
 +    return nBeyond;
 +}
Simple merge
index 1260ef82d861d4289a2faba7bddb7eac43355247,0000000000000000000000000000000000000000..e98305a945202c6f0bd809409c0a5f8b4cd45adc
mode 100644,000000..100644
--- /dev/null
@@@ -1,1353 -1,0 +1,1353 @@@
-                                 int start,int end,
-                                 int g,t_pullgrp *pg,ivec pulldims,
-                                 gmx_mtop_t *mtop,t_inputrec *ir)
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GROwing Monsters And Cloning Shrimps
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +
 +#include <math.h>
 +#include <stdio.h>
 +#include <stdlib.h>
 +#include "futil.h"
 +#include "rdgroup.h"
 +#include "statutil.h"
 +#include "gmxfio.h"
 +#include "vec.h" 
 +#include "typedefs.h"
 +#include "network.h"
 +#include "filenm.h"
 +#include <string.h>
 +#include "smalloc.h"
 +#include "pull.h"
 +#include "xvgr.h"
 +#include "names.h"
 +#include "partdec.h"
 +#include "pbc.h"
 +#include "mtop_util.h"
 +#include "mdrun.h"
 +#include "gmx_ga2la.h"
 +#include "copyrite.h"
 +#include "macros.h"
 +
 +static void pull_print_x_grp(FILE *out,gmx_bool bRef,ivec dim,t_pullgrp *pgrp) 
 +{
 +    int m;
 +    
 +    for(m=0; m<DIM; m++)
 +    {
 +        if (dim[m])
 +        {
 +            fprintf(out,"\t%g",bRef ? pgrp->x[m] : pgrp->dr[m]);
 +        }
 +    }
 +}
 +
 +static void pull_print_x(FILE *out,t_pull *pull,double t) 
 +{
 +    int g;
 +  
 +    fprintf(out, "%.4f", t);
 +    
 +    if (PULL_CYL(pull))
 +    {
 +        for (g=1; g<1+pull->ngrp; g++)
 +        {
 +            pull_print_x_grp(out,TRUE ,pull->dim,&pull->dyna[g]);
 +            pull_print_x_grp(out,FALSE,pull->dim,&pull->grp[g]);
 +        }
 +    }
 +    else
 +    {
 +        for (g=0; g<1+pull->ngrp; g++)
 +        {
 +            if (pull->grp[g].nat > 0)
 +            {
 +                pull_print_x_grp(out,g==0,pull->dim,&pull->grp[g]);
 +            }
 +        }
 +    }
 +    fprintf(out,"\n");
 +}
 +
 +static void pull_print_f(FILE *out,t_pull *pull,double t) 
 +{
 +    int g,d;
 +    
 +    fprintf(out, "%.4f", t);
 +    
 +    for(g=1; g<1+pull->ngrp; g++)
 +    {
 +        if (pull->eGeom == epullgPOS)
 +        {
 +            for(d=0; d<DIM; d++)
 +            {
 +                if (pull->dim[d])
 +                {
 +                    fprintf(out,"\t%g",pull->grp[g].f[d]);
 +                }
 +            }
 +        }
 +        else
 +        {
 +            fprintf(out,"\t%g",pull->grp[g].f_scal);
 +        }
 +    }
 +    fprintf(out,"\n");
 +}
 +
 +void pull_print_output(t_pull *pull, gmx_large_int_t step, double time)
 +{
 +    if ((pull->nstxout != 0) && (step % pull->nstxout == 0))
 +    {
 +        pull_print_x(pull->out_x,pull,time);
 +    }
 +    
 +    if ((pull->nstfout != 0) && (step % pull->nstfout == 0))
 +    {
 +        pull_print_f(pull->out_f,pull,time);
 +    }
 +}
 +
 +static FILE *open_pull_out(const char *fn,t_pull *pull,const output_env_t oenv, 
 +                           gmx_bool bCoord, unsigned long Flags)
 +{
 +    FILE *fp;
 +    int  nsets,g,m;
 +    char **setname,buf[10];
 +    
 +    if(Flags & MD_APPENDFILES)
 +    {
 +        fp = gmx_fio_fopen(fn,"a+");
 +    }
 +    else
 +    {
 +        fp = gmx_fio_fopen(fn,"w+");
 +        if (bCoord)
 +        {
 +            xvgr_header(fp,"Pull COM",  "Time (ps)","Position (nm)",
 +                        exvggtXNY,oenv);
 +        }
 +        else
 +        {
 +            xvgr_header(fp,"Pull force","Time (ps)","Force (kJ/mol/nm)",
 +                        exvggtXNY,oenv);
 +        }
 +        
 +        snew(setname,(1+pull->ngrp)*DIM);
 +        nsets = 0;
 +        for(g=0; g<1+pull->ngrp; g++)
 +        {
 +            if (pull->grp[g].nat > 0 &&
 +                (g > 0 || (bCoord && !PULL_CYL(pull))))
 +            {
 +                if (bCoord || pull->eGeom == epullgPOS)
 +                {
 +                    if (PULL_CYL(pull))
 +                    {
 +                        for(m=0; m<DIM; m++)
 +                        {
 +                            if (pull->dim[m])
 +                            {
 +                                sprintf(buf,"%d %s%c",g,"c",'X'+m);
 +                                setname[nsets] = strdup(buf);
 +                                nsets++;
 +                            }
 +                        }
 +                    }
 +                    for(m=0; m<DIM; m++)
 +                    {
 +                        if (pull->dim[m])
 +                        {
 +                            sprintf(buf,"%d %s%c",
 +                                    g,(bCoord && g > 0)?"d":"",'X'+m);
 +                            setname[nsets] = strdup(buf);
 +                            nsets++;
 +                        }
 +                    }
 +                }
 +                else
 +                {
 +                    sprintf(buf,"%d",g);
 +                    setname[nsets] = strdup(buf);
 +                    nsets++;
 +                }
 +            }
 +        }
 +        if (bCoord || nsets > 1)
 +        {
 +            xvgr_legend(fp,nsets,(const char**)setname,oenv);
 +        }
 +        for(g=0; g<nsets; g++)
 +        {
 +            sfree(setname[g]);
 +        }
 +        sfree(setname);
 +    }
 +    
 +    return fp;
 +}
 +
 +/* Apply forces in a mass weighted fashion */
 +static void apply_forces_grp(t_pullgrp *pgrp, t_mdatoms * md,
 +                             gmx_ga2la_t ga2la,
 +                             dvec f_pull, int sign, rvec *f)
 +{
 +    int i,ii,m,start,end;
 +    double wmass,inv_wm;
 +    
 +    start = md->start;
 +    end   = md->homenr + start;
 +    
 +    inv_wm = pgrp->wscale*pgrp->invtm;
 +    
 +    for(i=0; i<pgrp->nat_loc; i++)
 +    {
 +        ii = pgrp->ind_loc[i];
 +        wmass = md->massT[ii];
 +        if (pgrp->weight_loc)
 +        {
 +            wmass *= pgrp->weight_loc[i];
 +        }
 +    
 +        for(m=0; m<DIM; m++)
 +        {
 +            f[ii][m] += sign * wmass * f_pull[m] * inv_wm;
 +        }
 +    }
 +}
 +
 +/* Apply forces in a mass weighted fashion */
 +static void apply_forces(t_pull * pull, t_mdatoms * md, gmx_ga2la_t ga2la,
 +                         rvec *f)
 +{
 +    int i;
 +    t_pullgrp *pgrp;
 +    
 +    for(i=1; i<pull->ngrp+1; i++)
 +    {
 +        pgrp = &(pull->grp[i]);
 +        apply_forces_grp(pgrp,md,ga2la,pgrp->f,1,f);
 +        if (pull->grp[0].nat)
 +        {
 +            if (PULL_CYL(pull))
 +            {
 +                apply_forces_grp(&(pull->dyna[i]),md,ga2la,pgrp->f,-1,f);
 +            }
 +            else
 +            {
 +                apply_forces_grp(&(pull->grp[0]),md,ga2la,pgrp->f,-1,f);
 +            }
 +        }
 +    }
 +}
 +
 +static double max_pull_distance2(const t_pull *pull,const t_pbc *pbc)
 +{
 +    double max_d2;
 +    int    m;
 +
 +    max_d2 = GMX_DOUBLE_MAX;
 +
 +    if (pull->eGeom != epullgDIRPBC)
 +    {
 +        for(m=0; m<pbc->ndim_ePBC; m++)
 +        {
 +            if (pull->dim[m] != 0)
 +            {
 +                max_d2 = min(max_d2,norm2(pbc->box[m]));
 +            }
 +        }
 +    }
 +    
 +    return 0.25*max_d2;
 +}
 +
 +static void get_pullgrps_dr(const t_pull *pull,const t_pbc *pbc,int g,double t,
 +                            dvec xg,dvec xref,double max_dist2,
 +                            dvec dr)
 +{
 +    t_pullgrp *pref,*pgrp;
 +    int       m;
 +    dvec      xrefr,dref={0,0,0};
 +    double    dr2;
 +    
 +    pgrp = &pull->grp[g];
 +    
 +    copy_dvec(xref,xrefr);
 +
 +    if (pull->eGeom == epullgDIRPBC)
 +    {
 +        for(m=0; m<DIM; m++)
 +        {
 +            dref[m] = (pgrp->init[0] + pgrp->rate*t)*pull->grp[g].vec[m];
 +        }
 +        /* Add the reference position, so we use the correct periodic image */
 +        dvec_inc(xrefr,dref);
 +    }
 +  
 +    pbc_dx_d(pbc, xg, xrefr, dr);
 +    dr2 = 0;
 +    for(m=0; m<DIM; m++)
 +    {
 +        dr[m] *= pull->dim[m];
 +        dr2 += dr[m]*dr[m];
 +    }
 +    if (max_dist2 >= 0 && dr2 > 0.98*0.98*max_dist2)
 +    {
 +        gmx_fatal(FARGS,"Distance of pull group %d (%f nm) is larger than 0.49 times the box size (%f)",g,sqrt(dr2),sqrt(max_dist2));
 +    }
 +
 +    if (pull->eGeom == epullgDIRPBC)
 +    {
 +        dvec_inc(dr,dref);
 +    }
 +}
 +
 +static void get_pullgrp_dr(const t_pull *pull,const t_pbc *pbc,int g,double t,
 +                           dvec dr)
 +{
 +    double md2;
 +
 +    if (pull->eGeom == epullgDIRPBC)
 +    {
 +        md2 = -1;
 +    }
 +    else
 +    {
 +        md2 = max_pull_distance2(pull,pbc);
 +    }
 +
 +    get_pullgrps_dr(pull,pbc,g,t,
 +                    pull->grp[g].x,
 +                    PULL_CYL(pull) ? pull->dyna[g].x : pull->grp[0].x,
 +                    md2,
 +                    dr);
 +}
 +
 +void get_pullgrp_distance(t_pull *pull,t_pbc *pbc,int g,double t,
 +                          dvec dr,dvec dev)
 +{
 +    static gmx_bool bWarned=FALSE; /* TODO: this should be fixed for thread-safety, 
 +                                  but is fairly benign */
 +    t_pullgrp *pgrp;
 +    int       m;
 +    dvec      ref;
 +    double    drs,inpr;
 +    
 +    pgrp = &pull->grp[g];
 +    
 +    get_pullgrp_dr(pull,pbc,g,t,dr);
 +    
 +    if (pull->eGeom == epullgPOS)
 +    {
 +        for(m=0; m<DIM; m++)
 +        {
 +            ref[m] = pgrp->init[m] + pgrp->rate*t*pgrp->vec[m];
 +        }
 +    }
 +    else
 +    {
 +        ref[0] = pgrp->init[0] + pgrp->rate*t;
 +    }
 +    
 +    switch (pull->eGeom)
 +    {
 +    case epullgDIST:
 +        /* Pull along the vector between the com's */
 +        if (ref[0] < 0 && !bWarned)
 +        {
 +            fprintf(stderr,"\nPull reference distance for group %d is negative (%f)\n",g,ref[0]);
 +            bWarned = TRUE;
 +        }
 +        drs = dnorm(dr);
 +        if (drs == 0)
 +        {
 +            /* With no vector we can not determine the direction for the force,
 +             * so we set the force to zero.
 +             */
 +            dev[0] = 0;
 +        }
 +        else
 +        {
 +            /* Determine the deviation */
 +            dev[0] = drs - ref[0];
 +        }
 +        break;
 +    case epullgDIR:
 +    case epullgDIRPBC:
 +    case epullgCYL:
 +        /* Pull along vec */
 +        inpr = 0;
 +        for(m=0; m<DIM; m++)
 +        {
 +            inpr += pgrp->vec[m]*dr[m];
 +        }
 +        dev[0] = inpr - ref[0];
 +        break;
 +    case epullgPOS:
 +        /* Determine the difference of dr and ref along each dimension */
 +        for(m=0; m<DIM; m++)
 +        {
 +            dev[m] = (dr[m] - ref[m])*pull->dim[m];
 +        }
 +        break;
 +    }
 +}
 +
 +void clear_pull_forces(t_pull *pull)
 +{
 +    int i;
 +    
 +    /* Zeroing the forces is only required for constraint pulling.
 +     * It can happen that multiple constraint steps need to be applied
 +     * and therefore the constraint forces need to be accumulated.
 +     */
 +    for(i=0; i<1+pull->ngrp; i++)
 +    {
 +        clear_dvec(pull->grp[i].f);
 +        pull->grp[i].f_scal = 0;
 +    }
 +}
 +
 +/* Apply constraint using SHAKE */
 +static void do_constraint(t_pull *pull, t_mdatoms *md, t_pbc *pbc,
 +                          rvec *x, rvec *v,
 +                          gmx_bool bMaster, tensor vir,
 +                          double dt, double t) 
 +{
 +
 +    dvec *r_ij;  /* x[i] com of i in prev. step. Obeys constr. -> r_ij[i] */
 +    dvec unc_ij; /* xp[i] com of i this step, before constr.   -> unc_ij  */
 +
 +    dvec *rinew;           /* current 'new' position of group i */
 +    dvec *rjnew;           /* current 'new' position of group j */
 +    dvec  ref,vec;
 +    double d0,inpr;
 +    double lambda, rm, mass, invdt=0;
 +    gmx_bool bConverged_all,bConverged=FALSE;
 +    int niter=0,g,ii,j,m,max_iter=100;
 +    double q,a,b,c;  /* for solving the quadratic equation, 
 +                        see Num. Recipes in C ed 2 p. 184 */
 +    dvec *dr;        /* correction for group i */
 +    dvec ref_dr;     /* correction for group j */
 +    dvec f;          /* the pull force */
 +    dvec tmp,tmp3;
 +    t_pullgrp *pdyna,*pgrp,*pref;
 +    
 +    snew(r_ij,pull->ngrp+1);
 +    if (PULL_CYL(pull))
 +    {
 +        snew(rjnew,pull->ngrp+1);
 +    }
 +    else
 +    {
 +        snew(rjnew,1);
 +    }
 +    snew(dr,pull->ngrp+1);
 +    snew(rinew,pull->ngrp+1);
 +    
 +    /* copy the current unconstrained positions for use in iterations. We 
 +       iterate until rinew[i] and rjnew[j] obey the constraints. Then
 +       rinew - pull.x_unc[i] is the correction dr to group i */
 +    for(g=1; g<1+pull->ngrp; g++)
 +    {
 +        copy_dvec(pull->grp[g].xp,rinew[g]);
 +    }
 +    if (PULL_CYL(pull))
 +    {
 +        for(g=1; g<1+pull->ngrp; g++)
 +        {
 +            copy_dvec(pull->dyna[g].xp,rjnew[g]);
 +        }
 +    }
 +    else
 +    {
 +        copy_dvec(pull->grp[0].xp,rjnew[0]);
 +    }
 +    
 +    /* Determine the constraint directions from the old positions */
 +    for(g=1; g<1+pull->ngrp; g++)
 +    {
 +        get_pullgrp_dr(pull,pbc,g,t,r_ij[g]);
 +        /* Store the difference vector at time t for printing */
 +        copy_dvec(r_ij[g],pull->grp[g].dr);
 +        if (debug)
 +        {
 +            fprintf(debug,"Pull group %d dr %f %f %f\n",
 +                    g,r_ij[g][XX],r_ij[g][YY],r_ij[g][ZZ]);
 +        }
 +        
 +        if (pull->eGeom == epullgDIR || pull->eGeom == epullgDIRPBC)
 +        {
 +            /* Select the component along vec */
 +            a = 0;
 +            for(m=0; m<DIM; m++)
 +            {
 +                a += pull->grp[g].vec[m]*r_ij[g][m];
 +            }
 +            for(m=0; m<DIM; m++)
 +            {
 +                r_ij[g][m] = a*pull->grp[g].vec[m];
 +            }
 +        }
 +    }
 +    
 +    bConverged_all = FALSE;
 +    while (!bConverged_all && niter < max_iter)
 +    {
 +        bConverged_all = TRUE;
 +
 +        /* loop over all constraints */
 +        for(g=1; g<1+pull->ngrp; g++)
 +        {
 +            pgrp = &pull->grp[g];
 +            if (PULL_CYL(pull))
 +                pref = &pull->dyna[g];
 +            else
 +                pref = &pull->grp[0];
 +
 +            /* Get the current difference vector */
 +            get_pullgrps_dr(pull,pbc,g,t,rinew[g],rjnew[PULL_CYL(pull) ? g : 0],
 +                            -1,unc_ij);
 +
 +            if (pull->eGeom == epullgPOS)
 +            {
 +                for(m=0; m<DIM; m++)
 +                {
 +                    ref[m] = pgrp->init[m] + pgrp->rate*t*pgrp->vec[m];
 +                }
 +            }
 +            else
 +            {
 +                ref[0] = pgrp->init[0] + pgrp->rate*t;
 +                /* Keep the compiler happy */
 +                ref[1] = 0;
 +                ref[2] = 0;
 +            }
 +            
 +            if (debug)
 +            {
 +                fprintf(debug,"Pull group %d, iteration %d\n",g,niter);
 +            }
 +            
 +            rm = 1.0/(pull->grp[g].invtm + pref->invtm);
 +            
 +            switch (pull->eGeom)
 +            {
 +            case epullgDIST:
 +                if (ref[0] <= 0)
 +                {
 +                    gmx_fatal(FARGS,"The pull constraint reference distance for group %d is <= 0 (%f)",g,ref[0]);
 +                }
 +                
 +                a = diprod(r_ij[g],r_ij[g]); 
 +                b = diprod(unc_ij,r_ij[g])*2;
 +                c = diprod(unc_ij,unc_ij) - dsqr(ref[0]);
 +                
 +                if (b < 0)
 +                {
 +                    q = -0.5*(b - sqrt(b*b - 4*a*c));
 +                    lambda = -q/a;
 +                }
 +                else
 +                {
 +                    q = -0.5*(b + sqrt(b*b - 4*a*c));
 +                    lambda = -c/q;
 +                }
 +                
 +                if (debug)
 +                {
 +                    fprintf(debug,
 +                            "Pull ax^2+bx+c=0: a=%e b=%e c=%e lambda=%e\n",
 +                            a,b,c,lambda);
 +                }
 +                
 +                /* The position corrections dr due to the constraints */
 +                dsvmul(-lambda*rm*pgrp->invtm, r_ij[g],  dr[g]);
 +                dsvmul( lambda*rm*pref->invtm, r_ij[g], ref_dr);
 +                break;
 +            case epullgDIR:
 +            case epullgDIRPBC:
 +            case epullgCYL:
 +                /* A 1-dimensional constraint along a vector */
 +                a = 0;
 +                for(m=0; m<DIM; m++)
 +                {
 +                    vec[m] = pgrp->vec[m];
 +                    a += unc_ij[m]*vec[m];
 +                }
 +                /* Select only the component along the vector */
 +                dsvmul(a,vec,unc_ij);
 +                lambda = a - ref[0];
 +                if (debug)
 +                {
 +                    fprintf(debug,"Pull inpr %e lambda: %e\n",a,lambda);
 +                }
 +                
 +                /* The position corrections dr due to the constraints */
 +                dsvmul(-lambda*rm*pull->grp[g].invtm, vec, dr[g]);
 +                dsvmul( lambda*rm*       pref->invtm, vec,ref_dr);
 +                break;
 +            case epullgPOS:
 +                for(m=0; m<DIM; m++)
 +                {
 +                    if (pull->dim[m])
 +                    {
 +                        lambda = r_ij[g][m] - ref[m];
 +                        /* The position corrections dr due to the constraints */
 +                        dr[g][m]  = -lambda*rm*pull->grp[g].invtm;
 +                        ref_dr[m] =  lambda*rm*pref->invtm;
 +                    }
 +                    else
 +                    {
 +                        dr[g][m]  = 0;
 +                        ref_dr[m] = 0;
 +                    }
 +                }
 +                break;
 +            }
 +            
 +            /* DEBUG */
 +            if (debug)
 +            {
 +                j = (PULL_CYL(pull) ? g : 0);
 +                get_pullgrps_dr(pull,pbc,g,t,rinew[g],rjnew[j],-1,tmp);
 +                get_pullgrps_dr(pull,pbc,g,t,dr[g]   ,ref_dr  ,-1,tmp3);
 +                fprintf(debug,
 +                        "Pull cur %8.5f %8.5f %8.5f j:%8.5f %8.5f %8.5f d: %8.5f\n",
 +                        rinew[g][0],rinew[g][1],rinew[g][2], 
 +                        rjnew[j][0],rjnew[j][1],rjnew[j][2], dnorm(tmp));
 +                if (pull->eGeom == epullgPOS)
 +                {
 +                    fprintf(debug,
 +                            "Pull ref %8.5f %8.5f %8.5f\n",
 +                            pgrp->vec[0],pgrp->vec[1],pgrp->vec[2]);
 +                }
 +                else
 +                {
 +                    fprintf(debug,
 +                            "Pull ref %8s %8s %8s   %8s %8s %8s d: %8.5f %8.5f %8.5f\n",
 +                            "","","","","","",ref[0],ref[1],ref[2]);
 +                }
 +                fprintf(debug,
 +                        "Pull cor %8.5f %8.5f %8.5f j:%8.5f %8.5f %8.5f d: %8.5f\n",
 +                        dr[g][0],dr[g][1],dr[g][2],
 +                        ref_dr[0],ref_dr[1],ref_dr[2],
 +                        dnorm(tmp3));
 +                fprintf(debug,
 +                        "Pull cor %10.7f %10.7f %10.7f\n",
 +                        dr[g][0],dr[g][1],dr[g][2]);
 +            } /* END DEBUG */
 +            
 +            /* Update the COMs with dr */
 +            dvec_inc(rinew[g],                     dr[g]);
 +            dvec_inc(rjnew[PULL_CYL(pull) ? g : 0],ref_dr);
 +        }
 +        
 +        /* Check if all constraints are fullfilled now */
 +        for(g=1; g<1+pull->ngrp; g++)
 +        {
 +            pgrp = &pull->grp[g];
 +            
 +            get_pullgrps_dr(pull,pbc,g,t,rinew[g],rjnew[PULL_CYL(pull) ? g : 0],
 +                            -1,unc_ij);
 +            
 +            switch (pull->eGeom)
 +            {
 +            case epullgDIST:
 +                bConverged = fabs(dnorm(unc_ij) - ref[0]) < pull->constr_tol;
 +                break;
 +            case epullgDIR:
 +            case epullgDIRPBC:
 +            case epullgCYL:
 +                for(m=0; m<DIM; m++)
 +                {
 +                    vec[m] = pgrp->vec[m];
 +                }
 +                inpr = diprod(unc_ij,vec);
 +                dsvmul(inpr,vec,unc_ij);
 +                bConverged =
 +                    fabs(diprod(unc_ij,vec) - ref[0]) < pull->constr_tol;
 +                break;
 +            case epullgPOS:
 +                bConverged = TRUE;
 +                for(m=0; m<DIM; m++)
 +                {
 +                    if (pull->dim[m] && 
 +                        fabs(unc_ij[m] - ref[m]) >= pull->constr_tol)
 +                    {
 +                        bConverged = FALSE;
 +                    }
 +                }
 +                break;
 +            }
 +            
 +            if (!bConverged)
 +            {
 +                if (debug)
 +                {
 +                    fprintf(debug,"NOT CONVERGED YET: Group %d:"
 +                            "d_ref = %f %f %f, current d = %f\n",
 +                            g,ref[0],ref[1],ref[2],dnorm(unc_ij));
 +                }
 +
 +                bConverged_all = FALSE;
 +            }
 +        }
 +        
 +        niter++;
 +        /* if after all constraints are dealt with and bConverged is still TRUE
 +           we're finished, if not we do another iteration */
 +    }
 +    if (niter > max_iter)
 +    {
 +        gmx_fatal(FARGS,"Too many iterations for constraint run: %d",niter);
 +    }
 +    
 +    /* DONE ITERATING, NOW UPDATE COORDINATES AND CALC. CONSTRAINT FORCES */
 +    
 +    if (v)
 +    {
 +        invdt = 1/dt;
 +    }
 +    
 +    /* update the normal groups */
 +    for(g=1; g<1+pull->ngrp; g++)
 +    {
 +        pgrp = &pull->grp[g];
 +        /* get the final dr and constraint force for group i */
 +        dvec_sub(rinew[g],pgrp->xp,dr[g]);
 +        /* select components of dr */
 +        for(m=0; m<DIM; m++)
 +        {
 +            dr[g][m] *= pull->dim[m];
 +        }
 +        dsvmul(1.0/(pgrp->invtm*dt*dt),dr[g],f);
 +        dvec_inc(pgrp->f,f);
 +        switch (pull->eGeom)
 +        {
 +        case epullgDIST:
 +            for(m=0; m<DIM; m++)
 +            {
 +                pgrp->f_scal += r_ij[g][m]*f[m]/dnorm(r_ij[g]);
 +            }
 +            break;
 +        case epullgDIR:
 +        case epullgDIRPBC:
 +        case epullgCYL:
 +            for(m=0; m<DIM; m++)
 +            {
 +                pgrp->f_scal += pgrp->vec[m]*f[m];
 +            }
 +            break;
 +        case epullgPOS:
 +            break;
 +        }
 +        
 +        if (vir && bMaster) {
 +            /* Add the pull contribution to the virial */
 +            for(j=0; j<DIM; j++)
 +            {
 +                for(m=0; m<DIM; m++)
 +                {
 +                    vir[j][m] -= 0.5*f[j]*r_ij[g][m];
 +                }
 +            }
 +        }
 +        
 +        /* update the atom positions */
 +        copy_dvec(dr[g],tmp);
 +        for(j=0;j<pgrp->nat_loc;j++)
 +        {
 +            ii = pgrp->ind_loc[j];
 +            if (pgrp->weight_loc)
 +            {
 +                dsvmul(pgrp->wscale*pgrp->weight_loc[j],dr[g],tmp); 
 +            }
 +            for(m=0; m<DIM; m++)
 +            {
 +                x[ii][m] += tmp[m];
 +            }
 +            if (v)
 +            {
 +                for(m=0; m<DIM; m++)
 +                {
 +                    v[ii][m] += invdt*tmp[m];
 +                }
 +            }
 +        }
 +    }
 +    
 +    /* update the reference groups */
 +    if (PULL_CYL(pull))
 +    {
 +        /* update the dynamic reference groups */
 +        for(g=1; g<1+pull->ngrp; g++)
 +        {
 +            pdyna = &pull->dyna[g];
 +            dvec_sub(rjnew[g],pdyna->xp,ref_dr);
 +            /* select components of ref_dr */
 +            for(m=0; m<DIM; m++)
 +            {
 +                ref_dr[m] *= pull->dim[m];
 +            }
 +            
 +            for(j=0;j<pdyna->nat_loc;j++)
 +            {
 +                /* reset the atoms with dr, weighted by w_i */
 +                dsvmul(pdyna->wscale*pdyna->weight_loc[j],ref_dr,tmp); 
 +                ii = pdyna->ind_loc[j];
 +                for(m=0; m<DIM; m++)
 +                {
 +                    x[ii][m] += tmp[m];
 +                }
 +                if (v)
 +                {
 +                    for(m=0; m<DIM; m++)
 +                    {
 +                        v[ii][m] += invdt*tmp[m];
 +                    }
 +                }
 +            }
 +        }
 +    }
 +    else
 +    {
 +        pgrp = &pull->grp[0];
 +        /* update the reference group */
 +        dvec_sub(rjnew[0],pgrp->xp, ref_dr); 
 +        /* select components of ref_dr */
 +        for(m=0;m<DIM;m++)
 +        {
 +            ref_dr[m] *= pull->dim[m];
 +        }
 +        
 +        copy_dvec(ref_dr,tmp);
 +        for(j=0; j<pgrp->nat_loc;j++)
 +        {
 +            ii = pgrp->ind_loc[j];
 +            if (pgrp->weight_loc)
 +            {
 +                dsvmul(pgrp->wscale*pgrp->weight_loc[j],ref_dr,tmp); 
 +            }
 +            for(m=0; m<DIM; m++)
 +            {
 +                x[ii][m] += tmp[m];
 +            }
 +            if (v)
 +            {
 +                for(m=0; m<DIM; m++)
 +                {
 +                    v[ii][m] += invdt*tmp[m];
 +                }
 +            }
 +        }
 +    }
 +    
 +    /* finished! I hope. Give back some memory */
 +    sfree(r_ij);
 +    sfree(rinew);
 +    sfree(rjnew);
 +    sfree(dr);
 +}
 +
 +/* Pulling with a harmonic umbrella potential or constant force */
 +static void do_pull_pot(int ePull,
 +                        t_pull *pull, t_pbc *pbc, double t, real lambda,
 +                        real *V, tensor vir, real *dVdl)
 +{
 +    int       g,j,m;
 +    dvec      dev;
 +    double    ndr,invdr;
 +    real      k,dkdl;
 +    t_pullgrp *pgrp;
 +    
 +    /* loop over the groups that are being pulled */
 +    *V    = 0;
 +    *dVdl = 0;
 +    for(g=1; g<1+pull->ngrp; g++)
 +    {
 +        pgrp = &pull->grp[g];
 +        get_pullgrp_distance(pull,pbc,g,t,pgrp->dr,dev);
 +        
 +        k    = (1.0 - lambda)*pgrp->k + lambda*pgrp->kB;
 +        dkdl = pgrp->kB - pgrp->k;
 +        
 +        switch (pull->eGeom)
 +        {
 +        case epullgDIST:
 +            ndr   = dnorm(pgrp->dr);
 +            invdr = 1/ndr;
 +            if (ePull == epullUMBRELLA)
 +            {
 +                pgrp->f_scal  =       -k*dev[0];
 +                *V           += 0.5*   k*dsqr(dev[0]);
 +                *dVdl        += 0.5*dkdl*dsqr(dev[0]);
 +            }
 +            else
 +            {
 +                pgrp->f_scal  =   -k;
 +                *V           +=    k*ndr;
 +                *dVdl        += dkdl*ndr;
 +            }
 +            for(m=0; m<DIM; m++)
 +            {
 +                pgrp->f[m]    = pgrp->f_scal*pgrp->dr[m]*invdr;
 +            }
 +            break;
 +        case epullgDIR:
 +        case epullgDIRPBC:
 +        case epullgCYL:
 +            if (ePull == epullUMBRELLA)
 +            {
 +                pgrp->f_scal  =       -k*dev[0];
 +                *V           += 0.5*   k*dsqr(dev[0]);
 +                *dVdl        += 0.5*dkdl*dsqr(dev[0]);
 +            }
 +            else
 +            {
 +                ndr = 0;
 +                for(m=0; m<DIM; m++)
 +                {
 +                    ndr += pgrp->vec[m]*pgrp->dr[m];
 +                }
 +                pgrp->f_scal  =   -k;
 +                *V           +=    k*ndr;
 +                *dVdl        += dkdl*ndr;
 +            }
 +            for(m=0; m<DIM; m++)
 +            {
 +                pgrp->f[m]    = pgrp->f_scal*pgrp->vec[m];
 +            }
 +            break;
 +        case epullgPOS:
 +            for(m=0; m<DIM; m++)
 +            {
 +                if (ePull == epullUMBRELLA)
 +                {
 +                    pgrp->f[m]  =       -k*dev[m];
 +                    *V         += 0.5*   k*dsqr(dev[m]);
 +                    *dVdl      += 0.5*dkdl*dsqr(dev[m]);
 +                }
 +                else
 +                {
 +                    pgrp->f[m]  =   -k*pull->dim[m];
 +                    *V         +=    k*pgrp->dr[m]*pull->dim[m];
 +                    *dVdl      += dkdl*pgrp->dr[m]*pull->dim[m];
 +                }
 +            }
 +            break;
 +        }
 +        
 +        if (vir)
 +        {
 +            /* Add the pull contribution to the virial */
 +            for(j=0; j<DIM; j++)
 +            {
 +                for(m=0;m<DIM;m++)
 +                {
 +                    vir[j][m] -= 0.5*pgrp->f[j]*pgrp->dr[m];
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +real pull_potential(int ePull,t_pull *pull, t_mdatoms *md, t_pbc *pbc,
 +                  t_commrec *cr, double t, real lambda,
 +                  rvec *x, rvec *f, tensor vir, real *dvdlambda)
 +{
 +  real V,dVdl;
 +
 +  pull_calc_coms(cr,pull,md,pbc,t,x,NULL);
 +
 +  do_pull_pot(ePull,pull,pbc,t,lambda,
 +            &V,pull->bVirial && MASTER(cr) ? vir : NULL,&dVdl);
 +
 +  /* Distribute forces over pulled groups */
 +  apply_forces(pull, md, DOMAINDECOMP(cr) ? cr->dd->ga2la : NULL, f);
 +
 +  if (MASTER(cr)) {
 +    *dvdlambda += dVdl;
 +  }
 +
 +  return (MASTER(cr) ? V : 0.0);
 +}
 +
 +void pull_constraint(t_pull *pull, t_mdatoms *md, t_pbc *pbc,
 +                   t_commrec *cr, double dt, double t,
 +                   rvec *x, rvec *xp, rvec *v, tensor vir)
 +{
 +  pull_calc_coms(cr,pull,md,pbc,t,x,xp);
 +
 +  do_constraint(pull,md,pbc,xp,v,pull->bVirial && MASTER(cr),vir,dt,t);
 +}
 +
 +static void make_local_pull_group(gmx_ga2la_t ga2la,
 +                                t_pullgrp *pg,int start,int end)
 +{
 +  int i,ii;
 +
 +  pg->nat_loc = 0;
 +  for(i=0; i<pg->nat; i++) {
 +    ii = pg->ind[i];
 +    if (ga2la) {
 +      if (!ga2la_get_home(ga2la,ii,&ii)) {
 +        ii = -1;
 +      }
 +    }
 +    if (ii >= start && ii < end) {
 +      /* This is a home atom, add it to the local pull group */
 +      if (pg->nat_loc >= pg->nalloc_loc) {
 +      pg->nalloc_loc = over_alloc_dd(pg->nat_loc+1);
 +      srenew(pg->ind_loc,pg->nalloc_loc);
 +      if (pg->epgrppbc == epgrppbcCOS || pg->weight) {
 +        srenew(pg->weight_loc,pg->nalloc_loc);
 +      }
 +      }
 +      pg->ind_loc[pg->nat_loc] = ii;
 +      if (pg->weight) {
 +        pg->weight_loc[pg->nat_loc] = pg->weight[i];
 +      }
 +      pg->nat_loc++;
 +    }
 +  }
 +}
 +
 +void dd_make_local_pull_groups(gmx_domdec_t *dd,t_pull *pull,t_mdatoms *md)
 +{
 +  gmx_ga2la_t ga2la;
 +  int g;
 +  
 +  if (dd) {
 +    ga2la = dd->ga2la;
 +  } else {
 +    ga2la = NULL;
 +  }
 +
 +  if (pull->grp[0].nat > 0)
 +    make_local_pull_group(ga2la,&pull->grp[0],md->start,md->start+md->homenr);
 +  for(g=1; g<1+pull->ngrp; g++)
 +    make_local_pull_group(ga2la,&pull->grp[g],md->start,md->start+md->homenr);
 +}
 +
 +static void init_pull_group_index(FILE *fplog,t_commrec *cr,
-       m = (1 - ir->init_lambda)*atom->m + ir->init_lambda*atom->mB;
++                                  int start,int end,
++                                  int g,t_pullgrp *pg,ivec pulldims,
++                                  gmx_mtop_t *mtop,t_inputrec *ir, real lambda)
 +{
 +  int i,ii,d,nfrozen,ndim;
 +  real m,w,mbd;
 +  double tmass,wmass,wwmass;
 +  gmx_bool bDomDec;
 +  gmx_ga2la_t ga2la=NULL;
 +  gmx_groups_t *groups;
 +  t_atom *atom;
 +
 +  bDomDec = (cr && DOMAINDECOMP(cr));
 +  if (bDomDec) {
 +    ga2la = cr->dd->ga2la;
 +  }
 +
 +  if (EI_ENERGY_MINIMIZATION(ir->eI) || ir->eI == eiBD) {
 +    /* There are no masses in the integrator.
 +     * But we still want to have the correct mass-weighted COMs.
 +     * So we store the real masses in the weights.
 +     * We do not set nweight, so these weights do not end up in the tpx file.
 +     */
 +    if (pg->nweight == 0) {
 +      snew(pg->weight,pg->nat);
 +    }
 +  }
 +
 +  if (cr && PAR(cr)) {
 +    pg->nat_loc    = 0;
 +    pg->nalloc_loc = 0;
 +    pg->ind_loc    = NULL;
 +    pg->weight_loc = NULL;
 +  } else {
 +    pg->nat_loc = pg->nat;
 +    pg->ind_loc = pg->ind;
 +    if (pg->epgrppbc == epgrppbcCOS) {
 +      snew(pg->weight_loc,pg->nat);
 +    } else {
 +      pg->weight_loc = pg->weight;
 +    }
 +  }
 +
 +  groups = &mtop->groups;
 +
 +  nfrozen = 0;
 +  tmass  = 0;
 +  wmass  = 0;
 +  wwmass = 0;
 +  for(i=0; i<pg->nat; i++) {
 +    ii = pg->ind[i];
 +    gmx_mtop_atomnr_to_atom(mtop,ii,&atom);
 +    if (cr && PAR(cr) && !bDomDec && ii >= start && ii < end)
 +      pg->ind_loc[pg->nat_loc++] = ii;
 +    if (ir->opts.nFreeze) {
 +      for(d=0; d<DIM; d++)
 +      if (pulldims[d] && ir->opts.nFreeze[ggrpnr(groups,egcFREEZE,ii)][d])
 +        nfrozen++;
 +    }
 +    if (ir->efep == efepNO) {
 +      m = atom->m;
 +    } else {
-              gmx_mtop_t *mtop,t_commrec *cr,const output_env_t oenv,
++      m = (1 - lambda)*atom->m + lambda*atom->mB;
 +    }
 +    if (pg->nweight > 0) {
 +      w = pg->weight[i];
 +    } else {
 +      w = 1;
 +    }
 +    if (EI_ENERGY_MINIMIZATION(ir->eI)) {
 +      /* Move the mass to the weight */
 +      w *= m;
 +      m = 1;
 +      pg->weight[i] = w;
 +    } else if (ir->eI == eiBD) {
 +      if (ir->bd_fric) {
 +      mbd = ir->bd_fric*ir->delta_t;
 +      } else {
 +      if (groups->grpnr[egcTC] == NULL) {
 +        mbd = ir->delta_t/ir->opts.tau_t[0];
 +      } else {
 +        mbd = ir->delta_t/ir->opts.tau_t[groups->grpnr[egcTC][ii]];
 +      }
 +      }
 +      w *= m/mbd;
 +      m = mbd;
 +      pg->weight[i] = w;
 +    }
 +    tmass  += m;
 +    wmass  += m*w;
 +    wwmass += m*w*w;
 +  }
 +
 +  if (wmass == 0) {
 +    gmx_fatal(FARGS,"The total%s mass of pull group %d is zero",
 +            pg->weight ? " weighted" : "",g);
 +  }
 +  if (fplog) {
 +    fprintf(fplog,
 +          "Pull group %d: %5d atoms, mass %9.3f",g,pg->nat,tmass);
 +    if (pg->weight || EI_ENERGY_MINIMIZATION(ir->eI) || ir->eI == eiBD) {
 +      fprintf(fplog,", weighted mass %9.3f",wmass*wmass/wwmass);
 +    }
 +    if (pg->epgrppbc == epgrppbcCOS) {
 +      fprintf(fplog,", cosine weighting will be used");
 +    }
 +    fprintf(fplog,"\n");
 +  }
 +  
 +  if (nfrozen == 0) {
 +    /* A value > 0 signals not frozen, it is updated later */
 +    pg->invtm  = 1.0;
 +  } else {
 +    ndim = 0;
 +    for(d=0; d<DIM; d++)
 +      ndim += pulldims[d]*pg->nat;
 +    if (fplog && nfrozen > 0 && nfrozen < ndim) {
 +      fprintf(fplog,
 +            "\nWARNING: In pull group %d some, but not all of the degrees of freedom\n"
 +            "         that are subject to pulling are frozen.\n"
 +            "         For pulling the whole group will be frozen.\n\n",
 +            g);
 +    }
 +    pg->invtm  = 0.0;
 +    pg->wscale = 1.0;
 +  }
 +}
 +
 +void init_pull(FILE *fplog,t_inputrec *ir,int nfile,const t_filenm fnm[],
-             init_pull_group_index(fplog,cr,start,end,g,pgrp,pull->dim,mtop,ir);
++               gmx_mtop_t *mtop,t_commrec *cr,const output_env_t oenv, real lambda,
 +               gmx_bool bOutFile, unsigned long Flags)
 +{
 +    t_pull    *pull;
 +    t_pullgrp *pgrp;
 +    int       g,start=0,end=0,m;
 +    gmx_bool      bCite;
 +    
 +    pull = ir->pull;
 +    
 +    pull->ePBC = ir->ePBC;
 +    switch (pull->ePBC)
 +    {
 +    case epbcNONE: pull->npbcdim = 0; break;
 +    case epbcXY:   pull->npbcdim = 2; break;
 +    default:       pull->npbcdim = 3; break;
 +    }
 +    
 +    if (fplog)
 +    {
 +        fprintf(fplog,"\nWill apply %s COM pulling in geometry '%s'\n",
 +                EPULLTYPE(ir->ePull),EPULLGEOM(pull->eGeom));
 +        if (pull->grp[0].nat > 0)
 +        {
 +            fprintf(fplog,"between a reference group and %d group%s\n",
 +                    pull->ngrp,pull->ngrp==1 ? "" : "s");
 +        }
 +        else
 +        {
 +            fprintf(fplog,"with an absolute reference on %d group%s\n",
 +                    pull->ngrp,pull->ngrp==1 ? "" : "s");
 +        }
 +        bCite = FALSE;
 +        for(g=0; g<pull->ngrp+1; g++)
 +        {
 +            if (pull->grp[g].nat > 1 &&
 +                pull->grp[g].pbcatom < 0)
 +            {
 +                /* We are using cosine weighting */
 +                fprintf(fplog,"Cosine weighting is used for group %d\n",g);
 +                bCite = TRUE;
 +            }
 +        }
 +        if (bCite)
 +        {
 +            please_cite(fplog,"Engin2010");
 +        }
 +    }
 +    
 +    /* We always add the virial contribution,
 +     * except for geometry = direction_periodic where this is impossible.
 +     */
 +    pull->bVirial = (pull->eGeom != epullgDIRPBC);
 +    if (getenv("GMX_NO_PULLVIR") != NULL)
 +    {
 +        if (fplog)
 +        {
 +            fprintf(fplog,"Found env. var., will not add the virial contribution of the COM pull forces\n");
 +        }
 +        pull->bVirial = FALSE;
 +    }
 +    
 +    if (cr && PARTDECOMP(cr))
 +    {
 +        pd_at_range(cr,&start,&end);
 +    }
 +    pull->rbuf=NULL;
 +    pull->dbuf=NULL;
 +    pull->dbuf_cyl=NULL;
 +    pull->bRefAt = FALSE;
 +    pull->cosdim = -1;
 +    for(g=0; g<pull->ngrp+1; g++)
 +    {
 +        pgrp = &pull->grp[g];
 +        pgrp->epgrppbc = epgrppbcNONE;
 +        if (pgrp->nat > 0)
 +        {
 +            /* Determine if we need to take PBC into account for calculating
 +             * the COM's of the pull groups.
 +             */
 +            for(m=0; m<pull->npbcdim; m++)
 +            {
 +                if (pull->dim[m] && pgrp->nat > 1)
 +                {
 +                    if (pgrp->pbcatom >= 0)
 +                    {
 +                        pgrp->epgrppbc = epgrppbcREFAT;
 +                        pull->bRefAt   = TRUE;
 +                    }
 +                    else
 +                    {
 +                        if (pgrp->weight)
 +                        {
 +                            gmx_fatal(FARGS,"Pull groups can not have relative weights and cosine weighting at same time");
 +                        }
 +                        pgrp->epgrppbc = epgrppbcCOS;
 +                        if (pull->cosdim >= 0 && pull->cosdim != m)
 +                        {
 +                            gmx_fatal(FARGS,"Can only use cosine weighting with pulling in one dimension (use mdp option pull_dim)");
 +                        }
 +                        pull->cosdim = m;
 +                    }
 +                }
 +            }
 +            /* Set the indices */
++            init_pull_group_index(fplog,cr,start,end,g,pgrp,pull->dim,mtop,ir,lambda);
 +            if (PULL_CYL(pull) && pgrp->invtm == 0)
 +            {
 +                gmx_fatal(FARGS,"Can not have frozen atoms in a cylinder pull group");
 +            }
 +        }
 +        else
 +        {
 +            /* Absolute reference, set the inverse mass to zero */
 +            pgrp->invtm  = 0;
 +            pgrp->wscale = 1;
 +        }
 +    }      
 +    
 +    /* if we use dynamic reference groups, do some initialising for them */
 +    if (PULL_CYL(pull))
 +    {
 +        if (pull->grp[0].nat == 0)
 +        {
 +            gmx_fatal(FARGS, "Dynamic reference groups are not supported when using absolute reference!\n");
 +        }
 +        snew(pull->dyna,pull->ngrp+1);
 +    }
 +    
 +    /* Only do I/O when we are doing dynamics and if we are the MASTER */
 +    pull->out_x = NULL;
 +    pull->out_f = NULL;
 +    if (bOutFile)
 +    {
 +        if (pull->nstxout > 0)
 +        {
 +            pull->out_x = open_pull_out(opt2fn("-px",nfile,fnm),pull,oenv,TRUE,Flags);
 +        }
 +        if (pull->nstfout > 0)
 +        {
 +            pull->out_f = open_pull_out(opt2fn("-pf",nfile,fnm),pull,oenv,
 +                                        FALSE,Flags);
 +        }
 +    }
 +}
 +
 +void finish_pull(FILE *fplog,t_pull *pull)
 +{
 +    if (pull->out_x)
 +    {
 +        gmx_fio_fclose(pull->out_x);
 +    }
 +    if (pull->out_f)
 +    {
 +        gmx_fio_fclose(pull->out_f);
 +    }
 +}
index ab2287dbe3491ae4d9d1353378d7c745a286fd9e,0000000000000000000000000000000000000000..f836e518c020ac2c1e62fd46b1c9f0197c65e2e1
mode 100644,000000..100644
--- /dev/null
@@@ -1,1050 -1,0 +1,1050 @@@
-                     real lambda,real *dvdlambda,t_nrnb *nrnb)
 +/*
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2008, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 + 
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <string.h>
 +#include "typedefs.h"
 +#include "smalloc.h"
 +#include "gmx_fatal.h"
 +#include "vec.h"
 +#include "txtdump.h"
 +#include "mdrun.h"
 +#include "partdec.h"
 +#include "mdatoms.h"
 +#include "vsite.h"
 +#include "network.h"
 +#include "names.h"
 +#include "constr.h"
 +#include "domdec.h"
 +#include "partdec.h"
 +#include "physics.h"
 +#include "copyrite.h"
 +#include "shellfc.h"
 +#include "mtop_util.h"
 +#include "chargegroup.h"
 +#include "macros.h"
 +
 +
 +typedef struct {
 +  int     nnucl;
 +  atom_id shell;              /* The shell id                         */
 +  atom_id nucl1,nucl2,nucl3;  /* The nuclei connected to the shell    */
 +  /* gmx_bool    bInterCG; */       /* Coupled to nuclei outside cg?        */
 +  real    k;                  /* force constant                       */
 +  real    k_1;                        /* 1 over force constant                */
 +  rvec    xold;
 +  rvec    fold;
 +  rvec    step;
 +} t_shell;
 +
 +typedef struct gmx_shellfc {
 +  int     nshell_gl;       /* The number of shells in the system       */
 +  t_shell *shell_gl;       /* All the shells (for DD only)             */
 +  int     *shell_index_gl; /* Global shell index (for DD only)         */
 +  gmx_bool    bInterCG;        /* Are there inter charge-group shells?     */
 +  int     nshell;          /* The number of local shells               */
 +  t_shell *shell;          /* The local shells                         */
 +  int     shell_nalloc;    /* The allocation size of shell             */
 +  gmx_bool    bPredict;        /* Predict shell positions                  */
 +  gmx_bool    bForceInit;      /* Force initialization of shell positions  */
 +  int     nflexcon;        /* The number of flexible constraints       */
 +  rvec    *x[2];           /* Array for iterative minimization         */
 +  rvec    *f[2];           /* Array for iterative minimization         */
 +  int     x_nalloc;        /* The allocation size of x and f           */
 +  rvec    *acc_dir;        /* Acceleration direction for flexcon       */
 +  rvec    *x_old;          /* Old coordinates for flexcon              */
 +  int     flex_nalloc;     /* The allocation size of acc_dir and x_old */
 +  rvec    *adir_xnold;     /* Work space for init_adir                 */
 +  rvec    *adir_xnew;      /* Work space for init_adir                 */
 +  int     adir_nalloc;     /* Work space for init_adir                 */
 +} t_gmx_shellfc;
 +
 +      
 +static void pr_shell(FILE *fplog,int ns,t_shell s[])
 +{
 +  int i;
 +  
 +  fprintf(fplog,"SHELL DATA\n");
 +  fprintf(fplog,"%5s  %8s  %5s  %5s  %5s\n",
 +        "Shell","Force k","Nucl1","Nucl2","Nucl3");
 +  for(i=0; (i<ns); i++) {
 +    fprintf(fplog,"%5d  %8.3f  %5d",s[i].shell,1.0/s[i].k_1,s[i].nucl1);
 +    if (s[i].nnucl == 2)
 +      fprintf(fplog,"  %5d\n",s[i].nucl2);
 +    else if (s[i].nnucl == 3)
 +      fprintf(fplog,"  %5d  %5d\n",s[i].nucl2,s[i].nucl3);
 +    else
 +      fprintf(fplog,"\n");
 +  }
 +}
 +
 +static void predict_shells(FILE *fplog,rvec x[],rvec v[],real dt,
 +                         int ns,t_shell s[],
 +                         real mass[],gmx_mtop_t *mtop,gmx_bool bInit)
 +{
 +  int  i,m,s1,n1,n2,n3;
 +  real dt_1,dt_2,dt_3,fudge,tm,m1,m2,m3;
 +  rvec *ptr;
 +  t_atom *atom;
 +  
 +  /* We introduce a fudge factor for performance reasons: with this choice
 +   * the initial force on the shells is about a factor of two lower than 
 +   * without
 +   */
 +  fudge = 1.0;
 +    
 +  if (bInit) {
 +    if (fplog)
 +      fprintf(fplog,"RELAX: Using prediction for initial shell placement\n");
 +    ptr  = x;
 +    dt_1 = 1;
 +  }
 +  else {
 +    ptr  = v;
 +    dt_1 = fudge*dt;
 +  }
 +    
 +  for(i=0; (i<ns); i++) {
 +    s1 = s[i].shell;
 +    if (bInit)
 +      clear_rvec(x[s1]);
 +    switch (s[i].nnucl) {
 +    case 1:
 +      n1 = s[i].nucl1;
 +      for(m=0; (m<DIM); m++)
 +      x[s1][m]+=ptr[n1][m]*dt_1;
 +      break;
 +    case 2:
 +      n1 = s[i].nucl1;
 +      n2 = s[i].nucl2;
 +      if (mass) {
 +      m1 = mass[n1];
 +      m2 = mass[n2];
 +      } else {
 +      /* Not the correct masses with FE, but it is just a prediction... */
 +      m1 = atom[n1].m;
 +      m2 = atom[n2].m;
 +      }
 +      tm = dt_1/(m1+m2);
 +      for(m=0; (m<DIM); m++)
 +      x[s1][m]+=(m1*ptr[n1][m]+m2*ptr[n2][m])*tm;
 +      break;
 +    case 3:
 +      n1 = s[i].nucl1;
 +      n2 = s[i].nucl2;
 +      n3 = s[i].nucl3;
 +      if (mass) {
 +      m1 = mass[n1];
 +      m2 = mass[n2];
 +      m3 = mass[n3];
 +      } else {
 +      /* Not the correct masses with FE, but it is just a prediction... */
 +      gmx_mtop_atomnr_to_atom(mtop,n1,&atom);
 +      m1 = atom->m;
 +      gmx_mtop_atomnr_to_atom(mtop,n2,&atom);
 +      m2 = atom->m;
 +      gmx_mtop_atomnr_to_atom(mtop,n3,&atom);
 +      m3 = atom->m;
 +      }
 +      tm = dt_1/(m1+m2+m3);
 +      for(m=0; (m<DIM); m++)
 +      x[s1][m]+=(m1*ptr[n1][m]+m2*ptr[n2][m]+m3*ptr[n3][m])*tm;
 +      break;
 +    default:
 +      gmx_fatal(FARGS,"Shell %d has %d nuclei!",i,s[i].nnucl);
 +    }
 +  }
 +}
 +
 +gmx_shellfc_t init_shell_flexcon(FILE *fplog,
 +                               gmx_mtop_t *mtop,int nflexcon,
 +                               rvec *x)
 +{
 +  struct gmx_shellfc *shfc;
 +  t_shell     *shell;
 +  int         *shell_index=NULL,*at2cg;
 +  t_atom      *atom;
 +  int         n[eptNR],ns,nshell,nsi;
 +  int         i,j,nmol,type,mb,mt,a_offset,cg,mol,ftype,nra;
 +  real        qS,alpha;
 +  int         aS,aN=0; /* Shell and nucleus */
 +  int         bondtypes[] = { F_BONDS, F_HARMONIC, F_CUBICBONDS, F_POLARIZATION, F_ANHARM_POL, F_WATER_POL };
 +#define NBT asize(bondtypes)
 +  t_iatom     *ia;
 +  gmx_mtop_atomloop_block_t aloopb;
 +  gmx_mtop_atomloop_all_t aloop;
 +  gmx_ffparams_t *ffparams;
 +  gmx_molblock_t *molb;
 +  gmx_moltype_t *molt;
 +  t_block     *cgs;
 +
 +  /* Count number of shells, and find their indices */
 +  for(i=0; (i<eptNR); i++) {
 +    n[i] = 0;
 +  }
 +
 +  aloopb = gmx_mtop_atomloop_block_init(mtop);
 +  while (gmx_mtop_atomloop_block_next(aloopb,&atom,&nmol)) {
 +    n[atom->ptype] += nmol;
 +  }
 +
 +  if (fplog) {
 +    /* Print the number of each particle type */  
 +    for(i=0; (i<eptNR); i++) {
 +      if (n[i] != 0) {
 +      fprintf(fplog,"There are: %d %ss\n",n[i],ptype_str[i]);
 +      }
 +    }
 +  }
 +
 +  nshell = n[eptShell];
 +  
 +  if (nshell == 0 && nflexcon == 0) {
 +    return NULL;
 +  }
 +
 +  snew(shfc,1);
 +  shfc->nflexcon = nflexcon;
 +
 +  if (nshell == 0) {
 +    return shfc;
 +  }
 +
 +  /* We have shells: fill the shell data structure */
 +
 +  /* Global system sized array, this should be avoided */
 +  snew(shell_index,mtop->natoms);
 +
 +  aloop = gmx_mtop_atomloop_all_init(mtop);
 +  nshell = 0;
 +  while (gmx_mtop_atomloop_all_next(aloop,&i,&atom)) {
 +    if (atom->ptype == eptShell) {
 +      shell_index[i] = nshell++;
 +    }
 +  }
 +
 +  snew(shell,nshell);
 +  
 +  /* Initiate the shell structures */    
 +  for(i=0; (i<nshell); i++) {
 +    shell[i].shell = NO_ATID;
 +    shell[i].nnucl = 0;
 +    shell[i].nucl1 = NO_ATID;
 +    shell[i].nucl2 = NO_ATID;
 +    shell[i].nucl3 = NO_ATID;
 +    /* shell[i].bInterCG=FALSE; */
 +    shell[i].k_1   = 0;
 +    shell[i].k     = 0;
 +  }
 +
 +  ffparams = &mtop->ffparams;
 +
 +  /* Now fill the structures */
 +  shfc->bInterCG = FALSE;
 +  ns = 0;
 +  a_offset = 0;
 +  for(mb=0; mb<mtop->nmolblock; mb++) {
 +    molb = &mtop->molblock[mb];
 +    molt = &mtop->moltype[molb->type];
 +
 +    cgs = &molt->cgs;
 +    snew(at2cg,molt->atoms.nr);
 +    for(cg=0; cg<cgs->nr; cg++) {
 +      for(i=cgs->index[cg]; i<cgs->index[cg+1]; i++) {
 +      at2cg[i] = cg;
 +      }
 +    }
 +
 +    atom = molt->atoms.atom;
 +    for(mol=0; mol<molb->nmol; mol++) {
 +      for(j=0; (j<NBT); j++) {
 +      ia = molt->ilist[bondtypes[j]].iatoms;
 +      for(i=0; (i<molt->ilist[bondtypes[j]].nr); ) {
 +        type  = ia[0];
 +        ftype = ffparams->functype[type];
 +        nra   = interaction_function[ftype].nratoms;
 +        
 +        /* Check whether we have a bond with a shell */
 +        aS = NO_ATID;
 +        
 +        switch (bondtypes[j]) {
 +        case F_BONDS:
 +        case F_HARMONIC:
 +        case F_CUBICBONDS:
 +        case F_POLARIZATION:
 +        case F_ANHARM_POL:
 +          if (atom[ia[1]].ptype == eptShell) {
 +            aS = ia[1];
 +            aN = ia[2];
 +          }
 +          else if (atom[ia[2]].ptype == eptShell) {
 +            aS = ia[2];
 +            aN = ia[1];
 +          }
 +          break;
 +        case F_WATER_POL:
 +          aN    = ia[4];  /* Dummy */
 +          aS    = ia[5];  /* Shell */
 +          break;
 +        default:
 +          gmx_fatal(FARGS,"Death Horror: %s, %d",__FILE__,__LINE__);
 +        }
 +        
 +        if (aS != NO_ATID) {    
 +          qS = atom[aS].q;
 +          
 +          /* Check whether one of the particles is a shell... */
 +          nsi = shell_index[a_offset+aS];
 +          if ((nsi < 0) || (nsi >= nshell))
 +            gmx_fatal(FARGS,"nsi is %d should be within 0 - %d. aS = %d",
 +                      nsi,nshell,aS);
 +          if (shell[nsi].shell == NO_ATID) {
 +            shell[nsi].shell = a_offset + aS;
 +            ns ++;
 +          }
 +          else if (shell[nsi].shell != a_offset+aS)
 +            gmx_fatal(FARGS,"Weird stuff in %s, %d",__FILE__,__LINE__);
 +          
 +          if      (shell[nsi].nucl1 == NO_ATID) {
 +            shell[nsi].nucl1 = a_offset + aN;
 +          } else if (shell[nsi].nucl2 == NO_ATID) {
 +            shell[nsi].nucl2 = a_offset + aN;
 +          } else if (shell[nsi].nucl3 == NO_ATID) {
 +            shell[nsi].nucl3 = a_offset + aN;
 +          } else {
 +            if (fplog)
 +              pr_shell(fplog,ns,shell);
 +            gmx_fatal(FARGS,"Can not handle more than three bonds per shell\n");
 +          }
 +          if (at2cg[aS] != at2cg[aN]) {
 +            /* shell[nsi].bInterCG = TRUE; */
 +            shfc->bInterCG = TRUE;
 +          }
 +          
 +          switch (bondtypes[j]) {
 +          case F_BONDS:
 +          case F_HARMONIC:
 +            shell[nsi].k    += ffparams->iparams[type].harmonic.krA;
 +            break;
 +          case F_CUBICBONDS:
 +            shell[nsi].k    += ffparams->iparams[type].cubic.kb;
 +            break;
 +          case F_POLARIZATION:
 +          case F_ANHARM_POL:
 +            if (!gmx_within_tol(qS, atom[aS].qB, GMX_REAL_EPS*10))
 +              gmx_fatal(FARGS,"polarize can not be used with qA(%e) != qB(%e) for atom %d of molecule block %d", qS, atom[aS].qB, aS+1, mb+1);
 +            shell[nsi].k    += sqr(qS)*ONE_4PI_EPS0/
 +              ffparams->iparams[type].polarize.alpha;
 +            break;
 +          case F_WATER_POL:
 +            if (!gmx_within_tol(qS, atom[aS].qB, GMX_REAL_EPS*10))
 +              gmx_fatal(FARGS,"water_pol can not be used with qA(%e) != qB(%e) for atom %d of molecule block %d", qS, atom[aS].qB, aS+1, mb+1);
 +            alpha          = (ffparams->iparams[type].wpol.al_x+
 +                              ffparams->iparams[type].wpol.al_y+
 +                              ffparams->iparams[type].wpol.al_z)/3.0;
 +            shell[nsi].k  += sqr(qS)*ONE_4PI_EPS0/alpha;
 +            break;
 +          default:
 +            gmx_fatal(FARGS,"Death Horror: %s, %d",__FILE__,__LINE__);
 +          }
 +          shell[nsi].nnucl++;
 +        }
 +        ia += nra+1;
 +        i  += nra+1;
 +      }
 +      }
 +      a_offset += molt->atoms.nr;
 +    }
 +    /* Done with this molecule type */
 +    sfree(at2cg);
 +  }
 +  
 +  /* Verify whether it's all correct */
 +  if (ns != nshell)
 +    gmx_fatal(FARGS,"Something weird with shells. They may not be bonded to something");
 +  
 +  for(i=0; (i<ns); i++)
 +    shell[i].k_1 = 1.0/shell[i].k;
 +  
 +  if (debug)
 +    pr_shell(debug,ns,shell);
 +
 +  
 +  shfc->nshell_gl      = ns;
 +  shfc->shell_gl       = shell;
 +  shfc->shell_index_gl = shell_index;
 +
 +  shfc->bPredict   = (getenv("GMX_NOPREDICT") == NULL);
 +  shfc->bForceInit = FALSE;
 +  if (!shfc->bPredict) {
 +    if (fplog)
 +      fprintf(fplog,"\nWill never predict shell positions\n");
 +  } else {
 +    shfc->bForceInit = (getenv("GMX_FORCEINIT") != NULL);
 +    if (shfc->bForceInit && fplog)
 +      fprintf(fplog,"\nWill always initiate shell positions\n");
 +  }
 +
 +  if (shfc->bPredict) {
 +    if (x) {
 +      predict_shells(fplog,x,NULL,0,shfc->nshell_gl,shfc->shell_gl,
 +                   NULL,mtop,TRUE);
 +    }
 +
 +    if (shfc->bInterCG) {
 +      if (fplog)
 +      fprintf(fplog,"\nNOTE: there all shells that are connected to particles outside thier own charge group, will not predict shells positions during the run\n\n");
 +      shfc->bPredict = FALSE;
 +    }
 +  }
 +
 +  return shfc;
 +}
 +
 +void make_local_shells(t_commrec *cr,t_mdatoms *md,
 +                     struct gmx_shellfc *shfc)
 +{
 +  t_shell *shell;
 +  int a0,a1,*ind,nshell,i;
 +  gmx_domdec_t *dd=NULL;
 +
 +  if (PAR(cr)) {
 +    if (DOMAINDECOMP(cr)) {
 +      dd = cr->dd;
 +      a0 = 0;
 +      a1 = dd->nat_home;
 +    } else {
 +      pd_at_range(cr,&a0,&a1);
 +    }
 +  } else {
 +    /* Single node: we need all shells, just copy the pointer */
 +    shfc->nshell = shfc->nshell_gl;
 +    shfc->shell  = shfc->shell_gl;
 +    
 +    return;
 +  }
 +
 +  ind = shfc->shell_index_gl;
 +
 +  nshell = 0;
 +  shell  = shfc->shell; 
 +  for(i=a0; i<a1; i++) {
 +    if (md->ptype[i] == eptShell) {
 +      if (nshell+1 > shfc->shell_nalloc) {
 +      shfc->shell_nalloc = over_alloc_dd(nshell+1);
 +      srenew(shell,shfc->shell_nalloc);
 +      }
 +      if (dd) {
 +      shell[nshell] = shfc->shell_gl[ind[dd->gatindex[i]]];
 +      } else {
 +      shell[nshell] = shfc->shell_gl[ind[i]];
 +      }
 +      /* With inter-cg shells we can no do shell prediction,
 +       * so we do not need the nuclei numbers.
 +       */
 +      if (!shfc->bInterCG) {
 +      shell[nshell].nucl1   = i + shell[nshell].nucl1 - shell[nshell].shell;
 +      if (shell[nshell].nnucl > 1)
 +        shell[nshell].nucl2 = i + shell[nshell].nucl2 - shell[nshell].shell;
 +      if (shell[nshell].nnucl > 2)
 +        shell[nshell].nucl3 = i + shell[nshell].nucl3 - shell[nshell].shell;
 +      }
 +      shell[nshell].shell = i;
 +      nshell++;
 +    }
 +  }
 +
 +  shfc->nshell = nshell;
 +  shfc->shell  = shell;
 +}
 +
 +static void do_1pos(rvec xnew,rvec xold,rvec f,real step)
 +{
 +  real xo,yo,zo;
 +  real dx,dy,dz;
 +  
 +  xo=xold[XX];
 +  yo=xold[YY];
 +  zo=xold[ZZ];
 +
 +  dx=f[XX]*step;
 +  dy=f[YY]*step;
 +  dz=f[ZZ]*step;
 +
 +  xnew[XX]=xo+dx;
 +  xnew[YY]=yo+dy;
 +  xnew[ZZ]=zo+dz;
 +}
 +
 +static void do_1pos3(rvec xnew,rvec xold,rvec f,rvec step)
 +{
 +  real xo,yo,zo;
 +  real dx,dy,dz;
 +  
 +  xo=xold[XX];
 +  yo=xold[YY];
 +  zo=xold[ZZ];
 +
 +  dx=f[XX]*step[XX];
 +  dy=f[YY]*step[YY];
 +  dz=f[ZZ]*step[ZZ];
 +
 +  xnew[XX]=xo+dx;
 +  xnew[YY]=yo+dy;
 +  xnew[ZZ]=zo+dz;
 +}
 +
 +static void directional_sd(FILE *log,rvec xold[],rvec xnew[],rvec acc_dir[],
 +                         int start,int homenr,real step)
 +{
 +  int  i;
 +
 +  for(i=start; i<homenr; i++)
 +    do_1pos(xnew[i],xold[i],acc_dir[i],step);
 +}
 +
 +static void shell_pos_sd(FILE *log,rvec xcur[],rvec xnew[],rvec f[],
 +                       int ns,t_shell s[],int count)
 +{
 +    const real step_scale_min = 0.8,
 +        step_scale_increment = 0.2,
 +        step_scale_max = 1.2,
 +        step_scale_multiple = (step_scale_max - step_scale_min) / step_scale_increment;
 +  int  i,shell,d;
 +  real dx,df,k_est;
 +#ifdef PRINT_STEP  
 +  real step_min,step_max;
 +
 +  step_min = 1e30;
 +  step_max = 0;
 +#endif
 +  for(i=0; (i<ns); i++) {
 +    shell = s[i].shell;
 +    if (count == 1) {
 +      for(d=0; d<DIM; d++) {
 +      s[i].step[d] = s[i].k_1;
 +#ifdef PRINT_STEP
 +      step_min = min(step_min,s[i].step[d]);
 +      step_max = max(step_max,s[i].step[d]);
 +#endif
 +      }
 +    } else {
 +      for(d=0; d<DIM; d++) {
 +      dx = xcur[shell][d] - s[i].xold[d];
 +      df =    f[shell][d] - s[i].fold[d];
 +    /* -dx/df gets used to generate an interpolated value, but would
 +     * cause a NaN if df were binary-equal to zero. Values close to
 +     * zero won't cause problems (because of the min() and max()), so
 +     * just testing for binary inequality is OK. */
 +    if (0.0 != df)
 +    {
 +        k_est = -dx/df;
 +        /* Scale the step size by a factor interpolated from
 +         * step_scale_min to step_scale_max, as k_est goes from 0 to
 +         * step_scale_multiple * s[i].step[d] */
 +        s[i].step[d] =
 +            step_scale_min * s[i].step[d] +
 +            step_scale_increment * min(step_scale_multiple * s[i].step[d], max(k_est, 0));
 +    }
 +    else
 +    {
 +        /* Here 0 == df */
 +        if (gmx_numzero(dx)) /* 0 == dx */
 +        {
 +            /* Likely this will never happen, but if it does just
 +             * don't scale the step. */
 +        }
 +        else /* 0 != dx */
 +        {
 +            s[i].step[d] *= step_scale_max;
 +        }
 +    }
 +#ifdef PRINT_STEP
 +      step_min = min(step_min,s[i].step[d]);
 +      step_max = max(step_max,s[i].step[d]);
 +#endif
 +      }
 +    }
 +    copy_rvec(xcur[shell],s[i].xold);
 +    copy_rvec(f[shell],   s[i].fold);
 +
 +    do_1pos3(xnew[shell],xcur[shell],f[shell],s[i].step);
 +
 +    if (gmx_debug_at) {
 +      fprintf(debug,"shell[%d] = %d\n",i,shell);
 +      pr_rvec(debug,0,"fshell",f[shell],DIM,TRUE);
 +      pr_rvec(debug,0,"xold",xcur[shell],DIM,TRUE);
 +      pr_rvec(debug,0,"step",s[i].step,DIM,TRUE);
 +      pr_rvec(debug,0,"xnew",xnew[shell],DIM,TRUE);
 +    }
 +  }
 +#ifdef PRINT_STEP
 +  printf("step %.3e %.3e\n",step_min,step_max);
 +#endif
 +}
 +
 +static void decrease_step_size(int nshell,t_shell s[])
 +{
 +  int i;
 +  
 +  for(i=0; i<nshell; i++)
 +    svmul(0.8,s[i].step,s[i].step);
 +}
 +
 +static void print_epot(FILE *fp,gmx_large_int_t mdstep,int count,real epot,real df,
 +                     int ndir,real sf_dir)
 +{
 +  char buf[22];
 +
 +  fprintf(fp,"MDStep=%5s/%2d EPot: %12.8e, rmsF: %6.2e",
 +        gmx_step_str(mdstep,buf),count,epot,df);
 +  if (ndir)
 +    fprintf(fp,", dir. rmsF: %6.2e\n",sqrt(sf_dir/ndir));
 +  else
 +    fprintf(fp,"\n");
 +}
 +
 +
 +static real rms_force(t_commrec *cr,rvec f[],int ns,t_shell s[],
 +                    int ndir,real *sf_dir,real *Epot)
 +{
 +  int  i,shell,ntot;
 +  double buf[4];
 +
 +  buf[0] = *sf_dir;
 +  for(i=0; i<ns; i++) {
 +    shell = s[i].shell;
 +    buf[0]  += norm2(f[shell]);
 +  }
 +  ntot = ns;
 +
 +  if (PAR(cr)) {
 +    buf[1] = ntot;
 +    buf[2] = *sf_dir;
 +    buf[3] = *Epot;
 +    gmx_sumd(4,buf,cr);
 +    ntot = (int)(buf[1] + 0.5);
 +    *sf_dir = buf[2];
 +    *Epot   = buf[3];
 +  }
 +  ntot += ndir;
 +
 +  return (ntot ? sqrt(buf[0]/ntot) : 0);
 +}
 +
 +static void check_pbc(FILE *fp,rvec x[],int shell)
 +{
 +  int m,now;
 +  
 +  now = shell-4;
 +  for(m=0; (m<DIM); m++)
 +    if (fabs(x[shell][m]-x[now][m]) > 0.3) {
 +      pr_rvecs(fp,0,"SHELL-X",x+now,5);
 +      break;
 +    }
 +}
 +
 +static void dump_shells(FILE *fp,rvec x[],rvec f[],real ftol,int ns,t_shell s[])
 +{
 +  int  i,shell;
 +  real ft2,ff2;
 +  
 +  ft2 = sqr(ftol);
 +  
 +  for(i=0; (i<ns); i++) {
 +    shell = s[i].shell;
 +    ff2   = iprod(f[shell],f[shell]);
 +    if (ff2 > ft2)
 +      fprintf(fp,"SHELL %5d, force %10.5f  %10.5f  %10.5f, |f| %10.5f\n",
 +            shell,f[shell][XX],f[shell][YY],f[shell][ZZ],sqrt(ff2));
 +    check_pbc(fp,x,shell);
 +  }
 +}
 +
 +static void init_adir(FILE *log,gmx_shellfc_t shfc,
 +                    gmx_constr_t constr,t_idef *idef,t_inputrec *ir,
 +                    t_commrec *cr,int dd_ac1,
 +                    gmx_large_int_t step,t_mdatoms *md,int start,int end,
 +                    rvec *x_old,rvec *x_init,rvec *x,
 +                    rvec *f,rvec *acc_dir,matrix box,
-           lambda,dvdlambda,NULL,NULL,nrnb,econqCoord,FALSE,0,0);
++                    real *lambda,real *dvdlambda,t_nrnb *nrnb)
 +{
 +  rvec   *xnold,*xnew;
 +  double w_dt;
 +  int    gf,ga,gt;
 +  real   dt,scale;
 +  int    n,d; 
 +  unsigned short *ptype;
 +  rvec   p,dx;
 +  
 +  if (DOMAINDECOMP(cr))
 +    n = dd_ac1;
 +  else
 +    n = end - start;
 +  if (n > shfc->adir_nalloc) {
 +    shfc->adir_nalloc = over_alloc_dd(n);
 +    srenew(shfc->adir_xnold,shfc->adir_nalloc);
 +    srenew(shfc->adir_xnew ,shfc->adir_nalloc);
 +  }
 +  xnold = shfc->adir_xnold;
 +  xnew  = shfc->adir_xnew;
 +    
 +  ptype = md->ptype;
 +
 +  dt = ir->delta_t;
 +
 +  /* Does NOT work with freeze or acceleration groups (yet) */
 +  for (n=start; n<end; n++) {  
 +    w_dt = md->invmass[n]*dt;
 +    
 +    for (d=0; d<DIM; d++) {
 +      if ((ptype[n] != eptVSite) && (ptype[n] != eptShell)) {
 +      xnold[n-start][d] = x[n][d] - (x_init[n][d] - x_old[n][d]);
 +      xnew[n-start][d] = 2*x[n][d] - x_old[n][d] + f[n][d]*w_dt*dt;
 +      } else {
 +      xnold[n-start][d] = x[n][d];
 +      xnew[n-start][d] = x[n][d];
 +      }
 +    }
 +  }
 +  constrain(log,FALSE,FALSE,constr,idef,ir,NULL,cr,step,0,md,
 +          x,xnold-start,NULL,box,
-           lambda,dvdlambda,NULL,NULL,nrnb,econqCoord,FALSE,0,0);
++          lambda[efptBONDED],&(dvdlambda[efptBONDED]),NULL,NULL,nrnb,econqCoord,FALSE,0,0);
 +  constrain(log,FALSE,FALSE,constr,idef,ir,NULL,cr,step,0,md,
 +          x,xnew-start,NULL,box,
-           lambda,dvdlambda,NULL,NULL,nrnb,econqDeriv_FlexCon,FALSE,0,0); 
++          lambda[efptBONDED],&(dvdlambda[efptBONDED]),NULL,NULL,nrnb,econqCoord,FALSE,0,0);
 +
 +  /* Set xnew to minus the acceleration */
 +  for (n=start; n<end; n++) {
 +    for(d=0; d<DIM; d++)
 +      xnew[n-start][d] =
 +      -(2*x[n][d]-xnold[n-start][d]-xnew[n-start][d])/sqr(dt)
 +      - f[n][d]*md->invmass[n];
 +    clear_rvec(acc_dir[n]);
 +  }
 +
 +  /* Project the acceleration on the old bond directions */
 +  constrain(log,FALSE,FALSE,constr,idef,ir,NULL,cr,step,0,md,
 +          x_old,xnew-start,acc_dir,box,
++          lambda[efptBONDED],&(dvdlambda[efptBONDED]),NULL,NULL,nrnb,econqDeriv_FlexCon,FALSE,0,0);
 +}
 +
 +int relax_shell_flexcon(FILE *fplog,t_commrec *cr,gmx_bool bVerbose,
 +                      gmx_large_int_t mdstep,t_inputrec *inputrec,
 +                      gmx_bool bDoNS,int force_flags,
 +                      gmx_bool bStopCM,
 +                      gmx_localtop_t *top,
 +                      gmx_mtop_t* mtop,
 +                      gmx_constr_t constr,
 +                      gmx_enerdata_t *enerd,t_fcdata *fcd,
 +                      t_state *state,rvec f[],
 +                      tensor force_vir,
 +                      t_mdatoms *md,
 +                      t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 +                      t_graph *graph,
 +                      gmx_groups_t *groups,
 +                      struct gmx_shellfc *shfc,
 +                      t_forcerec *fr,
 +                      gmx_bool bBornRadii,
 +                      double t,rvec mu_tot,
 +                      int natoms,gmx_bool *bConverged,
 +                      gmx_vsite_t *vsite,
 +                      FILE *fp_field)
 +{
 +  int    nshell;
 +  t_shell *shell;
 +  t_idef *idef;
 +  rvec   *pos[2],*force[2],*acc_dir=NULL,*x_old=NULL;
 +  real   Epot[2],df[2];
 +  rvec   dx;
 +  real   sf_dir,invdt;
 +  real   ftol,xiH,xiS,dum=0;
 +  char   sbuf[22];
 +  gmx_bool   bCont,bInit;
 +  int    nat,dd_ac0,dd_ac1=0,i;
 +  int    start=md->start,homenr=md->homenr,end=start+homenr,cg0,cg1;
 +  int    nflexcon,g,number_steps,d,Min=0,count=0;
 +#define  Try (1-Min)             /* At start Try = 1 */
 +
 +  bCont        = (mdstep == inputrec->init_step) && inputrec->bContinuation;
 +  bInit        = (mdstep == inputrec->init_step) || shfc->bForceInit;
 +  ftol         = inputrec->em_tol;
 +  number_steps = inputrec->niter;
 +  nshell       = shfc->nshell;
 +  shell        = shfc->shell;
 +  nflexcon     = shfc->nflexcon;
 +
 +  idef = &top->idef;
 +
 +  if (DOMAINDECOMP(cr)) {
 +    nat = dd_natoms_vsite(cr->dd);
 +    if (nflexcon > 0) {
 +      dd_get_constraint_range(cr->dd,&dd_ac0,&dd_ac1);
 +      nat = max(nat,dd_ac1);
 +    }
 +  } else {
 +    nat = state->natoms;
 +  }
 +
 +  if (nat > shfc->x_nalloc) {
 +    /* Allocate local arrays */
 +    shfc->x_nalloc = over_alloc_dd(nat);
 +    for(i=0; (i<2); i++) {
 +      srenew(shfc->x[i],shfc->x_nalloc);
 +      srenew(shfc->f[i],shfc->x_nalloc);
 +    }
 +  }
 +  for(i=0; (i<2); i++) {
 +    pos[i]   = shfc->x[i];
 +    force[i] = shfc->f[i];
 +  }
 +     
 +  /* With particle decomposition this code only works
 +   * when all particles involved with each shell are in the same cg.
 +   */
 +
 +  if (bDoNS && inputrec->ePBC != epbcNONE && !DOMAINDECOMP(cr)) {
 +    /* This is the only time where the coordinates are used
 +     * before do_force is called, which normally puts all
 +     * charge groups in the box.
 +     */
 +    if (PARTDECOMP(cr)) {
 +      pd_cg_range(cr,&cg0,&cg1);
 +    } else {
 +      cg0 = 0;
 +      cg1 = top->cgs.nr;
 +    }
 +    put_charge_groups_in_box(fplog,cg0,cg1,fr->ePBC,state->box,
 +                           &(top->cgs),state->x,fr->cg_cm);
 +    if (graph)
 +      mk_mshift(fplog,graph,fr->ePBC,state->box,state->x);
 +  }
 +
 +  /* After this all coordinate arrays will contain whole molecules */
 +  if (graph)
 +    shift_self(graph,state->box,state->x);
 +
 +  if (nflexcon) {
 +    if (nat > shfc->flex_nalloc) {
 +      shfc->flex_nalloc = over_alloc_dd(nat);
 +      srenew(shfc->acc_dir,shfc->flex_nalloc);
 +      srenew(shfc->x_old,shfc->flex_nalloc);
 +    }
 +    acc_dir = shfc->acc_dir;
 +    x_old   = shfc->x_old;
 +    for(i=0; i<homenr; i++) {
 +      for(d=0; d<DIM; d++)
 +        shfc->x_old[i][d] =
 +        state->x[start+i][d] - state->v[start+i][d]*inputrec->delta_t;
 +    }
 +  }
 +
 +  /* Do a prediction of the shell positions */
 +  if (shfc->bPredict && !bCont) {
 +    predict_shells(fplog,state->x,state->v,inputrec->delta_t,nshell,shell,
 +                 md->massT,NULL,bInit);
 +  }
 +
 +  /* do_force expected the charge groups to be in the box */
 +  if (graph)
 +    unshift_self(graph,state->box,state->x);
 +
 +  /* Calculate the forces first time around */
 +  if (gmx_debug_at) {
 +    pr_rvecs(debug,0,"x b4 do_force",state->x + start,homenr);
 +  }
 +  do_force(fplog,cr,inputrec,mdstep,nrnb,wcycle,top,mtop,groups,
 +         state->box,state->x,&state->hist,
 +         force[Min],force_vir,md,enerd,fcd,
 +         state->lambda,graph,
 +         fr,vsite,mu_tot,t,fp_field,NULL,bBornRadii,
 +         (bDoNS ? GMX_FORCE_NS : 0) | force_flags);
 +
 +  sf_dir = 0;
 +  if (nflexcon) {
 +    init_adir(fplog,shfc,
 +            constr,idef,inputrec,cr,dd_ac1,mdstep,md,start,end,
 +            shfc->x_old-start,state->x,state->x,force[Min],
 +            shfc->acc_dir-start,state->box,state->lambda,&dum,nrnb);
 +
 +    for(i=start; i<end; i++)
 +      sf_dir += md->massT[i]*norm2(shfc->acc_dir[i-start]);
 +  }
 +
 +  Epot[Min] = enerd->term[F_EPOT];
 +
 +  df[Min]=rms_force(cr,shfc->f[Min],nshell,shell,nflexcon,&sf_dir,&Epot[Min]);
 +  df[Try]=0;
 +  if (debug) {
 +    fprintf(debug,"df = %g  %g\n",df[Min],df[Try]);
 +  }
 +
 +  if (gmx_debug_at) {
 +    pr_rvecs(debug,0,"force0",force[Min],md->nr);
 +  }
 +
 +  if (nshell+nflexcon > 0) {
 +    /* Copy x to pos[Min] & pos[Try]: during minimization only the
 +     * shell positions are updated, therefore the other particles must
 +     * be set here.
 +     */
 +    memcpy(pos[Min],state->x,nat*sizeof(state->x[0]));
 +    memcpy(pos[Try],state->x,nat*sizeof(state->x[0]));
 +  }
 +  
 +  if (bVerbose && MASTER(cr))
 +    print_epot(stdout,mdstep,0,Epot[Min],df[Min],nflexcon,sf_dir);
 +
 +  if (debug) {
 +    fprintf(debug,"%17s: %14.10e\n",
 +          interaction_function[F_EKIN].longname,enerd->term[F_EKIN]);
 +    fprintf(debug,"%17s: %14.10e\n",
 +          interaction_function[F_EPOT].longname,enerd->term[F_EPOT]);
 +    fprintf(debug,"%17s: %14.10e\n",
 +          interaction_function[F_ETOT].longname,enerd->term[F_ETOT]);
 +    fprintf(debug,"SHELLSTEP %s\n",gmx_step_str(mdstep,sbuf));
 +  }
 +  
 +  /* First check whether we should do shells, or whether the force is 
 +   * low enough even without minimization.
 +   */
 +  *bConverged = (df[Min] < ftol);
 +  
 +  for(count=1; (!(*bConverged) && (count < number_steps)); count++) {
 +    if (vsite)
 +      construct_vsites(fplog,vsite,pos[Min],nrnb,inputrec->delta_t,state->v,
 +                     idef->iparams,idef->il,
 +                     fr->ePBC,fr->bMolPBC,graph,cr,state->box);
 +     
 +    if (nflexcon) {
 +      init_adir(fplog,shfc,
 +              constr,idef,inputrec,cr,dd_ac1,mdstep,md,start,end,
 +              x_old-start,state->x,pos[Min],force[Min],acc_dir-start,
 +              state->box,state->lambda,&dum,nrnb);
 +      
 +      directional_sd(fplog,pos[Min],pos[Try],acc_dir-start,start,end,
 +                   fr->fc_stepsize);
 +    }
 +    
 +    /* New positions, Steepest descent */
 +    shell_pos_sd(fplog,pos[Min],pos[Try],force[Min],nshell,shell,count); 
 +
 +    /* do_force expected the charge groups to be in the box */
 +    if (graph)
 +      unshift_self(graph,state->box,pos[Try]);
 +
 +    if (gmx_debug_at) {
 +      pr_rvecs(debug,0,"RELAX: pos[Min]  ",pos[Min] + start,homenr);
 +      pr_rvecs(debug,0,"RELAX: pos[Try]  ",pos[Try] + start,homenr);
 +    }
 +    /* Try the new positions */
 +    do_force(fplog,cr,inputrec,1,nrnb,wcycle,
 +           top,mtop,groups,state->box,pos[Try],&state->hist,
 +           force[Try],force_vir,
 +           md,enerd,fcd,state->lambda,graph,
 +           fr,vsite,mu_tot,t,fp_field,NULL,bBornRadii,
 +           force_flags);
 +    
 +    if (gmx_debug_at) {
 +      pr_rvecs(debug,0,"RELAX: force[Min]",force[Min] + start,homenr);
 +      pr_rvecs(debug,0,"RELAX: force[Try]",force[Try] + start,homenr);
 +    }
 +    sf_dir = 0;
 +    if (nflexcon) {
 +      init_adir(fplog,shfc,
 +              constr,idef,inputrec,cr,dd_ac1,mdstep,md,start,end,
 +              x_old-start,state->x,pos[Try],force[Try],acc_dir-start,
 +              state->box,state->lambda,&dum,nrnb);
 +
 +      for(i=start; i<end; i++)
 +      sf_dir += md->massT[i]*norm2(acc_dir[i-start]);
 +    }
 +
 +    Epot[Try] = enerd->term[F_EPOT]; 
 +    
 +    df[Try]=rms_force(cr,force[Try],nshell,shell,nflexcon,&sf_dir,&Epot[Try]);
 +
 +    if (debug)
 +      fprintf(debug,"df = %g  %g\n",df[Min],df[Try]);
 +
 +    if (debug) {
 +      if (gmx_debug_at)
 +      pr_rvecs(debug,0,"F na do_force",force[Try] + start,homenr);
 +      if (gmx_debug_at) {
 +      fprintf(debug,"SHELL ITER %d\n",count);
 +      dump_shells(debug,pos[Try],force[Try],ftol,nshell,shell);
 +      }
 +    }
 +
 +    if (bVerbose && MASTER(cr))
 +      print_epot(stdout,mdstep,count,Epot[Try],df[Try],nflexcon,sf_dir);
 +      
 +    *bConverged = (df[Try] < ftol);
 +    
 +    if ((df[Try] < df[Min])) {
 +      if (debug)
 +      fprintf(debug,"Swapping Min and Try\n");
 +      if (nflexcon) {
 +      /* Correct the velocities for the flexible constraints */
 +      invdt = 1/inputrec->delta_t;
 +      for(i=start; i<end; i++) {
 +        for(d=0; d<DIM; d++)
 +          state->v[i][d] += (pos[Try][i][d] - pos[Min][i][d])*invdt;
 +      }
 +      }
 +      Min  = Try;
 +    } else {
 +      decrease_step_size(nshell,shell);
 +    }
 +  }
 +  if (MASTER(cr) && !(*bConverged)) {
 +    /* Note that the energies and virial are incorrect when not converged */
 +    if (fplog)
 +      fprintf(fplog,
 +            "step %s: EM did not converge in %d iterations, RMS force %.3f\n",
 +            gmx_step_str(mdstep,sbuf),number_steps,df[Min]);
 +    fprintf(stderr,
 +          "step %s: EM did not converge in %d iterations, RMS force %.3f\n",
 +          gmx_step_str(mdstep,sbuf),number_steps,df[Min]);
 +  }
 +
 +  /* Copy back the coordinates and the forces */
 +  memcpy(state->x,pos[Min],nat*sizeof(state->x[0]));
 +  memcpy(f,force[Min],nat*sizeof(f[0]));
 +
 +  return count; 
 +}
 +
index 4285ffd58497b649e5a2401214c1abba2ab89f4d,0000000000000000000000000000000000000000..0f5380dc1bf0cb3b26416d87eee9b650a70eb6b7
mode 100644,000000..100644
--- /dev/null
@@@ -1,1559 -1,0 +1,1638 @@@
-  * 
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
-  * 
++ *
 + *                This source code is part of
-  * 
++ *
 + *                 G   R   O   M   A   C   S
-  * 
++ *
 + *          GROningen MAchine for Chemical Simulations
-  * 
++ *
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
-  * 
++ *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
-  * 
++ *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
-  * 
++ *
 + * For more info, check our website at http://www.gromacs.org
-     
++ *
 + * And Hey:
 + * GROwing Monsters And Cloning Shrimps
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#ifdef GMX_CRAY_XT3
 +#include<catamount/dclock.h>
 +#endif
 +
 +
 +#include <stdio.h>
 +#include <time.h>
 +#ifdef HAVE_SYS_TIME_H
 +#include <sys/time.h>
 +#endif
 +#include <math.h>
 +#include "typedefs.h"
 +#include "string2.h"
 +#include "gmxfio.h"
 +#include "smalloc.h"
 +#include "names.h"
 +#include "confio.h"
 +#include "mvdata.h"
 +#include "txtdump.h"
 +#include "pbc.h"
 +#include "chargegroup.h"
 +#include "vec.h"
 +#include <time.h>
 +#include "nrnb.h"
 +#include "mshift.h"
 +#include "mdrun.h"
 +#include "update.h"
 +#include "physics.h"
 +#include "main.h"
 +#include "mdatoms.h"
 +#include "force.h"
 +#include "bondf.h"
 +#include "pme.h"
 +#include "disre.h"
 +#include "orires.h"
 +#include "network.h"
 +#include "calcmu.h"
 +#include "constr.h"
 +#include "xvgr.h"
 +#include "trnio.h"
 +#include "xtcio.h"
 +#include "copyrite.h"
 +#include "pull_rotation.h"
++#include "gmx_random.h"
 +#include "domdec.h"
 +#include "partdec.h"
 +#include "gmx_wallcycle.h"
 +#include "genborn.h"
 +
 +#ifdef GMX_LIB_MPI
 +#include <mpi.h>
 +#endif
 +#ifdef GMX_THREAD_MPI
 +#include "tmpi.h"
 +#endif
 +
 +#include "adress.h"
 +#include "qmmm.h"
 +
 +#if 0
 +typedef struct gmx_timeprint {
-       
++
 +} t_gmx_timeprint;
 +#endif
 +
 +/* Portable version of ctime_r implemented in src/gmxlib/string2.c, but we do not want it declared in public installed headers */
 +char *
 +gmx_ctime_r(const time_t *clock,char *buf, int n);
 +
 +
 +double
 +gmx_gettime()
 +{
 +#ifdef HAVE_GETTIMEOFDAY
 +      struct timeval t;
 +      double seconds;
-       
++
 +      gettimeofday(&t,NULL);
-       
++
 +      seconds = (double) t.tv_sec + 1e-6*(double)t.tv_usec;
-       
++
 +      return seconds;
 +#else
 +      double  seconds;
-       
++
 +      seconds = time(NULL);
- void print_time(FILE *out,gmx_runtime_t *runtime,gmx_large_int_t step,   
++
 +      return seconds;
 +#endif
 +}
 +
 +
 +#define difftime(end,start) ((double)(end)-(double)(start))
 +
-     
++void print_time(FILE *out,gmx_runtime_t *runtime,gmx_large_int_t step,
 +                t_inputrec *ir, t_commrec *cr)
 +{
 +    time_t finish;
 +    char   timebuf[STRLEN];
 +    double dt;
 +    char buf[48];
-         
++
 +#ifndef GMX_THREAD_MPI
 +    if (!PAR(cr))
 +#endif
 +    {
 +        fprintf(out,"\r");
 +    }
 +    fprintf(out,"step %s",gmx_step_str(step,buf));
 +    if ((step >= ir->nstlist))
 +    {
 +        if ((ir->nstlist == 0) || ((step % ir->nstlist) == 0))
 +        {
 +            /* We have done a full cycle let's update time_per_step */
 +            runtime->last = gmx_gettime();
 +            dt = difftime(runtime->last,runtime->real);
 +            runtime->time_per_step = dt/(step - ir->init_step + 1);
 +        }
 +        dt = (ir->nsteps + ir->init_step - step)*runtime->time_per_step;
-             {    
++
 +        if (ir->nsteps >= 0)
 +        {
 +            if (dt >= 300)
- #ifdef NO_CLOCK 
++            {
 +                finish = (time_t) (runtime->last + dt);
 +                gmx_ctime_r(&finish,timebuf,STRLEN);
 +                sprintf(buf,"%s",timebuf);
 +                buf[strlen(buf)-1]='\0';
 +                fprintf(out,", will finish %s",buf);
 +            }
 +            else
 +                fprintf(out,", remaining runtime: %5d s          ",(int)dt);
 +        }
 +        else
 +        {
 +            fprintf(out," performance: %.1f ns/day    ",
 +                    ir->delta_t/1000*24*60*60/runtime->time_per_step);
 +        }
 +    }
 +#ifndef GMX_THREAD_MPI
 +    if (PAR(cr))
 +    {
 +        fprintf(out,"\n");
 +    }
 +#endif
 +
 +    fflush(out);
 +}
 +
-     
++#ifdef NO_CLOCK
 +#define clock() -1
 +#endif
 +
 +static double set_proctime(gmx_runtime_t *runtime)
 +{
 +    double diff;
 +#ifdef GMX_CRAY_XT3
 +    double prev;
 +
 +    prev = runtime->proc;
 +    runtime->proc = dclock();
-     
++
 +    diff = runtime->proc - prev;
 +#else
 +    clock_t prev;
 +
 +    prev = runtime->proc;
 +    runtime->proc = clock();
 +
 +    diff = (double)(runtime->proc - prev)/(double)CLOCKS_PER_SEC;
 +#endif
 +    if (diff < 0)
 +    {
 +        /* The counter has probably looped, ignore this data */
 +        diff = 0;
 +    }
 +
 +    return diff;
 +}
 +
 +void runtime_start(gmx_runtime_t *runtime)
 +{
 +    runtime->real = gmx_gettime();
 +    runtime->proc          = 0;
 +    set_proctime(runtime);
 +    runtime->realtime      = 0;
 +    runtime->proctime      = 0;
 +    runtime->last          = 0;
 +    runtime->time_per_step = 0;
 +}
 +
 +void runtime_end(gmx_runtime_t *runtime)
 +{
 +    double now;
-     
++
 +    now = gmx_gettime();
-   
++
 +    runtime->proctime += set_proctime(runtime);
 +    runtime->realtime  = now - runtime->real;
 +    runtime->real      = now;
 +}
 +
 +void runtime_upd_proc(gmx_runtime_t *runtime)
 +{
 +    runtime->proctime += set_proctime(runtime);
 +}
 +
 +void print_date_and_time(FILE *fplog,int nodeid,const char *title,
 +                         const gmx_runtime_t *runtime)
 +{
 +    int i;
 +    char timebuf[STRLEN];
 +    char time_string[STRLEN];
 +    time_t tmptime;
 +
 +    if (fplog)
 +    {
 +        if (runtime != NULL)
 +        {
 +            tmptime = (time_t) runtime->real;
 +            gmx_ctime_r(&tmptime,timebuf,STRLEN);
 +        }
 +        else
 +        {
 +            tmptime = (time_t) gmx_gettime();
 +            gmx_ctime_r(&tmptime,timebuf,STRLEN);
 +        }
 +        for(i=0; timebuf[i]>=' '; i++)
 +        {
 +            time_string[i]=timebuf[i];
 +        }
 +        time_string[i]='\0';
 +
 +        fprintf(fplog,"%s on node %d %s\n",title,nodeid,time_string);
 +    }
 +}
 +
 +static void sum_forces(int start,int end,rvec f[],rvec flr[])
 +{
 +  int i;
- /* 
++
 +  if (gmx_debug_at) {
 +    pr_rvecs(debug,0,"fsr",f+start,end-start);
 +    pr_rvecs(debug,0,"flr",flr+start,end-start);
 +  }
 +  for(i=start; (i<end); i++)
 +    rvec_inc(f[i],flr[i]);
 +}
 +
-  * force is kJ mol^-1 nm^-1 = e * kJ mol^-1 nm^-1 / e 
++/*
 + * calc_f_el calculates forces due to an electric field.
 + *
-  * Et[] contains the parameters for the time dependent 
-  * part of the field (not yet used). 
++ * force is kJ mol^-1 nm^-1 = e * kJ mol^-1 nm^-1 / e
 + *
-  * now. 
++ * Et[] contains the parameters for the time dependent
++ * part of the field (not yet used).
 + * Ex[] contains the parameters for
 + * the spatial dependent part of the field. You can have cool periodic
 + * fields in principle, but only a constant field is supported
-     
++ * now.
 + * The function should return the energy due to the electric field
 + * (if any) but for now returns 0.
 + *
 + * WARNING:
 + * There can be problems with the virial.
 + * Since the field is not self-consistent this is unavoidable.
 + * For neutral molecules the virial is correct within this approximation.
 + * For neutral systems with many charged molecules the error is small.
 + * But for systems with a net charge or a few charged molecules
 + * the error can be significant when the field is high.
 + * Solution: implement a self-consitent electric field into PME.
 + */
 +static void calc_f_el(FILE *fp,int  start,int homenr,
 +                      real charge[],rvec x[],rvec f[],
 +                      t_cosines Ex[],t_cosines Et[],double t)
 +{
 +    rvec Ext;
 +    real t0;
 +    int  i,m;
-   
-   /* Calculate partial virial, for local atoms only, based on short range. 
-    * Total virial is computed in global_stat, called from do_md 
++
 +    for(m=0; (m<DIM); m++)
 +    {
 +        if (Et[m].n > 0)
 +        {
 +            if (Et[m].n == 3)
 +            {
 +                t0 = Et[m].a[1];
 +                Ext[m] = cos(Et[m].a[0]*(t-t0))*exp(-sqr(t-t0)/(2.0*sqr(Et[m].a[2])));
 +            }
 +            else
 +            {
 +                Ext[m] = cos(Et[m].a[0]*t);
 +            }
 +        }
 +        else
 +        {
 +            Ext[m] = 1.0;
 +        }
 +        if (Ex[m].n > 0)
 +        {
 +            /* Convert the field strength from V/nm to MD-units */
 +            Ext[m] *= Ex[m].a[0]*FIELDFAC;
 +            for(i=start; (i<start+homenr); i++)
 +                f[i][m] += charge[i]*Ext[m];
 +        }
 +        else
 +        {
 +            Ext[m] = 0;
 +        }
 +    }
 +    if (fp != NULL)
 +    {
 +        fprintf(fp,"%10g  %10g  %10g  %10g #FIELD\n",t,
 +                Ext[XX]/FIELDFAC,Ext[YY]/FIELDFAC,Ext[ZZ]/FIELDFAC);
 +    }
 +}
 +
 +static void calc_virial(FILE *fplog,int start,int homenr,rvec x[],rvec f[],
 +                      tensor vir_part,t_graph *graph,matrix box,
 +                      t_nrnb *nrnb,const t_forcerec *fr,int ePBC)
 +{
 +  int i,j;
 +  tensor virtest;
 +
 +  /* The short-range virial from surrounding boxes */
 +  clear_mat(vir_part);
 +  calc_vir(fplog,SHIFTS,fr->shift_vec,fr->fshift,vir_part,ePBC==epbcSCREW,box);
 +  inc_nrnb(nrnb,eNR_VIRIAL,SHIFTS);
-               real lambda,t_graph *graph,
++
++  /* Calculate partial virial, for local atoms only, based on short range.
++   * Total virial is computed in global_stat, called from do_md
 +   */
 +  f_calc_vir(fplog,start,start+homenr,x,f,vir_part,graph,box);
 +  inc_nrnb(nrnb,eNR_VIRIAL,homenr);
 +
 +  /* Add position restraint contribution */
 +  for(i=0; i<DIM; i++) {
 +    vir_part[i][i] += fr->vir_diag_posres[i];
 +  }
 +
 +  /* Add wall contribution */
 +  for(i=0; i<DIM; i++) {
 +    vir_part[i][ZZ] += fr->vir_wall_z[i];
 +  }
 +
 +  if (debug)
 +    pr_rvecs(debug,0,"vir_part",vir_part,DIM);
 +}
 +
 +static void print_large_forces(FILE *fp,t_mdatoms *md,t_commrec *cr,
 +                             gmx_large_int_t step,real pforce,rvec *x,rvec *f)
 +{
 +  int  i;
 +  real pf2,fn2;
 +  char buf[STEPSTRSIZE];
 +
 +  pf2 = sqr(pforce);
 +  for(i=md->start; i<md->start+md->homenr; i++) {
 +    fn2 = norm2(f[i]);
 +    /* We also catch NAN, if the compiler does not optimize this away. */
 +    if (fn2 >= pf2 || fn2 != fn2) {
 +      fprintf(fp,"step %s  atom %6d  x %8.3f %8.3f %8.3f  force %12.5e\n",
 +            gmx_step_str(step,buf),
 +            ddglatnr(cr->dd,i),x[i][XX],x[i][YY],x[i][ZZ],sqrt(fn2));
 +    }
 +  }
 +}
 +
 +void do_force(FILE *fplog,t_commrec *cr,
 +              t_inputrec *inputrec,
 +              gmx_large_int_t step,t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 +              gmx_localtop_t *top,
 +              gmx_mtop_t *mtop,
 +              gmx_groups_t *groups,
 +              matrix box,rvec x[],history_t *hist,
 +              rvec f[],
 +              tensor vir_force,
 +              t_mdatoms *mdatoms,
 +              gmx_enerdata_t *enerd,t_fcdata *fcd,
-     double mu[2*DIM]; 
++              real *lambda,t_graph *graph,
 +              t_forcerec *fr,gmx_vsite_t *vsite,rvec mu_tot,
 +              double t,FILE *field,gmx_edsam_t ed,
 +              gmx_bool bBornRadii,
 +              int flags)
 +{
 +    int    cg0,cg1,i,j;
 +    int    start,homenr;
-     real   e,v,dvdl;
++    double mu[2*DIM];
 +    gmx_bool   bSepDVDL,bStateChanged,bNS,bFillGrid,bCalcCGCM,bBS;
 +    gmx_bool   bDoLongRange,bDoForces,bSepLRF;
 +    gmx_bool   bDoAdressWF;
 +    matrix boxs;
-   
++    real   e,v,dvdlambda[efptNR];
++    real   dvdl_dum,lambda_dum;
 +    t_pbc  pbc;
 +    float  cycles_ppdpme,cycles_pme,cycles_seppme,cycles_force;
-     bNS           = (flags & GMX_FORCE_NS) && (fr->bAllvsAll==FALSE); 
++
 +    start  = mdatoms->start;
 +    homenr = mdatoms->homenr;
 +
 +    bSepDVDL = (fr->bSepDVDL && do_per_step(step,inputrec->nstlog));
 +
 +    clear_mat(vir_force);
 +
 +    if (PARTDECOMP(cr))
 +    {
 +        pd_cg_range(cr,&cg0,&cg1);
 +    }
 +    else
 +    {
 +        cg0 = 0;
 +        if (DOMAINDECOMP(cr))
 +        {
 +            cg1 = cr->dd->ncg_tot;
 +        }
 +        else
 +        {
 +            cg1 = top->cgs.nr;
 +        }
 +        if (fr->n_tpi > 0)
 +        {
 +            cg1--;
 +        }
 +    }
 +
 +    bStateChanged = (flags & GMX_FORCE_STATECHANGED);
-         
-         /* Calculate total (local) dipole moment in a temporary common array. 
++    bNS           = (flags & GMX_FORCE_NS) && (fr->bAllvsAll==FALSE);
 +    bFillGrid     = (bNS && bStateChanged);
 +    bCalcCGCM     = (bFillGrid && !DOMAINDECOMP(cr));
 +    bDoLongRange  = (fr->bTwinRange && bNS && (flags & GMX_FORCE_DOLR));
 +    bDoForces     = (flags & GMX_FORCE_FORCES);
 +    bSepLRF       = (bDoLongRange && bDoForces && (flags & GMX_FORCE_SEPLRF));
 +    /* should probably move this to the forcerec since it doesn't change */
 +    bDoAdressWF   = ((fr->adress_type!=eAdressOff));
 +
 +    if (bStateChanged)
 +    {
 +        update_forcerec(fplog,fr,box);
-   
-   if (fr->ePBC != epbcNONE) { 
++
++        /* Calculate total (local) dipole moment in a temporary common array.
 +         * This makes it possible to sum them over nodes faster.
 +         */
 +        calc_mu(start,homenr,
 +                x,mdatoms->chargeA,mdatoms->chargeB,mdatoms->nChargePerturbed,
 +                mu,mu+DIM);
 +    }
-     
-     if (bCalcCGCM) { 
++
++  if (fr->ePBC != epbcNONE) {
 +    /* Compute shift vectors every step,
 +     * because of pressure coupling or box deformation!
 +     */
 +    if ((flags & GMX_FORCE_DYNAMICBOX) && bStateChanged)
 +      calc_shifts(box,fr->shift_vec);
-     } 
++
++    if (bCalcCGCM) {
 +      put_charge_groups_in_box(fplog,cg0,cg1,fr->ePBC,box,
 +                             &(top->cgs),x,fr->cg_cm);
 +      inc_nrnb(nrnb,eNR_CGCM,homenr);
 +      inc_nrnb(nrnb,eNR_RESETX,cg1-cg0);
-   } 
++    }
 +    else if (EI_ENERGY_MINIMIZATION(inputrec->eI) && graph) {
 +      unshift_self(graph,box,x);
 +    }
-   
++  }
 +  else if (bCalcCGCM) {
 +    calc_cgcm(fplog,cg0,cg1,&(top->cgs),x,fr->cg_cm);
 +    inc_nrnb(nrnb,eNR_CGCM,homenr);
 +  }
-      */    
++
 +  if (bCalcCGCM) {
 +    if (PAR(cr)) {
 +      move_cgcm(fplog,cr,fr->cg_cm);
 +    }
 +    if (gmx_debug_at)
 +      pr_rvecs(debug,0,"cgcm",fr->cg_cm,top->cgs.nr);
 +  }
 +
 +#ifdef GMX_MPI
 +  if (!(cr->duty & DUTY_PME)) {
 +    /* Send particle coordinates to the pme nodes.
 +     * Since this is only implemented for domain decomposition
 +     * and domain decomposition does not use the graph,
 +     * we do not need to worry about shifting.
-                    mdatoms->nChargePerturbed,lambda,
++     */
 +
 +    wallcycle_start(wcycle,ewcPP_PMESENDX);
 +
 +    bBS = (inputrec->nwall == 2);
 +    if (bBS) {
 +      copy_mat(box,boxs);
 +      svmul(inputrec->wall_ewald_zfac,boxs[ZZ],boxs[ZZ]);
 +    }
 +
 +    gmx_pme_send_x(cr,bBS ? boxs : box,x,
-                 (1.0 - lambda)*fr->mu_tot[0][j] + lambda*fr->mu_tot[1][j];
++                   mdatoms->nChargePerturbed,lambda[efptCOUL],
 +                   ( flags & GMX_FORCE_VIRIAL),step);
 +
 +    wallcycle_stop(wcycle,ewcPP_PMESENDX);
 +  }
 +#endif /* GMX_MPI */
 +
 +    /* Communicate coordinates and sum dipole if necessary */
 +    if (PAR(cr))
 +    {
 +        wallcycle_start(wcycle,ewcMOVEX);
 +        if (DOMAINDECOMP(cr))
 +        {
 +            dd_move_x(cr->dd,box,x);
 +        }
 +        else
 +        {
 +            move_x(fplog,cr,GMX_LEFT,GMX_RIGHT,x,nrnb);
 +        }
 +        /* When we don't need the total dipole we sum it in global_stat */
 +        if (bStateChanged && NEED_MUTOT(*inputrec))
 +        {
 +            gmx_sumd(2*DIM,mu,cr);
 +        }
 +        wallcycle_stop(wcycle,ewcMOVEX);
 +    }
 +    if (bStateChanged)
 +    {
 +
 +        /* update adress weight beforehand */
 +        if(bDoAdressWF)
 +        {
 +            /* need pbc for adress weight calculation with pbc_dx */
 +            set_pbc(&pbc,inputrec->ePBC,box);
 +            if(fr->adress_site == eAdressSITEcog)
 +            {
 +                update_adress_weights_cog(top->idef.iparams,top->idef.il,x,fr,mdatoms,
 +                                          inputrec->ePBC==epbcNONE ? NULL : &pbc);
 +            }
 +            else if (fr->adress_site == eAdressSITEcom)
 +            {
 +                update_adress_weights_com(fplog,cg0,cg1,&(top->cgs),x,fr,mdatoms,
 +                                          inputrec->ePBC==epbcNONE ? NULL : &pbc);
 +            }
 +            else if (fr->adress_site == eAdressSITEatomatom){
 +                update_adress_weights_atom_per_atom(cg0,cg1,&(top->cgs),x,fr,mdatoms,
 +                                          inputrec->ePBC==epbcNONE ? NULL : &pbc);
 +            }
 +            else
 +            {
 +                update_adress_weights_atom(cg0,cg1,&(top->cgs),x,fr,mdatoms,
 +                                           inputrec->ePBC==epbcNONE ? NULL : &pbc);
 +            }
 +        }
 +
 +        for(i=0; i<2; i++)
 +        {
 +            for(j=0;j<DIM;j++)
 +            {
 +                fr->mu_tot[i][j] = mu[i*DIM + j];
 +            }
 +        }
 +    }
 +    if (fr->efep == efepNO)
 +    {
 +        copy_rvec(fr->mu_tot[0],mu_tot);
 +    }
 +    else
 +    {
 +        for(j=0; j<DIM; j++)
 +        {
 +            mu_tot[j] =
-         
++                (1.0 - lambda[efptCOUL])*fr->mu_tot[0][j] + lambda[efptCOUL]*fr->mu_tot[1][j];
 +        }
 +    }
 +
 +    /* Reset energies */
 +    reset_enerdata(&(inputrec->opts),fr,bNS,enerd,MASTER(cr));
 +    clear_rvecs(SHIFTS,fr->fshift);
 +
 +    if (bNS)
 +    {
 +        wallcycle_start(wcycle,ewcNS);
-         dvdl = 0; 
++
 +        if (graph && bStateChanged)
 +        {
 +            /* Calculate intramolecular shift vectors to make molecules whole */
 +            mk_mshift(fplog,graph,fr->ePBC,box,x);
 +        }
 +
 +        /* Reset long range forces if necessary */
 +        if (fr->bTwinRange)
 +        {
 +            /* Reset the (long-range) forces if necessary */
 +            clear_rvecs(fr->natoms_force_constr,bSepLRF ? fr->f_twin : f);
 +        }
 +
 +        /* Do the actual neighbour searching and if twin range electrostatics
 +         * also do the calculation of long range forces and energies.
 +         */
-            cr,nrnb,lambda,&dvdl,&enerd->grpp,bFillGrid,
++        for (i=0;i<efptNR;i++) {dvdlambda[i] = 0;}
 +        ns(fplog,fr,x,box,
 +           groups,&(inputrec->opts),top,mdatoms,
-             fprintf(fplog,sepdvdlformat,"LR non-bonded",0.0,dvdl);
++           cr,nrnb,lambda,dvdlambda,&enerd->grpp,bFillGrid,
 +           bDoLongRange,bDoForces,bSepLRF ? fr->f_twin : f);
 +        if (bSepDVDL)
 +        {
-         enerd->dvdl_lin += dvdl;
-         
++            fprintf(fplog,sepdvdlformat,"LR non-bonded",0.0,dvdlambda);
 +        }
-       
-     if (inputrec->implicit_solvent && bNS) 
++        enerd->dvdl_lin[efptVDW] += dvdlambda[efptVDW];
++        enerd->dvdl_lin[efptCOUL] += dvdlambda[efptCOUL];
++
 +        wallcycle_stop(wcycle,ewcNS);
 +    }
-       
++
++    if (inputrec->implicit_solvent && bNS)
 +    {
 +        make_gb_nblist(cr,inputrec->gb_algorithm,inputrec->rlist,
 +                       x,box,fr,&top->idef,graph,fr->born);
 +    }
-     
++
 +    if (DOMAINDECOMP(cr))
 +    {
 +        if (!(cr->duty & DUTY_PME))
 +        {
 +            wallcycle_start(wcycle,ewcPPDURINGPME);
 +            dd_force_flop_start(cr->dd,nrnb);
 +        }
 +    }
-         if (fr->bF_NoVirSum) 
++
 +    if (inputrec->bRot)
 +    {
 +        /* Enforced rotation has its own cycle counter that starts after the collective
 +         * coordinates have been communicated. It is added to ddCyclF to allow
 +         * for proper load-balancing */
 +        wallcycle_start(wcycle,ewcROT);
 +        do_rotation(cr,inputrec,box,x,t,step,wcycle,bNS);
 +        wallcycle_stop(wcycle,ewcROT);
 +    }
 +
 +    /* Start the force cycle counter.
 +     * This counter is stopped in do_forcelow_level.
 +     * No parallel communication should occur while this counter is running,
 +     * since that will interfere with the dynamic load balancing.
 +     */
 +    wallcycle_start(wcycle,ewcFORCE);
 +
 +    if (bDoForces)
 +    {
 +        /* Reset forces for which the virial is calculated separately:
 +         * PME/Ewald forces if necessary */
-                    inputrec->ePBC==epbcNONE ? NULL : &pbc,lambda,&dvdl,
++        if (fr->bF_NoVirSum)
 +        {
 +            if (flags & GMX_FORCE_VIRIAL)
 +            {
 +                fr->f_novirsum = fr->f_novirsum_alloc;
 +                if (fr->bDomDec)
 +                {
 +                    clear_rvecs(fr->f_novirsum_n,fr->f_novirsum);
 +                }
 +                else
 +                {
 +                    clear_rvecs(homenr,fr->f_novirsum+start);
 +                }
 +            }
 +            else
 +            {
 +                /* We are not calculating the pressure so we do not need
 +                 * a separate array for forces that do not contribute
 +                 * to the pressure.
 +                 */
 +                fr->f_novirsum = f;
 +            }
 +        }
 +
 +        if (bSepLRF)
 +        {
 +            /* Add the long range forces to the short range forces */
 +            for(i=0; i<fr->natoms_force_constr; i++)
 +            {
 +                copy_rvec(fr->f_twin[i],f[i]);
 +            }
 +        }
 +        else if (!(fr->bTwinRange && bNS))
 +        {
 +            /* Clear the short-range forces */
 +            clear_rvecs(fr->natoms_force_constr,f);
 +        }
 +
 +        clear_rvec(fr->vir_diag_posres);
 +    }
 +    if (inputrec->ePull == epullCONSTRAINT)
 +    {
 +        clear_pull_forces(inputrec->pull);
 +    }
 +
 +    /* update QMMMrec, if necessary */
 +    if(fr->bQMMM)
 +    {
 +        update_QMMMrec(cr,fr,x,mdatoms,box,top);
 +    }
 +
 +    if ((flags & GMX_FORCE_BONDED) && top->idef.il[F_POSRES].nr > 0)
 +    {
 +        /* Position restraints always require full pbc. Check if we already did it for Adress */
 +        if(!(bStateChanged && bDoAdressWF))
 +        {
 +            set_pbc(&pbc,inputrec->ePBC,box);
 +        }
 +        v = posres(top->idef.il[F_POSRES].nr,top->idef.il[F_POSRES].iatoms,
 +                   top->idef.iparams_posres,
 +                   (const rvec*)x,fr->f_novirsum,fr->vir_diag_posres,
-                     interaction_function[F_POSRES].longname,v,dvdl);
++                   inputrec->ePBC==epbcNONE ? NULL : &pbc,lambda[efptRESTRAINT],&(dvdlambda[efptRESTRAINT]),
 +                   fr->rc_scaling,fr->ePBC,fr->posres_com,fr->posres_comB);
 +        if (bSepDVDL)
 +        {
 +            fprintf(fplog,sepdvdlformat,
-          * grompp checks for this.
++                    interaction_function[F_POSRES].longname,v,dvdlambda);
 +        }
 +        enerd->term[F_POSRES] += v;
 +        /* This linear lambda dependence assumption is only correct
 +         * when only k depends on lambda,
 +         * not when the reference position depends on lambda.
-         enerd->dvdl_lin += dvdl;
++         * grompp checks for this.  (verify this is still the case?)
 +         */
-     }
++        enerd->dvdl_nonlin[efptRESTRAINT] += dvdlambda[efptRESTRAINT]; /* if just the force constant changes, this is linear,
++                                                                          but we can't be sure w/o additional checking that is
++                                                                          hard to do at this level of code. Otherwise,
++                                                                          the dvdl is not differentiable */
 +        inc_nrnb(nrnb,eNR_POSRES,top->idef.il[F_POSRES].nr/2);
-     /* Compute the bonded and non-bonded energies and optionally forces */    
++        if ((inputrec->fepvals->n_lambda > 0) && (flags & GMX_FORCE_DHDL))
++        {
++            for(i=0; i<enerd->n_lambda; i++)
++            {
++                lambda_dum = (i==0 ? lambda[efptRESTRAINT] : inputrec->fepvals->all_lambda[efptRESTRAINT][i-1]);
++                v = posres(top->idef.il[F_POSRES].nr,top->idef.il[F_POSRES].iatoms,
++                           top->idef.iparams_posres,
++                           (const rvec*)x,NULL,NULL,
++                           inputrec->ePBC==epbcNONE ? NULL : &pbc,lambda_dum,&dvdl_dum,
++                           fr->rc_scaling,fr->ePBC,fr->posres_com,fr->posres_comB);
++                enerd->enerpart_lambda[i] += v;
++            }
++        }
++   }
 +
-                       lambda,graph,&(top->excls),fr->mu_tot,
++    /* Compute the bonded and non-bonded energies and optionally forces */
 +    do_force_lowlevel(fplog,step,fr,inputrec,&(top->idef),
 +                      cr,nrnb,wcycle,mdatoms,&(inputrec->opts),
 +                      x,hist,f,enerd,fcd,mtop,top,fr->born,
 +                      &(top->atomtypes),bBornRadii,box,
-     
++                      inputrec->fepvals,lambda,graph,&(top->excls),fr->mu_tot,
 +                      flags,&cycles_pme);
-     
++
 +    cycles_force = wallcycle_stop(wcycle,ewcFORCE);
-       
++
 +    if (ed)
 +    {
 +        do_flood(fplog,cr,x,f,ed,box,step);
 +    }
-     
++
 +    if (DOMAINDECOMP(cr))
 +    {
 +        dd_force_flop_stop(cr->dd,nrnb);
 +        if (wcycle)
 +        {
 +            dd_cycles_add(cr->dd,cycles_force-cycles_pme,ddCyclF);
 +        }
 +    }
-         
++
 +    if (bDoForces)
 +    {
 +        if (IR_ELEC_FIELD(*inputrec))
 +        {
 +            /* Compute forces due to electric field */
 +            calc_f_el(MASTER(cr) ? field : NULL,
 +                      start,homenr,mdatoms->chargeA,x,fr->f_novirsum,
 +                      inputrec->ex,inputrec->et,t);
 +        }
 +
 +        if (bDoAdressWF && fr->adress_icor == eAdressICThermoForce)
 +        {
 +            /* Compute thermodynamic force in hybrid AdResS region */
 +            adress_thermo_force(start,homenr,&(top->cgs),x,fr->f_novirsum,fr,mdatoms,
 +                                inputrec->ePBC==epbcNONE ? NULL : &pbc);
 +        }
-         
++
 +        /* Communicate the forces */
 +        if (PAR(cr))
 +        {
 +            wallcycle_start(wcycle,ewcMOVEF);
 +            if (DOMAINDECOMP(cr))
 +            {
 +                dd_move_f(cr->dd,f,fr->fshift);
 +                /* Do we need to communicate the separate force array
 +                 * for terms that do not contribute to the single sum virial?
 +                 * Position restraints and electric fields do not introduce
 +                 * inter-cg forces, only full electrostatics methods do.
 +                 * When we do not calculate the virial, fr->f_novirsum = f,
 +                 * so we have already communicated these forces.
 +                 */
 +                if (EEL_FULL(fr->eeltype) && cr->dd->n_intercg_excl &&
 +                    (flags & GMX_FORCE_VIRIAL))
 +                {
 +                    dd_move_f(cr->dd,fr->f_novirsum,NULL);
 +                }
 +                if (bSepLRF)
 +                {
 +                    /* We should not update the shift forces here,
 +                     * since f_twin is already included in f.
 +                     */
 +                    dd_move_f(cr->dd,fr->f_twin,NULL);
 +                }
 +            }
 +            else
 +            {
 +                pd_move_f(cr,f,nrnb);
 +                if (bSepLRF)
 +                {
 +                    pd_move_f(cr,fr->f_twin,nrnb);
 +                }
 +            }
 +            wallcycle_stop(wcycle,ewcMOVEF);
 +        }
 +
 +        /* If we have NoVirSum forces, but we do not calculate the virial,
 +         * we sum fr->f_novirum=f later.
 +         */
 +        if (vsite && !(fr->bF_NoVirSum && !(flags & GMX_FORCE_VIRIAL)))
 +        {
 +            wallcycle_start(wcycle,ewcVSITESPREAD);
 +            spread_vsite_f(fplog,vsite,x,f,fr->fshift,FALSE,NULL,nrnb,
 +                           &top->idef,fr->ePBC,fr->bMolPBC,graph,box,cr);
 +            wallcycle_stop(wcycle,ewcVSITESPREAD);
 +
 +            if (bSepLRF)
 +            {
 +                wallcycle_start(wcycle,ewcVSITESPREAD);
 +                spread_vsite_f(fplog,vsite,x,fr->f_twin,NULL,FALSE,NULL,
 +                               nrnb,
 +                               &top->idef,fr->ePBC,fr->bMolPBC,graph,box,cr);
 +                wallcycle_stop(wcycle,ewcVSITESPREAD);
 +            }
 +        }
-         dvdl = 0; 
++
 +        if (flags & GMX_FORCE_VIRIAL)
 +        {
 +            /* Calculation of the virial must be done after vsites! */
 +            calc_virial(fplog,mdatoms->start,mdatoms->homenr,x,f,
 +                        vir_force,graph,box,nrnb,fr,inputrec->ePBC);
 +        }
 +    }
 +
 +    enerd->term[F_COM_PULL] = 0;
 +    if (inputrec->ePull == epullUMBRELLA || inputrec->ePull == epullCONST_F)
 +    {
 +        /* Calculate the center of mass forces, this requires communication,
 +         * which is why pull_potential is called close to other communication.
 +         * The virial contribution is calculated directly,
 +         * which is why we call pull_potential after calc_virial.
 +         */
 +        set_pbc(&pbc,inputrec->ePBC,box);
-                            cr,t,lambda,x,f,vir_force,&dvdl);
++        dvdlambda[efptRESTRAINT] = 0;
 +        enerd->term[F_COM_PULL] +=
 +            pull_potential(inputrec->ePull,inputrec->pull,mdatoms,&pbc,
-             fprintf(fplog,sepdvdlformat,"Com pull",enerd->term[F_COM_PULL],dvdl);
++                           cr,t,lambda[efptRESTRAINT],x,f,vir_force,&(dvdlambda[efptRESTRAINT]));
 +        if (bSepDVDL)
 +        {
-         enerd->dvdl_lin += dvdl;
++            fprintf(fplog,sepdvdlformat,"Com pull",enerd->term[F_COM_PULL],dvdlambda[efptRESTRAINT]);
 +        }
-     
++        enerd->dvdl_lin[efptRESTRAINT] += dvdlambda[efptRESTRAINT];
 +    }
-         /* In case of node-splitting, the PP nodes receive the long-range 
++
 +    /* Add the forces from enforced rotation potentials (if any) */
 +    if (inputrec->bRot)
 +    {
 +        wallcycle_start(wcycle,ewcROTadd);
 +        enerd->term[F_COM_PULL] += add_rot_forces(inputrec->rot, f, cr,step,t);
 +        wallcycle_stop(wcycle,ewcROTadd);
 +    }
 +
 +    if (PAR(cr) && !(cr->duty & DUTY_PME))
 +    {
 +        cycles_ppdpme = wallcycle_stop(wcycle,ewcPPDURINGPME);
 +        dd_cycles_add(cr->dd,cycles_ppdpme,ddCyclPPduringPME);
 +
-          */    
++        /* In case of node-splitting, the PP nodes receive the long-range
 +         * forces, virial and energy from the PME nodes here.
-         dvdl = 0;
-         gmx_pme_receive_f(cr,fr->f_novirsum,fr->vir_el_recip,&e,&dvdl,
++         */
 +        wallcycle_start(wcycle,ewcPP_PMEWAITRECVF);
-             fprintf(fplog,sepdvdlformat,"PME mesh",e,dvdl);
++        dvdlambda[efptCOUL] = 0;
++        gmx_pme_receive_f(cr,fr->f_novirsum,fr->vir_el_recip,&e,&dvdlambda[efptCOUL],
 +                          &cycles_seppme);
 +        if (bSepDVDL)
 +        {
-         enerd->dvdl_lin += dvdl;
++            fprintf(fplog,sepdvdlformat,"PME mesh",e,dvdlambda[efptCOUL]);
 +        }
 +        enerd->term[F_COUL_RECIP] += e;
-             /* Spread the mesh force on virtual sites to the other particles... 
++        enerd->dvdl_lin[efptCOUL] += dvdlambda[efptCOUL];
 +        if (wcycle)
 +        {
 +            dd_cycles_add(cr->dd,cycles_seppme,ddCyclPME);
 +        }
 +        wallcycle_stop(wcycle,ewcPP_PMEWAITRECVF);
 +    }
 +
 +    if (bDoForces && fr->bF_NoVirSum)
 +    {
 +        if (vsite)
 +        {
-     
++            /* Spread the mesh force on virtual sites to the other particles...
 +             * This is parallellized. MPI communication is performed
 +             * if the constructing atoms aren't local.
 +             */
 +            wallcycle_start(wcycle,ewcVSITESPREAD);
 +            spread_vsite_f(fplog,vsite,x,fr->f_novirsum,NULL,
 +                           (flags & GMX_FORCE_VIRIAL),fr->vir_el_recip,
 +                           nrnb,
 +                           &top->idef,fr->ePBC,fr->bMolPBC,graph,box,cr);
 +            wallcycle_stop(wcycle,ewcVSITESPREAD);
 +        }
 +        if (flags & GMX_FORCE_VIRIAL)
 +        {
 +            /* Now add the forces, this is local */
 +            if (fr->bDomDec)
 +            {
 +                sum_forces(0,fr->f_novirsum_n,f,fr->f_novirsum);
 +            }
 +            else
 +            {
 +                sum_forces(start,start+homenr,f,fr->f_novirsum);
 +            }
 +            if (EEL_FULL(fr->eeltype))
 +            {
 +                /* Add the mesh contribution to the virial */
 +                m_add(vir_force,fr->vir_el_recip,vir_force);
 +            }
 +            if (debug)
 +            {
 +                pr_rvecs(debug,0,"vir_force",vir_force,DIM);
 +            }
 +        }
 +    }
-     
++
 +    /* Sum the potential energy terms from group contributions */
 +    sum_epot(&(inputrec->opts),enerd);
-     real   dvdlambda;
++
 +    if (fr->print_force >= 0 && bDoForces)
 +    {
 +        print_large_forces(stderr,mdatoms,cr,step,fr->print_force,x,f);
 +    }
 +}
 +
 +void do_constrain_first(FILE *fplog,gmx_constr_t constr,
 +                        t_inputrec *ir,t_mdatoms *md,
 +                        t_state *state,rvec *f,
 +                        t_graph *graph,t_commrec *cr,t_nrnb *nrnb,
 +                        t_forcerec *fr, gmx_localtop_t *top, tensor shake_vir)
 +{
 +    int    i,m,start,end;
 +    gmx_large_int_t step;
 +    real   dt=ir->delta_t;
-     
++    real   dvdl_dum;
 +    rvec   *savex;
-     
++
 +    snew(savex,state->natoms);
 +
 +    start = md->start;
 +    end   = md->homenr + start;
-     dvdlambda = 0;
-     
++
 +    if (debug)
 +        fprintf(debug,"vcm: start=%d, homenr=%d, end=%d\n",
 +                start,md->homenr,end);
 +    /* Do a first constrain to reset particles... */
 +    step = ir->init_step;
 +    if (fplog)
 +    {
 +        char buf[STEPSTRSIZE];
 +        fprintf(fplog,"\nConstraining the starting coordinates (step %s)\n",
 +                gmx_step_str(step,buf));
 +    }
-               state->box,state->lambda,&dvdlambda,
++    dvdl_dum = 0;
++
 +    /* constrain the current position */
 +    constrain(NULL,TRUE,FALSE,constr,&(top->idef),
 +              ir,NULL,cr,step,0,md,
 +              state->x,state->x,NULL,
-     if (EI_VV(ir->eI)) 
++              state->box,state->lambda[efptBONDED],&dvdl_dum,
 +              NULL,NULL,nrnb,econqCoord,ir->epc==epcMTTK,state->veta,state->veta);
-                   state->box,state->lambda,&dvdlambda,
++    if (EI_VV(ir->eI))
 +    {
 +        /* constrain the inital velocity, and save it */
 +        /* also may be useful if we need the ekin from the halfstep for velocity verlet */
 +        /* might not yet treat veta correctly */
 +        constrain(NULL,TRUE,FALSE,constr,&(top->idef),
 +                  ir,NULL,cr,step,0,md,
 +                  state->x,state->v,state->v,
-         for(i=start; (i<end); i++) 
++                  state->box,state->lambda[efptBONDED],&dvdl_dum,
 +                  NULL,NULL,nrnb,econqVeloc,ir->epc==epcMTTK,state->veta,state->veta);
 +    }
 +    /* constrain the inital velocities at t-dt/2 */
 +    if (EI_STATE_VELOCITY(ir->eI) && ir->eI!=eiVV)
 +    {
-             for(m=0; (m<DIM); m++) 
++        for(i=start; (i<end); i++)
 +        {
-     /* Shake the positions at t=-dt with the positions at t=0                        
-      * as reference coordinates.                                                     
++            for(m=0; (m<DIM); m++)
 +            {
 +                /* Reverse the velocity */
 +                state->v[i][m] = -state->v[i][m];
 +                /* Store the position at t-dt in buf */
 +                savex[i][m] = state->x[i][m] + dt*state->v[i][m];
 +            }
 +        }
-         dvdlambda = 0;
++    /* Shake the positions at t=-dt with the positions at t=0
++     * as reference coordinates.
 +         */
 +        if (fplog)
 +        {
 +            char buf[STEPSTRSIZE];
 +            fprintf(fplog,"\nConstraining the coordinates at t0-dt (step %s)\n",
 +                    gmx_step_str(step,buf));
 +        }
-                   state->box,state->lambda,&dvdlambda,
++        dvdl_dum = 0;
 +        constrain(NULL,TRUE,FALSE,constr,&(top->idef),
 +                  ir,NULL,cr,step,-1,md,
 +                  state->x,savex,NULL,
-         
++                  state->box,state->lambda[efptBONDED],&dvdl_dum,
 +                  state->v,NULL,nrnb,econqCoord,ir->epc==epcMTTK,state->veta,state->veta);
-     
++
 +        for(i=start; i<end; i++) {
 +            for(m=0; m<DIM; m++) {
 +                /* Re-reverse the velocities */
 +                state->v[i][m] = -state->v[i][m];
 +            }
 +        }
 +    }
-   real   scale,*vdwtab; 
 +    sfree(savex);
 +}
 +
 +void calc_enervirdiff(FILE *fplog,int eDispCorr,t_forcerec *fr)
 +{
 +  double eners[2],virs[2],enersum,virsum,y0,f,g,h;
 +  double r0,r1,r,rc3,rc9,ea,eb,ec,pa,pb,pc,pd;
 +  double invscale,invscale2,invscale3;
 +  int    ri0,ri1,ri,i,offstart,offset;
-       
-       invscale = 1.0/(scale);  
++  real   scale,*vdwtab;
 +
 +  fr->enershiftsix = 0;
 +  fr->enershifttwelve = 0;
 +  fr->enerdiffsix = 0;
 +  fr->enerdifftwelve = 0;
 +  fr->virdiffsix = 0;
 +  fr->virdifftwelve = 0;
 +
 +  if (eDispCorr != edispcNO) {
 +    for(i=0; i<2; i++) {
 +      eners[i] = 0;
 +      virs[i]  = 0;
 +    }
 +    if ((fr->vdwtype == evdwSWITCH) || (fr->vdwtype == evdwSHIFT)) {
 +      if (fr->rvdw_switch == 0)
 +      gmx_fatal(FARGS,
 +                "With dispersion correction rvdw-switch can not be zero "
 +                "for vdw-type = %s",evdw_names[fr->vdwtype]);
 +
 +      scale  = fr->nblists[0].tab.scale;
 +      vdwtab = fr->nblists[0].vdwtab;
 +
 +      /* Round the cut-offs to exact table values for precision */
 +      ri0 = floor(fr->rvdw_switch*scale);
 +      ri1 = ceil(fr->rvdw*scale);
 +      r0  = ri0/scale;
 +      r1  = ri1/scale;
 +      rc3 = r0*r0*r0;
 +      rc9  = rc3*rc3*rc3;
 +
 +      if (fr->vdwtype == evdwSHIFT) {
 +      /* Determine the constant energy shift below rvdw_switch */
 +      fr->enershiftsix    = (real)(-1.0/(rc3*rc3)) - vdwtab[8*ri0];
 +      fr->enershifttwelve = (real)( 1.0/(rc9*rc3)) - vdwtab[8*ri0 + 4];
 +      }
 +      /* Add the constant part from 0 to rvdw_switch.
 +       * This integration from 0 to rvdw_switch overcounts the number
 +       * of interactions by 1, as it also counts the self interaction.
 +       * We will correct for this later.
 +       */
 +      eners[0] += 4.0*M_PI*fr->enershiftsix*rc3/3.0;
 +      eners[1] += 4.0*M_PI*fr->enershifttwelve*rc3/3.0;
-       
++
++      invscale = 1.0/(scale);
 +      invscale2 = invscale*invscale;
 +      invscale3 = invscale*invscale2;
 +
 +      /* following summation derived from cubic spline definition,
 +      Numerical Recipies in C, second edition, p. 113-116.  Exact
 +      for the cubic spline.  We first calculate the negative of
 +      the energy from rvdw to rvdw_switch, assuming that g(r)=1,
 +      and then add the more standard, abrupt cutoff correction to
 +      that result, yielding the long-range correction for a
 +      switched function.  We perform both the pressure and energy
 +      loops at the same time for simplicity, as the computational
 +      cost is low. */
-           
++
 +      for (i=0;i<2;i++) {
 +        enersum = 0.0; virsum = 0.0;
 +        if (i==0)
 +        offstart = 0;
 +      else
 +        offstart = 4;
 +      for (ri=ri0; ri<ri1; ri++) {
 +          r = ri*invscale;
 +          ea = invscale3;
 +          eb = 2.0*invscale2*r;
 +          ec = invscale*r*r;
-           
++
 +          pa = invscale3;
 +          pb = 3.0*invscale2*r;
 +          pc = 3.0*invscale*r*r;
 +          pd = r*r*r;
-         
++
 +          /* this "8" is from the packing in the vdwtab array - perhaps
 +          should be #define'ed? */
 +          offset = 8*ri + offstart;
 +          y0 = vdwtab[offset];
 +          f = vdwtab[offset+1];
 +          g = vdwtab[offset+2];
 +          h = vdwtab[offset+3];
-             g*(ea/5 + eb/4 + ec/3) + h*(ea/6 + eb/5 + ec/4);  
-           virsum  +=  f*(pa/4 + pb/3 + pc/2 + pd) + 
++
 +          enersum += y0*(ea/3 + eb/2 + ec) + f*(ea/4 + eb/3 + ec/2)+
-         
++            g*(ea/5 + eb/4 + ec/3) + h*(ea/6 + eb/5 + ec/4);
++          virsum  +=  f*(pa/4 + pb/3 + pc/2 + pd) +
 +            2*g*(pa/5 + pb/4 + pc/3 + pd/2) + 3*h*(pa/6 + pb/5 + pc/4 + pd/3);
-         virsum  *= 4.0*M_PI; 
++
 +        }
 +        enersum *= 4.0*M_PI;
-     } 
++        virsum  *= 4.0*M_PI;
 +        eners[i] -= enersum;
 +        virs[i]  -= virsum;
 +      }
 +
 +      /* now add the correction for rvdw_switch to infinity */
 +      eners[0] += -4.0*M_PI/(3.0*rc3);
 +      eners[1] +=  4.0*M_PI/(9.0*rc9);
 +      virs[0]  +=  8.0*M_PI/rc3;
 +      virs[1]  += -16.0*M_PI/(3.0*rc9);
-     
++    }
 +    else if ((fr->vdwtype == evdwCUT) || (fr->vdwtype == evdwUSER)) {
 +      if (fr->vdwtype == evdwUSER && fplog)
 +      fprintf(fplog,
 +              "WARNING: using dispersion correction with user tables\n");
 +      rc3  = fr->rvdw*fr->rvdw*fr->rvdw;
 +      rc9  = rc3*rc3*rc3;
 +      eners[0] += -4.0*M_PI/(3.0*rc3);
 +      eners[1] +=  4.0*M_PI/(9.0*rc9);
 +      virs[0]  +=  8.0*M_PI/rc3;
 +      virs[1]  += -16.0*M_PI/(3.0*rc9);
 +    } else {
 +      gmx_fatal(FARGS,
 +              "Dispersion correction is not implemented for vdw-type = %s",
 +              evdw_names[fr->vdwtype]);
 +    }
 +    fr->enerdiffsix    = eners[0];
 +    fr->enerdifftwelve = eners[1];
 +    /* The 0.5 is due to the Gromacs definition of the virial */
 +    fr->virdiffsix     = 0.5*virs[0];
 +    fr->virdifftwelve  = 0.5*virs[1];
 +  }
 +}
 +
 +void calc_dispcorr(FILE *fplog,t_inputrec *ir,t_forcerec *fr,
 +                   gmx_large_int_t step,int natoms,
 +                   matrix box,real lambda,tensor pres,tensor virial,
 +                   real *prescorr, real *enercorr, real *dvdlcorr)
 +{
 +    gmx_bool bCorrAll,bCorrPres;
 +    real dvdlambda,invvol,dens,ninter,avcsix,avctwelve,enerdiff,svir=0,spres=0;
 +    int  m;
-     
++
 +    *prescorr = 0;
 +    *enercorr = 0;
 +    *dvdlcorr = 0;
-     
++
 +    clear_mat(virial);
 +    clear_mat(pres);
-         
++
 +    if (ir->eDispCorr != edispcNO) {
 +        bCorrAll  = (ir->eDispCorr == edispcAllEner ||
 +                     ir->eDispCorr == edispcAllEnerPres);
 +        bCorrPres = (ir->eDispCorr == edispcEnerPres ||
 +                     ir->eDispCorr == edispcAllEnerPres);
-         if (fr->n_tpi) 
++
 +        invvol = 1/det(box);
-         } 
-         else 
++        if (fr->n_tpi)
 +        {
 +            /* Only correct for the interactions with the inserted molecule */
 +            dens = (natoms - fr->n_tpi)*invvol;
 +            ninter = fr->n_tpi;
-         
-         if (ir->efep == efepNO) 
++        }
++        else
 +        {
 +            dens = natoms*invvol;
 +            ninter = 0.5*natoms;
 +        }
-         } 
-         else 
++
++        if (ir->efep == efepNO)
 +        {
 +            avcsix    = fr->avcsix[0];
 +            avctwelve = fr->avctwelve[0];
-         
++        }
++        else
 +        {
 +            avcsix    = (1 - lambda)*fr->avcsix[0]    + lambda*fr->avcsix[1];
 +            avctwelve = (1 - lambda)*fr->avctwelve[0] + lambda*fr->avctwelve[1];
 +        }
-         if (ir->efep != efepNO) 
++
 +        enerdiff = ninter*(dens*fr->enerdiffsix - fr->enershiftsix);
 +        *enercorr += avcsix*enerdiff;
 +        dvdlambda = 0.0;
-         if (bCorrAll) 
++        if (ir->efep != efepNO)
 +        {
 +            dvdlambda += (fr->avcsix[1] - fr->avcsix[0])*enerdiff;
 +        }
-             if (fr->efep != efepNO) 
++        if (bCorrAll)
 +        {
 +            enerdiff = ninter*(dens*fr->enerdifftwelve - fr->enershifttwelve);
 +            *enercorr += avctwelve*enerdiff;
-         
-         if (bCorrPres) 
++            if (fr->efep != efepNO)
 +            {
 +                dvdlambda += (fr->avctwelve[1] - fr->avctwelve[0])*enerdiff;
 +            }
 +        }
-             
++
++        if (bCorrPres)
 +        {
 +            svir = ninter*dens*avcsix*fr->virdiffsix/3.0;
 +            if (ir->eDispCorr == edispcAllEnerPres)
 +            {
 +                svir += ninter*dens*avctwelve*fr->virdifftwelve/3.0;
 +            }
 +            /* The factor 2 is because of the Gromacs virial definition */
 +            spres = -2.0*invvol*svir*PRESFAC;
-         
++
 +            for(m=0; m<DIM; m++) {
 +                virial[m][m] += svir;
 +                pres[m][m] += spres;
 +            }
 +            *prescorr += spres;
 +        }
-             if (bCorrPres) 
++
 +        /* Can't currently control when it prints, for now, just print when degugging */
 +        if (debug)
 +        {
 +            if (bCorrAll) {
 +                fprintf(debug,"Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
 +                        avcsix,avctwelve);
 +            }
-         
++            if (bCorrPres)
 +            {
 +                fprintf(debug,
 +                        "Long Range LJ corr.: Epot %10g, Pres: %10g, Vir: %10g\n",
 +                        *enercorr,spres,svir);
 +            }
 +            else
 +            {
 +                fprintf(debug,"Long Range LJ corr.: Epot %10g\n",*enercorr);
 +            }
 +        }
-         if (fr->efep != efepNO) 
++
 +        if (fr->bSepDVDL && do_per_step(step,ir->nstlog))
 +        {
 +            fprintf(fplog,sepdvdlformat,"Dispersion correction",
 +                    *enercorr,dvdlambda);
 +        }
-     if (molb->natoms_mol == 1 || 
++        if (fr->efep != efepNO)
 +        {
 +            *dvdlcorr += dvdlambda;
 +        }
 +    }
 +}
 +
 +void do_pbc_first(FILE *fplog,matrix box,t_forcerec *fr,
 +                t_graph *graph,rvec x[])
 +{
 +  if (fplog)
 +    fprintf(fplog,"Removing pbc first time\n");
 +  calc_shifts(box,fr->shift_vec);
 +  if (graph) {
 +    mk_mshift(fplog,graph,fr->ePBC,box,x);
 +    if (gmx_debug_at)
 +      p_graph(debug,"do_pbc_first 1",graph);
 +    shift_self(graph,box,x);
 +    /* By doing an extra mk_mshift the molecules that are broken
 +     * because they were e.g. imported from another software
 +     * will be made whole again. Such are the healing powers
 +     * of GROMACS.
 +     */
 +    mk_mshift(fplog,graph,fr->ePBC,box,x);
 +    if (gmx_debug_at)
 +      p_graph(debug,"do_pbc_first 2",graph);
 +  }
 +  if (fplog)
 +    fprintf(fplog,"Done rmpbc\n");
 +}
 +
 +static void low_do_pbc_mtop(FILE *fplog,int ePBC,matrix box,
 +                          gmx_mtop_t *mtop,rvec x[],
 +                          gmx_bool bFirst)
 +{
 +  t_graph *graph;
 +  int mb,as,mol;
 +  gmx_molblock_t *molb;
 +
 +  if (bFirst && fplog)
 +    fprintf(fplog,"Removing pbc first time\n");
 +
 +  snew(graph,1);
 +  as = 0;
 +  for(mb=0; mb<mtop->nmolblock; mb++) {
 +    molb = &mtop->molblock[mb];
-       
++    if (molb->natoms_mol == 1 ||
 +      (!bFirst && mtop->moltype[molb->type].cgs.nr == 1)) {
 +      /* Just one atom or charge group in the molecule, no PBC required */
 +      as += molb->nmol*molb->natoms_mol;
 +    } else {
 +      /* Pass NULL iso fplog to avoid graph prints for each molecule type */
 +      mk_graph_ilist(NULL,mtop->moltype[molb->type].ilist,
 +                   0,molb->natoms_mol,FALSE,FALSE,graph);
-       
++
 +      for(mol=0; mol<molb->nmol; mol++) {
 +      mk_mshift(fplog,graph,ePBC,box,x+as);
-       
++
 +      shift_self(graph,box,x+as);
 +      /* The molecule is whole now.
 +       * We don't need the second mk_mshift call as in do_pbc_first,
 +       * since we no longer need this graph.
 +       */
- #endif  
++
 +      as += molb->natoms_mol;
 +      }
 +      done_graph(graph);
 +    }
 +  }
 +  sfree(graph);
 +}
 +
 +void do_pbc_first_mtop(FILE *fplog,int ePBC,matrix box,
 +                     gmx_mtop_t *mtop,rvec x[])
 +{
 +  low_do_pbc_mtop(fplog,ePBC,box,mtop,x,TRUE);
 +}
 +
 +void do_pbc_mtop(FILE *fplog,int ePBC,matrix box,
 +               gmx_mtop_t *mtop,rvec x[])
 +{
 +  low_do_pbc_mtop(fplog,ePBC,box,mtop,x,FALSE);
 +}
 +
 +void finish_run(FILE *fplog,t_commrec *cr,const char *confout,
 +                t_inputrec *inputrec,
 +                t_nrnb nrnb[],gmx_wallcycle_t wcycle,
 +                gmx_runtime_t *runtime,
 +                gmx_bool bWriteStat)
 +{
 +  int    i,j;
 +  t_nrnb *nrnb_tot=NULL;
 +  real   delta_t;
 +  double nbfs,mflop;
 +  double cycles[ewcNR];
 +
 +  wallcycle_sum(cr,wcycle,cycles);
 +
 +  if (cr->nnodes > 1) {
 +    if (SIMMASTER(cr))
 +      snew(nrnb_tot,1);
 +#ifdef GMX_MPI
 +    MPI_Reduce(nrnb->n,nrnb_tot->n,eNRNB,MPI_DOUBLE,MPI_SUM,
 +               MASTERRANK(cr),cr->mpi_comm_mysim);
-     
++#endif
 +  } else {
 +    nrnb_tot = nrnb;
 +  }
- #endif  
++
 +  if (SIMMASTER(cr)) {
 +    print_flop(fplog,nrnb_tot,&nbfs,&mflop);
 +    if (cr->nnodes > 1) {
 +      sfree(nrnb_tot);
 +    }
 +  }
 +
 +  if ((cr->duty & DUTY_PP) && DOMAINDECOMP(cr)) {
 +    print_dd_statistics(cr,inputrec,fplog);
 +  }
 +
 +#ifdef GMX_MPI
 +    if (PARTDECOMP(cr))
 +    {
 +        if (MASTER(cr))
 +        {
 +            t_nrnb     *nrnb_all;
 +            int        s;
 +            MPI_Status stat;
 +
 +            snew(nrnb_all,cr->nnodes);
 +            nrnb_all[0] = *nrnb;
 +            for(s=1; s<cr->nnodes; s++)
 +            {
 +                MPI_Recv(nrnb_all[s].n,eNRNB,MPI_DOUBLE,s,0,
 +                         cr->mpi_comm_mysim,&stat);
 +            }
 +            pr_load(fplog,cr,nrnb_all);
 +            sfree(nrnb_all);
 +        }
 +        else
 +        {
 +            MPI_Send(nrnb->n,eNRNB,MPI_DOUBLE,MASTERRANK(cr),0,
 +                     cr->mpi_comm_mysim);
 +        }
 +    }
-     
++#endif
 +
 +  if (SIMMASTER(cr)) {
 +    wallcycle_print(fplog,cr->nnodes,cr->npmenodes,runtime->realtime,
 +                    wcycle,cycles);
 +
 +    if (EI_DYNAMICS(inputrec->eI)) {
 +      delta_t = inputrec->delta_t;
 +    } else {
 +      delta_t = 0;
 +    }
-              real *lambda,double *lam0,
++
 +    if (fplog) {
 +        print_perf(fplog,runtime->proctime,runtime->realtime,
 +                   cr->nnodes-cr->npmenodes,
 +                   runtime->nsteps_done,delta_t,nbfs,mflop);
 +    }
 +    if (bWriteStat) {
 +        print_perf(stderr,runtime->proctime,runtime->realtime,
 +                   cr->nnodes-cr->npmenodes,
 +                   runtime->nsteps_done,delta_t,nbfs,mflop);
 +    }
 +
 +    /*
 +    runtime=inputrec->nsteps*inputrec->delta_t;
 +    if (bWriteStat) {
 +      if (cr->nnodes == 1)
 +      fprintf(stderr,"\n\n");
 +      print_perf(stderr,nodetime,realtime,runtime,&ntot,
 +               cr->nnodes-cr->npmenodes,FALSE);
 +    }
 +    wallcycle_print(fplog,cr->nnodes,cr->npmenodes,realtime,wcycle,cycles);
 +    print_perf(fplog,nodetime,realtime,runtime,&ntot,cr->nnodes-cr->npmenodes,
 +             TRUE);
 +    if (PARTDECOMP(cr))
 +      pr_load(fplog,cr,nrnb_all);
 +    if (cr->nnodes > 1)
 +      sfree(nrnb_all);
 +    */
 +  }
 +}
 +
++extern void initialize_lambdas(FILE *fplog,t_inputrec *ir,int *fep_state,real *lambda,double *lam0)
++{
++    /* this function works, but could probably use a logic rewrite to keep all the different
++       types of efep straight. */
++
++    int i;
++    t_lambda *fep = ir->fepvals;
++
++    if ((ir->efep==efepNO) && (ir->bSimTemp == FALSE)) {
++        for (i=0;i<efptNR;i++)  {
++            lambda[i] = 0.0;
++            if (lam0)
++            {
++                lam0[i] = 0.0;
++            }
++        }
++        return;
++    } else {
++        *fep_state = fep->init_fep_state; /* this might overwrite the checkpoint
++                                             if checkpoint is set -- a kludge is in for now
++                                             to prevent this.*/
++        for (i=0;i<efptNR;i++)
++        {
++            /* overwrite lambda state with init_lambda for now for backwards compatibility */
++            if (fep->init_lambda>=0) /* if it's -1, it was never initializd */
++            {
++                lambda[i] = fep->init_lambda;
++                if (lam0) {
++                    lam0[i] = lambda[i];
++                }
++            }
++            else
++            {
++                lambda[i] = fep->all_lambda[i][*fep_state];
++                if (lam0) {
++                    lam0[i] = lambda[i];
++                }
++            }
++        }
++        if (ir->bSimTemp) {
++            /* need to rescale control temperatures to match current state */
++            for (i=0;i<ir->opts.ngtc;i++) {
++                if (ir->opts.ref_t[i] > 0) {
++                    ir->opts.ref_t[i] = ir->simtempvals->temperatures[*fep_state];
++                }
++            }
++        }
++    }
++
++    /* Send to the log the information on the current lambdas */
++    if (fplog != NULL)
++    {
++        fprintf(fplog,"Initial vector of lambda components:[ ");
++        for (i=0;i<efptNR;i++)
++        {
++            fprintf(fplog,"%10.4f ",lambda[i]);
++        }
++        fprintf(fplog,"]\n");
++    }
++    return;
++}
++
++
 +void init_md(FILE *fplog,
 +             t_commrec *cr,t_inputrec *ir,const output_env_t oenv,
 +             double *t,double *t0,
-       
++             real *lambda, int *fep_state, double *lam0,
 +             t_nrnb *nrnb,gmx_mtop_t *mtop,
 +             gmx_update_t *upd,
 +             int nfile,const t_filenm fnm[],
 +             gmx_mdoutf_t **outf,t_mdebin **mdebin,
 +             tensor force_vir,tensor shake_vir,rvec mu_tot,
 +             gmx_bool *bSimAnn,t_vcm **vcm, t_state *state, unsigned long Flags)
 +{
 +    int  i,j,n;
 +    real tmpt,mod;
-     if (ir->efep != efepNO)
-     {
-         *lam0 = ir->init_lambda;
-         *lambda = *lam0 + ir->init_step*ir->delta_lambda;
-     }
-     else
-     {
-         *lambda = *lam0   = 0.0;
-     } 
++
 +    /* Initial values */
 +    *t = *t0       = ir->init_t;
-     
 +
 +    *bSimAnn=FALSE;
 +    for(i=0;i<ir->opts.ngtc;i++)
 +    {
 +        /* set bSimAnn if any group is being annealed */
 +        if(ir->opts.annealing[i]!=eannNO)
 +        {
 +            *bSimAnn = TRUE;
 +        }
 +    }
 +    if (*bSimAnn)
 +    {
 +        update_annealing_target_temp(&(ir->opts),ir->init_t);
 +    }
-     
++
++    /* Initialize lambda variables */
++    initialize_lambdas(fplog,ir,fep_state,lambda,lam0);
++
 +    if (upd)
 +    {
 +        *upd = init_update(fplog,ir);
 +    }
-     
++
++
 +    if (vcm != NULL)
 +    {
 +        *vcm = init_vcm(fplog,&mtop->groups,ir);
 +    }
-     
++
 +    if (EI_DYNAMICS(ir->eI) && !(Flags & MD_APPENDFILES))
 +    {
 +        if (ir->etc == etcBERENDSEN)
 +        {
 +            please_cite(fplog,"Berendsen84a");
 +        }
 +        if (ir->etc == etcVRESCALE)
 +        {
 +            please_cite(fplog,"Bussi2007a");
 +        }
 +    }
-     
++
 +    init_nrnb(nrnb);
-     
++
 +    if (nfile != -1)
 +    {
 +        *outf = init_mdoutf(nfile,fnm,Flags,cr,ir,oenv);
 +
 +        *mdebin = init_mdebin((Flags & MD_APPENDFILES) ? NULL : (*outf)->fp_ene,
 +                              mtop,ir, (*outf)->fp_dhdl);
 +    }
-     /* Initiate variables */  
++
 +    if (ir->bAdress)
 +    {
 +      please_cite(fplog,"Fritsch12");
 +      please_cite(fplog,"Junghans10");
 +    }
-     
++    /* Initiate variables */
 +    clear_mat(force_vir);
 +    clear_mat(shake_vir);
 +    clear_rvec(mu_tot);
++
 +    debug_gmx();
 +}
 +
++
++
++
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
index 89517cdb1044197244af0c87a58c043d167eecbb,0000000000000000000000000000000000000000..4a346186f777b5dd4f941225d221946f9002d60e
mode 100644,000000..100644
--- /dev/null
@@@ -1,1872 -1,0 +1,1989 @@@
- #include "dihre.h"
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include "typedefs.h"
 +#include "smalloc.h"
 +#include "sysstuff.h"
 +#include "vec.h"
 +#include "statutil.h"
 +#include "vcm.h"
 +#include "mdebin.h"
 +#include "nrnb.h"
 +#include "calcmu.h"
 +#include "index.h"
 +#include "vsite.h"
 +#include "update.h"
 +#include "ns.h"
 +#include "trnio.h"
 +#include "xtcio.h"
 +#include "mdrun.h"
 +#include "confio.h"
 +#include "network.h"
 +#include "pull.h"
 +#include "xvgr.h"
 +#include "physics.h"
 +#include "names.h"
 +#include "xmdrun.h"
 +#include "ionize.h"
 +#include "disre.h"
 +#include "orires.h"
 +#include "pme.h"
 +#include "mdatoms.h"
 +#include "repl_ex.h"
 +#include "qmmm.h"
 +#include "domdec.h"
 +#include "partdec.h"
 +#include "topsort.h"
 +#include "coulomb.h"
 +#include "constr.h"
 +#include "shellfc.h"
 +#include "compute_io.h"
 +#include "mvdata.h"
 +#include "checkpoint.h"
 +#include "mtop_util.h"
 +#include "sighandler.h"
++#include "txtdump.h"
 +#include "string2.h"
 +#include "membed.h"
 +
 +#ifdef GMX_LIB_MPI
 +#include <mpi.h>
 +#endif
 +#ifdef GMX_THREAD_MPI
 +#include "tmpi.h"
 +#endif
 +
 +#ifdef GMX_FAHCORE
 +#include "corewrap.h"
 +#endif
 +
-              int repl_ex_nst,int repl_ex_seed,gmx_membed_t membed,
 +double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
 +             const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
 +             int nstglobalcomm,
 +             gmx_vsite_t *vsite,gmx_constr_t constr,
 +             int stepout,t_inputrec *ir,
 +             gmx_mtop_t *top_global,
 +             t_fcdata *fcd,
 +             t_state *state_global,
 +             t_mdatoms *mdatoms,
 +             t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 +             gmx_edsam_t ed,t_forcerec *fr,
-     double     t,t0,lam0;
-     gmx_bool       bGStatEveryStep,bGStat,bNstEner,bCalcEnerPres;
-     gmx_bool       bNS,bNStList,bSimAnn,bStopCM,bRerunMD,bNotLastFrame=FALSE,
++             int repl_ex_nst,int repl_ex_nex,int repl_ex_seed,gmx_membed_t membed,
 +             real cpt_period,real max_hours,
 +             const char *deviceOptions,
 +             unsigned long Flags,
 +             gmx_runtime_t *runtime)
 +{
 +    gmx_mdoutf_t *outf;
 +    gmx_large_int_t step,step_rel;
 +    double     run_time;
-     gmx_bool       bDoDHDL=FALSE;
-     gmx_bool       do_ene,do_log,do_verbose,bRerunWarnNoV=TRUE,
++    double     t,t0,lam0[efptNR];
++    gmx_bool   bGStatEveryStep,bGStat,bNstEner,bCalcEnerPres,bEnergyHere;
++    gmx_bool   bNS,bNStList,bSimAnn,bStopCM,bRerunMD,bNotLastFrame=FALSE,
 +               bFirstStep,bStateFromCP,bStateFromTPX,bInitStep,bLastStep,
 +               bBornRadii,bStartingFromCpt;
-     gmx_bool       bMasterState;
++    gmx_bool   bDoDHDL=FALSE,bDoFEP=FALSE,bDoExpanded=FALSE;
++    gmx_bool   do_ene,do_log,do_verbose,bRerunWarnNoV=TRUE,
 +               bForceUpdate=FALSE,bCPT;
 +    int        mdof_flags;
++    gmx_bool   bMasterState;
 +    int        force_flags,cglo_flags;
 +    tensor     force_vir,shake_vir,total_vir,tmp_vir,pres;
 +    int        i,m;
 +    t_trxstatus *status;
 +    rvec       mu_tot;
 +    t_vcm      *vcm;
 +    t_state    *bufstate=NULL;   
 +    matrix     *scale_tot,pcoupl_mu,M,ebox;
 +    gmx_nlheur_t nlh;
 +    t_trxframe rerun_fr;
 +    gmx_repl_ex_t repl_ex=NULL;
 +    int        nchkpt=1;
 +    gmx_localtop_t *top;      
 +    t_mdebin *mdebin=NULL;
++    df_history_t df_history;
 +    t_state    *state=NULL;
 +    rvec       *f_global=NULL;
 +    int        n_xtc=-1;
 +    rvec       *x_xtc=NULL;
 +    gmx_enerdata_t *enerd;
 +    rvec       *f=NULL;
 +    gmx_global_stat_t gstat;
 +    gmx_update_t upd=NULL;
 +    t_graph    *graph=NULL;
 +    globsig_t   gs;
-     bIterations = ((IR_NPT_TROTTER(ir)) && (constr) && (!bRerunMD));
-     bTrotter = (bVV && (IR_NPT_TROTTER(ir) || (IR_NVT_TROTTER(ir))));        
++    gmx_rng_t mcrng=NULL;
 +    gmx_bool        bFFscan;
 +    gmx_groups_t *groups;
 +    gmx_ekindata_t *ekind, *ekind_save;
 +    gmx_shellfc_t shellfc;
 +    int         count,nconverged=0;
 +    real        timestep=0;
 +    double      tcount=0;
 +    gmx_bool        bIonize=FALSE;
 +    gmx_bool        bTCR=FALSE,bConverged=TRUE,bOK,bSumEkinhOld,bExchanged;
 +    gmx_bool        bAppend;
 +    gmx_bool        bResetCountersHalfMaxH=FALSE;
 +    gmx_bool        bVV,bIterations,bFirstIterate,bTemp,bPres,bTrotter;
 +    real        mu_aver=0,dvdl;
 +    int         a0,a1,gnx=0,ii;
 +    atom_id     *grpindex=NULL;
 +    char        *grpname;
 +    t_coupl_rec *tcr=NULL;
 +    rvec        *xcopy=NULL,*vcopy=NULL,*cbuf=NULL;
 +    matrix      boxcopy={{0}},lastbox;
 +      tensor      tmpvir;
 +      real        fom,oldfom,veta_save,pcurr,scalevir,tracevir;
 +      real        vetanew = 0;
++    int         lamnew=0;
++    /* for FEP */
++    int         fep_state=0;
++    int         nstfep;
++    real        rate;
 +    double      cycles;
 +      real        saved_conserved_quantity = 0;
 +    real        last_ekin = 0;
 +      int         iter_i;
 +      t_extmass   MassQ;
 +    int         **trotter_seq; 
 +    char        sbuf[STEPSTRSIZE],sbuf2[STEPSTRSIZE];
 +    int         handled_stop_condition=gmx_stop_cond_none; /* compare to get_stop_condition*/
 +    gmx_iterate_t iterate;
 +    gmx_large_int_t multisim_nsteps=-1; /* number of steps to do  before first multisim 
 +                                          simulation stops. If equal to zero, don't
 +                                          communicate any more between multisims.*/
 +#ifdef GMX_FAHCORE
 +    /* Temporary addition for FAHCORE checkpointing */
 +    int chkpt_ret;
 +#endif
 +
 +    /* Check for special mdrun options */
 +    bRerunMD = (Flags & MD_RERUN);
 +    bIonize  = (Flags & MD_IONIZE);
 +    bFFscan  = (Flags & MD_FFSCAN);
 +    bAppend  = (Flags & MD_APPENDFILES);
 +    if (Flags & MD_RESETCOUNTERSHALFWAY)
 +    {
 +        if (ir->nsteps > 0)
 +        {
 +            /* Signal to reset the counters half the simulation steps. */
 +            wcycle_set_reset_counters(wcycle,ir->nsteps/2);
 +        }
 +        /* Signal to reset the counters halfway the simulation time. */
 +        bResetCountersHalfMaxH = (max_hours > 0);
 +    }
 +
 +    /* md-vv uses averaged full step velocities for T-control 
 +       md-vv-avek uses averaged half step velocities for T-control (but full step ekin for P control)
 +       md uses averaged half step kinetic energies to determine temperature unless defined otherwise by GMX_EKIN_AVE_VEL; */
 +    bVV = EI_VV(ir->eI);
 +    if (bVV) /* to store the initial velocities while computing virial */
 +    {
 +        snew(cbuf,top_global->natoms);
 +    }
 +    /* all the iteratative cases - only if there are constraints */ 
-     init_md(fplog,cr,ir,oenv,&t,&t0,&state_global->lambda,&lam0,
++    bIterations = ((IR_NPH_TROTTER(ir) || IR_NPT_TROTTER(ir)) && (constr) && (!bRerunMD));
++    bTrotter = (bVV && (IR_NPT_TROTTER(ir) || IR_NPH_TROTTER(ir) || IR_NVT_TROTTER(ir)));
 +    
 +    if (bRerunMD)
 +    {
 +        /* Since we don't know if the frames read are related in any way,
 +         * rebuild the neighborlist at every step.
 +         */
 +        ir->nstlist       = 1;
 +        ir->nstcalcenergy = 1;
 +        nstglobalcomm     = 1;
 +    }
 +
 +    check_ir_old_tpx_versions(cr,fplog,ir,top_global);
 +
 +    nstglobalcomm = check_nstglobalcomm(fplog,cr,nstglobalcomm,ir);
 +    bGStatEveryStep = (nstglobalcomm == 1);
 +
 +    if (!bGStatEveryStep && ir->nstlist == -1 && fplog != NULL)
 +    {
 +        fprintf(fplog,
 +                "To reduce the energy communication with nstlist = -1\n"
 +                "the neighbor list validity should not be checked at every step,\n"
 +                "this means that exact integration is not guaranteed.\n"
 +                "The neighbor list validity is checked after:\n"
 +                "  <n.list life time> - 2*std.dev.(n.list life time)  steps.\n"
 +                "In most cases this will result in exact integration.\n"
 +                "This reduces the energy communication by a factor of 2 to 3.\n"
 +                "If you want less energy communication, set nstlist > 3.\n\n");
 +    }
 +
 +    if (bRerunMD || bFFscan)
 +    {
 +        ir->nstxtcout = 0;
 +    }
 +    groups = &top_global->groups;
 +
 +    /* Initial values */
-     init_enerdata(top_global->groups.grps[egcENER].nr,ir->n_flambda,enerd);
++    init_md(fplog,cr,ir,oenv,&t,&t0,state_global->lambda,
++            &(state_global->fep_state),lam0,
 +            nrnb,top_global,&upd,
 +            nfile,fnm,&outf,&mdebin,
 +            force_vir,shake_vir,mu_tot,&bSimAnn,&vcm,state_global,Flags);
 +
 +    clear_mat(total_vir);
 +    clear_mat(pres);
 +    /* Energy terms and groups */
 +    snew(enerd,1);
-     update_mdatoms(mdatoms,state->lambda);
++    init_enerdata(top_global->groups.grps[egcENER].nr,ir->fepvals->n_lambda,
++                  enerd);
 +    if (DOMAINDECOMP(cr))
 +    {
 +        f = NULL;
 +    }
 +    else
 +    {
 +        snew(f,top_global->natoms);
 +    }
 +
++    /* lambda Monte carlo random number generator  */
++    if (ir->bExpanded)
++    {
++        mcrng = gmx_rng_init(ir->expandedvals->lmc_seed);
++    }
++    /* copy the state into df_history */
++    copy_df_history(&df_history,&state_global->dfhist);
++
 +    /* Kinetic energy data */
 +    snew(ekind,1);
 +    init_ekindata(fplog,top_global,&(ir->opts),ekind);
 +    /* needed for iteration of constraints */
 +    snew(ekind_save,1);
 +    init_ekindata(fplog,top_global,&(ir->opts),ekind_save);
 +    /* Copy the cos acceleration to the groups struct */    
 +    ekind->cosacc.cos_accel = ir->cos_accel;
 +
 +    gstat = global_stat_init(ir);
 +    debug_gmx();
 +
 +    /* Check for polarizable models and flexible constraints */
 +    shellfc = init_shell_flexcon(fplog,
 +                                 top_global,n_flexible_constraints(constr),
 +                                 (ir->bContinuation || 
 +                                  (DOMAINDECOMP(cr) && !MASTER(cr))) ?
 +                                 NULL : state_global->x);
 +
 +    if (DEFORM(*ir))
 +    {
 +#ifdef GMX_THREAD_MPI
 +        tMPI_Thread_mutex_lock(&deform_init_box_mutex);
 +#endif
 +        set_deform_reference_box(upd,
 +                                 deform_init_init_step_tpx,
 +                                 deform_init_box_tpx);
 +#ifdef GMX_THREAD_MPI
 +        tMPI_Thread_mutex_unlock(&deform_init_box_mutex);
 +#endif
 +    }
 +
 +    {
 +        double io = compute_io(ir,top_global->natoms,groups,mdebin->ebin->nener,1);
 +        if ((io > 2000) && MASTER(cr))
 +            fprintf(stderr,
 +                    "\nWARNING: This run will generate roughly %.0f Mb of data\n\n",
 +                    io);
 +    }
 +
 +    if (DOMAINDECOMP(cr)) {
 +        top = dd_init_local_top(top_global);
 +
 +        snew(state,1);
 +        dd_init_local_state(cr->dd,state_global,state);
 +
 +        if (DDMASTER(cr->dd) && ir->nstfout) {
 +            snew(f_global,state_global->natoms);
 +        }
 +    } else {
 +        if (PAR(cr)) {
 +            /* Initialize the particle decomposition and split the topology */
 +            top = split_system(fplog,top_global,ir,cr);
 +
 +            pd_cg_range(cr,&fr->cg0,&fr->hcg);
 +            pd_at_range(cr,&a0,&a1);
 +        } else {
 +            top = gmx_mtop_generate_local_top(top_global,ir);
 +
 +            a0 = 0;
 +            a1 = top_global->natoms;
 +        }
 +
 +        state = partdec_init_local_state(cr,state_global);
 +        f_global = f;
 +
 +        atoms2md(top_global,ir,0,NULL,a0,a1-a0,mdatoms);
 +
 +        if (vsite) {
 +            set_vsite_top(vsite,top,mdatoms,cr);
 +        }
 +
 +        if (ir->ePBC != epbcNONE && !ir->bPeriodicMols) {
 +            graph = mk_graph(fplog,&(top->idef),0,top_global->natoms,FALSE,FALSE);
 +        }
 +
 +        if (shellfc) {
 +            make_local_shells(cr,mdatoms,shellfc);
 +        }
 +
 +        if (ir->pull && PAR(cr)) {
 +            dd_make_local_pull_groups(NULL,ir->pull,mdatoms);
 +        }
 +    }
 +
 +    if (DOMAINDECOMP(cr))
 +    {
 +        /* Distribute the charge groups over the nodes from the master node */
 +        dd_partition_system(fplog,ir->init_step,cr,TRUE,1,
 +                            state_global,top_global,ir,
 +                            state,&f,mdatoms,top,fr,
 +                            vsite,shellfc,constr,
 +                            nrnb,wcycle,FALSE);
 +    }
 +
-     if ((state->flags & (1<<estLD_RNG)) && (Flags & MD_READ_RNG)) {
++    update_mdatoms(mdatoms,state->lambda[efptMASS]);
 +
 +    if (opt2bSet("-cpi",nfile,fnm))
 +    {
 +        bStateFromCP = gmx_fexist_master(opt2fn_master("-cpi",nfile,fnm,cr),cr);
 +    }
 +    else
 +    {
 +        bStateFromCP = FALSE;
 +    }
 +
 +    if (MASTER(cr))
 +    {
 +        if (bStateFromCP)
 +        {
 +            /* Update mdebin with energy history if appending to output files */
 +            if ( Flags & MD_APPENDFILES )
 +            {
 +                restore_energyhistory_from_state(mdebin,&state_global->enerhist);
 +            }
 +            else
 +            {
 +                /* We might have read an energy history from checkpoint,
 +                 * free the allocated memory and reset the counts.
 +                 */
 +                done_energyhistory(&state_global->enerhist);
 +                init_energyhistory(&state_global->enerhist);
 +            }
 +        }
 +        /* Set the initial energy history in state by updating once */
 +        update_energyhistory(&state_global->enerhist,mdebin);
 +    } 
 +
-                                         repl_ex_nst,repl_ex_seed);
++    if ((state->flags & (1<<estLD_RNG)) && (Flags & MD_READ_RNG)) 
++    {
 +        /* Set the random state if we read a checkpoint file */
 +        set_stochd_state(upd,state);
 +    }
 +
++    if (state->flags & (1<<estMC_RNG))
++    {
++        set_mc_state(mcrng,state);
++    }
++
 +    /* Initialize constraints */
 +    if (constr) {
 +        if (!DOMAINDECOMP(cr))
 +            set_constraints(constr,top,ir,mdatoms,cr);
 +    }
 +
 +    /* Check whether we have to GCT stuff */
 +    bTCR = ftp2bSet(efGCT,nfile,fnm);
 +    if (bTCR) {
 +        if (MASTER(cr)) {
 +            fprintf(stderr,"Will do General Coupling Theory!\n");
 +        }
 +        gnx = top_global->mols.nr;
 +        snew(grpindex,gnx);
 +        for(i=0; (i<gnx); i++) {
 +            grpindex[i] = i;
 +        }
 +    }
 +
 +    if (repl_ex_nst > 0)
 +    {
 +        /* We need to be sure replica exchange can only occur
 +         * when the energies are current */
 +        check_nst_param(fplog,cr,"nstcalcenergy",ir->nstcalcenergy,
 +                        "repl_ex_nst",&repl_ex_nst);
 +        /* This check needs to happen before inter-simulation
 +         * signals are initialized, too */
 +    }
 +    if (repl_ex_nst > 0 && MASTER(cr))
++    {
 +        repl_ex = init_replica_exchange(fplog,cr->ms,state_global,ir,
-   
++                                        repl_ex_nst,repl_ex_nex,repl_ex_seed); 
++    }
 +    if (!ir->bContinuation && !bRerunMD)
 +    {
 +        if (mdatoms->cFREEZE && (state->flags & (1<<estV)))
 +        {
 +            /* Set the velocities of frozen particles to zero */
 +            for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++)
 +            {
 +                for(m=0; m<DIM; m++)
 +                {
 +                    if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m])
 +                    {
 +                        state->v[i][m] = 0;
 +                    }
 +                }
 +            }
 +        }
 +
 +        if (constr)
 +        {
 +            /* Constrain the initial coordinates and velocities */
 +            do_constrain_first(fplog,constr,ir,mdatoms,state,f,
 +                               graph,cr,nrnb,fr,top,shake_vir);
 +        }
 +        if (vsite)
 +        {
 +            /* Construct the virtual sites for the initial configuration */
 +            construct_vsites(fplog,vsite,state->x,nrnb,ir->delta_t,NULL,
 +                             top->idef.iparams,top->idef.il,
 +                             fr->ePBC,fr->bMolPBC,graph,cr,state->box);
 +        }
 +    }
 +
 +    debug_gmx();
-         if (ir->efep != efepNO)
++
++    /* set free energy calculation frequency as the minimum of nstdhdl, nstexpanded, and nstrepl_ex_nst*/
++    nstfep = ir->fepvals->nstdhdl;
++    if (ir->bExpanded && (nstfep > ir->expandedvals->nstexpanded))
++    {
++        nstfep = ir->expandedvals->nstexpanded;
++    }
++    if (repl_ex_nst > 0 && repl_ex_nst > nstfep)
++    {
++        nstfep = repl_ex_nst;
++    }
++
 +    /* I'm assuming we need global communication the first time! MRS */
 +    cglo_flags = (CGLO_TEMPERATURE | CGLO_GSTAT
 +                  | ((ir->comm_mode != ecmNO) ? CGLO_STOPCM:0)
 +                  | (bVV ? CGLO_PRESSURE:0)
 +                  | (bVV ? CGLO_CONSTRAINT:0)
 +                  | (bRerunMD ? CGLO_RERUNMD:0)
 +                  | ((Flags & MD_READ_EKIN) ? CGLO_READEKIN:0));
 +    
 +    bSumEkinhOld = FALSE;
 +    compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
 +                    NULL,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
 +                    constr,NULL,FALSE,state->box,
 +                    top_global,&pcurr,top_global->natoms,&bSumEkinhOld,cglo_flags);
 +    if (ir->eI == eiVVAK) {
 +        /* a second call to get the half step temperature initialized as well */ 
 +        /* we do the same call as above, but turn the pressure off -- internally to 
 +           compute_globals, this is recognized as a velocity verlet half-step 
 +           kinetic energy calculation.  This minimized excess variables, but 
 +           perhaps loses some logic?*/
 +        
 +        compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
 +                        NULL,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
 +                        constr,NULL,FALSE,state->box,
 +                        top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
 +                        cglo_flags &~ (CGLO_STOPCM | CGLO_PRESSURE));
 +    }
 +    
 +    /* Calculate the initial half step temperature, and save the ekinh_old */
 +    if (!(Flags & MD_STARTFROMCPT)) 
 +    {
 +        for(i=0; (i<ir->opts.ngtc); i++) 
 +        {
 +            copy_mat(ekind->tcstat[i].ekinh,ekind->tcstat[i].ekinh_old);
 +        } 
 +    }
 +    if (ir->eI != eiVV) 
 +    {
 +        enerd->term[F_TEMP] *= 2; /* result of averages being done over previous and current step,
 +                                     and there is no previous step */
 +    }
 +    
 +    /* if using an iterative algorithm, we need to create a working directory for the state. */
 +    if (bIterations) 
 +    {
 +            bufstate = init_bufstate(state);
 +    }
 +    if (bFFscan) 
 +    {
 +        snew(xcopy,state->natoms);
 +        snew(vcopy,state->natoms);
 +        copy_rvecn(state->x,xcopy,0,state->natoms);
 +        copy_rvecn(state->v,vcopy,0,state->natoms);
 +        copy_mat(state->box,boxcopy);
 +    } 
 +    
 +    /* need to make an initiation call to get the Trotter variables set, as well as other constants for non-trotter
 +       temperature control */
 +    trotter_seq = init_npt_vars(ir,state,&MassQ,bTrotter);
 +    
 +    if (MASTER(cr))
 +    {
 +        if (constr && !ir->bContinuation && ir->eConstrAlg == econtLINCS)
 +        {
 +            fprintf(fplog,
 +                    "RMS relative constraint deviation after constraining: %.2e\n",
 +                    constr_rmsd(constr,FALSE));
 +        }
 +        if (EI_STATE_VELOCITY(ir->eI))
 +        {
 +            fprintf(fplog,"Initial temperature: %g K\n",enerd->term[F_TEMP]);
 +        }
 +        if (bRerunMD)
 +        {
 +            fprintf(stderr,"starting md rerun '%s', reading coordinates from"
 +                    " input trajectory '%s'\n\n",
 +                    *(top_global->name),opt2fn("-rerun",nfile,fnm));
 +            if (bVerbose)
 +            {
 +                fprintf(stderr,"Calculated time to finish depends on nsteps from "
 +                        "run input file,\nwhich may not correspond to the time "
 +                        "needed to process input trajectory.\n\n");
 +            }
 +        }
 +        else
 +        {
 +            char tbuf[20];
 +            fprintf(stderr,"starting mdrun '%s'\n",
 +                    *(top_global->name));
 +            if (ir->nsteps >= 0)
 +            {
 +                sprintf(tbuf,"%8.1f",(ir->init_step+ir->nsteps)*ir->delta_t);
 +            }
 +            else
 +            {
 +                sprintf(tbuf,"%s","infinite");
 +            }
 +            if (ir->init_step > 0)
 +            {
 +                fprintf(stderr,"%s steps, %s ps (continuing from step %s, %8.1f ps).\n",
 +                        gmx_step_str(ir->init_step+ir->nsteps,sbuf),tbuf,
 +                        gmx_step_str(ir->init_step,sbuf2),
 +                        ir->init_step*ir->delta_t);
 +            }
 +            else
 +            {
 +                fprintf(stderr,"%s steps, %s ps.\n",
 +                        gmx_step_str(ir->nsteps,sbuf),tbuf);
 +            }
 +        }
 +        fprintf(fplog,"\n");
 +    }
 +
 +    /* Set and write start time */
 +    runtime_start(runtime);
 +    print_date_and_time(fplog,cr->nodeid,"Started mdrun",runtime);
 +    wallcycle_start(wcycle,ewcRUN);
 +    if (fplog)
++    {
 +        fprintf(fplog,"\n");
++    }
 +
 +    /* safest point to do file checkpointing is here.  More general point would be immediately before integrator call */
 +#ifdef GMX_FAHCORE
 +    chkpt_ret=fcCheckPointParallel( cr->nodeid,
 +                                    NULL,0);
 +    if ( chkpt_ret == 0 ) 
 +        gmx_fatal( 3,__FILE__,__LINE__, "Checkpoint error on step %d\n", 0 );
 +#endif
 +
 +    debug_gmx();
 +    /***********************************************************
 +     *
 +     *             Loop over MD steps 
 +     *
 +     ************************************************************/
 +
 +    /* if rerunMD then read coordinates and velocities from input trajectory */
 +    if (bRerunMD)
 +    {
 +        if (getenv("GMX_FORCE_UPDATE"))
 +        {
 +            bForceUpdate = TRUE;
 +        }
 +
 +        rerun_fr.natoms = 0;
 +        if (MASTER(cr))
 +        {
 +            bNotLastFrame = read_first_frame(oenv,&status,
 +                                             opt2fn("-rerun",nfile,fnm),
 +                                             &rerun_fr,TRX_NEED_X | TRX_READ_V);
 +            if (rerun_fr.natoms != top_global->natoms)
 +            {
 +                gmx_fatal(FARGS,
 +                          "Number of atoms in trajectory (%d) does not match the "
 +                          "run input file (%d)\n",
 +                          rerun_fr.natoms,top_global->natoms);
 +            }
 +            if (ir->ePBC != epbcNONE)
 +            {
 +                if (!rerun_fr.bBox)
 +                {
 +                    gmx_fatal(FARGS,"Rerun trajectory frame step %d time %f does not contain a box, while pbc is used",rerun_fr.step,rerun_fr.time);
 +                }
 +                if (max_cutoff2(ir->ePBC,rerun_fr.box) < sqr(fr->rlistlong))
 +                {
 +                    gmx_fatal(FARGS,"Rerun trajectory frame step %d time %f has too small box dimensions",rerun_fr.step,rerun_fr.time);
 +                }
 +            }
 +        }
 +
 +        if (PAR(cr))
 +        {
 +            rerun_parallel_comm(cr,&rerun_fr,&bNotLastFrame);
 +        }
 +
 +        if (ir->ePBC != epbcNONE)
 +        {
 +            /* Set the shift vectors.
 +             * Necessary here when have a static box different from the tpr box.
 +             */
 +            calc_shifts(rerun_fr.box,fr->shift_vec);
 +        }
 +    }
 +
 +    /* loop over MD steps or if rerunMD to end of input trajectory */
 +    bFirstStep = TRUE;
 +    /* Skip the first Nose-Hoover integration when we get the state from tpx */
 +    bStateFromTPX = !bStateFromCP;
 +    bInitStep = bFirstStep && (bStateFromTPX || bVV);
 +    bStartingFromCpt = (Flags & MD_STARTFROMCPT) && bInitStep;
 +    bLastStep    = FALSE;
 +    bSumEkinhOld = FALSE;
 +    bExchanged   = FALSE;
 +
 +    init_global_signals(&gs,cr,ir,repl_ex_nst);
 +
 +    step = ir->init_step;
 +    step_rel = 0;
 +
 +    if (ir->nstlist == -1)
 +    {
 +        init_nlistheuristics(&nlh,bGStatEveryStep,step);
 +    }
 +
 +    if (MULTISIM(cr) && (repl_ex_nst <=0 ))
 +    {
 +        /* check how many steps are left in other sims */
 +        multisim_nsteps=get_multisim_nsteps(cr, ir->nsteps);
 +    }
 +
 +
 +    /* and stop now if we should */
 +    bLastStep = (bRerunMD || (ir->nsteps >= 0 && step_rel > ir->nsteps) ||
 +                 ((multisim_nsteps >= 0) && (step_rel >= multisim_nsteps )));
 +    while (!bLastStep || (bRerunMD && bNotLastFrame)) {
 +
 +        wallcycle_start(wcycle,ewcSTEP);
 +
 +        if (bRerunMD) {
 +            if (rerun_fr.bStep) {
 +                step = rerun_fr.step;
 +                step_rel = step - ir->init_step;
 +            }
 +            if (rerun_fr.bTime) {
 +                t = rerun_fr.time;
 +            }
 +            else
 +            {
 +                t = step;
 +            }
 +        } 
 +        else 
 +        {
 +            bLastStep = (step_rel == ir->nsteps);
 +            t = t0 + step*ir->delta_t;
 +        }
 +
-             if (bRerunMD && rerun_fr.bLambda && (ir->delta_lambda!=0))
-             {
-                 state_global->lambda = rerun_fr.lambda;
-             }
-             else
-             {
-                 state_global->lambda = lam0 + step*ir->delta_lambda;
-             }
-             state->lambda = state_global->lambda;
-             bDoDHDL = do_per_step(step,ir->nstdhdl);
++        if (ir->efep != efepNO || ir->bSimTemp)
 +        {
-             bNS = (bFirstStep || bExchanged || bNStList ||
++            /* find and set the current lambdas.  If rerunning, we either read in a state, or a lambda value,
++               requiring different logic. */
++            
++            set_current_lambdas(step,ir->fepvals,bRerunMD,&rerun_fr,state_global,state,lam0);
++            bDoDHDL = do_per_step(step,ir->fepvals->nstdhdl);
++            bDoFEP  = (do_per_step(step,nstfep) && (ir->efep != efepNO));
++            bDoExpanded  = (do_per_step(step,ir->expandedvals->nstexpanded) && (ir->bExpanded) && (step > 0));
 +        }
 +
 +        if (bSimAnn) 
 +        {
 +            update_annealing_target_temp(&(ir->opts),t);
 +        }
 +
 +        if (bRerunMD)
 +        {
 +            if (!(DOMAINDECOMP(cr) && !MASTER(cr)))
 +            {
 +                for(i=0; i<state_global->natoms; i++)
 +                {
 +                    copy_rvec(rerun_fr.x[i],state_global->x[i]);
 +                }
 +                if (rerun_fr.bV)
 +                {
 +                    for(i=0; i<state_global->natoms; i++)
 +                    {
 +                        copy_rvec(rerun_fr.v[i],state_global->v[i]);
 +                    }
 +                }
 +                else
 +                {
 +                    for(i=0; i<state_global->natoms; i++)
 +                    {
 +                        clear_rvec(state_global->v[i]);
 +                    }
 +                    if (bRerunWarnNoV)
 +                    {
 +                        fprintf(stderr,"\nWARNING: Some frames do not contain velocities.\n"
 +                                "         Ekin, temperature and pressure are incorrect,\n"
 +                                "         the virial will be incorrect when constraints are present.\n"
 +                                "\n");
 +                        bRerunWarnNoV = FALSE;
 +                    }
 +                }
 +            }
 +            copy_mat(rerun_fr.box,state_global->box);
 +            copy_mat(state_global->box,state->box);
 +
 +            if (vsite && (Flags & MD_RERUN_VSITE))
 +            {
 +                if (DOMAINDECOMP(cr))
 +                {
 +                    gmx_fatal(FARGS,"Vsite recalculation with -rerun is not implemented for domain decomposition, use particle decomposition");
 +                }
 +                if (graph)
 +                {
 +                    /* Following is necessary because the graph may get out of sync
 +                     * with the coordinates if we only have every N'th coordinate set
 +                     */
 +                    mk_mshift(fplog,graph,fr->ePBC,state->box,state->x);
 +                    shift_self(graph,state->box,state->x);
 +                }
 +                construct_vsites(fplog,vsite,state->x,nrnb,ir->delta_t,state->v,
 +                                 top->idef.iparams,top->idef.il,
 +                                 fr->ePBC,fr->bMolPBC,graph,cr,state->box);
 +                if (graph)
 +                {
 +                    unshift_self(graph,state->box,state->x);
 +                }
 +            }
 +        }
 +
 +        /* Stop Center of Mass motion */
 +        bStopCM = (ir->comm_mode != ecmNO && do_per_step(step,ir->nstcomm));
 +
 +        /* Copy back starting coordinates in case we're doing a forcefield scan */
 +        if (bFFscan)
 +        {
 +            for(ii=0; (ii<state->natoms); ii++)
 +            {
 +                copy_rvec(xcopy[ii],state->x[ii]);
 +                copy_rvec(vcopy[ii],state->v[ii]);
 +            }
 +            copy_mat(boxcopy,state->box);
 +        }
 +
 +        if (bRerunMD)
 +        {
 +            /* for rerun MD always do Neighbour Searching */
 +            bNS = (bFirstStep || ir->nstlist != 0);
 +            bNStList = bNS;
 +        }
 +        else
 +        {
 +            /* Determine whether or not to do Neighbour Searching and LR */
 +            bNStList = (ir->nstlist > 0  && step % ir->nstlist == 0);
 +            
-                 set_nlistheuristics(&nlh,bFirstStep || bExchanged,step);
++            bNS = (bFirstStep || bExchanged || bNStList || bDoFEP ||
 +                   (ir->nstlist == -1 && nlh.nabnsb > 0));
 +
 +            if (bNS && ir->nstlist == -1)
 +            {
-             print_ebin_header(fplog,step,t,state->lambda);
++                set_nlistheuristics(&nlh,bFirstStep || bExchanged || bDoFEP, step);
 +            }
 +        } 
 +
 +        /* check whether we should stop because another simulation has 
 +           stopped. */
 +        if (MULTISIM(cr))
 +        {
 +            if ( (multisim_nsteps >= 0) &&  (step_rel >= multisim_nsteps)  &&  
 +                 (multisim_nsteps != ir->nsteps) )  
 +            {
 +                if (bNS)
 +                {
 +                    if (MASTER(cr))
 +                    {
 +                        fprintf(stderr, 
 +                                "Stopping simulation %d because another one has finished\n",
 +                                cr->ms->sim);
 +                    }
 +                    bLastStep=TRUE;
 +                    gs.sig[eglsCHKPT] = 1;
 +                }
 +            }
 +        }
 +
 +        /* < 0 means stop at next step, > 0 means stop at next NS step */
 +        if ( (gs.set[eglsSTOPCOND] < 0 ) ||
 +             ( (gs.set[eglsSTOPCOND] > 0 ) && ( bNS || ir->nstlist==0)) )
 +        {
 +            bLastStep = TRUE;
 +        }
 +
 +        /* Determine whether or not to update the Born radii if doing GB */
 +        bBornRadii=bFirstStep;
 +        if (ir->implicit_solvent && (step % ir->nstgbradii==0))
 +        {
 +            bBornRadii=TRUE;
 +        }
 +        
 +        do_log = do_per_step(step,ir->nstlog) || bFirstStep || bLastStep;
 +        do_verbose = bVerbose &&
 +                  (step % stepout == 0 || bFirstStep || bLastStep);
 +
 +        if (bNS && !(bFirstStep && ir->bContinuation && !bRerunMD))
 +        {
 +            if (bRerunMD)
 +            {
 +                bMasterState = TRUE;
 +            }
 +            else
 +            {
 +                bMasterState = FALSE;
 +                /* Correct the new box if it is too skewed */
 +                if (DYNAMIC_BOX(*ir))
 +                {
 +                    if (correct_box(fplog,step,state->box,graph))
 +                    {
 +                        bMasterState = TRUE;
 +                    }
 +                }
 +                if (DOMAINDECOMP(cr) && bMasterState)
 +                {
 +                    dd_collect_state(cr->dd,state,state_global);
 +                }
 +            }
 +
 +            if (DOMAINDECOMP(cr))
 +            {
 +                /* Repartition the domain decomposition */
 +                wallcycle_start(wcycle,ewcDOMDEC);
 +                dd_partition_system(fplog,step,cr,
 +                                    bMasterState,nstglobalcomm,
 +                                    state_global,top_global,ir,
 +                                    state,&f,mdatoms,top,fr,
 +                                    vsite,shellfc,constr,
 +                                    nrnb,wcycle,do_verbose);
 +                wallcycle_stop(wcycle,ewcDOMDEC);
 +                /* If using an iterative integrator, reallocate space to match the decomposition */
 +            }
 +        }
 +
 +        if (MASTER(cr) && do_log && !bFFscan)
 +        {
-             update_mdatoms(mdatoms,state->lambda); 
++            print_ebin_header(fplog,step,t,state->lambda[efptFEP]); /* can we improve the information printed here? */
 +        }
 +
 +        if (ir->efep != efepNO)
 +        {
-         bNstEner = do_per_step(step,ir->nstcalcenergy);
++            update_mdatoms(mdatoms,state->lambda[efptMASS]);
 +        }
 +
 +        if (bRerunMD && rerun_fr.bV)
 +        {
 +            
 +            /* We need the kinetic energy at minus the half step for determining
 +             * the full step kinetic energy and possibly for T-coupling.*/
 +            /* This may not be quite working correctly yet . . . . */
 +            compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
 +                            wcycle,enerd,NULL,NULL,NULL,NULL,mu_tot,
 +                            constr,NULL,FALSE,state->box,
 +                            top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
 +                            CGLO_RERUNMD | CGLO_GSTAT | CGLO_TEMPERATURE);
 +        }
 +        clear_mat(force_vir);
 +        
 +        /* Ionize the atoms if necessary */
 +        if (bIonize)
 +        {
 +            ionize(fplog,oenv,mdatoms,top_global,t,ir,state->x,state->v,
 +                   mdatoms->start,mdatoms->start+mdatoms->homenr,state->box,cr);
 +        }
 +        
 +        /* Update force field in ffscan program */
 +        if (bFFscan)
 +        {
 +            if (update_forcefield(fplog,
 +                                  nfile,fnm,fr,
 +                                  mdatoms->nr,state->x,state->box))
 +            {
 +                gmx_finalize_par();
 +
 +                exit(0);
 +            }
 +        }
 +
 +        /* We write a checkpoint at this MD step when:
 +         * either at an NS step when we signalled through gs,
 +         * or at the last step (but not when we do not want confout),
 +         * but never at the first step or with rerun.
 +         */
 +        bCPT = (((gs.set[eglsCHKPT] && (bNS || ir->nstlist == 0)) ||
 +                 (bLastStep && (Flags & MD_CONFOUT))) &&
 +                step > ir->init_step && !bRerunMD);
 +        if (bCPT)
 +        {
 +            gs.set[eglsCHKPT] = 0;
 +        }
 +
 +        /* Determine the energy and pressure:
 +         * at nstcalcenergy steps and at energy output steps (set below).
 +         */
-              (ir->epc != epcNO && do_per_step(step,ir->nstpcouple)));
++
++        if (EI_VV(ir->eI) && (!bInitStep)) {  /* for vv, the first half actually corresponds to the last step */
++            bNstEner = do_per_step(step-1,ir->nstcalcenergy);
++        } else {
++            bNstEner = do_per_step(step,ir->nstcalcenergy);
++        }
 +        bCalcEnerPres =
 +            (bNstEner ||
-                        (bDoDHDL ? GMX_FORCE_DHDL : 0)
++             (ir->epc > epcNO && do_per_step(step,ir->nstpcouple)));
 +
 +        /* Do we need global communication ? */
 +        bGStat = (bCalcEnerPres || bStopCM ||
 +                  do_per_step(step,nstglobalcomm) ||
 +                  (ir->nstlist == -1 && !bRerunMD && step >= nlh.step_nscheck));
 +
 +        do_ene = (do_per_step(step,ir->nstenergy) || bLastStep);
 +
 +        if (do_ene || do_log)
 +        {
 +            bCalcEnerPres = TRUE;
 +            bGStat        = TRUE;
 +        }
 +        
 +        /* these CGLO_ options remain the same throughout the iteration */
 +        cglo_flags = ((bRerunMD ? CGLO_RERUNMD : 0) |
 +                      (bGStat ? CGLO_GSTAT : 0)
 +            );
 +        
 +        force_flags = (GMX_FORCE_STATECHANGED |
 +                       ((DYNAMIC_BOX(*ir) || bRerunMD) ? GMX_FORCE_DYNAMICBOX : 0) |
 +                       GMX_FORCE_ALLFORCES |
 +                       (bNStList ? GMX_FORCE_DOLR : 0) |
 +                       GMX_FORCE_SEPLRF |
 +                       (bCalcEnerPres ? GMX_FORCE_VIRIAL : 0) |
-         
++                       (bDoFEP ? GMX_FORCE_DHDL : 0)
 +            );
 +        
 +        if (shellfc)
 +        {
 +            /* Now is the time to relax the shells */
 +            count=relax_shell_flexcon(fplog,cr,bVerbose,bFFscan ? step+1 : step,
 +                                      ir,bNS,force_flags,
 +                                      bStopCM,top,top_global,
 +                                      constr,enerd,fcd,
 +                                      state,f,force_vir,mdatoms,
 +                                      nrnb,wcycle,graph,groups,
 +                                      shellfc,fr,bBornRadii,t,mu_tot,
 +                                      state->natoms,&bConverged,vsite,
 +                                      outf->fp_field);
 +            tcount+=count;
 +
 +            if (bConverged)
 +            {
 +                nconverged++;
 +            }
 +        }
 +        else
 +        {
 +            /* The coordinates (x) are shifted (to get whole molecules)
 +             * in do_force.
 +             * This is parallellized as well, and does communication too. 
 +             * Check comments in sim_util.c
 +             */
-                 
 +            do_force(fplog,cr,ir,step,nrnb,wcycle,top,top_global,groups,
 +                     state->box,state->x,&state->hist,
 +                     f,force_vir,mdatoms,enerd,fcd,
 +                     state->lambda,graph,
 +                     fr,vsite,mu_tot,t,outf->fp_field,ed,bBornRadii,
 +                     (bNS ? GMX_FORCE_NS : 0) | force_flags);
 +        }
 +        
 +        if (bTCR)
 +        {
 +            mu_aver = calc_mu_aver(cr,state->x,mdatoms->chargeA,
 +                                   mu_tot,&top_global->mols,mdatoms,gnx,grpindex);
 +        }
 +        
 +        if (bTCR && bFirstStep)
 +        {
 +            tcr=init_coupling(fplog,nfile,fnm,cr,fr,mdatoms,&(top->idef));
 +            fprintf(fplog,"Done init_coupling\n"); 
 +            fflush(fplog);
 +        }
 +        
 +        if (bVV && !bStartingFromCpt && !bRerunMD)
 +        /*  ############### START FIRST UPDATE HALF-STEP FOR VV METHODS############### */
 +        {
 +            if (ir->eI==eiVV && bInitStep) 
 +            {
 +                /* if using velocity verlet with full time step Ekin,
 +                 * take the first half step only to compute the 
 +                 * virial for the first step. From there,
 +                 * revert back to the initial coordinates
 +                 * so that the input is actually the initial step.
 +                 */
 +                copy_rvecn(state->v,cbuf,0,state->natoms); /* should make this better for parallelizing? */
 +            } else {
 +                /* this is for NHC in the Ekin(t+dt/2) version of vv */
 +                trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ1);            
 +            }
 +
 +            update_coords(fplog,step,ir,mdatoms,state,
 +                          f,fr->bTwinRange && bNStList,fr->f_twin,fcd,
 +                          ekind,M,wcycle,upd,bInitStep,etrtVELOCITY1,
 +                          cr,nrnb,constr,&top->idef);
 +            
 +            if (bIterations)
 +            {
 +                gmx_iterate_init(&iterate,bIterations && !bInitStep);
 +            }
 +            /* for iterations, we save these vectors, as we will be self-consistently iterating
 +               the calculations */
 +
 +            /*#### UPDATE EXTENDED VARIABLES IN TROTTER FORMULATION */
 +            
 +            /* save the state */
 +            if (bIterations && iterate.bIterate) { 
 +                copy_coupling_state(state,bufstate,ekind,ekind_save,&(ir->opts));
 +            }
 +            
 +            bFirstIterate = TRUE;
 +            while (bFirstIterate || (bIterations && iterate.bIterate))
 +            {
 +                if (bIterations && iterate.bIterate) 
 +                {
 +                    copy_coupling_state(bufstate,state,ekind_save,ekind,&(ir->opts));
 +                    if (bFirstIterate && bTrotter) 
 +                    {
 +                        /* The first time through, we need a decent first estimate
 +                           of veta(t+dt) to compute the constraints.  Do
 +                           this by computing the box volume part of the
 +                           trotter integration at this time. Nothing else
 +                           should be changed by this routine here.  If
 +                           !(first time), we start with the previous value
 +                           of veta.  */
 +                        
 +                        veta_save = state->veta;
 +                        trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ0);
 +                        vetanew = state->veta;
 +                        state->veta = veta_save;
 +                    } 
 +                } 
 +                
 +                bOK = TRUE;
 +                if ( !bRerunMD || rerun_fr.bV || bForceUpdate) {  /* Why is rerun_fr.bV here?  Unclear. */
 +                    dvdl = 0;
 +                    
 +                    update_constraints(fplog,step,&dvdl,ir,ekind,mdatoms,state,graph,f,
 +                                       &top->idef,shake_vir,NULL,
 +                                       cr,nrnb,wcycle,upd,constr,
 +                                       bInitStep,TRUE,bCalcEnerPres,vetanew);
 +                    
 +                    if (!bOK && !bFFscan)
 +                    {
 +                        gmx_fatal(FARGS,"Constraint error: Shake, Lincs or Settle could not solve the constrains");
 +                    }
 +                    
 +                } 
 +                else if (graph)
 +                { /* Need to unshift here if a do_force has been
 +                     called in the previous step */
 +                    unshift_self(graph,state->box,state->x);
 +                }
-                 /*bPres = (ir->eI==eiVV || IR_NPT_TROTTER(ir)); */
-                     /*bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK && IR_NPT_TROTTER(ir)));*/
++
 +                
 +                /* if VV, compute the pressure and constraints */
 +                /* For VV2, we strictly only need this if using pressure
 +                 * control, but we really would like to have accurate pressures
 +                 * printed out.
 +                 * Think about ways around this in the future?
 +                 * For now, keep this choice in comments.
 +                 */
-             enerd->term[F_DHDL_CON] += dvdl;
++                /* bPres = (ir->eI==eiVV || IR_NPT_TROTTER(ir)); */
++                /*bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK && IR_NPT_TROTTER(ir)));*/
 +                bPres = TRUE;
 +                bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK));
++                if (bNstEner && ir->eI==eiVVAK)  /*MRS:  7/9/2010 -- this still doesn't fix it?*/
++                {
++                    bSumEkinhOld = TRUE;
++                }
 +                compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
 +                                wcycle,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
 +                                constr,NULL,FALSE,state->box,
 +                                top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
 +                                cglo_flags 
 +                                | CGLO_ENERGY 
 +                                | (bStopCM ? CGLO_STOPCM : 0)
 +                                | (bTemp ? CGLO_TEMPERATURE:0) 
 +                                | (bPres ? CGLO_PRESSURE : 0) 
 +                                | (bPres ? CGLO_CONSTRAINT : 0)
 +                                | ((bIterations && iterate.bIterate) ? CGLO_ITERATE : 0)  
 +                                | (bFirstIterate ? CGLO_FIRSTITERATE : 0)
 +                                | CGLO_SCALEEKIN 
 +                    );
 +                /* explanation of above: 
 +                   a) We compute Ekin at the full time step
 +                   if 1) we are using the AveVel Ekin, and it's not the
 +                   initial step, or 2) if we are using AveEkin, but need the full
 +                   time step kinetic energy for the pressure (always true now, since we want accurate statistics).
 +                   b) If we are using EkinAveEkin for the kinetic energy for the temperture control, we still feed in 
 +                   EkinAveVel because it's needed for the pressure */
 +                
 +                /* temperature scaling and pressure scaling to produce the extended variables at t+dt */
 +                if (!bInitStep) 
 +                {
 +                    if (bTrotter)
 +                    {
 +                        trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ2);
 +                    } 
 +                    else 
 +                    {
 +                        update_tcouple(fplog,step,ir,state,ekind,wcycle,upd,&MassQ,mdatoms);
 +                    }
 +                }
 +                
 +                if (bIterations &&
 +                    done_iterating(cr,fplog,step,&iterate,bFirstIterate,
 +                                   state->veta,&vetanew)) 
 +                {
 +                    break;
 +                }
 +                bFirstIterate = FALSE;
 +            }
 +
 +            if (bTrotter && !bInitStep) {
++                enerd->term[F_DVDL_BONDED] += dvdl;        /* only add after iterations */
 +                copy_mat(shake_vir,state->svir_prev);
 +                copy_mat(force_vir,state->fvir_prev);
 +                if (IR_NVT_TROTTER(ir) && ir->eI==eiVV) {
 +                    /* update temperature and kinetic energy now that step is over - this is the v(t+dt) point */
 +                    enerd->term[F_TEMP] = sum_ekin(&(ir->opts),ekind,NULL,(ir->eI==eiVV),FALSE,FALSE);
 +                    enerd->term[F_EKIN] = trace(ekind->ekin);
 +                }
 +            }
 +            /* if it's the initial step, we performed this first step just to get the constraint virial */
 +            if (bInitStep && ir->eI==eiVV) {
 +                copy_rvecn(cbuf,state->v,0,state->natoms);
 +            }
 +            
 +            if (fr->bSepDVDL && fplog && do_log) 
 +            {
 +                fprintf(fplog,sepdvdlformat,"Constraint",0.0,dvdl);
 +            }
-     
++            enerd->term[F_DVDL_BONDED] += dvdl;
 +        }
-                 last_ekin = enerd->term[F_EKIN]; /* does this get preserved through checkpointing? */
++
 +        /* MRS -- now done iterating -- compute the conserved quantity */
 +        if (bVV) {
 +            saved_conserved_quantity = compute_conserved_from_auxiliary(ir,state,&MassQ);
 +            if (ir->eI==eiVV) 
 +            {
-         
++                last_ekin = enerd->term[F_EKIN];
 +            }
 +            if ((ir->eDispCorr != edispcEnerPres) && (ir->eDispCorr != edispcAllEnerPres)) 
 +            {
 +                saved_conserved_quantity -= enerd->term[F_DISPCORR];
 +            }
++            /* sum up the foreign energy and dhdl terms for vv.  currently done every step so that dhdl is correct in the .edr */
++            sum_dhdl(enerd,state->lambda,ir->fepvals);
 +        }
 +        
 +        /* ########  END FIRST UPDATE STEP  ############## */
 +        /* ########  If doing VV, we now have v(dt) ###### */
++        if (bDoExpanded) {
++            /* perform extended ensemble sampling in lambda - we don't
++               actually move to the new state before outputting
++               statistics, but if performing simulated tempering, we
++               do update the velocities and the tau_t. */
++
++            lamnew = ExpandedEnsembleDynamics(fplog,ir,enerd,state,&MassQ,&df_history,step,mcrng,state->v,mdatoms);
++        }
 +        /* ################## START TRAJECTORY OUTPUT ################# */
 +        
 +        /* Now we have the energies and forces corresponding to the 
 +         * coordinates at time t. We must output all of this before
 +         * the update.
 +         * for RerunMD t is read from input trajectory
 +         */
 +        mdof_flags = 0;
 +        if (do_per_step(step,ir->nstxout)) { mdof_flags |= MDOF_X; }
 +        if (do_per_step(step,ir->nstvout)) { mdof_flags |= MDOF_V; }
 +        if (do_per_step(step,ir->nstfout)) { mdof_flags |= MDOF_F; }
 +        if (do_per_step(step,ir->nstxtcout)) { mdof_flags |= MDOF_XTC; }
 +        if (bCPT) { mdof_flags |= MDOF_CPT; };
 +
 +#if defined(GMX_FAHCORE) || defined(GMX_WRITELASTSTEP)
 +        if (bLastStep)
 +        {
 +            /* Enforce writing positions and velocities at end of run */
 +            mdof_flags |= (MDOF_X | MDOF_V);
 +        }
 +#endif
 +#ifdef GMX_FAHCORE
 +        if (MASTER(cr))
 +            fcReportProgress( ir->nsteps, step );
 +
 +        /* sync bCPT and fc record-keeping */
 +        if (bCPT && MASTER(cr))
 +            fcRequestCheckPoint();
 +#endif
 +        
 +        if (mdof_flags != 0)
 +        {
 +            wallcycle_start(wcycle,ewcTRAJ);
 +            if (bCPT)
 +            {
 +                if (state->flags & (1<<estLD_RNG))
 +                {
 +                    get_stochd_state(upd,state);
 +                }
++                if (state->flags  & (1<<estMC_RNG))
++                {
++                    get_mc_state(mcrng,state);
++                }
 +                if (MASTER(cr))
 +                {
 +                    if (bSumEkinhOld)
 +                    {
 +                        state_global->ekinstate.bUpToDate = FALSE;
 +                    }
 +                    else
 +                    {
 +                        update_ekinstate(&state_global->ekinstate,ekind);
 +                        state_global->ekinstate.bUpToDate = TRUE;
 +                    }
 +                    update_energyhistory(&state_global->enerhist,mdebin);
++                    if (ir->efep!=efepNO || ir->bSimTemp) 
++                    {
++                        state_global->fep_state = state->fep_state; /* MRS: seems kludgy. The code should be
++                                                                       structured so this isn't necessary.
++                                                                       Note this reassignment is only necessary
++                                                                       for single threads.*/
++                        copy_df_history(&state_global->dfhist,&df_history);
++                    }
 +                }
 +            }
 +            write_traj(fplog,cr,outf,mdof_flags,top_global,
 +                       step,t,state,state_global,f,f_global,&n_xtc,&x_xtc);
 +            if (bCPT)
 +            {
 +                nchkpt++;
 +                bCPT = FALSE;
 +            }
 +            debug_gmx();
 +            if (bLastStep && step_rel == ir->nsteps &&
 +                (Flags & MD_CONFOUT) && MASTER(cr) &&
 +                !bRerunMD && !bFFscan)
 +            {
 +                /* x and v have been collected in write_traj,
 +                 * because a checkpoint file will always be written
 +                 * at the last step.
 +                 */
 +                fprintf(stderr,"\nWriting final coordinates.\n");
 +                if (ir->ePBC != epbcNONE && !ir->bPeriodicMols &&
 +                    DOMAINDECOMP(cr))
 +                {
 +                    /* Make molecules whole only for confout writing */
 +                    do_pbc_mtop(fplog,ir->ePBC,state->box,top_global,state_global->x);
 +                }
 +                write_sto_conf_mtop(ftp2fn(efSTO,nfile,fnm),
 +                                    *top_global->name,top_global,
 +                                    state_global->x,state_global->v,
 +                                    ir->ePBC,state->box);
 +                debug_gmx();
 +            }
 +            wallcycle_stop(wcycle,ewcTRAJ);
 +        }
 +        
 +        /* kludge -- virial is lost with restart for NPT control. Must restart */
 +        if (bStartingFromCpt && bVV) 
 +        {
 +            copy_mat(state->svir_prev,shake_vir);
 +            copy_mat(state->fvir_prev,force_vir);
 +        }
 +        /*  ################## END TRAJECTORY OUTPUT ################ */
 +        
++        /* Determine the pressure:
++         * always when we want exact averages in the energy file,
++         * at ns steps when we have pressure coupling,
++         * otherwise only at energy output steps (set below).
++         */
++
++        
++        bNstEner = (bGStatEveryStep || do_per_step(step,ir->nstcalcenergy));
++        bCalcEnerPres = bNstEner;
++
++        /* Do we need global communication ? */
++        bGStat = (bGStatEveryStep || bStopCM || bNS ||
++                  (ir->nstlist == -1 && !bRerunMD && step >= nlh.step_nscheck));
++
++        do_ene = (do_per_step(step,ir->nstenergy) || bLastStep);
++
++        if (do_ene || do_log)
++        {
++            bCalcEnerPres = TRUE;
++            bGStat        = TRUE;
++        }
++
 +        /* Determine the wallclock run time up till now */
 +        run_time = gmx_gettime() - (double)runtime->real;
-   
 +        /* Check whether everything is still allright */    
 +        if (((int)gmx_get_stop_condition() > handled_stop_condition)
 +#ifdef GMX_THREAD_MPI
 +            && MASTER(cr)
 +#endif
 +            )
 +        {
 +            /* this is just make gs.sig compatible with the hack 
 +               of sending signals around by MPI_Reduce with together with
 +               other floats */
 +            if ( gmx_get_stop_condition() == gmx_stop_cond_next_ns )
 +                gs.sig[eglsSTOPCOND]=1;
 +            if ( gmx_get_stop_condition() == gmx_stop_cond_next )
 +                gs.sig[eglsSTOPCOND]=-1;
 +            /* < 0 means stop at next step, > 0 means stop at next NS step */
 +            if (fplog)
 +            {
 +                fprintf(fplog,
 +                        "\n\nReceived the %s signal, stopping at the next %sstep\n\n",
 +                        gmx_get_signal_name(),
 +                        gs.sig[eglsSTOPCOND]==1 ? "NS " : "");
 +                fflush(fplog);
 +            }
 +            fprintf(stderr,
 +                    "\n\nReceived the %s signal, stopping at the next %sstep\n\n",
 +                    gmx_get_signal_name(),
 +                    gs.sig[eglsSTOPCOND]==1 ? "NS " : "");
 +            fflush(stderr);
 +            handled_stop_condition=(int)gmx_get_stop_condition();
 +        }
 +        else if (MASTER(cr) && (bNS || ir->nstlist <= 0) &&
 +                 (max_hours > 0 && run_time > max_hours*60.0*60.0*0.99) &&
 +                 gs.sig[eglsSTOPCOND] == 0 && gs.set[eglsSTOPCOND] == 0)
 +        {
 +            /* Signal to terminate the run */
 +            gs.sig[eglsSTOPCOND] = 1;
 +            if (fplog)
 +            {
 +                fprintf(fplog,"\nStep %s: Run time exceeded %.3f hours, will terminate the run\n",gmx_step_str(step,sbuf),max_hours*0.99);
 +            }
 +            fprintf(stderr, "\nStep %s: Run time exceeded %.3f hours, will terminate the run\n",gmx_step_str(step,sbuf),max_hours*0.99);
 +        }
 +
 +        if (bResetCountersHalfMaxH && MASTER(cr) &&
 +            run_time > max_hours*60.0*60.0*0.495)
 +        {
 +            gs.sig[eglsRESETCOUNTERS] = 1;
 +        }
 +
 +        if (ir->nstlist == -1 && !bRerunMD)
 +        {
 +            /* When bGStatEveryStep=FALSE, global_stat is only called
 +             * when we check the atom displacements, not at NS steps.
 +             * This means that also the bonded interaction count check is not
 +             * performed immediately after NS. Therefore a few MD steps could
 +             * be performed with missing interactions.
 +             * But wrong energies are never written to file,
 +             * since energies are only written after global_stat
 +             * has been called.
 +             */
 +            if (step >= nlh.step_nscheck)
 +            {
 +                nlh.nabnsb = natoms_beyond_ns_buffer(ir,fr,&top->cgs,
 +                                                     nlh.scale_tot,state->x);
 +            }
 +            else
 +            {
 +                /* This is not necessarily true,
 +                 * but step_nscheck is determined quite conservatively.
 +                 */
 +                nlh.nabnsb = 0;
 +            }
 +        }
 +
 +        /* In parallel we only have to check for checkpointing in steps
 +         * where we do global communication,
 +         *  otherwise the other nodes don't know.
 +         */
 +        if (MASTER(cr) && ((bGStat || !PAR(cr)) &&
 +                           cpt_period >= 0 &&
 +                           (cpt_period == 0 || 
 +                            run_time >= nchkpt*cpt_period*60.0)) &&
 +            gs.set[eglsCHKPT] == 0)
 +        {
 +            gs.sig[eglsCHKPT] = 1;
 +        }
-                 if (ir->eI==eiVVAK) 
++
++
++        /* at the start of step, randomize the velocities */
++        if (ETC_ANDERSEN(ir->etc) && EI_VV(ir->eI))
++        {
++            gmx_bool bDoAndersenConstr;
++            bDoAndersenConstr = update_randomize_velocities(ir,step,mdatoms,state,upd,&top->idef,constr);
++            /* if we have constraints, we have to remove the kinetic energy parallel to the bonds */
++            if (bDoAndersenConstr)
++            {
++                update_constraints(fplog,step,&dvdl,ir,ekind,mdatoms,state,graph,f,
++                                   &top->idef,tmp_vir,NULL,
++                                   cr,nrnb,wcycle,upd,constr,
++                                   bInitStep,TRUE,FALSE,vetanew);
++            }
++        }
++
 +        if (bIterations)
 +        {
 +            gmx_iterate_init(&iterate,bIterations);
 +        }
 +    
 +        /* for iterations, we save these vectors, as we will be redoing the calculations */
 +        if (bIterations && iterate.bIterate) 
 +        {
 +            copy_coupling_state(state,bufstate,ekind,ekind_save,&(ir->opts));
 +        }
 +        bFirstIterate = TRUE;
 +        while (bFirstIterate || (bIterations && iterate.bIterate))
 +        {
 +            /* We now restore these vectors to redo the calculation with improved extended variables */    
 +            if (bIterations) 
 +            { 
 +                copy_coupling_state(bufstate,state,ekind_save,ekind,&(ir->opts));
 +            }
 +
 +            /* We make the decision to break or not -after- the calculation of Ekin and Pressure,
 +               so scroll down for that logic */
 +            
 +            /* #########   START SECOND UPDATE STEP ################# */
 +            /* Box is changed in update() when we do pressure coupling,
 +             * but we should still use the old box for energy corrections and when
 +             * writing it to the energy file, so it matches the trajectory files for
 +             * the same timestep above. Make a copy in a separate array.
 +             */
 +            copy_mat(state->box,lastbox);
 +
 +            bOK = TRUE;
 +            if (!(bRerunMD && !rerun_fr.bV && !bForceUpdate))
 +            {
 +                wallcycle_start(wcycle,ewcUPDATE);
 +                dvdl = 0;
 +                /* UPDATE PRESSURE VARIABLES IN TROTTER FORMULATION WITH CONSTRAINTS */
 +                if (bTrotter) 
 +                {
 +                    if (bIterations && iterate.bIterate) 
 +                    {
 +                        if (bFirstIterate) 
 +                        {
 +                            scalevir = 1;
 +                        }
 +                        else 
 +                        {
 +                            /* we use a new value of scalevir to converge the iterations faster */
 +                            scalevir = tracevir/trace(shake_vir);
 +                        }
 +                        msmul(shake_vir,scalevir,shake_vir); 
 +                        m_add(force_vir,shake_vir,total_vir);
 +                        clear_mat(shake_vir);
 +                    }
 +                    trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ3);
 +                /* We can only do Berendsen coupling after we have summed
 +                 * the kinetic energy or virial. Since the happens
 +                 * in global_state after update, we should only do it at
 +                 * step % nstlist = 1 with bGStatEveryStep=FALSE.
 +                 */
 +                }
 +                else 
 +                {
 +                    update_tcouple(fplog,step,ir,state,ekind,wcycle,upd,&MassQ,mdatoms);
 +                    update_pcouple(fplog,step,ir,state,pcoupl_mu,M,wcycle,
 +                                   upd,bInitStep);
 +                }
 +
 +                if (bVV)
 +                {
 +                    /* velocity half-step update */
 +                    update_coords(fplog,step,ir,mdatoms,state,f,
 +                                  fr->bTwinRange && bNStList,fr->f_twin,fcd,
 +                                  ekind,M,wcycle,upd,FALSE,etrtVELOCITY2,
 +                                  cr,nrnb,constr,&top->idef);
 +                }
 +
 +                /* Above, initialize just copies ekinh into ekin,
 +                 * it doesn't copy position (for VV),
 +                 * and entire integrator for MD.
 +                 */
 +                
 +                if (ir->eI==eiVVAK) 
 +                {
 +                    copy_rvecn(state->x,cbuf,0,state->natoms);
 +                }
 +                
 +                update_coords(fplog,step,ir,mdatoms,state,f,fr->bTwinRange && bNStList,fr->f_twin,fcd,
 +                              ekind,M,wcycle,upd,bInitStep,etrtPOSITION,cr,nrnb,constr,&top->idef);
 +                wallcycle_stop(wcycle,ewcUPDATE);
 +
 +                update_constraints(fplog,step,&dvdl,ir,ekind,mdatoms,state,graph,f,
 +                                   &top->idef,shake_vir,force_vir,
 +                                   cr,nrnb,wcycle,upd,constr,
 +                                   bInitStep,FALSE,bCalcEnerPres,state->veta);  
 +                
-                                     cglo_flags | CGLO_TEMPERATURE    
++                if (ir->eI==eiVVAK)
 +                {
 +                    /* erase F_EKIN and F_TEMP here? */
 +                    /* just compute the kinetic energy at the half step to perform a trotter step */
 +                    compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
 +                                    wcycle,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
 +                                    constr,NULL,FALSE,lastbox,
 +                                    top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
-                     fprintf(fplog,sepdvdlformat,"Constraint",0.0,dvdl);
++                                    cglo_flags | CGLO_TEMPERATURE
 +                        );
 +                    wallcycle_start(wcycle,ewcUPDATE);
 +                    trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ4);            
 +                    /* now we know the scaling, we can compute the positions again again */
 +                    copy_rvecn(cbuf,state->x,0,state->natoms);
 +
 +                    update_coords(fplog,step,ir,mdatoms,state,f,fr->bTwinRange && bNStList,fr->f_twin,fcd,
 +                                  ekind,M,wcycle,upd,bInitStep,etrtPOSITION,cr,nrnb,constr,&top->idef);
 +                    wallcycle_stop(wcycle,ewcUPDATE);
 +
 +                    /* do we need an extra constraint here? just need to copy out of state->v to upd->xp? */
 +                    /* are the small terms in the shake_vir here due
 +                     * to numerical errors, or are they important
 +                     * physically? I'm thinking they are just errors, but not completely sure. 
 +                     * For now, will call without actually constraining, constr=NULL*/
 +                    update_constraints(fplog,step,&dvdl,ir,ekind,mdatoms,state,graph,f,
 +                                       &top->idef,tmp_vir,force_vir,
 +                                       cr,nrnb,wcycle,upd,NULL,
 +                                       bInitStep,FALSE,bCalcEnerPres,
 +                                       state->veta);  
 +                }
 +                if (!bOK && !bFFscan) 
 +                {
 +                    gmx_fatal(FARGS,"Constraint error: Shake, Lincs or Settle could not solve the constrains");
 +                }
 +                
 +                if (fr->bSepDVDL && fplog && do_log) 
 +                {
-                 enerd->term[F_DHDL_CON] += dvdl;
++                    fprintf(fplog,sepdvdlformat,"Constraint dV/dl",0.0,dvdl);
 +                }
-                             | (!EI_VV(ir->eI) || bRerunMD ? CGLO_PRESSURE : 0) 
++                enerd->term[F_DVDL_BONDED] += dvdl;
 +            } 
 +            else if (graph) 
 +            {
 +                /* Need to unshift here */
 +                unshift_self(graph,state->box,state->x);
 +            }
 +
 +            if (vsite != NULL) 
 +            {
 +                wallcycle_start(wcycle,ewcVSITECONSTR);
 +                if (graph != NULL) 
 +                {
 +                    shift_self(graph,state->box,state->x);
 +                }
 +                construct_vsites(fplog,vsite,state->x,nrnb,ir->delta_t,state->v,
 +                                 top->idef.iparams,top->idef.il,
 +                                 fr->ePBC,fr->bMolPBC,graph,cr,state->box);
 +                
 +                if (graph != NULL) 
 +                {
 +                    unshift_self(graph,state->box,state->x);
 +                }
 +                wallcycle_stop(wcycle,ewcVSITECONSTR);
 +            }
 +            
 +            /* ############## IF NOT VV, Calculate globals HERE, also iterate constraints ############ */
 +            if (ir->nstlist == -1 && bFirstIterate)
 +            {
 +                gs.sig[eglsNABNSB] = nlh.nabnsb;
 +            }
++            bEnergyHere = (!EI_VV(ir->eI) || (EI_VV(ir->eI) && bRerunMD)); /* this is not quite working for vv and rerun! fails for running rerun on multiple threads. This is caught in runner.c. */
 +            compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
 +                            wcycle,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
 +                            constr,
 +                            bFirstIterate ? &gs : NULL, 
 +                            (step_rel % gs.nstms == 0) && 
 +                                (multisim_nsteps<0 || (step_rel<multisim_nsteps)),
 +                            lastbox,
 +                            top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
 +                            cglo_flags 
 +                            | (!EI_VV(ir->eI) ? CGLO_ENERGY : 0) 
 +                            | (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0)
 +                            | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0) 
-                             | CGLO_CONSTRAINT 
++                            | (bEnergyHere || bRerunMD ? CGLO_PRESSURE : 0) 
 +                            | (bIterations && iterate.bIterate ? CGLO_ITERATE : 0) 
 +                            | (bFirstIterate ? CGLO_FIRSTITERATE : 0)
-         
-         /* sum up the foreign energy and dhdl terms */
-         sum_dhdl(enerd,state->lambda,ir);
++                            | CGLO_CONSTRAINT
 +                );
 +            if (ir->nstlist == -1 && bFirstIterate)
 +            {
 +                nlh.nabnsb = gs.set[eglsNABNSB];
 +                gs.set[eglsNABNSB] = 0;
 +            }
 +            /* bIterate is set to keep it from eliminating the old ekin kinetic energy terms */
 +            /* #############  END CALC EKIN AND PRESSURE ################# */
 +        
 +            /* Note: this is OK, but there are some numerical precision issues with using the convergence of
 +               the virial that should probably be addressed eventually. state->veta has better properies,
 +               but what we actually need entering the new cycle is the new shake_vir value. Ideally, we could
 +               generate the new shake_vir, but test the veta value for convergence.  This will take some thought. */
 +
 +            if (bIterations && 
 +                done_iterating(cr,fplog,step,&iterate,bFirstIterate,
 +                               trace(shake_vir),&tracevir)) 
 +            {
 +                break;
 +            }
 +            bFirstIterate = FALSE;
 +        }
 +
++        /* only add constraint dvdl after constraints */
++        enerd->term[F_DVDL_BONDED] += dvdl;
++        if (!bVV)
++        {
++            /* sum up the foreign energy and dhdl terms for md and sd. currently done every step so that dhdl is correct in the .edr */
++            sum_dhdl(enerd,state->lambda,ir->fepvals);
++        }
 +        update_box(fplog,step,ir,mdatoms,state,graph,f,
 +                   ir->nstlist==-1 ? &nlh.scale_tot : NULL,pcoupl_mu,nrnb,wcycle,upd,bInitStep,FALSE);
 +        
 +        /* ################# END UPDATE STEP 2 ################# */
 +        /* #### We now have r(t+dt) and v(t+dt/2)  ############# */
 +    
 +        /* The coordinates (x) were unshifted in update */
 +        if (bFFscan && (shellfc==NULL || bConverged))
 +        {
 +            if (print_forcefield(fplog,enerd->term,mdatoms->homenr,
 +                                 f,NULL,xcopy,
 +                                 &(top_global->mols),mdatoms->massT,pres))
 +            {
 +                gmx_finalize_par();
 +
 +                fprintf(stderr,"\n");
 +                exit(0);
 +            }
 +        }
 +        if (!bGStat)
 +        {
 +            /* We will not sum ekinh_old,                                                            
 +             * so signal that we still have to do it.                                                
 +             */
 +            bSumEkinhOld = TRUE;
 +        }
 +        
 +        if (bTCR)
 +        {
 +            /* Only do GCT when the relaxation of shells (minimization) has converged,
 +             * otherwise we might be coupling to bogus energies. 
 +             * In parallel we must always do this, because the other sims might
 +             * update the FF.
 +             */
 +
 +            /* Since this is called with the new coordinates state->x, I assume
 +             * we want the new box state->box too. / EL 20040121
 +             */
 +            do_coupling(fplog,oenv,nfile,fnm,tcr,t,step,enerd->term,fr,
 +                        ir,MASTER(cr),
 +                        mdatoms,&(top->idef),mu_aver,
 +                        top_global->mols.nr,cr,
 +                        state->box,total_vir,pres,
 +                        mu_tot,state->x,f,bConverged);
 +            debug_gmx();
 +        }
 +
 +        /* #########  BEGIN PREPARING EDR OUTPUT  ###########  */
-                     upd_mdebin(mdebin,bDoDHDL, TRUE,
-                                t,mdatoms->tmass,enerd,state,lastbox,
 +
 +        /* use the directly determined last velocity, not actually the averaged half steps */
 +        if (bTrotter && ir->eI==eiVV) 
 +        {
 +            enerd->term[F_EKIN] = last_ekin;
 +        }
 +        enerd->term[F_ETOT] = enerd->term[F_EPOT] + enerd->term[F_EKIN];
 +        
 +        if (bVV)
 +        {
 +            enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + saved_conserved_quantity;
 +        }
 +        else 
 +        {
 +            enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + compute_conserved_from_auxiliary(ir,state,&MassQ);
 +        }
 +        /* Check for excessively large energies */
 +        if (bIonize) 
 +        {
 +#ifdef GMX_DOUBLE
 +            real etot_max = 1e200;
 +#else
 +            real etot_max = 1e30;
 +#endif
 +            if (fabs(enerd->term[F_ETOT]) > etot_max) 
 +            {
 +                fprintf(stderr,"Energy too large (%g), giving up\n",
 +                        enerd->term[F_ETOT]);
 +            }
 +        }
 +        /* #########  END PREPARING EDR OUTPUT  ###########  */
 +        
 +        /* Time for performance */
 +        if (((step % stepout) == 0) || bLastStep) 
 +        {
 +            runtime_upd_proc(runtime);
 +        }
 +        
 +        /* Output stuff */
 +        if (MASTER(cr))
 +        {
 +            gmx_bool do_dr,do_or;
 +            
++            if (fplog && do_log && bDoExpanded)
++            {
++                /* only needed if doing expanded ensemble */
++                PrintFreeEnergyInfoToFile(fplog,ir->fepvals,ir->expandedvals,ir->bSimTemp?ir->simtempvals:NULL,
++                                          &df_history,state->fep_state,ir->nstlog,step);
++            }
 +            if (!(bStartingFromCpt && (EI_VV(ir->eI)))) 
 +            {
 +                if (bNstEner)
 +                {
++                    upd_mdebin(mdebin,bDoDHDL,TRUE,
++                               t,mdatoms->tmass,enerd,state,
++                               ir->fepvals,ir->expandedvals,lastbox,
 +                               shake_vir,force_vir,total_vir,pres,
 +                               ekind,mu_tot,constr);
 +                }
 +                else
 +                {
 +                    upd_mdebin_step(mdebin);
 +                }
 +                
 +                do_dr  = do_per_step(step,ir->nstdisreout);
 +                do_or  = do_per_step(step,ir->nstorireout);
 +                
 +                print_ebin(outf->fp_ene,do_ene,do_dr,do_or,do_log?fplog:NULL,
 +                           step,t,
 +                           eprNORMAL,bCompact,mdebin,fcd,groups,&(ir->opts));
 +            }
 +            if (ir->ePull != epullNO)
 +            {
 +                pull_print_output(ir->pull,step,t);
 +            }
 +            
 +            if (do_per_step(step,ir->nstlog))
 +            {
 +                if(fflush(fplog) != 0)
 +                {
 +                    gmx_fatal(FARGS,"Cannot flush logfile - maybe you are out of disk space?");
 +                }
 +            }
 +        }
-                                           state_global,enerd->term,
++        if (bDoExpanded)
++        {
++            /* Have to do this part after outputting the logfile and the edr file */
++            state->fep_state = lamnew;
++            for (i=0;i<efptNR;i++)
++            {
++                state->lambda[i] = ir->fepvals->all_lambda[i][lamnew];
++            }
++        }
 +        /* Remaining runtime */
 +        if (MULTIMASTER(cr) && (do_verbose || gmx_got_usr_signal() ))
 +        {
 +            if (shellfc) 
 +            {
 +                fprintf(stderr,"\n");
 +            }
 +            print_time(stderr,runtime,step,ir,cr);
 +        }
 +
 +        /* Replica exchange */
 +        bExchanged = FALSE;
 +        if ((repl_ex_nst > 0) && (step > 0) && !bLastStep &&
 +            do_per_step(step,repl_ex_nst)) 
 +        {
 +            bExchanged = replica_exchange(fplog,cr,repl_ex,
++                                          state_global,enerd,
 +                                          state,step,t);
 +
 +            if (bExchanged && DOMAINDECOMP(cr)) 
 +            {
 +                dd_partition_system(fplog,step,cr,TRUE,1,
 +                                    state_global,top_global,ir,
 +                                    state,&f,mdatoms,top,fr,
 +                                    vsite,shellfc,constr,
 +                                    nrnb,wcycle,FALSE);
 +            }
 +        }
 +        
 +        bFirstStep = FALSE;
 +        bInitStep = FALSE;
 +        bStartingFromCpt = FALSE;
 +
 +        /* #######  SET VARIABLES FOR NEXT ITERATION IF THEY STILL NEED IT ###### */
 +        /* With all integrators, except VV, we need to retain the pressure
 +         * at the current step for coupling at the next step.
 +         */
 +        if ((state->flags & (1<<estPRES_PREV)) &&
 +            (bGStatEveryStep ||
 +             (ir->nstpcouple > 0 && step % ir->nstpcouple == 0)))
 +        {
 +            /* Store the pressure in t_state for pressure coupling
 +             * at the next MD step.
 +             */
 +            copy_mat(pres,state->pres_prev);
 +        }
 +        
 +        /* #######  END SET VARIABLES FOR NEXT ITERATION ###### */
 +
 +        if ( (membed!=NULL) && (!bLastStep) )
 +        {
 +            rescale_membed(step_rel,membed,state_global->x);
 +        }
 +
 +        if (bRerunMD) 
 +        {
 +            if (MASTER(cr))
 +            {
 +                /* read next frame from input trajectory */
 +                bNotLastFrame = read_next_frame(oenv,status,&rerun_fr);
 +            }
 +
 +            if (PAR(cr))
 +            {
 +                rerun_parallel_comm(cr,&rerun_fr,&bNotLastFrame);
 +            }
 +        }
 +        
 +        if (!bRerunMD || !rerun_fr.bStep)
 +        {
 +            /* increase the MD step number */
 +            step++;
 +            step_rel++;
 +        }
 +        
 +        cycles = wallcycle_stop(wcycle,ewcSTEP);
 +        if (DOMAINDECOMP(cr) && wcycle)
 +        {
 +            dd_cycles_add(cr->dd,cycles,ddCyclStep);
 +        }
 +        
 +        if (step_rel == wcycle_get_reset_counters(wcycle) ||
 +            gs.set[eglsRESETCOUNTERS] != 0)
 +        {
 +            /* Reset all the counters related to performance over the run */
 +            reset_all_counters(fplog,cr,step,&step_rel,ir,wcycle,nrnb,runtime);
 +            wcycle_set_reset_counters(wcycle,-1);
 +            /* Correct max_hours for the elapsed time */
 +            max_hours -= run_time/(60.0*60.0);
 +            bResetCountersHalfMaxH = FALSE;
 +            gs.set[eglsRESETCOUNTERS] = 0;
 +        }
 +
 +    }
 +    /* End of main MD loop */
 +    debug_gmx();
 +    
 +    /* Stop the time */
 +    runtime_end(runtime);
 +    
 +    if (bRerunMD && MASTER(cr))
 +    {
 +        close_trj(status);
 +    }
 +    
 +    if (!(cr->duty & DUTY_PME))
 +    {
 +        /* Tell the PME only node to finish */
 +        gmx_pme_finish(cr);
 +    }
 +    
 +    if (MASTER(cr))
 +    {
 +        if (ir->nstcalcenergy > 0 && !bRerunMD) 
 +        {
 +            print_ebin(outf->fp_ene,FALSE,FALSE,FALSE,fplog,step,t,
 +                       eprAVER,FALSE,mdebin,fcd,groups,&(ir->opts));
 +        }
 +    }
 +
 +    done_mdoutf(outf);
 +
 +    debug_gmx();
 +
 +    if (ir->nstlist == -1 && nlh.nns > 0 && fplog)
 +    {
 +        fprintf(fplog,"Average neighborlist lifetime: %.1f steps, std.dev.: %.1f steps\n",nlh.s1/nlh.nns,sqrt(nlh.s2/nlh.nns - sqr(nlh.s1/nlh.nns)));
 +        fprintf(fplog,"Average number of atoms that crossed the half buffer length: %.1f\n\n",nlh.ab/nlh.nns);
 +    }
 +    
 +    if (shellfc && fplog)
 +    {
 +        fprintf(fplog,"Fraction of iterations that converged:           %.2f %%\n",
 +                (nconverged*100.0)/step_rel);
 +        fprintf(fplog,"Average number of force evaluations per MD step: %.2f\n\n",
 +                tcount/step_rel);
 +    }
 +    
 +    if (repl_ex_nst > 0 && MASTER(cr))
 +    {
 +        print_replica_exchange_statistics(fplog,repl_ex);
 +    }
 +    
 +    runtime->nsteps_done = step_rel;
 +    
 +    return 0;
 +}
index e16f809e2705130a37467669517217506ca193b8,0000000000000000000000000000000000000000..b29bd0e2f75dcc54d28bbc8120fb3c173938beb9
mode 100644,000000..100644
--- /dev/null
@@@ -1,572 -1,0 +1,573 @@@
- #include "dihre.h"
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2010, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
 + */
 +
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <signal.h>
 +#include <stdlib.h>
 +
 +#include "typedefs.h"
 +#include "smalloc.h"
 +#include "sysstuff.h"
 +#include "vec.h"
 +#include "statutil.h"
 +#include "vcm.h"
 +#include "mdebin.h"
 +#include "nrnb.h"
 +#include "calcmu.h"
 +#include "index.h"
 +#include "vsite.h"
 +#include "update.h"
 +#include "ns.h"
 +#include "trnio.h"
 +#include "xtcio.h"
 +#include "mdrun.h"
 +#include "confio.h"
 +#include "network.h"
 +#include "pull.h"
 +#include "xvgr.h"
 +#include "physics.h"
 +#include "names.h"
 +#include "xmdrun.h"
 +#include "ionize.h"
 +#include "disre.h"
 +#include "orires.h"
-                     int repl_ex_nst,int repl_ex_seed,
 +#include "pme.h"
 +#include "mdatoms.h"
 +#include "qmmm.h"
 +#include "domdec.h"
 +#include "partdec.h"
 +#include "topsort.h"
 +#include "coulomb.h"
 +#include "constr.h"
 +#include "compute_io.h"
 +#include "mvdata.h"
 +#include "checkpoint.h"
 +#include "mtop_util.h"
 +#include "sighandler.h"
 +#include "genborn.h"
 +#include "string2.h"
 +#include "copyrite.h"
 +#include "membed.h"
 +
 +#ifdef GMX_THREAD_MPI
 +#include "tmpi.h"
 +#endif
 +
 +/* include even when OpenMM not used to force compilation of do_md_openmm */
 +#include "openmm_wrapper.h"
 +
 +double do_md_openmm(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
 +                    const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
 +                    int nstglobalcomm,
 +                    gmx_vsite_t *vsite,gmx_constr_t constr,
 +                    int stepout,t_inputrec *ir,
 +                    gmx_mtop_t *top_global,
 +                    t_fcdata *fcd,
 +                    t_state *state_global,
 +                    t_mdatoms *mdatoms,
 +                    t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 +                    gmx_edsam_t ed,t_forcerec *fr,
-     init_md(fplog,cr,ir,oenv,&t,&t0,&state_global->lambda,&lam0,
++                    int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
 +                    gmx_membed_t membed,
 +                    real cpt_period,real max_hours,
 +                    const char *deviceOptions,
 +                    unsigned long Flags,
 +                    gmx_runtime_t *runtime)
 +{
 +    gmx_mdoutf_t *outf;
 +    gmx_large_int_t step,step_rel;
 +    double     run_time;
 +    double     t,t0,lam0;
 +    gmx_bool       bSimAnn,
 +    bFirstStep,bStateFromTPX,bLastStep,bStartingFromCpt;
 +    gmx_bool       bInitStep=TRUE;
 +    gmx_bool       do_ene,do_log, do_verbose,
 +    bX,bV,bF,bCPT;
 +    tensor     force_vir,shake_vir,total_vir,pres;
 +    int        i,m;
 +    int        mdof_flags;
 +    rvec       mu_tot;
 +    t_vcm      *vcm;
 +    int        nchkpt=1;
 +    gmx_localtop_t *top;
 +    t_mdebin *mdebin;
 +    t_state    *state=NULL;
 +    rvec       *f_global=NULL;
 +    int        n_xtc=-1;
 +    rvec       *x_xtc=NULL;
 +    gmx_enerdata_t *enerd;
 +    rvec       *f=NULL;
 +    gmx_global_stat_t gstat;
 +    gmx_update_t upd=NULL;
 +    t_graph    *graph=NULL;
 +    globsig_t   gs;
 +
 +    gmx_groups_t *groups;
 +    gmx_ekindata_t *ekind, *ekind_save;
 +    gmx_bool        bAppend;
 +    int         a0,a1;
 +    matrix      lastbox;
 +    real        reset_counters=0,reset_counters_now=0;
 +    char        sbuf[STEPSTRSIZE],sbuf2[STEPSTRSIZE];
 +    int         handled_stop_condition=gmx_stop_cond_none; 
 +
 +    const char *ommOptions = NULL;
 +    void   *openmmData;
 +
 +#ifdef GMX_DOUBLE
 +    /* Checks in cmake should prevent the compilation in double precision
 +     * with OpenMM, but just to be sure we check here.
 +     */
 +    gmx_fatal(FARGS,"Compilation was performed in double precision, but OpenMM only supports single precision. If you want to use to OpenMM, compile in single precision.");
 +#endif
 +
 +    bAppend  = (Flags & MD_APPENDFILES);
 +    check_ir_old_tpx_versions(cr,fplog,ir,top_global);
 +
 +    groups = &top_global->groups;
 +
 +    /* Initial values */
-     init_enerdata(top_global->groups.grps[egcENER].nr,ir->n_flambda,enerd);
++    init_md(fplog,cr,ir,oenv,&t,&t0,state_global->lambda,
++            &(state_global->fep_state),&lam0,
 +            nrnb,top_global,&upd,
 +            nfile,fnm,&outf,&mdebin,
 +            force_vir,shake_vir,mu_tot,&bSimAnn,&vcm,state_global,Flags);
 +
 +    clear_mat(total_vir);
 +    clear_mat(pres);
 +    /* Energy terms and groups */
 +    snew(enerd,1);
-     update_mdatoms(mdatoms,state->lambda);
++    init_enerdata(top_global->groups.grps[egcENER].nr,ir->fepvals->n_lambda,
++                  enerd);
 +    snew(f,top_global->natoms);
 +
 +    /* Kinetic energy data */
 +    snew(ekind,1);
 +    init_ekindata(fplog,top_global,&(ir->opts),ekind);
 +    /* needed for iteration of constraints */
 +    snew(ekind_save,1);
 +    init_ekindata(fplog,top_global,&(ir->opts),ekind_save);
 +    /* Copy the cos acceleration to the groups struct */
 +    ekind->cosacc.cos_accel = ir->cos_accel;
 +
 +    gstat = global_stat_init(ir);
 +    debug_gmx();
 +
 +    {
 +        double io = compute_io(ir,top_global->natoms,groups,mdebin->ebin->nener,1);
 +        if ((io > 2000) && MASTER(cr))
 +            fprintf(stderr,
 +                    "\nWARNING: This run will generate roughly %.0f Mb of data\n\n",
 +                    io);
 +    }
 +
 +    top = gmx_mtop_generate_local_top(top_global,ir);
 +
 +    a0 = 0;
 +    a1 = top_global->natoms;
 +
 +    state = partdec_init_local_state(cr,state_global);
 +    f_global = f;
 +
 +    atoms2md(top_global,ir,0,NULL,a0,a1-a0,mdatoms);
 +
 +    if (vsite)
 +    {
 +        set_vsite_top(vsite,top,mdatoms,cr);
 +    }
 +
 +    if (ir->ePBC != epbcNONE && !ir->bPeriodicMols)
 +    {
 +        graph = mk_graph(fplog,&(top->idef),0,top_global->natoms,FALSE,FALSE);
 +    }
 +
-             print_ebin_header(fplog,step,t,state->lambda);
++    update_mdatoms(mdatoms,state->lambda[efptMASS]);
 +
 +    if (deviceOptions[0]=='\0')
 +    {
 +        /* empty options, which should default to OpenMM in this build */
 +        ommOptions=deviceOptions;
 +    }
 +    else
 +    {
 +        if (gmx_strncasecmp(deviceOptions,"OpenMM",6)!=0)
 +        {
 +            gmx_fatal(FARGS, "This Gromacs version currently only works with OpenMM. Use -device \"OpenMM:<options>\"");
 +        }
 +        else
 +        {
 +            ommOptions=strchr(deviceOptions,':');
 +            if (NULL!=ommOptions)
 +            {
 +                /* Increase the pointer to skip the colon */
 +                ommOptions++;
 +            }
 +        }
 +    }
 +
 +    openmmData = openmm_init(fplog, ommOptions, ir, top_global, top, mdatoms, fr, state);
 +    please_cite(fplog,"Friedrichs2009");
 +
 +    if (MASTER(cr))
 +    {
 +        /* Update mdebin with energy history if appending to output files */
 +        if ( Flags & MD_APPENDFILES )
 +        {
 +            restore_energyhistory_from_state(mdebin,&state_global->enerhist);
 +        }
 +        /* Set the initial energy history in state to zero by updating once */
 +        update_energyhistory(&state_global->enerhist,mdebin);
 +    }
 +
 +    if (constr)
 +    {
 +        set_constraints(constr,top,ir,mdatoms,cr);
 +    }
 +
 +    if (!ir->bContinuation)
 +    {
 +        if (mdatoms->cFREEZE && (state->flags & (1<<estV)))
 +        {
 +            /* Set the velocities of frozen particles to zero */
 +            for (i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++)
 +            {
 +                for (m=0; m<DIM; m++)
 +                {
 +                    if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m])
 +                    {
 +                        state->v[i][m] = 0;
 +                    }
 +                }
 +            }
 +        }
 +
 +        if (constr)
 +        {
 +            /* Constrain the initial coordinates and velocities */
 +            do_constrain_first(fplog,constr,ir,mdatoms,state,f,
 +                               graph,cr,nrnb,fr,top,shake_vir);
 +        }
 +        if (vsite)
 +        {
 +            /* Construct the virtual sites for the initial configuration */
 +            construct_vsites(fplog,vsite,state->x,nrnb,ir->delta_t,NULL,
 +                             top->idef.iparams,top->idef.il,
 +                             fr->ePBC,fr->bMolPBC,graph,cr,state->box);
 +        }
 +    }
 +
 +    debug_gmx();
 +
 +    if (MASTER(cr))
 +    {
 +        char tbuf[20];
 +        fprintf(stderr,"starting mdrun '%s'\n",
 +                *(top_global->name));
 +        if (ir->nsteps >= 0)
 +        {
 +            sprintf(tbuf,"%8.1f",(ir->init_step+ir->nsteps)*ir->delta_t);
 +        }
 +        else
 +        {
 +            sprintf(tbuf,"%s","infinite");
 +        }
 +        if (ir->init_step > 0)
 +        {
 +            fprintf(stderr,"%s steps, %s ps (continuing from step %s, %8.1f ps).\n",
 +                    gmx_step_str(ir->init_step+ir->nsteps,sbuf),tbuf,
 +                    gmx_step_str(ir->init_step,sbuf2),
 +                    ir->init_step*ir->delta_t);
 +        }
 +        else
 +        {
 +            fprintf(stderr,"%s steps, %s ps.\n",
 +                    gmx_step_str(ir->nsteps,sbuf),tbuf);
 +        }
 +    }
 +
 +    fprintf(fplog,"\n");
 +
 +    /* Set and write start time */
 +    runtime_start(runtime);
 +    print_date_and_time(fplog,cr->nodeid,"Started mdrun",runtime);
 +    wallcycle_start(wcycle,ewcRUN);
 +    if (fplog)
 +        fprintf(fplog,"\n");
 +
 +    /* safest point to do file checkpointing is here.  More general point would be immediately before integrator call */
 +
 +    debug_gmx();
 +    /***********************************************************
 +     *
 +     *             Loop over MD steps
 +     *
 +     ************************************************************/
 +
 +    /* loop over MD steps or if rerunMD to end of input trajectory */
 +    bFirstStep = TRUE;
 +    /* Skip the first Nose-Hoover integration when we get the state from tpx */
 +    bStateFromTPX = !opt2bSet("-cpi",nfile,fnm);
 +    bInitStep = bFirstStep && bStateFromTPX;
 +    bStartingFromCpt = (Flags & MD_STARTFROMCPT) && bInitStep;
 +    bLastStep = FALSE;
 +
 +    init_global_signals(&gs,cr,ir,repl_ex_nst);
 +
 +    step = ir->init_step;
 +    step_rel = 0;
 +
 +    while (!bLastStep)
 +    {
 +        wallcycle_start(wcycle,ewcSTEP);
 +
 +        bLastStep = (step_rel == ir->nsteps);
 +        t = t0 + step*ir->delta_t;
 +
 +        if (gs.set[eglsSTOPCOND] != 0)
 +        {
 +            bLastStep = TRUE;
 +        }
 +
 +        do_log = do_per_step(step,ir->nstlog) || bFirstStep || bLastStep;
 +        do_verbose = bVerbose &&
 +                     (step % stepout == 0 || bFirstStep || bLastStep);
 +
 +        if (MASTER(cr) && do_log)
 +        {
-                        t,mdatoms->tmass,enerd,state,lastbox,
++            print_ebin_header(fplog,step,t,state->lambda[efptFEP]);
 +        }
 +
 +        clear_mat(force_vir);
 +
 +        /* We write a checkpoint at this MD step when:
 +         * either when we signalled through gs (in OpenMM NS works different),
 +         * or at the last step (but not when we do not want confout),
 +         * but never at the first step.
 +         */
 +        bCPT = ((gs.set[eglsCHKPT] ||
 +                 (bLastStep && (Flags & MD_CONFOUT))) &&
 +                step > ir->init_step );
 +        if (bCPT)
 +        {
 +            gs.set[eglsCHKPT] = 0;
 +        }
 +
 +        /* Now we have the energies and forces corresponding to the
 +         * coordinates at time t. We must output all of this before
 +         * the update.
 +         * for RerunMD t is read from input trajectory
 +         */
 +        mdof_flags = 0;
 +        if (do_per_step(step,ir->nstxout))
 +        {
 +            mdof_flags |= MDOF_X;
 +        }
 +        if (do_per_step(step,ir->nstvout))
 +        {
 +            mdof_flags |= MDOF_V;
 +        }
 +        if (do_per_step(step,ir->nstfout))
 +        {
 +            mdof_flags |= MDOF_F;
 +        }
 +        if (do_per_step(step,ir->nstxtcout))
 +        {
 +            mdof_flags |= MDOF_XTC;
 +        }
 +        if (bCPT)
 +        {
 +            mdof_flags |= MDOF_CPT;
 +        };
 +        do_ene = (do_per_step(step,ir->nstenergy) || bLastStep);
 +
 +        if (mdof_flags != 0 || do_ene || do_log)
 +        {
 +            wallcycle_start(wcycle,ewcTRAJ);
 +            bF = (mdof_flags & MDOF_F);
 +            bX = (mdof_flags & (MDOF_X | MDOF_XTC | MDOF_CPT));
 +            bV = (mdof_flags & (MDOF_V | MDOF_CPT));
 +
 +            openmm_copy_state(openmmData, state, &t, f, enerd, bX, bV, bF, do_ene);
 +
 +            upd_mdebin(mdebin,FALSE,TRUE,
++                       t,mdatoms->tmass,enerd,state,ir->fepvals,ir->expandedvals,lastbox,
 +                       shake_vir,force_vir,total_vir,pres,
 +                       ekind,mu_tot,constr);
 +            print_ebin(outf->fp_ene,do_ene,FALSE,FALSE,do_log?fplog:NULL,
 +                       step,t,
 +                       eprNORMAL,bCompact,mdebin,fcd,groups,&(ir->opts));
 +            write_traj(fplog,cr,outf,mdof_flags,top_global,
 +                       step,t,state,state_global,f,f_global,&n_xtc,&x_xtc);
 +            if (bCPT)
 +            {
 +                nchkpt++;
 +                bCPT = FALSE;
 +            }
 +            debug_gmx();
 +            if (bLastStep && step_rel == ir->nsteps &&
 +                    (Flags & MD_CONFOUT) && MASTER(cr))
 +            {
 +                /* x and v have been collected in write_traj,
 +                 * because a checkpoint file will always be written
 +                 * at the last step.
 +                 */
 +                fprintf(stderr,"\nWriting final coordinates.\n");
 +                if (ir->ePBC != epbcNONE && !ir->bPeriodicMols)
 +                {
 +                    /* Make molecules whole only for confout writing */
 +                    do_pbc_mtop(fplog,ir->ePBC,state->box,top_global,state_global->x);
 +                }
 +                write_sto_conf_mtop(ftp2fn(efSTO,nfile,fnm),
 +                                    *top_global->name,top_global,
 +                                    state_global->x,state_global->v,
 +                                    ir->ePBC,state->box);
 +                debug_gmx();
 +            }
 +            wallcycle_stop(wcycle,ewcTRAJ);
 +        }
 +
 +        /* Determine the wallclock run time up till now */
 +        run_time = gmx_gettime() - (double)runtime->real;
 +
 +        /* Check whether everything is still allright */
 +        if (((int)gmx_get_stop_condition() > handled_stop_condition)
 +#ifdef GMX_THREAD_MPI
 +            && MASTER(cr)
 +#endif
 +            )
 +        {
 +           /* this is just make gs.sig compatible with the hack 
 +               of sending signals around by MPI_Reduce with together with
 +               other floats */
 +            /* NOTE: this only works for serial code. For code that allows
 +               MPI nodes to propagate their condition, see kernel/md.c*/
 +            if ( gmx_get_stop_condition() == gmx_stop_cond_next_ns )
 +                gs.set[eglsSTOPCOND]=1;
 +            if ( gmx_get_stop_condition() == gmx_stop_cond_next )
 +                gs.set[eglsSTOPCOND]=1;
 +            /* < 0 means stop at next step, > 0 means stop at next NS step */
 +            if (fplog)
 +            {
 +                fprintf(fplog,
 +                        "\n\nReceived the %s signal, stopping at the next %sstep\n\n",
 +                        gmx_get_signal_name(),
 +                        gs.sig[eglsSTOPCOND]==1 ? "NS " : "");
 +                fflush(fplog);
 +            }
 +            fprintf(stderr,
 +                    "\n\nReceived the %s signal, stopping at the next %sstep\n\n",
 +                    gmx_get_signal_name(),
 +                    gs.sig[eglsSTOPCOND]==1 ? "NS " : "");
 +            fflush(stderr);
 +            handled_stop_condition=(int)gmx_get_stop_condition();
 +        }
 +        else if (MASTER(cr) &&
 +                 (max_hours > 0 && run_time > max_hours*60.0*60.0*0.99) &&
 +                 gs.set[eglsSTOPCOND] == 0)
 +        {
 +            /* Signal to terminate the run */
 +            gs.set[eglsSTOPCOND] = 1;
 +            if (fplog)
 +            {
 +                fprintf(fplog,"\nStep %s: Run time exceeded %.3f hours, will terminate the run\n",gmx_step_str(step,sbuf),max_hours*0.99);
 +            }
 +            fprintf(stderr, "\nStep %s: Run time exceeded %.3f hours, will terminate the run\n",gmx_step_str(step,sbuf),max_hours*0.99);
 +        }
 +
 +        /* checkpoints */
 +        if (MASTER(cr) && (cpt_period >= 0 &&
 +                           (cpt_period == 0 ||
 +                            run_time >= nchkpt*cpt_period*60.0)) &&
 +                gs.set[eglsCHKPT] == 0)
 +        {
 +            gs.set[eglsCHKPT] = 1;
 +        }
 +
 +        /* Time for performance */
 +        if (((step % stepout) == 0) || bLastStep)
 +        {
 +            runtime_upd_proc(runtime);
 +        }
 +
 +        if (do_per_step(step,ir->nstlog))
 +        {
 +            if (fflush(fplog) != 0)
 +            {
 +                gmx_fatal(FARGS,"Cannot flush logfile - maybe you are out of disk space?");
 +            }
 +        }
 +
 +        /* Remaining runtime */
 +        if (MULTIMASTER(cr) && (do_verbose || gmx_got_usr_signal() ))
 +        {
 +            print_time(stderr,runtime,step,ir,cr);
 +        }
 +
 +        bFirstStep = FALSE;
 +        bInitStep = FALSE;
 +        bStartingFromCpt = FALSE;
 +        step++;
 +        step_rel++;
 +
 +        openmm_take_one_step(openmmData);
 +    }
 +    /* End of main MD loop */
 +    debug_gmx();
 +
 +    /* Stop the time */
 +    runtime_end(runtime);
 +
 +    if (MASTER(cr))
 +    {
 +        if (ir->nstcalcenergy > 0) 
 +        {
 +            print_ebin(outf->fp_ene,FALSE,FALSE,FALSE,fplog,step,t,
 +                       eprAVER,FALSE,mdebin,fcd,groups,&(ir->opts));
 +        }
 +    }
 +
 +    openmm_cleanup(fplog, openmmData);
 +
 +    done_mdoutf(outf);
 +
 +    debug_gmx();
 +
 +    runtime->nsteps_done = step_rel;
 +
 +    return 0;
 +}
Simple merge
Simple merge
Simple merge
Simple merge
index 1025372f6a0c184ae10b30c34b434690f6988f42,0000000000000000000000000000000000000000..ad22f7194eed216326ae0bcff151167c7116d007
mode 100644,000000..100644
--- /dev/null
@@@ -1,982 -1,0 +1,984 @@@
- #include "dihre.h"
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +#ifdef __linux
 +#define _GNU_SOURCE
 +#include <sched.h>
 +#include <sys/syscall.h>
 +#endif
 +#include <signal.h>
 +#include <stdlib.h>
 +#ifdef HAVE_UNISTD_H
 +#include <unistd.h>
 +#endif
 +
 +#include "typedefs.h"
 +#include "smalloc.h"
 +#include "sysstuff.h"
 +#include "statutil.h"
 +#include "mdrun.h"
 +#include "network.h"
 +#include "pull.h"
 +#include "pull_rotation.h"
 +#include "names.h"
 +#include "disre.h"
 +#include "orires.h"
-                       mc.nmultisim, mc.repl_ex_nst, mc.repl_ex_seed, mc.pforce, 
 +#include "pme.h"
 +#include "mdatoms.h"
 +#include "repl_ex.h"
 +#include "qmmm.h"
 +#include "domdec.h"
 +#include "partdec.h"
 +#include "coulomb.h"
 +#include "constr.h"
 +#include "mvdata.h"
 +#include "checkpoint.h"
 +#include "mtop_util.h"
 +#include "sighandler.h"
 +#include "tpxio.h"
 +#include "txtdump.h"
 +#include "pull_rotation.h"
 +#include "membed.h"
 +#include "macros.h"
 +
 +#ifdef GMX_LIB_MPI
 +#include <mpi.h>
 +#endif
 +#ifdef GMX_THREAD_MPI
 +#include "tmpi.h"
 +#endif
 +
 +#ifdef GMX_FAHCORE
 +#include "corewrap.h"
 +#endif
 +
 +#ifdef GMX_OPENMM
 +#include "md_openmm.h"
 +#endif
 +
 +#ifdef GMX_OPENMP
 +#include <omp.h>
 +#endif
 +
 +
 +typedef struct { 
 +    gmx_integrator_t *func;
 +} gmx_intp_t;
 +
 +/* The array should match the eI array in include/types/enums.h */
 +#ifdef GMX_OPENMM  /* FIXME do_md_openmm needs fixing */
 +const gmx_intp_t integrator[eiNR] = { {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm},{do_md_openmm}};
 +#else
 +const gmx_intp_t integrator[eiNR] = { {do_md}, {do_steep}, {do_cg}, {do_md}, {do_md}, {do_nm}, {do_lbfgs}, {do_tpi}, {do_tpi}, {do_md}, {do_md},{do_md}};
 +#endif
 +
 +gmx_large_int_t     deform_init_init_step_tpx;
 +matrix              deform_init_box_tpx;
 +#ifdef GMX_THREAD_MPI
 +tMPI_Thread_mutex_t deform_init_box_mutex=TMPI_THREAD_MUTEX_INITIALIZER;
 +#endif
 +
 +
 +#ifdef GMX_THREAD_MPI
 +struct mdrunner_arglist
 +{
 +    FILE *fplog;
 +    t_commrec *cr;
 +    int nfile;
 +    const t_filenm *fnm;
 +    output_env_t oenv;
 +    gmx_bool bVerbose;
 +    gmx_bool bCompact;
 +    int nstglobalcomm;
 +    ivec ddxyz;
 +    int dd_node_order;
 +    real rdd;
 +    real rconstr;
 +    const char *dddlb_opt;
 +    real dlb_scale;
 +    const char *ddcsx;
 +    const char *ddcsy;
 +    const char *ddcsz;
 +    int nstepout;
 +    int resetstep;
 +    int nmultisim;
 +    int repl_ex_nst;
++    int repl_ex_nex;
 +    int repl_ex_seed;
 +    real pforce;
 +    real cpt_period;
 +    real max_hours;
 +    const char *deviceOptions;
 +    unsigned long Flags;
 +    int ret; /* return value */
 +};
 +
 +
 +/* The function used for spawning threads. Extracts the mdrunner() 
 +   arguments from its one argument and calls mdrunner(), after making
 +   a commrec. */
 +static void mdrunner_start_fn(void *arg)
 +{
 +    struct mdrunner_arglist *mda=(struct mdrunner_arglist*)arg;
 +    struct mdrunner_arglist mc=*mda; /* copy the arg list to make sure 
 +                                        that it's thread-local. This doesn't
 +                                        copy pointed-to items, of course,
 +                                        but those are all const. */
 +    t_commrec *cr;  /* we need a local version of this */
 +    FILE *fplog=NULL;
 +    t_filenm *fnm;
 +
 +    fnm = dup_tfn(mc.nfile, mc.fnm);
 +
 +    cr = init_par_threads(mc.cr);
 +
 +    if (MASTER(cr))
 +    {
 +        fplog=mc.fplog;
 +    }
 +
 +    mda->ret=mdrunner(cr->nnodes, fplog, cr, mc.nfile, fnm, mc.oenv, 
 +                      mc.bVerbose, mc.bCompact, mc.nstglobalcomm, 
 +                      mc.ddxyz, mc.dd_node_order, mc.rdd,
 +                      mc.rconstr, mc.dddlb_opt, mc.dlb_scale, 
 +                      mc.ddcsx, mc.ddcsy, mc.ddcsz, mc.nstepout, mc.resetstep, 
-               int repl_ex_seed, real pforce,real cpt_period, real max_hours, 
++                      mc.nmultisim, mc.repl_ex_nst, mc.repl_ex_nex, mc.repl_ex_seed, mc.pforce,
 +                      mc.cpt_period, mc.max_hours, mc.deviceOptions, mc.Flags);
 +}
 +
 +/* called by mdrunner() to start a specific number of threads (including 
 +   the main thread) for thread-parallel runs. This in turn calls mdrunner()
 +   for each thread. 
 +   All options besides nthreads are the same as for mdrunner(). */
 +static t_commrec *mdrunner_start_threads(int nthreads, 
 +              FILE *fplog,t_commrec *cr,int nfile, 
 +              const t_filenm fnm[], const output_env_t oenv, gmx_bool bVerbose,
 +              gmx_bool bCompact, int nstglobalcomm,
 +              ivec ddxyz,int dd_node_order,real rdd,real rconstr,
 +              const char *dddlb_opt,real dlb_scale,
 +              const char *ddcsx,const char *ddcsy,const char *ddcsz,
 +              int nstepout,int resetstep,int nmultisim,int repl_ex_nst,
-              int nstepout,int resetstep,int nmultisim,int repl_ex_nst,
++              int repl_ex_nex, int repl_ex_seed, real pforce,real cpt_period, real max_hours,
 +              const char *deviceOptions, unsigned long Flags)
 +{
 +    int ret;
 +    struct mdrunner_arglist *mda;
 +    t_commrec *crn; /* the new commrec */
 +    t_filenm *fnmn;
 +
 +    /* first check whether we even need to start tMPI */
 +    if (nthreads<2)
 +        return cr;
 +
 +    /* a few small, one-time, almost unavoidable memory leaks: */
 +    snew(mda,1);
 +    fnmn=dup_tfn(nfile, fnm);
 +
 +    /* fill the data structure to pass as void pointer to thread start fn */
 +    mda->fplog=fplog;
 +    mda->cr=cr;
 +    mda->nfile=nfile;
 +    mda->fnm=fnmn;
 +    mda->oenv=oenv;
 +    mda->bVerbose=bVerbose;
 +    mda->bCompact=bCompact;
 +    mda->nstglobalcomm=nstglobalcomm;
 +    mda->ddxyz[XX]=ddxyz[XX];
 +    mda->ddxyz[YY]=ddxyz[YY];
 +    mda->ddxyz[ZZ]=ddxyz[ZZ];
 +    mda->dd_node_order=dd_node_order;
 +    mda->rdd=rdd;
 +    mda->rconstr=rconstr;
 +    mda->dddlb_opt=dddlb_opt;
 +    mda->dlb_scale=dlb_scale;
 +    mda->ddcsx=ddcsx;
 +    mda->ddcsy=ddcsy;
 +    mda->ddcsz=ddcsz;
 +    mda->nstepout=nstepout;
 +    mda->resetstep=resetstep;
 +    mda->nmultisim=nmultisim;
 +    mda->repl_ex_nst=repl_ex_nst;
++    mda->repl_ex_nex=repl_ex_nex;
 +    mda->repl_ex_seed=repl_ex_seed;
 +    mda->pforce=pforce;
 +    mda->cpt_period=cpt_period;
 +    mda->max_hours=max_hours;
 +    mda->deviceOptions=deviceOptions;
 +    mda->Flags=Flags;
 +
 +    fprintf(stderr, "Starting %d threads\n",nthreads);
 +    fflush(stderr);
 +    /* now spawn new threads that start mdrunner_start_fn(), while 
 +       the main thread returns */
 +    ret=tMPI_Init_fn(TRUE, nthreads, mdrunner_start_fn, (void*)(mda) );
 +    if (ret!=TMPI_SUCCESS)
 +        return NULL;
 +
 +    /* make a new comm_rec to reflect the new situation */
 +    crn=init_par_threads(cr);
 +    return crn;
 +}
 +
 +
 +/* Get the number of threads to use for thread-MPI based on how many
 + * were requested, which algorithms we're using,
 + * and how many particles there are.
 + */
 +static int get_nthreads_mpi(int nthreads_requested, t_inputrec *inputrec,
 +                            gmx_mtop_t *mtop)
 +{
 +    int nthreads,nthreads_new;
 +    int min_atoms_per_thread;
 +    char *env;
 +
 +    nthreads = nthreads_requested;
 +
 +    /* determine # of hardware threads. */
 +    if (nthreads_requested < 1)
 +    {
 +        if ((env = getenv("GMX_MAX_THREADS")) != NULL)
 +        {
 +            nthreads = 0;
 +            sscanf(env,"%d",&nthreads);
 +            if (nthreads < 1)
 +            {
 +                gmx_fatal(FARGS,"GMX_MAX_THREADS (%d) should be larger than 0",
 +                          nthreads);
 +            }
 +        }
 +        else
 +        {
 +            nthreads = tMPI_Thread_get_hw_number();
 +        }
 +    }
 +
 +    if (inputrec->eI == eiNM || EI_TPI(inputrec->eI))
 +    {
 +        /* Steps are divided over the nodes iso splitting the atoms */
 +        min_atoms_per_thread = 0;
 +    }
 +    else
 +    {
 +        min_atoms_per_thread = MIN_ATOMS_PER_THREAD;
 +    }
 +
 +    /* Check if an algorithm does not support parallel simulation.  */
 +    if (nthreads != 1 && 
 +        ( inputrec->eI == eiLBFGS ||
 +          inputrec->coulombtype == eelEWALD ) )
 +    {
 +        fprintf(stderr,"\nThe integration or electrostatics algorithm doesn't support parallel runs. Not starting any threads.\n");
 +        nthreads = 1;
 +    }
 +    else if (nthreads_requested < 1 &&
 +             mtop->natoms/nthreads < min_atoms_per_thread)
 +    {
 +        /* the thread number was chosen automatically, but there are too many
 +           threads (too few atoms per thread) */
 +        nthreads_new = max(1,mtop->natoms/min_atoms_per_thread);
 +
 +        if (nthreads_new > 8 || (nthreads == 8 && nthreads_new > 4))
 +        {
 +            /* Use only multiples of 4 above 8 threads
 +             * or with an 8-core processor
 +             * (to avoid 6 threads on 8 core processors with 4 real cores).
 +             */
 +            nthreads_new = (nthreads_new/4)*4;
 +        }
 +        else if (nthreads_new > 4)
 +        {
 +            /* Avoid 5 or 7 threads */
 +            nthreads_new = (nthreads_new/2)*2;
 +        }
 +
 +        nthreads = nthreads_new;
 +
 +        fprintf(stderr,"\n");
 +        fprintf(stderr,"NOTE: Parallelization is limited by the small number of atoms,\n");
 +        fprintf(stderr,"      only starting %d threads.\n",nthreads);
 +        fprintf(stderr,"      You can use the -nt option to optimize the number of threads.\n\n");
 +    }
 +    return nthreads;
 +}
 +#endif
 +
 +
 +int mdrunner(int nthreads_requested, FILE *fplog,t_commrec *cr,int nfile,
 +             const t_filenm fnm[], const output_env_t oenv, gmx_bool bVerbose,
 +             gmx_bool bCompact, int nstglobalcomm,
 +             ivec ddxyz,int dd_node_order,real rdd,real rconstr,
 +             const char *dddlb_opt,real dlb_scale,
 +             const char *ddcsx,const char *ddcsy,const char *ddcsz,
-                                       repl_ex_nst, repl_ex_seed, pforce, 
++             int nstepout,int resetstep,int nmultisim, int repl_ex_nst, int repl_ex_nex,
 +             int repl_ex_seed, real pforce,real cpt_period,real max_hours,
 +             const char *deviceOptions, unsigned long Flags)
 +{
 +    double     nodetime=0,realtime;
 +    t_inputrec *inputrec;
 +    t_state    *state=NULL;
 +    matrix     box;
 +    gmx_ddbox_t ddbox={0};
 +    int        npme_major,npme_minor;
 +    real       tmpr1,tmpr2;
 +    t_nrnb     *nrnb;
 +    gmx_mtop_t *mtop=NULL;
 +    t_mdatoms  *mdatoms=NULL;
 +    t_forcerec *fr=NULL;
 +    t_fcdata   *fcd=NULL;
 +    real       ewaldcoeff=0;
 +    gmx_pme_t  *pmedata=NULL;
 +    gmx_vsite_t *vsite=NULL;
 +    gmx_constr_t constr;
 +    int        i,m,nChargePerturbed=-1,status,nalloc;
 +    char       *gro;
 +    gmx_wallcycle_t wcycle;
 +    gmx_bool       bReadRNG,bReadEkin;
 +    int        list;
 +    gmx_runtime_t runtime;
 +    int        rc;
 +    gmx_large_int_t reset_counters;
 +    gmx_edsam_t ed=NULL;
 +    t_commrec   *cr_old=cr; 
 +    int         nthreads_mpi=1;
 +    int         nthreads_pme=1;
 +    gmx_membed_t membed=NULL;
 +
 +    /* CAUTION: threads may be started later on in this function, so
 +       cr doesn't reflect the final parallel state right now */
 +    snew(inputrec,1);
 +    snew(mtop,1);
 +
 +    if (bVerbose && SIMMASTER(cr))
 +    {
 +        fprintf(stderr,"Getting Loaded...\n");
 +    }
 +    
 +    if (Flags & MD_APPENDFILES) 
 +    {
 +        fplog = NULL;
 +    }
 +
 +    snew(state,1);
 +    if (MASTER(cr)) 
 +    {
 +        /* Read (nearly) all data required for the simulation */
 +        read_tpx_state(ftp2fn(efTPX,nfile,fnm),inputrec,state,NULL,mtop);
 +
 +        /* NOW the threads will be started: */
 +#ifdef GMX_THREAD_MPI
 +        nthreads_mpi = get_nthreads_mpi(nthreads_requested, inputrec, mtop);
 +
 +        if (nthreads_mpi > 1)
 +        {
 +            /* now start the threads. */
 +            cr=mdrunner_start_threads(nthreads_mpi, fplog, cr_old, nfile, fnm,
 +                                      oenv, bVerbose, bCompact, nstglobalcomm, 
 +                                      ddxyz, dd_node_order, rdd, rconstr, 
 +                                      dddlb_opt, dlb_scale, ddcsx, ddcsy, ddcsz,
 +                                      nstepout, resetstep, nmultisim, 
-         /* Dihedral Restraints */
-         if (gmx_mtop_ftype_count(mtop,F_DIHRES) > 0)
-         {
-             init_dihres(fplog,mtop,inputrec,fcd);
-         }
++                                      repl_ex_nst, repl_ex_nex, repl_ex_seed, pforce,
 +                                      cpt_period, max_hours, deviceOptions, 
 +                                      Flags);
 +            /* the main thread continues here with a new cr. We don't deallocate
 +               the old cr because other threads may still be reading it. */
 +            if (cr == NULL)
 +            {
 +                gmx_comm("Failed to spawn threads");
 +            }
 +        }
 +#endif
 +    }
 +    /* END OF CAUTION: cr is now reliable */
 +
 +    /* g_membed initialisation *
 +     * Because we change the mtop, init_membed is called before the init_parallel *
 +     * (in case we ever want to make it run in parallel) */
 +    if (opt2bSet("-membed",nfile,fnm))
 +    {
 +        if (MASTER(cr))
 +        {
 +            fprintf(stderr,"Initializing membed");
 +        }
 +        membed = init_membed(fplog,nfile,fnm,mtop,inputrec,state,cr,&cpt_period);
 +    }
 +
 +    if (PAR(cr))
 +    {
 +        /* now broadcast everything to the non-master nodes/threads: */
 +        init_parallel(fplog, cr, inputrec, mtop);
 +    }
 +    if (fplog != NULL)
 +    {
 +        pr_inputrec(fplog,0,"Input Parameters",inputrec,FALSE);
 +    }
 +
 +    /* now make sure the state is initialized and propagated */
 +    set_state_entries(state,inputrec,cr->nnodes);
 +
++    /* remove when vv and rerun works correctly! */
++    if (PAR(cr) && EI_VV(inputrec->eI) && ((Flags & MD_RERUN) || (Flags & MD_RERUN_VSITE)))
++    {
++        gmx_fatal(FARGS,
++                  "Currently can't do velocity verlet with rerun in parallel.");
++    }
++
 +    /* A parallel command line option consistency check that we can
 +       only do after any threads have started. */
 +    if (!PAR(cr) &&
 +        (ddxyz[XX] > 1 || ddxyz[YY] > 1 || ddxyz[ZZ] > 1 || cr->npmenodes > 0))
 +    {
 +        gmx_fatal(FARGS,
 +                  "The -dd or -npme option request a parallel simulation, "
 +#ifndef GMX_MPI
 +                  "but mdrun was compiled without threads or MPI enabled"
 +#else
 +#ifdef GMX_THREAD_MPI
 +                  "but the number of threads (option -nt) is 1"
 +#else
 +                  "but mdrun was not started through mpirun/mpiexec or only one process was requested through mpirun/mpiexec" 
 +#endif
 +#endif
 +            );
 +    }
 +
 +    if ((Flags & MD_RERUN) &&
 +        (EI_ENERGY_MINIMIZATION(inputrec->eI) || eiNM == inputrec->eI))
 +    {
 +        gmx_fatal(FARGS, "The .mdp file specified an energy mininization or normal mode algorithm, and these are not compatible with mdrun -rerun");
 +    }
 +
 +    if (can_use_allvsall(inputrec,mtop,TRUE,cr,fplog))
 +    {
 +        /* All-vs-all loops do not work with domain decomposition */
 +        Flags |= MD_PARTDEC;
 +    }
 +
 +    if (!EEL_PME(inputrec->coulombtype) || (Flags & MD_PARTDEC))
 +    {
 +        if (cr->npmenodes > 0)
 +        {
 +            if (!EEL_PME(inputrec->coulombtype))
 +            {
 +                gmx_fatal_collective(FARGS,cr,NULL,
 +                                     "PME nodes are requested, but the system does not use PME electrostatics");
 +            }
 +            if (Flags & MD_PARTDEC)
 +            {
 +                gmx_fatal_collective(FARGS,cr,NULL,
 +                                     "PME nodes are requested, but particle decomposition does not support separate PME nodes");
 +            }
 +        }
 +
 +        cr->npmenodes = 0;
 +    }
 +
 +#ifdef GMX_FAHCORE
 +    fcRegisterSteps(inputrec->nsteps,inputrec->init_step);
 +#endif
 +
 +    /* NMR restraints must be initialized before load_checkpoint,
 +     * since with time averaging the history is added to t_state.
 +     * For proper consistency check we therefore need to extend
 +     * t_state here.
 +     * So the PME-only nodes (if present) will also initialize
 +     * the distance restraints.
 +     */
 +    snew(fcd,1);
 +
 +    /* This needs to be called before read_checkpoint to extend the state */
 +    init_disres(fplog,mtop,inputrec,cr,Flags & MD_PARTDEC,fcd,state);
 +
 +    if (gmx_mtop_ftype_count(mtop,F_ORIRES) > 0)
 +    {
 +        if (PAR(cr) && !(Flags & MD_PARTDEC))
 +        {
 +            gmx_fatal(FARGS,"Orientation restraints do not work (yet) with domain decomposition, use particle decomposition (mdrun option -pd)");
 +        }
 +        /* Orientation restraints */
 +        if (MASTER(cr))
 +        {
 +            init_orires(fplog,mtop,state->x,inputrec,cr->ms,&(fcd->orires),
 +                        state);
 +        }
 +    }
 +
 +    if (DEFORM(*inputrec))
 +    {
 +        /* Store the deform reference box before reading the checkpoint */
 +        if (SIMMASTER(cr))
 +        {
 +            copy_mat(state->box,box);
 +        }
 +        if (PAR(cr))
 +        {
 +            gmx_bcast(sizeof(box),box,cr);
 +        }
 +        /* Because we do not have the update struct available yet
 +         * in which the reference values should be stored,
 +         * we store them temporarily in static variables.
 +         * This should be thread safe, since they are only written once
 +         * and with identical values.
 +         */
 +#ifdef GMX_THREAD_MPI
 +        tMPI_Thread_mutex_lock(&deform_init_box_mutex);
 +#endif
 +        deform_init_init_step_tpx = inputrec->init_step;
 +        copy_mat(box,deform_init_box_tpx);
 +#ifdef GMX_THREAD_MPI
 +        tMPI_Thread_mutex_unlock(&deform_init_box_mutex);
 +#endif
 +    }
 +
 +    if (opt2bSet("-cpi",nfile,fnm)) 
 +    {
 +        /* Check if checkpoint file exists before doing continuation.
 +         * This way we can use identical input options for the first and subsequent runs...
 +         */
 +        if( gmx_fexist_master(opt2fn_master("-cpi",nfile,fnm,cr),cr) )
 +        {
 +            load_checkpoint(opt2fn_master("-cpi",nfile,fnm,cr),&fplog,
 +                            cr,Flags & MD_PARTDEC,ddxyz,
 +                            inputrec,state,&bReadRNG,&bReadEkin,
 +                            (Flags & MD_APPENDFILES),
 +                            (Flags & MD_APPENDFILESSET));
 +            
 +            if (bReadRNG)
 +            {
 +                Flags |= MD_READ_RNG;
 +            }
 +            if (bReadEkin)
 +            {
 +                Flags |= MD_READ_EKIN;
 +            }
 +        }
 +    }
 +
 +    if (((MASTER(cr) || (Flags & MD_SEPPOT)) && (Flags & MD_APPENDFILES))
 +#ifdef GMX_THREAD_MPI
 +        /* With thread MPI only the master node/thread exists in mdrun.c,
 +         * therefore non-master nodes need to open the "seppot" log file here.
 +         */
 +        || (!MASTER(cr) && (Flags & MD_SEPPOT))
 +#endif
 +        )
 +    {
 +        gmx_log_open(ftp2fn(efLOG,nfile,fnm),cr,!(Flags & MD_SEPPOT),
 +                             Flags,&fplog);
 +    }
 +
 +    if (SIMMASTER(cr)) 
 +    {
 +        copy_mat(state->box,box);
 +    }
 +
 +    if (PAR(cr)) 
 +    {
 +        gmx_bcast(sizeof(box),box,cr);
 +    }
 +
 +    /* Essential dynamics */
 +    if (opt2bSet("-ei",nfile,fnm))
 +    {
 +        /* Open input and output files, allocate space for ED data structure */
 +        ed = ed_open(nfile,fnm,Flags,cr);
 +    }
 +
 +    if (bVerbose && SIMMASTER(cr))
 +    {
 +        fprintf(stderr,"Loaded with Money\n\n");
 +    }
 +
 +    if (PAR(cr) && !((Flags & MD_PARTDEC) ||
 +                     EI_TPI(inputrec->eI) ||
 +                     inputrec->eI == eiNM))
 +    {
 +        cr->dd = init_domain_decomposition(fplog,cr,Flags,ddxyz,rdd,rconstr,
 +                                           dddlb_opt,dlb_scale,
 +                                           ddcsx,ddcsy,ddcsz,
 +                                           mtop,inputrec,
 +                                           box,state->x,
 +                                           &ddbox,&npme_major,&npme_minor);
 +
 +        make_dd_communicators(fplog,cr,dd_node_order);
 +
 +        /* Set overallocation to avoid frequent reallocation of arrays */
 +        set_over_alloc_dd(TRUE);
 +    }
 +    else
 +    {
 +        /* PME, if used, is done on all nodes with 1D decomposition */
 +        cr->npmenodes = 0;
 +        cr->duty = (DUTY_PP | DUTY_PME);
 +        npme_major = 1;
 +        npme_minor = 1;
 +        if (!EI_TPI(inputrec->eI))
 +        {
 +            npme_major = cr->nnodes;
 +        }
 +        
 +        if (inputrec->ePBC == epbcSCREW)
 +        {
 +            gmx_fatal(FARGS,
 +                      "pbc=%s is only implemented with domain decomposition",
 +                      epbc_names[inputrec->ePBC]);
 +        }
 +    }
 +
 +    if (PAR(cr))
 +    {
 +        /* After possible communicator splitting in make_dd_communicators.
 +         * we can set up the intra/inter node communication.
 +         */
 +        gmx_setup_nodecomm(fplog,cr);
 +    }
 +
 +    /* get number of OpenMP/PME threads
 +     * env variable should be read only on one node to make sure it is identical everywhere */
 +#ifdef GMX_OPENMP
 +    if (EEL_PME(inputrec->coulombtype))
 +    {
 +        if (MASTER(cr))
 +        {
 +            char *ptr;
 +            if ((ptr=getenv("GMX_PME_NTHREADS")) != NULL)
 +            {
 +                sscanf(ptr,"%d",&nthreads_pme);
 +            }
 +            if (fplog != NULL && nthreads_pme > 1)
 +            {
 +                fprintf(fplog,"Using %d threads for PME\n",nthreads_pme);
 +            }
 +        }
 +        if (PAR(cr))
 +        {
 +            gmx_bcast_sim(sizeof(nthreads_pme),&nthreads_pme,cr);
 +        }
 +    }
 +#endif
 +
 +    wcycle = wallcycle_init(fplog,resetstep,cr,nthreads_pme);
 +    if (PAR(cr))
 +    {
 +        /* Master synchronizes its value of reset_counters with all nodes 
 +         * including PME only nodes */
 +        reset_counters = wcycle_get_reset_counters(wcycle);
 +        gmx_bcast_sim(sizeof(reset_counters),&reset_counters,cr);
 +        wcycle_set_reset_counters(wcycle, reset_counters);
 +    }
 +
 +
 +    snew(nrnb,1);
 +    if (cr->duty & DUTY_PP)
 +    {
 +        /* For domain decomposition we allocate dynamically
 +         * in dd_partition_system.
 +         */
 +        if (DOMAINDECOMP(cr))
 +        {
 +            bcast_state_setup(cr,state);
 +        }
 +        else
 +        {
 +            if (PAR(cr))
 +            {
 +                bcast_state(cr,state,TRUE);
 +            }
 +        }
 +
-             init_pull(fplog,inputrec,nfile,fnm,mtop,cr,oenv,
 +        /* Initiate forcerecord */
 +        fr = mk_forcerec();
 +        init_forcerec(fplog,oenv,fr,fcd,inputrec,mtop,cr,box,FALSE,
 +                      opt2fn("-table",nfile,fnm),
 +                      opt2fn("-tabletf",nfile,fnm),
 +                      opt2fn("-tablep",nfile,fnm),
 +                      opt2fn("-tableb",nfile,fnm),FALSE,pforce);
 +
 +        /* version for PCA_NOT_READ_NODE (see md.c) */
 +        /*init_forcerec(fplog,fr,fcd,inputrec,mtop,cr,box,FALSE,
 +          "nofile","nofile","nofile","nofile",FALSE,pforce);
 +          */        
 +        fr->bSepDVDL = ((Flags & MD_SEPPOT) == MD_SEPPOT);
 +
 +        /* Initialize QM-MM */
 +        if(fr->bQMMM)
 +        {
 +            init_QMMMrec(cr,box,mtop,inputrec,fr);
 +        }
 +
 +        /* Initialize the mdatoms structure.
 +         * mdatoms is not filled with atom data,
 +         * as this can not be done now with domain decomposition.
 +         */
 +        mdatoms = init_mdatoms(fplog,mtop,inputrec->efep!=efepNO);
 +
 +        /* Initialize the virtual site communication */
 +        vsite = init_vsite(mtop,cr);
 +
 +        calc_shifts(box,fr->shift_vec);
 +
 +        /* With periodic molecules the charge groups should be whole at start up
 +         * and the virtual sites should not be far from their proper positions.
 +         */
 +        if (!inputrec->bContinuation && MASTER(cr) &&
 +            !(inputrec->ePBC != epbcNONE && inputrec->bPeriodicMols))
 +        {
 +            /* Make molecules whole at start of run */
 +            if (fr->ePBC != epbcNONE)
 +            {
 +                do_pbc_first_mtop(fplog,inputrec->ePBC,box,mtop,state->x);
 +            }
 +            if (vsite)
 +            {
 +                /* Correct initial vsite positions are required
 +                 * for the initial distribution in the domain decomposition
 +                 * and for the initial shell prediction.
 +                 */
 +                construct_vsites_mtop(fplog,vsite,mtop,state->x);
 +            }
 +        }
 +
 +        if (EEL_PME(fr->eeltype))
 +        {
 +            ewaldcoeff = fr->ewaldcoeff;
 +            pmedata = &fr->pmedata;
 +        }
 +        else
 +        {
 +            pmedata = NULL;
 +        }
 +    }
 +    else
 +    {
 +        /* This is a PME only node */
 +
 +        /* We don't need the state */
 +        done_state(state);
 +
 +        ewaldcoeff = calc_ewaldcoeff(inputrec->rcoulomb, inputrec->ewald_rtol);
 +        snew(pmedata,1);
 +    }
 +
 +    /* Initiate PME if necessary,
 +     * either on all nodes or on dedicated PME nodes only. */
 +    if (EEL_PME(inputrec->coulombtype))
 +    {
 +        if (mdatoms)
 +        {
 +            nChargePerturbed = mdatoms->nChargePerturbed;
 +        }
 +        if (cr->npmenodes > 0)
 +        {
 +            /* The PME only nodes need to know nChargePerturbed */
 +            gmx_bcast_sim(sizeof(nChargePerturbed),&nChargePerturbed,cr);
 +        }
 +
 +
 +        /* Set CPU affinity. Can be important for performance.
 +           On some systems (e.g. Cray) CPU Affinity is set by default.
 +           But default assigning doesn't work (well) with only some ranks
 +           having threads. This causes very low performance.
 +           External tools have cumbersome syntax for setting affinity
 +           in the case that only some ranks have threads.
 +           Thus it is important that GROMACS sets the affinity internally at
 +           if only PME is using threads.
 +        */
 +
 +#ifdef GMX_OPENMP
 +#ifdef __linux
 +#ifdef GMX_LIB_MPI
 +        {
 +            int core;
 +            MPI_Comm comm_intra; /* intra communicator (but different to nc.comm_intra includes PME nodes) */
 +            MPI_Comm_split(MPI_COMM_WORLD,gmx_hostname_num(),gmx_node_rank(),&comm_intra);
 +            int local_omp_nthreads = (cr->duty & DUTY_PME) ? nthreads_pme : 1; /* threads on this node */
 +            MPI_Scan(&local_omp_nthreads,&core, 1, MPI_INT, MPI_SUM, comm_intra);
 +            core-=local_omp_nthreads; /* make exclusive scan */
 +#pragma omp parallel firstprivate(core) num_threads(local_omp_nthreads)
 +            {
 +                cpu_set_t mask;
 +                CPU_ZERO(&mask);
 +                core+=omp_get_thread_num();
 +                CPU_SET(core,&mask);
 +                sched_setaffinity((pid_t) syscall (SYS_gettid),sizeof(cpu_set_t),&mask);
 +            }
 +        }
 +#endif /*GMX_MPI*/
 +#endif /*__linux*/
 +#endif /*GMX_OPENMP*/
 +
 +        if (cr->duty & DUTY_PME)
 +        {
 +            status = gmx_pme_init(pmedata,cr,npme_major,npme_minor,inputrec,
 +                                  mtop ? mtop->natoms : 0,nChargePerturbed,
 +                                  (Flags & MD_REPRODUCIBLE),nthreads_pme);
 +            if (status != 0) 
 +            {
 +                gmx_fatal(FARGS,"Error %d initializing PME",status);
 +            }
 +        }
 +    }
 +
 +
 +    if (integrator[inputrec->eI].func == do_md
 +#ifdef GMX_OPENMM
 +        ||
 +        integrator[inputrec->eI].func == do_md_openmm
 +#endif
 +        )
 +    {
 +        /* Turn on signal handling on all nodes */
 +        /*
 +         * (A user signal from the PME nodes (if any)
 +         * is communicated to the PP nodes.
 +         */
 +        signal_handler_install();
 +    }
 +
 +    if (cr->duty & DUTY_PP)
 +    {
 +        if (inputrec->ePull != epullNO)
 +        {
 +            /* Initialize pull code */
-                                       repl_ex_nst,repl_ex_seed,
++            init_pull(fplog,inputrec,nfile,fnm,mtop,cr,oenv, inputrec->fepvals->init_lambda,
 +                      EI_DYNAMICS(inputrec->eI) && MASTER(cr),Flags);
 +        }
 +        
 +        if (inputrec->bRot)
 +        {
 +           /* Initialize enforced rotation code */
 +           init_rot(fplog,inputrec,nfile,fnm,cr,state->x,state->box,mtop,oenv,
 +                    bVerbose,Flags);
 +        }
 +
 +        constr = init_constraints(fplog,mtop,inputrec,ed,state,cr);
 +
 +        if (DOMAINDECOMP(cr))
 +        {
 +            dd_init_bondeds(fplog,cr->dd,mtop,vsite,constr,inputrec,
 +                            Flags & MD_DDBONDCHECK,fr->cginfo_mb);
 +
 +            set_dd_parameters(fplog,cr->dd,dlb_scale,inputrec,fr,&ddbox);
 +
 +            setup_dd_grid(fplog,cr->dd);
 +        }
 +
 +        /* Now do whatever the user wants us to do (how flexible...) */
 +        integrator[inputrec->eI].func(fplog,cr,nfile,fnm,
 +                                      oenv,bVerbose,bCompact,
 +                                      nstglobalcomm,
 +                                      vsite,constr,
 +                                      nstepout,inputrec,mtop,
 +                                      fcd,state,
 +                                      mdatoms,nrnb,wcycle,ed,fr,
++                                      repl_ex_nst,repl_ex_nex,repl_ex_seed,
 +                                      membed,
 +                                      cpt_period,max_hours,
 +                                      deviceOptions,
 +                                      Flags,
 +                                      &runtime);
 +
 +        if (inputrec->ePull != epullNO)
 +        {
 +            finish_pull(fplog,inputrec->pull);
 +        }
 +        
 +        if (inputrec->bRot)
 +        {
 +            finish_rot(fplog,inputrec->rot);
 +        }
 +
 +    } 
 +    else 
 +    {
 +        /* do PME only */
 +        gmx_pmeonly(*pmedata,cr,nrnb,wcycle,ewaldcoeff,FALSE,inputrec);
 +    }
 +
 +    if (EI_DYNAMICS(inputrec->eI) || EI_TPI(inputrec->eI))
 +    {
 +        /* Some timing stats */  
 +        if (SIMMASTER(cr))
 +        {
 +            if (runtime.proc == 0)
 +            {
 +                runtime.proc = runtime.real;
 +            }
 +        }
 +        else
 +        {
 +            runtime.real = 0;
 +        }
 +    }
 +
 +    wallcycle_stop(wcycle,ewcRUN);
 +
 +    /* Finish up, write some stuff
 +     * if rerunMD, don't write last frame again 
 +     */
 +    finish_run(fplog,cr,ftp2fn(efSTO,nfile,fnm),
 +               inputrec,nrnb,wcycle,&runtime,
 +               EI_DYNAMICS(inputrec->eI) && !MULTISIM(cr));
 +
 +    if (opt2bSet("-membed",nfile,fnm))
 +    {
 +        sfree(membed);
 +    }
 +
 +    /* Does what it says */  
 +    print_date_and_time(fplog,cr->nodeid,"Finished mdrun",&runtime);
 +
 +    /* Close logfile already here if we were appending to it */
 +    if (MASTER(cr) && (Flags & MD_APPENDFILES))
 +    {
 +        gmx_log_close(fplog);
 +    } 
 +
 +    rc=(int)gmx_get_stop_condition();
 +
 +#ifdef GMX_THREAD_MPI
 +    /* we need to join all threads. The sub-threads join when they
 +       exit this function, but the master thread needs to be told to 
 +       wait for that. */
 +    if (PAR(cr) && MASTER(cr))
 +    {
 +        tMPI_Finalize();
 +    }
 +#endif
 +
 +    return rc;
 +}
Simple merge
Simple merge