Avoid DLB with overloaded PME ranks
[alexxy/gromacs.git] / src / programs / mdrun / md.c
index cf1dd35ec32dd75e33cda2ab0cc0aa50664929af..3d98d597c7d0e9c009e02d816e631b3f36846a25 100644 (file)
@@ -1,47 +1,47 @@
-/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
+/*
+ * This file is part of the GROMACS molecular simulation package.
  *
- * 
- *                This source code is part of
- * 
- *                 G   R   O   M   A   C   S
- * 
- *          GROningen MAchine for Chemical Simulations
- * 
- *                        VERSION 3.2.0
- * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2004, The GROMACS development team,
- * check out http://www.gromacs.org for more information.
-
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
+ * Copyright (c) 2001-2004, The GROMACS development team.
+ * Copyright (c) 2011,2012,2013,2014, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
  * of the License, or (at your option) any later version.
- * 
- * If you want to redistribute modifications, please consider that
- * scientific software is very special. Version control is crucial -
- * bugs must be traceable. We will be happy to consider code for
- * inclusion in the official distribution, but derived work must not
- * be called official GROMACS. Details are found in the README & COPYING
- * files - if they are missing, get the official version at www.gromacs.org.
- * 
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
  * To help us fund GROMACS development, we humbly ask that you cite
- * the papers on the package - you can find them in the top README file.
- * 
- * For more info, check our website at http://www.gromacs.org
- * 
- * And Hey:
- * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
+ * the research papers on the package. Check out http://www.gromacs.org.
  */
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
 
 #include "typedefs.h"
-#include "smalloc.h"
+#include "gromacs/utility/smalloc.h"
 #include "sysstuff.h"
 #include "vec.h"
-#include "statutil.h"
 #include "vcm.h"
 #include "mdebin.h"
 #include "nrnb.h"
 #include "vsite.h"
 #include "update.h"
 #include "ns.h"
-#include "trnio.h"
-#include "xtcio.h"
 #include "mdrun.h"
 #include "md_support.h"
 #include "md_logging.h"
-#include "confio.h"
 #include "network.h"
-#include "pull.h"
 #include "xvgr.h"
 #include "physics.h"
 #include "names.h"
-#include "xmdrun.h"
-#include "ionize.h"
+#include "force.h"
 #include "disre.h"
 #include "orires.h"
 #include "pme.h"
 #include "mdatoms.h"
 #include "repl_ex.h"
+#include "deform.h"
 #include "qmmm.h"
 #include "domdec.h"
 #include "domdec_network.h"
-#include "partdec.h"
-#include "topsort.h"
+#include "gromacs/gmxlib/topsort.h"
 #include "coulomb.h"
 #include "constr.h"
 #include "shellfc.h"
-#include "compute_io.h"
-#include "mvdata.h"
+#include "gromacs/gmxpreprocess/compute_io.h"
 #include "checkpoint.h"
 #include "mtop_util.h"
 #include "sighandler.h"
 #include "txtdump.h"
-#include "string2.h"
+#include "gromacs/utility/cstringutil.h"
 #include "pme_loadbal.h"
 #include "bondf.h"
 #include "membed.h"
 #include "types/iteratedconstraints.h"
 #include "nbnxn_cuda_data_mgmt.h"
 
-#ifdef GMX_LIB_MPI
-#include <mpi.h>
-#endif
-#ifdef GMX_THREAD_MPI
-#include "tmpi.h"
-#endif
+#include "gromacs/utility/gmxmpi.h"
+#include "gromacs/fileio/confio.h"
+#include "gromacs/fileio/trajectory_writing.h"
+#include "gromacs/fileio/trnio.h"
+#include "gromacs/fileio/trxio.h"
+#include "gromacs/fileio/xtcio.h"
+#include "gromacs/timing/wallcycle.h"
+#include "gromacs/timing/walltime_accounting.h"
+#include "gromacs/pulling/pull.h"
+#include "gromacs/swap/swapcoords.h"
+#include "gromacs/imd/imd.h"
 
 #ifdef GMX_FAHCORE
 #include "corewrap.h"
 #endif
 
-static void reset_all_counters(FILE *fplog,t_commrec *cr,
-                               gmx_large_int_t step,
-                               gmx_large_int_t *step_rel,t_inputrec *ir,
-                               gmx_wallcycle_t wcycle,t_nrnb *nrnb,
-                               gmx_runtime_t *runtime,
+static void reset_all_counters(FILE *fplog, t_commrec *cr,
+                               gmx_int64_t step,
+                               gmx_int64_t *step_rel, t_inputrec *ir,
+                               gmx_wallcycle_t wcycle, t_nrnb *nrnb,
+                               gmx_walltime_accounting_t walltime_accounting,
                                nbnxn_cuda_ptr_t cu_nbv)
 {
     char sbuf[STEPSTRSIZE];
 
     /* Reset all the counters related to performance over the run */
-    md_print_warn(cr,fplog,"step %s: resetting all time and cycle counters\n",
-                  gmx_step_str(step,sbuf));
+    md_print_warn(cr, fplog, "step %s: resetting all time and cycle counters\n",
+                  gmx_step_str(step, sbuf));
 
     if (cu_nbv)
     {
         nbnxn_cuda_reset_timings(cu_nbv);
     }
 
-    wallcycle_stop(wcycle,ewcRUN);
+    wallcycle_stop(wcycle, ewcRUN);
     wallcycle_reset_all(wcycle);
     if (DOMAINDECOMP(cr))
     {
@@ -128,149 +127,138 @@ static void reset_all_counters(FILE *fplog,t_commrec *cr,
     init_nrnb(nrnb);
     ir->init_step += *step_rel;
     ir->nsteps    -= *step_rel;
-    *step_rel = 0;
-    wallcycle_start(wcycle,ewcRUN);
-    runtime_start(runtime);
-    print_date_and_time(fplog,cr->nodeid,"Restarted time",runtime);
+    *step_rel      = 0;
+    wallcycle_start(wcycle, ewcRUN);
+    walltime_accounting_start(walltime_accounting);
+    print_date_and_time(fplog, cr->nodeid, "Restarted time", gmx_gettime());
 }
 
-double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
-             const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
+double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[],
+             const output_env_t oenv, gmx_bool bVerbose, gmx_bool bCompact,
              int nstglobalcomm,
-             gmx_vsite_t *vsite,gmx_constr_t constr,
-             int stepout,t_inputrec *ir,
+             gmx_vsite_t *vsite, gmx_constr_t constr,
+             int stepout, t_inputrec *ir,
              gmx_mtop_t *top_global,
              t_fcdata *fcd,
              t_state *state_global,
              t_mdatoms *mdatoms,
-             t_nrnb *nrnb,gmx_wallcycle_t wcycle,
-             gmx_edsam_t ed,t_forcerec *fr,
-             int repl_ex_nst,int repl_ex_nex,int repl_ex_seed,gmx_membed_t membed,
-             real cpt_period,real max_hours,
-             const char *deviceOptions,
+             t_nrnb *nrnb, gmx_wallcycle_t wcycle,
+             gmx_edsam_t ed, t_forcerec *fr,
+             int repl_ex_nst, int repl_ex_nex, int repl_ex_seed, gmx_membed_t membed,
+             real cpt_period, real max_hours,
+             const char gmx_unused *deviceOptions,
+             int imdport,
              unsigned long Flags,
-             gmx_runtime_t *runtime)
+             gmx_walltime_accounting_t walltime_accounting)
 {
-    gmx_mdoutf_t *outf;
-    gmx_large_int_t step,step_rel;
-    double     run_time;
-    double     t,t0,lam0[efptNR];
-    gmx_bool       bGStatEveryStep,bGStat,bCalcVir,bCalcEner;
-    gmx_bool       bNS,bNStList,bSimAnn,bStopCM,bRerunMD,bNotLastFrame=FALSE,
-               bFirstStep,bStateFromCP,bStateFromTPX,bInitStep,bLastStep,
-               bBornRadii,bStartingFromCpt;
-    gmx_bool   bDoDHDL=FALSE,bDoFEP=FALSE,bDoExpanded=FALSE;
-    gmx_bool       do_ene,do_log,do_verbose,bRerunWarnNoV=TRUE,
-               bForceUpdate=FALSE,bCPT;
-    int        mdof_flags;
-    gmx_bool       bMasterState;
-    int        force_flags,cglo_flags;
-    tensor     force_vir,shake_vir,total_vir,tmp_vir,pres;
-    int        i,m;
-    t_trxstatus *status;
-    rvec       mu_tot;
-    t_vcm      *vcm;
-    t_state    *bufstate=NULL;   
-    matrix     *scale_tot,pcoupl_mu,M,ebox;
-    gmx_nlheur_t nlh;
-    t_trxframe rerun_fr;
-    gmx_repl_ex_t repl_ex=NULL;
-    int        nchkpt=1;
-    gmx_localtop_t *top;       
-    t_mdebin *mdebin=NULL;
-    df_history_t df_history;
-    t_state    *state=NULL;
-    rvec       *f_global=NULL;
-    int        n_xtc=-1;
-    rvec       *x_xtc=NULL;
-    gmx_enerdata_t *enerd;
-    rvec       *f=NULL;
+    gmx_mdoutf_t    outf = NULL;
+    gmx_int64_t     step, step_rel;
+    double          elapsed_time;
+    double          t, t0, lam0[efptNR];
+    gmx_bool        bGStatEveryStep, bGStat, bCalcVir, bCalcEner;
+    gmx_bool        bNS, bNStList, bSimAnn, bStopCM, bRerunMD, bNotLastFrame = FALSE,
+                    bFirstStep, bStateFromCP, bStateFromTPX, bInitStep, bLastStep,
+                    bBornRadii, bStartingFromCpt;
+    gmx_bool          bDoDHDL = FALSE, bDoFEP = FALSE, bDoExpanded = FALSE;
+    gmx_bool          do_ene, do_log, do_verbose, bRerunWarnNoV = TRUE,
+                      bForceUpdate = FALSE, bCPT;
+    gmx_bool          bMasterState;
+    int               force_flags, cglo_flags;
+    tensor            force_vir, shake_vir, total_vir, tmp_vir, pres;
+    int               i, m;
+    t_trxstatus      *status;
+    rvec              mu_tot;
+    t_vcm            *vcm;
+    t_state          *bufstate = NULL;
+    matrix           *scale_tot, pcoupl_mu, M, ebox;
+    gmx_nlheur_t      nlh;
+    t_trxframe        rerun_fr;
+    gmx_repl_ex_t     repl_ex = NULL;
+    int               nchkpt  = 1;
+    gmx_localtop_t   *top;
+    t_mdebin         *mdebin   = NULL;
+    t_state          *state    = NULL;
+    rvec             *f_global = NULL;
+    gmx_enerdata_t   *enerd;
+    rvec             *f = NULL;
     gmx_global_stat_t gstat;
-    gmx_update_t upd=NULL;
-    t_graph    *graph=NULL;
-    globsig_t   gs;
-    gmx_rng_t mcrng=NULL;
-    gmx_bool        bFFscan;
-    gmx_groups_t *groups;
-    gmx_ekindata_t *ekind, *ekind_save;
-    gmx_shellfc_t shellfc;
-    int         count,nconverged=0;
-    real        timestep=0;
-    double      tcount=0;
-    gmx_bool        bIonize=FALSE;
-    gmx_bool        bTCR=FALSE,bConverged=TRUE,bOK,bSumEkinhOld,bExchanged;
-    gmx_bool        bAppend;
-    gmx_bool        bResetCountersHalfMaxH=FALSE;
-    gmx_bool        bVV,bIterativeCase,bFirstIterate,bTemp,bPres,bTrotter;
-    gmx_bool        bUpdateDoLR;
-    real        mu_aver=0,dvdl;
-    int         a0,a1,gnx=0,ii;
-    atom_id     *grpindex=NULL;
-    char        *grpname;
-    t_coupl_rec *tcr=NULL;
-    rvec        *xcopy=NULL,*vcopy=NULL,*cbuf=NULL;
-    matrix      boxcopy={{0}},lastbox;
-       tensor      tmpvir;
-       real        fom,oldfom,veta_save,pcurr,scalevir,tracevir;
-       real        vetanew = 0;
-    int         lamnew=0;
+    gmx_update_t      upd   = NULL;
+    t_graph          *graph = NULL;
+    globsig_t         gs;
+    gmx_groups_t     *groups;
+    gmx_ekindata_t   *ekind, *ekind_save;
+    gmx_shellfc_t     shellfc;
+    int               count, nconverged = 0;
+    real              timestep   = 0;
+    double            tcount     = 0;
+    gmx_bool          bConverged = TRUE, bOK, bSumEkinhOld, bDoReplEx, bExchanged, bNeedRepartition;
+    gmx_bool          bAppend;
+    gmx_bool          bResetCountersHalfMaxH = FALSE;
+    gmx_bool          bVV, bIterativeCase, bFirstIterate, bTemp, bPres, bTrotter;
+    gmx_bool          bUpdateDoLR;
+    real              dvdl_constr;
+    rvec             *cbuf = NULL;
+    matrix            lastbox;
+    real              veta_save, scalevir, tracevir;
+    real              vetanew = 0;
+    int               lamnew  = 0;
     /* for FEP */
-    int         nstfep;
-    real        rate;
-    double      cycles;
-       real        saved_conserved_quantity = 0;
-    real        last_ekin = 0;
-       int         iter_i;
-       t_extmass   MassQ;
-    int         **trotter_seq; 
-    char        sbuf[STEPSTRSIZE],sbuf2[STEPSTRSIZE];
-    int         handled_stop_condition=gmx_stop_cond_none; /* compare to get_stop_condition*/
-    gmx_iterate_t iterate;
-    gmx_large_int_t multisim_nsteps=-1; /* number of steps to do  before first multisim 
-                                          simulation stops. If equal to zero, don't
-                                          communicate any more between multisims.*/
+    int               nstfep;
+    double            cycles;
+    real              saved_conserved_quantity = 0;
+    real              last_ekin                = 0;
+    int               iter_i;
+    t_extmass         MassQ;
+    int             **trotter_seq;
+    char              sbuf[STEPSTRSIZE], sbuf2[STEPSTRSIZE];
+    int               handled_stop_condition = gmx_stop_cond_none; /* compare to get_stop_condition*/
+    gmx_iterate_t     iterate;
+    gmx_int64_t       multisim_nsteps = -1;                        /* number of steps to do  before first multisim
+                                                                          simulation stops. If equal to zero, don't
+                                                                          communicate any more between multisims.*/
     /* PME load balancing data for GPU kernels */
-    pme_load_balancing_t pme_loadbal=NULL;
-    double          cycles_pmes;
-    gmx_bool        bPMETuneTry=FALSE,bPMETuneRunning=FALSE;
+    pme_load_balancing_t pme_loadbal = NULL;
+    double               cycles_pmes;
+    gmx_bool             bPMETuneTry = FALSE, bPMETuneRunning = FALSE;
+
+    /* Interactive MD */
+    gmx_bool          bIMDstep = FALSE;
 
 #ifdef GMX_FAHCORE
     /* Temporary addition for FAHCORE checkpointing */
     int chkpt_ret;
 #endif
-    
+
     /* Check for special mdrun options */
     bRerunMD = (Flags & MD_RERUN);
-    bIonize  = (Flags & MD_IONIZE);
-    bFFscan  = (Flags & MD_FFSCAN);
     bAppend  = (Flags & MD_APPENDFILES);
     if (Flags & MD_RESETCOUNTERSHALFWAY)
     {
         if (ir->nsteps > 0)
         {
             /* Signal to reset the counters half the simulation steps. */
-            wcycle_set_reset_counters(wcycle,ir->nsteps/2);
+            wcycle_set_reset_counters(wcycle, ir->nsteps/2);
         }
         /* Signal to reset the counters halfway the simulation time. */
         bResetCountersHalfMaxH = (max_hours > 0);
     }
 
-    /* md-vv uses averaged full step velocities for T-control 
+    /* md-vv uses averaged full step velocities for T-control
        md-vv-avek uses averaged half step velocities for T-control (but full step ekin for P control)
        md uses averaged half step kinetic energies to determine temperature unless defined otherwise by GMX_EKIN_AVE_VEL; */
     bVV = EI_VV(ir->eI);
     if (bVV) /* to store the initial velocities while computing virial */
     {
-        snew(cbuf,top_global->natoms);
+        snew(cbuf, top_global->natoms);
     }
-    /* all the iteratative cases - only if there are constraints */ 
+    /* all the iteratative cases - only if there are constraints */
     bIterativeCase = ((IR_NPH_TROTTER(ir) || IR_NPT_TROTTER(ir)) && (constr) && (!bRerunMD));
-    gmx_iterate_init(&iterate,FALSE); /* The default value of iterate->bIterationActive is set to
-                                         false in this step.  The correct value, true or false,
-                                         is set at each step, as it depends on the frequency of temperature
-                                         and pressure control.*/
+    gmx_iterate_init(&iterate, FALSE); /* The default value of iterate->bIterationActive is set to
+                                          false in this step.  The correct value, true or false,
+                                          is set at each step, as it depends on the frequency of temperature
+                                          and pressure control.*/
     bTrotter = (bVV && (IR_NPT_TROTTER(ir) || IR_NPH_TROTTER(ir) || IR_NVT_TROTTER(ir)));
-    
+
     if (bRerunMD)
     {
         /* Since we don't know if the frames read are related in any way,
@@ -281,9 +269,9 @@ double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
         nstglobalcomm     = 1;
     }
 
-    check_ir_old_tpx_versions(cr,fplog,ir,top_global);
+    check_ir_old_tpx_versions(cr, fplog, ir, top_global);
 
-    nstglobalcomm = check_nstglobalcomm(fplog,cr,nstglobalcomm,ir);
+    nstglobalcomm   = check_nstglobalcomm(fplog, cr, nstglobalcomm, ir);
     bGStatEveryStep = (nstglobalcomm == 1);
 
     if (!bGStatEveryStep && ir->nstlist == -1 && fplog != NULL)
@@ -299,24 +287,24 @@ double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
                 "If you want less energy communication, set nstlist > 3.\n\n");
     }
 
-    if (bRerunMD || bFFscan)
+    if (bRerunMD)
     {
-        ir->nstxtcout = 0;
+        ir->nstxout_compressed = 0;
     }
     groups = &top_global->groups;
 
     /* Initial values */
-    init_md(fplog,cr,ir,oenv,&t,&t0,state_global->lambda,
-            &(state_global->fep_state),lam0,
-            nrnb,top_global,&upd,
-            nfile,fnm,&outf,&mdebin,
-            force_vir,shake_vir,mu_tot,&bSimAnn,&vcm,state_global,Flags);
+    init_md(fplog, cr, ir, oenv, &t, &t0, state_global->lambda,
+            &(state_global->fep_state), lam0,
+            nrnb, top_global, &upd,
+            nfile, fnm, &outf, &mdebin,
+            force_vir, shake_vir, mu_tot, &bSimAnn, &vcm, Flags, wcycle);
 
     clear_mat(total_vir);
     clear_mat(pres);
     /* Energy terms and groups */
-    snew(enerd,1);
-    init_enerdata(top_global->groups.grps[egcENER].nr,ir->fepvals->n_lambda,
+    snew(enerd, 1);
+    init_enerdata(top_global->groups.grps[egcENER].nr, ir->fepvals->n_lambda,
                   enerd);
     if (DOMAINDECOMP(cr))
     {
@@ -324,24 +312,16 @@ double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
     }
     else
     {
-        snew(f,top_global->natoms);
+        snew(f, top_global->natoms);
     }
 
-    /* lambda Monte carlo random number generator  */
-    if (ir->bExpanded)
-    {
-        mcrng = gmx_rng_init(ir->expandedvals->lmc_seed);
-    }
-    /* copy the state into df_history */
-    copy_df_history(&df_history,&state_global->dfhist);
-
     /* Kinetic energy data */
-    snew(ekind,1);
-    init_ekindata(fplog,top_global,&(ir->opts),ekind);
+    snew(ekind, 1);
+    init_ekindata(fplog, top_global, &(ir->opts), ekind);
     /* needed for iteration of constraints */
-    snew(ekind_save,1);
-    init_ekindata(fplog,top_global,&(ir->opts),ekind_save);
-    /* Copy the cos acceleration to the groups struct */    
+    snew(ekind_save, 1);
+    init_ekindata(fplog, top_global, &(ir->opts), ekind_save);
+    /* Copy the cos acceleration to the groups struct */
     ekind->cosacc.cos_accel = ir->cos_accel;
 
     gstat = global_stat_init(ir);
@@ -349,111 +329,129 @@ double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
 
     /* Check for polarizable models and flexible constraints */
     shellfc = init_shell_flexcon(fplog,
-                                 top_global,n_flexible_constraints(constr),
-                                 (ir->bContinuation || 
+                                 top_global, n_flexible_constraints(constr),
+                                 (ir->bContinuation ||
                                   (DOMAINDECOMP(cr) && !MASTER(cr))) ?
                                  NULL : state_global->x);
+    if (shellfc && ir->nstcalcenergy != 1)
+    {
+        gmx_fatal(FARGS, "You have nstcalcenergy set to a value (%d) that is different from 1.\nThis is not supported in combinations with shell particles.\nPlease make a new tpr file.", ir->nstcalcenergy);
+    }
+    if (shellfc && DOMAINDECOMP(cr))
+    {
+        gmx_fatal(FARGS, "Shell particles are not implemented with domain decomposition, use a single rank");
+    }
+    if (shellfc && ir->eI == eiNM)
+    {
+        /* Currently shells don't work with Normal Modes */
+        gmx_fatal(FARGS, "Normal Mode analysis is not supported with shells.\nIf you'd like to help with adding support, we have an open discussion at http://redmine.gromacs.org/issues/879\n");
+    }
+
+    if (vsite && ir->eI == eiNM)
+    {
+        /* Currently virtual sites don't work with Normal Modes */
+        gmx_fatal(FARGS, "Normal Mode analysis is not supported with virtual sites.\nIf you'd like to help with adding support, we have an open discussion at http://redmine.gromacs.org/issues/879\n");
+    }
 
     if (DEFORM(*ir))
     {
-#ifdef GMX_THREAD_MPI
         tMPI_Thread_mutex_lock(&deform_init_box_mutex);
-#endif
         set_deform_reference_box(upd,
                                  deform_init_init_step_tpx,
                                  deform_init_box_tpx);
-#ifdef GMX_THREAD_MPI
         tMPI_Thread_mutex_unlock(&deform_init_box_mutex);
-#endif
     }
 
     {
-        double io = compute_io(ir,top_global->natoms,groups,mdebin->ebin->nener,1);
+        double io = compute_io(ir, top_global->natoms, groups, mdebin->ebin->nener, 1);
         if ((io > 2000) && MASTER(cr))
+        {
             fprintf(stderr,
                     "\nWARNING: This run will generate roughly %.0f Mb of data\n\n",
                     io);
+        }
     }
 
-    if (DOMAINDECOMP(cr)) {
+    if (DOMAINDECOMP(cr))
+    {
         top = dd_init_local_top(top_global);
 
-        snew(state,1);
-        dd_init_local_state(cr->dd,state_global,state);
-
-        if (DDMASTER(cr->dd) && ir->nstfout) {
-            snew(f_global,state_global->natoms);
-        }
-    } else {
-        if (PAR(cr)) {
-            /* Initialize the particle decomposition and split the topology */
-            top = split_system(fplog,top_global,ir,cr);
-
-            pd_cg_range(cr,&fr->cg0,&fr->hcg);
-            pd_at_range(cr,&a0,&a1);
-        } else {
-            top = gmx_mtop_generate_local_top(top_global,ir);
+        snew(state, 1);
+        dd_init_local_state(cr->dd, state_global, state);
 
-            a0 = 0;
-            a1 = top_global->natoms;
+        if (DDMASTER(cr->dd) && ir->nstfout)
+        {
+            snew(f_global, state_global->natoms);
         }
+    }
+    else
+    {
+        top = gmx_mtop_generate_local_top(top_global, ir);
 
-        forcerec_set_excl_load(fr,top,cr);
+        forcerec_set_excl_load(fr, top);
 
-        state = partdec_init_local_state(cr,state_global);
+        state    = serial_init_local_state(state_global);
         f_global = f;
 
-        atoms2md(top_global,ir,0,NULL,a0,a1-a0,mdatoms);
+        atoms2md(top_global, ir, 0, NULL, top_global->natoms, mdatoms);
 
-        if (vsite) {
-            set_vsite_top(vsite,top,mdatoms,cr);
+        if (vsite)
+        {
+            set_vsite_top(vsite, top, mdatoms, cr);
         }
 
-        if (ir->ePBC != epbcNONE && !fr->bMolPBC) {
-            graph = mk_graph(fplog,&(top->idef),0,top_global->natoms,FALSE,FALSE);
+        if (ir->ePBC != epbcNONE && !fr->bMolPBC)
+        {
+            graph = mk_graph(fplog, &(top->idef), 0, top_global->natoms, FALSE, FALSE);
         }
 
-        if (shellfc) {
-            make_local_shells(cr,mdatoms,shellfc);
+        if (shellfc)
+        {
+            make_local_shells(cr, mdatoms, shellfc);
         }
 
-        init_bonded_thread_force_reduction(fr,&top->idef);
-
-        if (ir->pull && PAR(cr)) {
-            dd_make_local_pull_groups(NULL,ir->pull,mdatoms);
-        }
+        setup_bonded_threading(fr, &top->idef);
     }
 
+    /* Set up interactive MD (IMD) */
+    init_IMD(ir, cr, top_global, fplog, ir->nstcalcenergy, state_global->x,
+             nfile, fnm, oenv, imdport, Flags);
+
     if (DOMAINDECOMP(cr))
     {
         /* Distribute the charge groups over the nodes from the master node */
-        dd_partition_system(fplog,ir->init_step,cr,TRUE,1,
-                            state_global,top_global,ir,
-                            state,&f,mdatoms,top,fr,
-                            vsite,shellfc,constr,
-                            nrnb,wcycle,FALSE);
+        dd_partition_system(fplog, ir->init_step, cr, TRUE, 1,
+                            state_global, top_global, ir,
+                            state, &f, mdatoms, top, fr,
+                            vsite, shellfc, constr,
+                            nrnb, wcycle, FALSE);
 
     }
 
-    update_mdatoms(mdatoms,state->lambda[efptMASS]);
+    update_mdatoms(mdatoms, state->lambda[efptMASS]);
 
-    if (opt2bSet("-cpi",nfile,fnm))
+    if (opt2bSet("-cpi", nfile, fnm))
     {
-        bStateFromCP = gmx_fexist_master(opt2fn_master("-cpi",nfile,fnm,cr),cr);
+        bStateFromCP = gmx_fexist_master(opt2fn_master("-cpi", nfile, fnm, cr), cr);
     }
     else
     {
         bStateFromCP = FALSE;
     }
 
+    if (ir->bExpanded)
+    {
+        init_expanded_ensemble(bStateFromCP, ir, &state->dfhist);
+    }
+
     if (MASTER(cr))
     {
         if (bStateFromCP)
         {
             /* Update mdebin with energy history if appending to output files */
-            if ( Flags & MD_APPENDFILES )
+            if (Flags & MD_APPENDFILES)
             {
-                restore_energyhistory_from_state(mdebin,&state_global->enerhist);
+                restore_energyhistory_from_state(mdebin, &state_global->enerhist);
             }
             else
             {
@@ -465,61 +463,30 @@ double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
             }
         }
         /* Set the initial energy history in state by updating once */
-        update_energyhistory(&state_global->enerhist,mdebin);
-    }  
-
-    if ((state->flags & (1<<estLD_RNG)) && (Flags & MD_READ_RNG)) 
-    {
-        /* Set the random state if we read a checkpoint file */
-        set_stochd_state(upd,state);
-    }
-
-    if (state->flags & (1<<estMC_RNG))
-    {
-        set_mc_state(mcrng,state);
+        update_energyhistory(&state_global->enerhist, mdebin);
     }
 
     /* Initialize constraints */
-    if (constr) {
-        if (!DOMAINDECOMP(cr))
-            set_constraints(constr,top,ir,mdatoms,cr);
-    }
-
-    /* Check whether we have to GCT stuff */
-    bTCR = ftp2bSet(efGCT,nfile,fnm);
-    if (bTCR) {
-        if (MASTER(cr)) {
-            fprintf(stderr,"Will do General Coupling Theory!\n");
-        }
-        gnx = top_global->mols.nr;
-        snew(grpindex,gnx);
-        for(i=0; (i<gnx); i++) {
-            grpindex[i] = i;
-        }
-    }
-
-    if (repl_ex_nst > 0)
+    if (constr && !DOMAINDECOMP(cr))
     {
-        /* We need to be sure replica exchange can only occur
-         * when the energies are current */
-        check_nst_param(fplog,cr,"nstcalcenergy",ir->nstcalcenergy,
-                        "repl_ex_nst",&repl_ex_nst);
-        /* This check needs to happen before inter-simulation
-         * signals are initialized, too */
+        set_constraints(constr, top, ir, mdatoms, cr);
     }
+
     if (repl_ex_nst > 0 && MASTER(cr))
     {
-        repl_ex = init_replica_exchange(fplog,cr->ms,state_global,ir,
-                                        repl_ex_nst,repl_ex_nex,repl_ex_seed);
+        repl_ex = init_replica_exchange(fplog, cr->ms, state_global, ir,
+                                        repl_ex_nst, repl_ex_nex, repl_ex_seed);
     }
 
-    /* PME tuning is only supported with GPUs or PME nodes and not with rerun */
+    /* PME tuning is only supported with GPUs or PME nodes and not with rerun.
+     * PME tuning is not supported with PME only for LJ and not for Coulomb.
+     */
     if ((Flags & MD_TUNEPME) &&
         EEL_PME(fr->eeltype) &&
         ( (fr->cutoff_scheme == ecutsVERLET && fr->nbv->bUseGPU) || !(cr->duty & DUTY_PME)) &&
         !bRerunMD)
     {
-        pme_loadbal_init(&pme_loadbal,ir,state->box,fr->ic,fr->pmedata);
+        pme_loadbal_init(&pme_loadbal, ir, state->box, fr->ic, fr->pmedata);
         cycles_pmes = 0;
         if (cr->duty & DUTY_PME)
         {
@@ -538,9 +505,9 @@ double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
         if (mdatoms->cFREEZE && (state->flags & (1<<estV)))
         {
             /* Set the velocities of frozen particles to zero */
-            for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++)
+            for (i = 0; i < mdatoms->homenr; i++)
             {
-                for(m=0; m<DIM; m++)
+                for (m = 0; m < DIM; m++)
                 {
                     if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m])
                     {
@@ -553,110 +520,104 @@ double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
         if (constr)
         {
             /* Constrain the initial coordinates and velocities */
-            do_constrain_first(fplog,constr,ir,mdatoms,state,f,
-                               graph,cr,nrnb,fr,top,shake_vir);
+            do_constrain_first(fplog, constr, ir, mdatoms, state,
+                               cr, nrnb, fr, top);
         }
         if (vsite)
         {
             /* Construct the virtual sites for the initial configuration */
-            construct_vsites(fplog,vsite,state->x,nrnb,ir->delta_t,NULL,
-                             top->idef.iparams,top->idef.il,
-                             fr->ePBC,fr->bMolPBC,graph,cr,state->box);
+            construct_vsites(vsite, state->x, ir->delta_t, NULL,
+                             top->idef.iparams, top->idef.il,
+                             fr->ePBC, fr->bMolPBC, cr, state->box);
         }
     }
 
     debug_gmx();
-  
-    /* set free energy calculation frequency as the minimum of nstdhdl, nstexpanded, and nstrepl_ex_nst*/
+
+    /* set free energy calculation frequency as the minimum
+       greatest common denominator of nstdhdl, nstexpanded, and repl_ex_nst*/
     nstfep = ir->fepvals->nstdhdl;
-    if (ir->bExpanded && (nstfep > ir->expandedvals->nstexpanded))
+    if (ir->bExpanded)
     {
-        nstfep = ir->expandedvals->nstexpanded;
+        nstfep = gmx_greatest_common_divisor(ir->fepvals->nstdhdl, nstfep);
     }
-    if (repl_ex_nst > 0 && nstfep > repl_ex_nst)
+    if (repl_ex_nst > 0)
     {
-        nstfep = repl_ex_nst;
+        nstfep = gmx_greatest_common_divisor(repl_ex_nst, nstfep);
     }
 
     /* I'm assuming we need global communication the first time! MRS */
     cglo_flags = (CGLO_TEMPERATURE | CGLO_GSTAT
-                  | ((ir->comm_mode != ecmNO) ? CGLO_STOPCM:0)
-                  | (bVV ? CGLO_PRESSURE:0)
-                  | (bVV ? CGLO_CONSTRAINT:0)
-                  | (bRerunMD ? CGLO_RERUNMD:0)
-                  | ((Flags & MD_READ_EKIN) ? CGLO_READEKIN:0));
-    
+                  | ((ir->comm_mode != ecmNO) ? CGLO_STOPCM : 0)
+                  | (bVV ? CGLO_PRESSURE : 0)
+                  | (bVV ? CGLO_CONSTRAINT : 0)
+                  | (bRerunMD ? CGLO_RERUNMD : 0)
+                  | ((Flags & MD_READ_EKIN) ? CGLO_READEKIN : 0));
+
     bSumEkinhOld = FALSE;
-    compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
-                    NULL,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
-                    constr,NULL,FALSE,state->box,
-                    top_global,&pcurr,top_global->natoms,&bSumEkinhOld,cglo_flags);
-    if (ir->eI == eiVVAK) {
-        /* a second call to get the half step temperature initialized as well */ 
-        /* we do the same call as above, but turn the pressure off -- internally to 
-           compute_globals, this is recognized as a velocity verlet half-step 
-           kinetic energy calculation.  This minimized excess variables, but 
+    compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm,
+                    NULL, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
+                    constr, NULL, FALSE, state->box,
+                    top_global, &bSumEkinhOld, cglo_flags);
+    if (ir->eI == eiVVAK)
+    {
+        /* a second call to get the half step temperature initialized as well */
+        /* we do the same call as above, but turn the pressure off -- internally to
+           compute_globals, this is recognized as a velocity verlet half-step
+           kinetic energy calculation.  This minimized excess variables, but
            perhaps loses some logic?*/
-        
-        compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
-                        NULL,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
-                        constr,NULL,FALSE,state->box,
-                        top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
-                        cglo_flags &~ (CGLO_STOPCM | CGLO_PRESSURE));
+
+        compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm,
+                        NULL, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
+                        constr, NULL, FALSE, state->box,
+                        top_global, &bSumEkinhOld,
+                        cglo_flags &~(CGLO_STOPCM | CGLO_PRESSURE));
     }
-    
+
     /* Calculate the initial half step temperature, and save the ekinh_old */
-    if (!(Flags & MD_STARTFROMCPT)) 
+    if (!(Flags & MD_STARTFROMCPT))
     {
-        for(i=0; (i<ir->opts.ngtc); i++) 
+        for (i = 0; (i < ir->opts.ngtc); i++)
         {
-            copy_mat(ekind->tcstat[i].ekinh,ekind->tcstat[i].ekinh_old);
-        } 
+            copy_mat(ekind->tcstat[i].ekinh, ekind->tcstat[i].ekinh_old);
+        }
     }
-    if (ir->eI != eiVV) 
+    if (ir->eI != eiVV)
     {
         enerd->term[F_TEMP] *= 2; /* result of averages being done over previous and current step,
                                      and there is no previous step */
     }
-    
+
     /* if using an iterative algorithm, we need to create a working directory for the state. */
     if (bIterativeCase)
     {
-            bufstate = init_bufstate(state);
+        bufstate = init_bufstate(state);
     }
-    if (bFFscan) 
-    {
-        snew(xcopy,state->natoms);
-        snew(vcopy,state->natoms);
-        copy_rvecn(state->x,xcopy,0,state->natoms);
-        copy_rvecn(state->v,vcopy,0,state->natoms);
-        copy_mat(state->box,boxcopy);
-    } 
-    
+
     /* need to make an initiation call to get the Trotter variables set, as well as other constants for non-trotter
        temperature control */
-    trotter_seq = init_npt_vars(ir,state,&MassQ,bTrotter);
-    
+    trotter_seq = init_npt_vars(ir, state, &MassQ, bTrotter);
+
     if (MASTER(cr))
     {
         if (constr && !ir->bContinuation && ir->eConstrAlg == econtLINCS)
         {
             fprintf(fplog,
                     "RMS relative constraint deviation after constraining: %.2e\n",
-                    constr_rmsd(constr,FALSE));
+                    constr_rmsd(constr, FALSE));
         }
         if (EI_STATE_VELOCITY(ir->eI))
         {
-            fprintf(fplog,"Initial temperature: %g K\n",enerd->term[F_TEMP]);
+            fprintf(fplog, "Initial temperature: %g K\n", enerd->term[F_TEMP]);
         }
         if (bRerunMD)
         {
-            fprintf(stderr,"starting md rerun '%s', reading coordinates from"
+            fprintf(stderr, "starting md rerun '%s', reading coordinates from"
                     " input trajectory '%s'\n\n",
-                    *(top_global->name),opt2fn("-rerun",nfile,fnm));
+                    *(top_global->name), opt2fn("-rerun", nfile, fnm));
             if (bVerbose)
             {
-                fprintf(stderr,"Calculated time to finish depends on nsteps from "
+                fprintf(stderr, "Calculated time to finish depends on nsteps from "
                         "run input file,\nwhich may not correspond to the time "
                         "needed to process input trajectory.\n\n");
             }
@@ -664,53 +625,50 @@ double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
         else
         {
             char tbuf[20];
-            fprintf(stderr,"starting mdrun '%s'\n",
+            fprintf(stderr, "starting mdrun '%s'\n",
                     *(top_global->name));
             if (ir->nsteps >= 0)
             {
-                sprintf(tbuf,"%8.1f",(ir->init_step+ir->nsteps)*ir->delta_t);
+                sprintf(tbuf, "%8.1f", (ir->init_step+ir->nsteps)*ir->delta_t);
             }
             else
             {
-                sprintf(tbuf,"%s","infinite");
+                sprintf(tbuf, "%s", "infinite");
             }
             if (ir->init_step > 0)
             {
-                fprintf(stderr,"%s steps, %s ps (continuing from step %s, %8.1f ps).\n",
-                        gmx_step_str(ir->init_step+ir->nsteps,sbuf),tbuf,
-                        gmx_step_str(ir->init_step,sbuf2),
+                fprintf(stderr, "%s steps, %s ps (continuing from step %s, %8.1f ps).\n",
+                        gmx_step_str(ir->init_step+ir->nsteps, sbuf), tbuf,
+                        gmx_step_str(ir->init_step, sbuf2),
                         ir->init_step*ir->delta_t);
             }
             else
             {
-                fprintf(stderr,"%s steps, %s ps.\n",
-                        gmx_step_str(ir->nsteps,sbuf),tbuf);
+                fprintf(stderr, "%s steps, %s ps.\n",
+                        gmx_step_str(ir->nsteps, sbuf), tbuf);
             }
         }
-        fprintf(fplog,"\n");
+        fprintf(fplog, "\n");
     }
 
-    /* Set and write start time */
-    runtime_start(runtime);
-    print_date_and_time(fplog,cr->nodeid,"Started mdrun",runtime);
-    wallcycle_start(wcycle,ewcRUN);
-    if (fplog)
-    {
-        fprintf(fplog,"\n");
-    }
+    walltime_accounting_start(walltime_accounting);
+    wallcycle_start(wcycle, ewcRUN);
+    print_start(fplog, cr, walltime_accounting, "mdrun");
 
     /* safest point to do file checkpointing is here.  More general point would be immediately before integrator call */
 #ifdef GMX_FAHCORE
-    chkpt_ret=fcCheckPointParallel( cr->nodeid,
-                                    NULL,0);
-    if ( chkpt_ret == 0 ) 
-        gmx_fatal( 3,__FILE__,__LINE__, "Checkpoint error on step %d\n", 0 );
+    chkpt_ret = fcCheckPointParallel( cr->nodeid,
+                                      NULL, 0);
+    if (chkpt_ret == 0)
+    {
+        gmx_fatal( 3, __FILE__, __LINE__, "Checkpoint error on step %d\n", 0 );
+    }
 #endif
 
     debug_gmx();
     /***********************************************************
      *
-     *             Loop over MD steps 
+     *             Loop over MD steps
      *
      ************************************************************/
 
@@ -725,32 +683,32 @@ double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
         rerun_fr.natoms = 0;
         if (MASTER(cr))
         {
-            bNotLastFrame = read_first_frame(oenv,&status,
-                                             opt2fn("-rerun",nfile,fnm),
-                                             &rerun_fr,TRX_NEED_X | TRX_READ_V);
+            bNotLastFrame = read_first_frame(oenv, &status,
+                                             opt2fn("-rerun", nfile, fnm),
+                                             &rerun_fr, TRX_NEED_X | TRX_READ_V);
             if (rerun_fr.natoms != top_global->natoms)
             {
                 gmx_fatal(FARGS,
                           "Number of atoms in trajectory (%d) does not match the "
                           "run input file (%d)\n",
-                          rerun_fr.natoms,top_global->natoms);
+                          rerun_fr.natoms, top_global->natoms);
             }
             if (ir->ePBC != epbcNONE)
             {
                 if (!rerun_fr.bBox)
                 {
-                    gmx_fatal(FARGS,"Rerun trajectory frame step %d time %f does not contain a box, while pbc is used",rerun_fr.step,rerun_fr.time);
+                    gmx_fatal(FARGS, "Rerun trajectory frame step %d time %f does not contain a box, while pbc is used", rerun_fr.step, rerun_fr.time);
                 }
-                if (max_cutoff2(ir->ePBC,rerun_fr.box) < sqr(fr->rlistlong))
+                if (max_cutoff2(ir->ePBC, rerun_fr.box) < sqr(fr->rlistlong))
                 {
-                    gmx_fatal(FARGS,"Rerun trajectory frame step %d time %f has too small box dimensions",rerun_fr.step,rerun_fr.time);
+                    gmx_fatal(FARGS, "Rerun trajectory frame step %d time %f has too small box dimensions", rerun_fr.step, rerun_fr.time);
                 }
             }
         }
 
         if (PAR(cr))
         {
-            rerun_parallel_comm(cr,&rerun_fr,&bNotLastFrame);
+            rerun_parallel_comm(cr, &rerun_fr, &bNotLastFrame);
         }
 
         if (ir->ePBC != epbcNONE)
@@ -758,103 +716,113 @@ double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
             /* Set the shift vectors.
              * Necessary here when have a static box different from the tpr box.
              */
-            calc_shifts(rerun_fr.box,fr->shift_vec);
+            calc_shifts(rerun_fr.box, fr->shift_vec);
         }
     }
 
     /* loop over MD steps or if rerunMD to end of input trajectory */
     bFirstStep = TRUE;
     /* Skip the first Nose-Hoover integration when we get the state from tpx */
-    bStateFromTPX = !bStateFromCP;
-    bInitStep = bFirstStep && (bStateFromTPX || bVV);
+    bStateFromTPX    = !bStateFromCP;
+    bInitStep        = bFirstStep && (bStateFromTPX || bVV);
     bStartingFromCpt = (Flags & MD_STARTFROMCPT) && bInitStep;
-    bLastStep    = FALSE;
-    bSumEkinhOld = FALSE;
-    bExchanged   = FALSE;
+    bLastStep        = FALSE;
+    bSumEkinhOld     = FALSE;
+    bDoReplEx        = FALSE;
+    bExchanged       = FALSE;
+    bNeedRepartition = FALSE;
 
-    init_global_signals(&gs,cr,ir,repl_ex_nst);
+    init_global_signals(&gs, cr, ir, repl_ex_nst);
 
-    step = ir->init_step;
+    step     = ir->init_step;
     step_rel = 0;
 
     if (ir->nstlist == -1)
     {
-        init_nlistheuristics(&nlh,bGStatEveryStep,step);
+        init_nlistheuristics(&nlh, bGStatEveryStep, step);
     }
 
-    if (MULTISIM(cr) && (repl_ex_nst <=0 ))
+    if (MULTISIM(cr) && (repl_ex_nst <= 0 ))
     {
         /* check how many steps are left in other sims */
-        multisim_nsteps=get_multisim_nsteps(cr, ir->nsteps);
+        multisim_nsteps = get_multisim_nsteps(cr, ir->nsteps);
     }
 
 
     /* and stop now if we should */
     bLastStep = (bRerunMD || (ir->nsteps >= 0 && step_rel > ir->nsteps) ||
                  ((multisim_nsteps >= 0) && (step_rel >= multisim_nsteps )));
-    while (!bLastStep || (bRerunMD && bNotLastFrame)) {
+    while (!bLastStep || (bRerunMD && bNotLastFrame))
+    {
 
-        wallcycle_start(wcycle,ewcSTEP);
+        wallcycle_start(wcycle, ewcSTEP);
 
-        if (bRerunMD) {
-            if (rerun_fr.bStep) {
-                step = rerun_fr.step;
+        if (bRerunMD)
+        {
+            if (rerun_fr.bStep)
+            {
+                step     = rerun_fr.step;
                 step_rel = step - ir->init_step;
             }
-            if (rerun_fr.bTime) {
+            if (rerun_fr.bTime)
+            {
                 t = rerun_fr.time;
             }
             else
             {
                 t = step;
             }
-        } 
-        else 
+        }
+        else
         {
             bLastStep = (step_rel == ir->nsteps);
-            t = t0 + step*ir->delta_t;
+            t         = t0 + step*ir->delta_t;
         }
 
         if (ir->efep != efepNO || ir->bSimTemp)
-            {
+        {
             /* find and set the current lambdas.  If rerunning, we either read in a state, or a lambda value,
                requiring different logic. */
-            
-            set_current_lambdas(step,ir->fepvals,bRerunMD,&rerun_fr,state_global,state,lam0);
-            bDoDHDL = do_per_step(step,ir->fepvals->nstdhdl);
-            bDoFEP  = (do_per_step(step,nstfep) && (ir->efep != efepNO));
-            bDoExpanded  = (do_per_step(step,ir->expandedvals->nstexpanded) && (ir->bExpanded) && (step > 0));
+
+            set_current_lambdas(step, ir->fepvals, bRerunMD, &rerun_fr, state_global, state, lam0);
+            bDoDHDL      = do_per_step(step, ir->fepvals->nstdhdl);
+            bDoFEP       = (do_per_step(step, nstfep) && (ir->efep != efepNO));
+            bDoExpanded  = (do_per_step(step, ir->expandedvals->nstexpanded)
+                            && (ir->bExpanded) && (step > 0) && (!bStartingFromCpt));
         }
 
-        if (bSimAnn) 
+        bDoReplEx = ((repl_ex_nst > 0) && (step > 0) && !bLastStep &&
+                     do_per_step(step, repl_ex_nst));
+
+        if (bSimAnn)
         {
-            update_annealing_target_temp(&(ir->opts),t);
+            update_annealing_target_temp(&(ir->opts), t);
         }
 
         if (bRerunMD)
         {
-            if (!(DOMAINDECOMP(cr) && !MASTER(cr)))
+            if (!DOMAINDECOMP(cr) || MASTER(cr))
             {
-                for(i=0; i<state_global->natoms; i++)
+                for (i = 0; i < state_global->natoms; i++)
                 {
-                    copy_rvec(rerun_fr.x[i],state_global->x[i]);
+                    copy_rvec(rerun_fr.x[i], state_global->x[i]);
                 }
                 if (rerun_fr.bV)
                 {
-                    for(i=0; i<state_global->natoms; i++)
+                    for (i = 0; i < state_global->natoms; i++)
                     {
-                        copy_rvec(rerun_fr.v[i],state_global->v[i]);
+                        copy_rvec(rerun_fr.v[i], state_global->v[i]);
                     }
                 }
                 else
                 {
-                    for(i=0; i<state_global->natoms; i++)
+                    for (i = 0; i < state_global->natoms; i++)
                     {
                         clear_rvec(state_global->v[i]);
                     }
                     if (bRerunWarnNoV)
                     {
-                        fprintf(stderr,"\nWARNING: Some frames do not contain velocities.\n"
+                        fprintf(stderr, "\nWARNING: Some frames do not contain velocities.\n"
                                 "         Ekin, temperature and pressure are incorrect,\n"
                                 "         the virial will be incorrect when constraints are present.\n"
                                 "\n");
@@ -862,105 +830,94 @@ double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
                     }
                 }
             }
-            copy_mat(rerun_fr.box,state_global->box);
-            copy_mat(state_global->box,state->box);
+            copy_mat(rerun_fr.box, state_global->box);
+            copy_mat(state_global->box, state->box);
 
             if (vsite && (Flags & MD_RERUN_VSITE))
             {
                 if (DOMAINDECOMP(cr))
                 {
-                    gmx_fatal(FARGS,"Vsite recalculation with -rerun is not implemented for domain decomposition, use particle decomposition");
+                    gmx_fatal(FARGS, "Vsite recalculation with -rerun is not implemented with domain decomposition, use a single rank");
                 }
                 if (graph)
                 {
                     /* Following is necessary because the graph may get out of sync
                      * with the coordinates if we only have every N'th coordinate set
                      */
-                    mk_mshift(fplog,graph,fr->ePBC,state->box,state->x);
-                    shift_self(graph,state->box,state->x);
+                    mk_mshift(fplog, graph, fr->ePBC, state->box, state->x);
+                    shift_self(graph, state->box, state->x);
                 }
-                construct_vsites(fplog,vsite,state->x,nrnb,ir->delta_t,state->v,
-                                 top->idef.iparams,top->idef.il,
-                                 fr->ePBC,fr->bMolPBC,graph,cr,state->box);
+                construct_vsites(vsite, state->x, ir->delta_t, state->v,
+                                 top->idef.iparams, top->idef.il,
+                                 fr->ePBC, fr->bMolPBC, cr, state->box);
                 if (graph)
                 {
-                    unshift_self(graph,state->box,state->x);
+                    unshift_self(graph, state->box, state->x);
                 }
             }
         }
 
         /* Stop Center of Mass motion */
-        bStopCM = (ir->comm_mode != ecmNO && do_per_step(step,ir->nstcomm));
-
-        /* Copy back starting coordinates in case we're doing a forcefield scan */
-        if (bFFscan)
-        {
-            for(ii=0; (ii<state->natoms); ii++)
-            {
-                copy_rvec(xcopy[ii],state->x[ii]);
-                copy_rvec(vcopy[ii],state->v[ii]);
-            }
-            copy_mat(boxcopy,state->box);
-        }
+        bStopCM = (ir->comm_mode != ecmNO && do_per_step(step, ir->nstcomm));
 
         if (bRerunMD)
         {
             /* for rerun MD always do Neighbour Searching */
-            bNS = (bFirstStep || ir->nstlist != 0);
+            bNS      = (bFirstStep || ir->nstlist != 0);
             bNStList = bNS;
         }
         else
         {
             /* Determine whether or not to do Neighbour Searching and LR */
             bNStList = (ir->nstlist > 0  && step % ir->nstlist == 0);
-            
-            bNS = (bFirstStep || bExchanged || bNStList || bDoFEP ||
+
+            bNS = (bFirstStep || bExchanged || bNeedRepartition || bNStList || bDoFEP ||
                    (ir->nstlist == -1 && nlh.nabnsb > 0));
 
             if (bNS && ir->nstlist == -1)
             {
-                set_nlistheuristics(&nlh,bFirstStep || bExchanged || bDoFEP, step);
+                set_nlistheuristics(&nlh, bFirstStep || bExchanged || bNeedRepartition || bDoFEP, step);
             }
-        } 
+        }
 
-        /* check whether we should stop because another simulation has 
+        /* check whether we should stop because another simulation has
            stopped. */
         if (MULTISIM(cr))
         {
-            if ( (multisim_nsteps >= 0) &&  (step_rel >= multisim_nsteps)  &&  
-                 (multisim_nsteps != ir->nsteps) )  
+            if ( (multisim_nsteps >= 0) &&  (step_rel >= multisim_nsteps)  &&
+                 (multisim_nsteps != ir->nsteps) )
             {
                 if (bNS)
                 {
                     if (MASTER(cr))
                     {
-                        fprintf(stderr, 
+                        fprintf(stderr,
                                 "Stopping simulation %d because another one has finished\n",
                                 cr->ms->sim);
                     }
-                    bLastStep=TRUE;
+                    bLastStep         = TRUE;
                     gs.sig[eglsCHKPT] = 1;
                 }
             }
         }
 
         /* < 0 means stop at next step, > 0 means stop at next NS step */
-        if ( (gs.set[eglsSTOPCOND] < 0 ) ||
-             ( (gs.set[eglsSTOPCOND] > 0 ) && ( bNS || ir->nstlist==0)) )
+        if ( (gs.set[eglsSTOPCOND] < 0) ||
+             ( (gs.set[eglsSTOPCOND] > 0) && (bNStList || ir->nstlist == 0) ) )
         {
             bLastStep = TRUE;
         }
 
         /* Determine whether or not to update the Born radii if doing GB */
-        bBornRadii=bFirstStep;
-        if (ir->implicit_solvent && (step % ir->nstgbradii==0))
+        bBornRadii = bFirstStep;
+        if (ir->implicit_solvent && (step % ir->nstgbradii == 0))
         {
-            bBornRadii=TRUE;
+            bBornRadii = TRUE;
         }
-        
-        do_log = do_per_step(step,ir->nstlog) || bFirstStep || bLastStep;
+
+        do_log     = do_per_step(step, ir->nstlog) || bFirstStep || bLastStep;
         do_verbose = bVerbose &&
-                  (step % stepout == 0 || bFirstStep || bLastStep);
+            (step % stepout == 0 || bFirstStep || bLastStep);
 
         if (bNS && !(bFirstStep && ir->bContinuation && !bRerunMD))
         {
@@ -974,76 +931,56 @@ double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
                 /* Correct the new box if it is too skewed */
                 if (DYNAMIC_BOX(*ir))
                 {
-                    if (correct_box(fplog,step,state->box,graph))
+                    if (correct_box(fplog, step, state->box, graph))
                     {
                         bMasterState = TRUE;
                     }
                 }
                 if (DOMAINDECOMP(cr) && bMasterState)
                 {
-                    dd_collect_state(cr->dd,state,state_global);
+                    dd_collect_state(cr->dd, state, state_global);
                 }
             }
 
             if (DOMAINDECOMP(cr))
             {
                 /* Repartition the domain decomposition */
-                wallcycle_start(wcycle,ewcDOMDEC);
-                dd_partition_system(fplog,step,cr,
-                                    bMasterState,nstglobalcomm,
-                                    state_global,top_global,ir,
-                                    state,&f,mdatoms,top,fr,
-                                    vsite,shellfc,constr,
-                                    nrnb,wcycle,
+                wallcycle_start(wcycle, ewcDOMDEC);
+                dd_partition_system(fplog, step, cr,
+                                    bMasterState, nstglobalcomm,
+                                    state_global, top_global, ir,
+                                    state, &f, mdatoms, top, fr,
+                                    vsite, shellfc, constr,
+                                    nrnb, wcycle,
                                     do_verbose && !bPMETuneRunning);
-                wallcycle_stop(wcycle,ewcDOMDEC);
+                wallcycle_stop(wcycle, ewcDOMDEC);
                 /* If using an iterative integrator, reallocate space to match the decomposition */
             }
         }
 
-        if (MASTER(cr) && do_log && !bFFscan)
+        if (MASTER(cr) && do_log)
         {
-            print_ebin_header(fplog,step,t,state->lambda[efptFEP]); /* can we improve the information printed here? */
+            print_ebin_header(fplog, step, t, state->lambda[efptFEP]); /* can we improve the information printed here? */
         }
 
         if (ir->efep != efepNO)
         {
-            update_mdatoms(mdatoms,state->lambda[efptMASS]);
+            update_mdatoms(mdatoms, state->lambda[efptMASS]);
         }
 
         if ((bRerunMD && rerun_fr.bV) || bExchanged)
         {
-            
+
             /* We need the kinetic energy at minus the half step for determining
              * the full step kinetic energy and possibly for T-coupling.*/
             /* This may not be quite working correctly yet . . . . */
-            compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
-                            wcycle,enerd,NULL,NULL,NULL,NULL,mu_tot,
-                            constr,NULL,FALSE,state->box,
-                            top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
+            compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm,
+                            wcycle, enerd, NULL, NULL, NULL, NULL, mu_tot,
+                            constr, NULL, FALSE, state->box,
+                            top_global, &bSumEkinhOld,
                             CGLO_RERUNMD | CGLO_GSTAT | CGLO_TEMPERATURE);
         }
         clear_mat(force_vir);
-        
-        /* Ionize the atoms if necessary */
-        if (bIonize)
-        {
-            ionize(fplog,oenv,mdatoms,top_global,t,ir,state->x,state->v,
-                   mdatoms->start,mdatoms->start+mdatoms->homenr,state->box,cr);
-        }
-        
-        /* Update force field in ffscan program */
-        if (bFFscan)
-        {
-            if (update_forcefield(fplog,
-                                  nfile,fnm,fr,
-                                  mdatoms->nr,state->x,state->box))
-            {
-                gmx_finalize_par();
-
-                exit(0);
-            }
-        }
 
         /* We write a checkpoint at this MD step when:
          * either at an NS step when we signalled through gs,
@@ -1068,36 +1005,36 @@ double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
                but the virial needs to be calculated on both the current step and the 'next' step. Future
                reorganization may be able to get rid of one of the bCalcVir=TRUE steps. */
 
-            bCalcEner = do_per_step(step-1,ir->nstcalcenergy);
-            bCalcVir = bCalcEner ||
-                (ir->epc != epcNO && (do_per_step(step,ir->nstpcouple) || do_per_step(step-1,ir->nstpcouple)));
+            bCalcEner = do_per_step(step-1, ir->nstcalcenergy);
+            bCalcVir  = bCalcEner ||
+                (ir->epc != epcNO && (do_per_step(step, ir->nstpcouple) || do_per_step(step-1, ir->nstpcouple)));
         }
         else
         {
-            bCalcEner = do_per_step(step,ir->nstcalcenergy);
-            bCalcVir = bCalcEner ||
-                (ir->epc != epcNO && do_per_step(step,ir->nstpcouple));
+            bCalcEner = do_per_step(step, ir->nstcalcenergy);
+            bCalcVir  = bCalcEner ||
+                (ir->epc != epcNO && do_per_step(step, ir->nstpcouple));
         }
 
         /* Do we need global communication ? */
         bGStat = (bCalcVir || bCalcEner || bStopCM ||
-                  do_per_step(step,nstglobalcomm) ||
+                  do_per_step(step, nstglobalcomm) || (bVV && IR_NVT_TROTTER(ir) && do_per_step(step-1, nstglobalcomm)) ||
                   (ir->nstlist == -1 && !bRerunMD && step >= nlh.step_nscheck));
 
-        do_ene = (do_per_step(step,ir->nstenergy) || bLastStep);
+        do_ene = (do_per_step(step, ir->nstenergy) || bLastStep);
 
-        if (do_ene || do_log)
+        if (do_ene || do_log || bDoReplEx)
         {
             bCalcVir  = TRUE;
             bCalcEner = TRUE;
             bGStat    = TRUE;
         }
-        
+
         /* these CGLO_ options remain the same throughout the iteration */
         cglo_flags = ((bRerunMD ? CGLO_RERUNMD : 0) |
                       (bGStat ? CGLO_GSTAT : 0)
-            );
-        
+                      );
+
         force_flags = (GMX_FORCE_STATECHANGED |
                        ((DYNAMIC_BOX(*ir) || bRerunMD) ? GMX_FORCE_DYNAMICBOX : 0) |
                        GMX_FORCE_ALLFORCES |
@@ -1105,29 +1042,29 @@ double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
                        (bCalcVir ? GMX_FORCE_VIRIAL : 0) |
                        (bCalcEner ? GMX_FORCE_ENERGY : 0) |
                        (bDoFEP ? GMX_FORCE_DHDL : 0)
-            );
+                       );
 
-        if(fr->bTwinRange)
+        if (fr->bTwinRange)
         {
-            if(do_per_step(step,ir->nstcalclr))
+            if (do_per_step(step, ir->nstcalclr))
             {
                 force_flags |= GMX_FORCE_DO_LR;
             }
         }
-        
+
         if (shellfc)
         {
             /* Now is the time to relax the shells */
-            count=relax_shell_flexcon(fplog,cr,bVerbose,bFFscan ? step+1 : step,
-                                      ir,bNS,force_flags,
-                                      bStopCM,top,top_global,
-                                      constr,enerd,fcd,
-                                      state,f,force_vir,mdatoms,
-                                      nrnb,wcycle,graph,groups,
-                                      shellfc,fr,bBornRadii,t,mu_tot,
-                                      state->natoms,&bConverged,vsite,
-                                      outf->fp_field);
-            tcount+=count;
+            count = relax_shell_flexcon(fplog, cr, bVerbose, step,
+                                        ir, bNS, force_flags,
+                                        top,
+                                        constr, enerd, fcd,
+                                        state, f, force_vir, mdatoms,
+                                        nrnb, wcycle, graph, groups,
+                                        shellfc, fr, bBornRadii, t, mu_tot,
+                                        &bConverged, vsite,
+                                        mdoutf_get_fp_field(outf));
+            tcount += count;
 
             if (bConverged)
             {
@@ -1138,45 +1075,35 @@ double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
         {
             /* The coordinates (x) are shifted (to get whole molecules)
              * in do_force.
-             * This is parallellized as well, and does communication too. 
+             * This is parallellized as well, and does communication too.
              * Check comments in sim_util.c
              */
-             do_force(fplog,cr,ir,step,nrnb,wcycle,top,top_global,groups,
-                     state->box,state->x,&state->hist,
-                     f,force_vir,mdatoms,enerd,fcd,
-                     state->lambda,graph,
-                     fr,vsite,mu_tot,t,outf->fp_field,ed,bBornRadii,
+            do_force(fplog, cr, ir, step, nrnb, wcycle, top, groups,
+                     state->box, state->x, &state->hist,
+                     f, force_vir, mdatoms, enerd, fcd,
+                     state->lambda, graph,
+                     fr, vsite, mu_tot, t, mdoutf_get_fp_field(outf), ed, bBornRadii,
                      (bNS ? GMX_FORCE_NS : 0) | force_flags);
         }
-        
-        if (bTCR)
-        {
-            mu_aver = calc_mu_aver(cr,state->x,mdatoms->chargeA,
-                                   mu_tot,&top_global->mols,mdatoms,gnx,grpindex);
-        }
-        
-        if (bTCR && bFirstStep)
-        {
-            tcr=init_coupling(fplog,nfile,fnm,cr,fr,mdatoms,&(top->idef));
-            fprintf(fplog,"Done init_coupling\n"); 
-            fflush(fplog);
-        }
-        
+
         if (bVV && !bStartingFromCpt && !bRerunMD)
         /*  ############### START FIRST UPDATE HALF-STEP FOR VV METHODS############### */
         {
-            if (ir->eI==eiVV && bInitStep) 
+            wallcycle_start(wcycle, ewcUPDATE);
+            if (ir->eI == eiVV && bInitStep)
             {
                 /* if using velocity verlet with full time step Ekin,
-                 * take the first half step only to compute the 
+                 * take the first half step only to compute the
                  * virial for the first step. From there,
                  * revert back to the initial coordinates
                  * so that the input is actually the initial step.
                  */
-                copy_rvecn(state->v,cbuf,0,state->natoms); /* should make this better for parallelizing? */
-            } else {
+                copy_rvecn(state->v, cbuf, 0, state->natoms); /* should make this better for parallelizing? */
+            }
+            else
+            {
                 /* this is for NHC in the Ekin(t+dt/2) version of vv */
-                trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ1);            
+                trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ1);
             }
 
             /* If we are using twin-range interactions where the long-range component
@@ -1184,35 +1111,40 @@ double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
              * step to combine the long-range forces on these steps.
              * For nstcalclr=1 this is not done, since the forces would have been added
              * directly to the short-range forces already.
+             *
+             * TODO Remove various aspects of VV+twin-range in master
+             * branch, because VV integrators did not ever support
+             * twin-range multiple time stepping with constraints.
              */
-            bUpdateDoLR = (fr->bTwinRange && do_per_step(step,ir->nstcalclr));
-            
-            update_coords(fplog,step,ir,mdatoms,state,fr->bMolPBC,
-                          f,bUpdateDoLR,fr->f_twin,fcd,
-                          ekind,M,wcycle,upd,bInitStep,etrtVELOCITY1,
-                          cr,nrnb,constr,&top->idef);
-            
-            if (bIterativeCase && do_per_step(step-1,ir->nstpcouple) && !bInitStep)
+            bUpdateDoLR = (fr->bTwinRange && do_per_step(step, ir->nstcalclr));
+
+            update_coords(fplog, step, ir, mdatoms, state, fr->bMolPBC,
+                          f, bUpdateDoLR, fr->f_twin, bCalcVir ? &fr->vir_twin_constr : NULL, fcd,
+                          ekind, M, upd, bInitStep, etrtVELOCITY1,
+                          cr, nrnb, constr, &top->idef);
+
+            if (bIterativeCase && do_per_step(step-1, ir->nstpcouple) && !bInitStep)
             {
-                gmx_iterate_init(&iterate,TRUE);
+                gmx_iterate_init(&iterate, TRUE);
             }
             /* for iterations, we save these vectors, as we will be self-consistently iterating
                the calculations */
 
             /*#### UPDATE EXTENDED VARIABLES IN TROTTER FORMULATION */
-            
+
             /* save the state */
-            if (iterate.bIterationActive) {
-                copy_coupling_state(state,bufstate,ekind,ekind_save,&(ir->opts));
+            if (iterate.bIterationActive)
+            {
+                copy_coupling_state(state, bufstate, ekind, ekind_save, &(ir->opts));
             }
-            
+
             bFirstIterate = TRUE;
             while (bFirstIterate || iterate.bIterationActive)
             {
                 if (iterate.bIterationActive)
                 {
-                    copy_coupling_state(bufstate,state,ekind_save,ekind,&(ir->opts));
-                    if (bFirstIterate && bTrotter) 
+                    copy_coupling_state(bufstate, state, ekind_save, ekind, &(ir->opts));
+                    if (bFirstIterate && bTrotter)
                     {
                         /* The first time through, we need a decent first estimate
                            of veta(t+dt) to compute the constraints.  Do
@@ -1221,36 +1153,44 @@ double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
                            should be changed by this routine here.  If
                            !(first time), we start with the previous value
                            of veta.  */
-                        
+
                         veta_save = state->veta;
-                        trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ0);
-                        vetanew = state->veta;
+                        trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ0);
+                        vetanew     = state->veta;
                         state->veta = veta_save;
-                    } 
-                } 
-                
+                    }
+                }
+
                 bOK = TRUE;
-                if ( !bRerunMD || rerun_fr.bV || bForceUpdate) {  /* Why is rerun_fr.bV here?  Unclear. */
-                    dvdl = 0;
-                    
-                    update_constraints(fplog,step,&dvdl,ir,ekind,mdatoms,
-                                       state,fr->bMolPBC,graph,f,
-                                       &top->idef,shake_vir,NULL,
-                                       cr,nrnb,wcycle,upd,constr,
-                                       bInitStep,TRUE,bCalcVir,vetanew);
-                    
-                    if (!bOK && !bFFscan)
+                if (!bRerunMD || rerun_fr.bV || bForceUpdate)     /* Why is rerun_fr.bV here?  Unclear. */
+                {
+                    wallcycle_stop(wcycle, ewcUPDATE);
+                    update_constraints(fplog, step, NULL, ir, ekind, mdatoms,
+                                       state, fr->bMolPBC, graph, f,
+                                       &top->idef, shake_vir,
+                                       cr, nrnb, wcycle, upd, constr,
+                                       TRUE, bCalcVir, vetanew);
+                    wallcycle_start(wcycle, ewcUPDATE);
+
+                    if (bCalcVir && bUpdateDoLR && ir->nstcalclr > 1)
+                    {
+                        /* Correct the virial for multiple time stepping */
+                        m_sub(shake_vir, fr->vir_twin_constr, shake_vir);
+                    }
+
+                    if (!bOK)
                     {
-                        gmx_fatal(FARGS,"Constraint error: Shake, Lincs or Settle could not solve the constrains");
+                        gmx_fatal(FARGS, "Constraint error: Shake, Lincs or Settle could not solve the constrains");
                     }
-                    
-                } 
+
+                }
                 else if (graph)
-                { /* Need to unshift here if a do_force has been
-                     called in the previous step */
-                    unshift_self(graph,state->box,state->x);
+                {
+                    /* Need to unshift here if a do_force has been
+                       called in the previous step */
+                    unshift_self(graph, state->box, state->x);
                 }
-                
+
                 /* if VV, compute the pressure and constraints */
                 /* For VV2, we strictly only need this if using pressure
                  * control, but we really would like to have accurate pressures
@@ -1261,27 +1201,29 @@ double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
                 /*bPres = (ir->eI==eiVV || IR_NPT_TROTTER(ir)); */
                 /*bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK && IR_NPT_TROTTER(ir)));*/
                 bPres = TRUE;
-                bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK));
-                if (bCalcEner && ir->eI==eiVVAK)  /*MRS:  7/9/2010 -- this still doesn't fix it?*/
+                bTemp = ((ir->eI == eiVV && (!bInitStep)) || (ir->eI == eiVVAK));
+                if (bCalcEner && ir->eI == eiVVAK)  /*MRS:  7/9/2010 -- this still doesn't fix it?*/
                 {
                     bSumEkinhOld = TRUE;
                 }
                 /* for vv, the first half of the integration actually corresponds to the previous step.
                    So we need information from the last step in the first half of the integration */
-                if (bGStat || do_per_step(step-1,nstglobalcomm)) {
-                    compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
-                                    wcycle,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
-                                    constr,NULL,FALSE,state->box,
-                                    top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
+                if (bGStat || do_per_step(step-1, nstglobalcomm))
+                {
+                    wallcycle_stop(wcycle, ewcUPDATE);
+                    compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm,
+                                    wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
+                                    constr, NULL, FALSE, state->box,
+                                    top_global, &bSumEkinhOld,
                                     cglo_flags
                                     | CGLO_ENERGY
-                                    | (bTemp ? CGLO_TEMPERATURE:0)
+                                    | (bTemp ? CGLO_TEMPERATURE : 0)
                                     | (bPres ? CGLO_PRESSURE : 0)
                                     | (bPres ? CGLO_CONSTRAINT : 0)
                                     | ((iterate.bIterationActive) ? CGLO_ITERATE : 0)
                                     | (bFirstIterate ? CGLO_FIRSTITERATE : 0)
                                     | CGLO_SCALEEKIN
-                        );
+                                    );
                     /* explanation of above:
                        a) We compute Ekin at the full time step
                        if 1) we are using the AveVel Ekin, and it's not the
@@ -1289,243 +1231,166 @@ double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
                        time step kinetic energy for the pressure (always true now, since we want accurate statistics).
                        b) If we are using EkinAveEkin for the kinetic energy for the temperature control, we still feed in
                        EkinAveVel because it's needed for the pressure */
+                    wallcycle_start(wcycle, ewcUPDATE);
                 }
                 /* temperature scaling and pressure scaling to produce the extended variables at t+dt */
-                if (!bInitStep) 
+                if (!bInitStep)
                 {
                     if (bTrotter)
                     {
-                        m_add(force_vir,shake_vir,total_vir); /* we need the un-dispersion corrected total vir here */
-                        trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ2);
-                    } 
-                    else 
+                        m_add(force_vir, shake_vir, total_vir); /* we need the un-dispersion corrected total vir here */
+                        trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ2);
+                    }
+                    else
                     {
                         if (bExchanged)
                         {
-            
+                            wallcycle_stop(wcycle, ewcUPDATE);
                             /* We need the kinetic energy at minus the half step for determining
                              * the full step kinetic energy and possibly for T-coupling.*/
                             /* This may not be quite working correctly yet . . . . */
-                            compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
-                                            wcycle,enerd,NULL,NULL,NULL,NULL,mu_tot,
-                                            constr,NULL,FALSE,state->box,
-                                            top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
+                            compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm,
+                                            wcycle, enerd, NULL, NULL, NULL, NULL, mu_tot,
+                                            constr, NULL, FALSE, state->box,
+                                            top_global, &bSumEkinhOld,
                                             CGLO_RERUNMD | CGLO_GSTAT | CGLO_TEMPERATURE);
+                            wallcycle_start(wcycle, ewcUPDATE);
                         }
                     }
                 }
-                
+
                 if (iterate.bIterationActive &&
-                    done_iterating(cr,fplog,step,&iterate,bFirstIterate,
-                                   state->veta,&vetanew)) 
+                    done_iterating(cr, fplog, step, &iterate, bFirstIterate,
+                                   state->veta, &vetanew))
                 {
                     break;
                 }
                 bFirstIterate = FALSE;
             }
 
-            if (bTrotter && !bInitStep) {
-                enerd->term[F_DVDL_BONDED] += dvdl;        /* only add after iterations */
-                copy_mat(shake_vir,state->svir_prev);
-                copy_mat(force_vir,state->fvir_prev);
-                if (IR_NVT_TROTTER(ir) && ir->eI==eiVV) {
+            if (bTrotter && !bInitStep)
+            {
+                copy_mat(shake_vir, state->svir_prev);
+                copy_mat(force_vir, state->fvir_prev);
+                if (IR_NVT_TROTTER(ir) && ir->eI == eiVV)
+                {
                     /* update temperature and kinetic energy now that step is over - this is the v(t+dt) point */
-                    enerd->term[F_TEMP] = sum_ekin(&(ir->opts),ekind,NULL,(ir->eI==eiVV),FALSE,FALSE);
+                    enerd->term[F_TEMP] = sum_ekin(&(ir->opts), ekind, NULL, (ir->eI == eiVV), FALSE);
                     enerd->term[F_EKIN] = trace(ekind->ekin);
                 }
             }
             /* if it's the initial step, we performed this first step just to get the constraint virial */
-            if (bInitStep && ir->eI==eiVV) {
-                copy_rvecn(cbuf,state->v,0,state->natoms);
-            }
-            
-            if (fr->bSepDVDL && fplog && do_log) 
+            if (bInitStep && ir->eI == eiVV)
             {
-                fprintf(fplog,sepdvdlformat,"Constraint",0.0,dvdl);
+                copy_rvecn(cbuf, state->v, 0, state->natoms);
             }
-            enerd->term[F_DVDL_BONDED] += dvdl;
+            wallcycle_stop(wcycle, ewcUPDATE);
         }
-    
+
         /* MRS -- now done iterating -- compute the conserved quantity */
-        if (bVV) {
-            saved_conserved_quantity = compute_conserved_from_auxiliary(ir,state,&MassQ);
-            if (ir->eI==eiVV) 
+        if (bVV)
+        {
+            saved_conserved_quantity = compute_conserved_from_auxiliary(ir, state, &MassQ);
+            if (ir->eI == eiVV)
             {
                 last_ekin = enerd->term[F_EKIN];
             }
-            if ((ir->eDispCorr != edispcEnerPres) && (ir->eDispCorr != edispcAllEnerPres)) 
+            if ((ir->eDispCorr != edispcEnerPres) && (ir->eDispCorr != edispcAllEnerPres))
             {
                 saved_conserved_quantity -= enerd->term[F_DISPCORR];
             }
             /* sum up the foreign energy and dhdl terms for vv.  currently done every step so that dhdl is correct in the .edr */
             if (!bRerunMD)
             {
-                sum_dhdl(enerd,state->lambda,ir->fepvals);
+                sum_dhdl(enerd, state->lambda, ir->fepvals);
             }
         }
-        
+
         /* ########  END FIRST UPDATE STEP  ############## */
         /* ########  If doing VV, we now have v(dt) ###### */
-        if (bDoExpanded) {
+        if (bDoExpanded)
+        {
             /* perform extended ensemble sampling in lambda - we don't
                actually move to the new state before outputting
                statistics, but if performing simulated tempering, we
                do update the velocities and the tau_t. */
-        
-            lamnew = ExpandedEnsembleDynamics(fplog,ir,enerd,state,&MassQ,&df_history,step,mcrng,state->v,mdatoms);
+
+            lamnew = ExpandedEnsembleDynamics(fplog, ir, enerd, state, &MassQ, state->fep_state, &state->dfhist, step, state->v, mdatoms);
+            /* history is maintained in state->dfhist, but state_global is what is sent to trajectory and log output */
+            copy_df_history(&state_global->dfhist, &state->dfhist);
         }
-        /* ################## START TRAJECTORY OUTPUT ################# */
-        
-        /* Now we have the energies and forces corresponding to the 
+
+        /* Now we have the energies and forces corresponding to the
          * coordinates at time t. We must output all of this before
          * the update.
-         * for RerunMD t is read from input trajectory
          */
-        mdof_flags = 0;
-        if (do_per_step(step,ir->nstxout)) { mdof_flags |= MDOF_X; }
-        if (do_per_step(step,ir->nstvout)) { mdof_flags |= MDOF_V; }
-        if (do_per_step(step,ir->nstfout)) { mdof_flags |= MDOF_F; }
-        if (do_per_step(step,ir->nstxtcout)) { mdof_flags |= MDOF_XTC; }
-        if (bCPT) { mdof_flags |= MDOF_CPT; };
+        do_md_trajectory_writing(fplog, cr, nfile, fnm, step, step_rel, t,
+                                 ir, state, state_global, top_global, fr,
+                                 outf, mdebin, ekind, f, f_global,
+                                 &nchkpt,
+                                 bCPT, bRerunMD, bLastStep, (Flags & MD_CONFOUT),
+                                 bSumEkinhOld);
+        /* Check if IMD step and do IMD communication, if bIMD is TRUE. */
+        bIMDstep = do_IMD(ir->bIMD, step, cr, bNS, state->box, state->x, ir, t, wcycle);
 
-#if defined(GMX_FAHCORE) || defined(GMX_WRITELASTSTEP)
-        if (bLastStep)
-        {
-            /* Enforce writing positions and velocities at end of run */
-            mdof_flags |= (MDOF_X | MDOF_V);
-        }
-#endif
-#ifdef GMX_FAHCORE
-        if (MASTER(cr))
-            fcReportProgress( ir->nsteps, step );
-
-        /* sync bCPT and fc record-keeping */
-        if (bCPT && MASTER(cr))
-            fcRequestCheckPoint();
-#endif
-        
-        if (mdof_flags != 0)
-        {
-            wallcycle_start(wcycle,ewcTRAJ);
-            if (bCPT)
-            {
-                if (state->flags & (1<<estLD_RNG))
-                {
-                    get_stochd_state(upd,state);
-                }
-                if (state->flags  & (1<<estMC_RNG))
-                {
-                    get_mc_state(mcrng,state);
-                }
-                if (MASTER(cr))
-                {
-                    if (bSumEkinhOld)
-                    {
-                        state_global->ekinstate.bUpToDate = FALSE;
-                    }
-                    else
-                    {
-                        update_ekinstate(&state_global->ekinstate,ekind);
-                        state_global->ekinstate.bUpToDate = TRUE;
-                    }
-                    update_energyhistory(&state_global->enerhist,mdebin);
-                    if (ir->efep!=efepNO || ir->bSimTemp) 
-                    {
-                        state_global->fep_state = state->fep_state; /* MRS: seems kludgy. The code should be
-                                                                       structured so this isn't necessary.
-                                                                       Note this reassignment is only necessary
-                                                                       for single threads.*/
-                        copy_df_history(&state_global->dfhist,&df_history);
-                    }
-                }
-            }
-            write_traj(fplog,cr,outf,mdof_flags,top_global,
-                       step,t,state,state_global,f,f_global,&n_xtc,&x_xtc);
-            if (bCPT)
-            {
-                nchkpt++;
-                bCPT = FALSE;
-            }
-            debug_gmx();
-            if (bLastStep && step_rel == ir->nsteps &&
-                (Flags & MD_CONFOUT) && MASTER(cr) &&
-                !bRerunMD && !bFFscan)
-            {
-                /* x and v have been collected in write_traj,
-                 * because a checkpoint file will always be written
-                 * at the last step.
-                 */
-                fprintf(stderr,"\nWriting final coordinates.\n");
-                if (fr->bMolPBC)
-                {
-                    /* Make molecules whole only for confout writing */
-                    do_pbc_mtop(fplog,ir->ePBC,state->box,top_global,state_global->x);
-                }
-                write_sto_conf_mtop(ftp2fn(efSTO,nfile,fnm),
-                                    *top_global->name,top_global,
-                                    state_global->x,state_global->v,
-                                    ir->ePBC,state->box);
-                debug_gmx();
-            }
-            wallcycle_stop(wcycle,ewcTRAJ);
-        }
-        
         /* kludge -- virial is lost with restart for NPT control. Must restart */
-        if (bStartingFromCpt && bVV) 
+        if (bStartingFromCpt && bVV)
         {
-            copy_mat(state->svir_prev,shake_vir);
-            copy_mat(state->fvir_prev,force_vir);
+            copy_mat(state->svir_prev, shake_vir);
+            copy_mat(state->fvir_prev, force_vir);
         }
-        /*  ################## END TRAJECTORY OUTPUT ################ */
-        
-        /* Determine the wallclock run time up till now */
-        run_time = gmx_gettime() - (double)runtime->real;
 
-        /* Check whether everything is still allright */    
+        elapsed_time = walltime_accounting_get_current_elapsed_time(walltime_accounting);
+
+        /* Check whether everything is still allright */
         if (((int)gmx_get_stop_condition() > handled_stop_condition)
 #ifdef GMX_THREAD_MPI
             && MASTER(cr)
 #endif
             )
         {
-            /* this is just make gs.sig compatible with the hack 
+            /* this is just make gs.sig compatible with the hack
                of sending signals around by MPI_Reduce with together with
                other floats */
-            if ( gmx_get_stop_condition() == gmx_stop_cond_next_ns )
-                gs.sig[eglsSTOPCOND]=1;
-            if ( gmx_get_stop_condition() == gmx_stop_cond_next )
-                gs.sig[eglsSTOPCOND]=-1;
+            if (gmx_get_stop_condition() == gmx_stop_cond_next_ns)
+            {
+                gs.sig[eglsSTOPCOND] = 1;
+            }
+            if (gmx_get_stop_condition() == gmx_stop_cond_next)
+            {
+                gs.sig[eglsSTOPCOND] = -1;
+            }
             /* < 0 means stop at next step, > 0 means stop at next NS step */
             if (fplog)
             {
                 fprintf(fplog,
                         "\n\nReceived the %s signal, stopping at the next %sstep\n\n",
                         gmx_get_signal_name(),
-                        gs.sig[eglsSTOPCOND]==1 ? "NS " : "");
+                        gs.sig[eglsSTOPCOND] == 1 ? "NS " : "");
                 fflush(fplog);
             }
             fprintf(stderr,
                     "\n\nReceived the %s signal, stopping at the next %sstep\n\n",
                     gmx_get_signal_name(),
-                    gs.sig[eglsSTOPCOND]==1 ? "NS " : "");
+                    gs.sig[eglsSTOPCOND] == 1 ? "NS " : "");
             fflush(stderr);
-            handled_stop_condition=(int)gmx_get_stop_condition();
+            handled_stop_condition = (int)gmx_get_stop_condition();
         }
         else if (MASTER(cr) && (bNS || ir->nstlist <= 0) &&
-                 (max_hours > 0 && run_time > max_hours*60.0*60.0*0.99) &&
+                 (max_hours > 0 && elapsed_time > max_hours*60.0*60.0*0.99) &&
                  gs.sig[eglsSTOPCOND] == 0 && gs.set[eglsSTOPCOND] == 0)
         {
             /* Signal to terminate the run */
             gs.sig[eglsSTOPCOND] = 1;
             if (fplog)
             {
-                fprintf(fplog,"\nStep %s: Run time exceeded %.3f hours, will terminate the run\n",gmx_step_str(step,sbuf),max_hours*0.99);
+                fprintf(fplog, "\nStep %s: Run time exceeded %.3f hours, will terminate the run\n", gmx_step_str(step, sbuf), max_hours*0.99);
             }
-            fprintf(stderr, "\nStep %s: Run time exceeded %.3f hours, will terminate the run\n",gmx_step_str(step,sbuf),max_hours*0.99);
+            fprintf(stderr, "\nStep %s: Run time exceeded %.3f hours, will terminate the run\n", gmx_step_str(step, sbuf), max_hours*0.99);
         }
 
         if (bResetCountersHalfMaxH && MASTER(cr) &&
-            run_time > max_hours*60.0*60.0*0.495)
+            elapsed_time > max_hours*60.0*60.0*0.495)
         {
             gs.sig[eglsRESETCOUNTERS] = 1;
         }
@@ -1543,8 +1408,8 @@ double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
              */
             if (step >= nlh.step_nscheck)
             {
-                nlh.nabnsb = natoms_beyond_ns_buffer(ir,fr,&top->cgs,
-                                                     nlh.scale_tot,state->x);
+                nlh.nabnsb = natoms_beyond_ns_buffer(ir, fr, &top->cgs,
+                                                     nlh.scale_tot, state->x);
             }
             else
             {
@@ -1561,432 +1426,419 @@ double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
          */
         if (MASTER(cr) && ((bGStat || !PAR(cr)) &&
                            cpt_period >= 0 &&
-                           (cpt_period == 0 || 
-                            run_time >= nchkpt*cpt_period*60.0)) &&
+                           (cpt_period == 0 ||
+                            elapsed_time >= nchkpt*cpt_period*60.0)) &&
             gs.set[eglsCHKPT] == 0)
         {
             gs.sig[eglsCHKPT] = 1;
         }
-  
+
         /* at the start of step, randomize or scale the velocities (trotter done elsewhere) */
         if (EI_VV(ir->eI))
         {
             if (!bInitStep)
             {
-                update_tcouple(fplog,step,ir,state,ekind,wcycle,upd,&MassQ,mdatoms);
+                update_tcouple(step, ir, state, ekind, &MassQ, mdatoms);
             }
             if (ETC_ANDERSEN(ir->etc)) /* keep this outside of update_tcouple because of the extra info required to pass */
             {
                 gmx_bool bIfRandomize;
-                bIfRandomize = update_randomize_velocities(ir,step,mdatoms,state,upd,&top->idef,constr);
+                bIfRandomize = update_randomize_velocities(ir, step, cr, mdatoms, state, upd, constr);
                 /* if we have constraints, we have to remove the kinetic energy parallel to the bonds */
                 if (constr && bIfRandomize)
                 {
-                    update_constraints(fplog,step,&dvdl,ir,ekind,mdatoms,
-                                       state,fr->bMolPBC,graph,f,
-                                       &top->idef,tmp_vir,NULL,
-                                       cr,nrnb,wcycle,upd,constr,
-                                       bInitStep,TRUE,bCalcVir,vetanew);
+                    update_constraints(fplog, step, NULL, ir, ekind, mdatoms,
+                                       state, fr->bMolPBC, graph, f,
+                                       &top->idef, tmp_vir,
+                                       cr, nrnb, wcycle, upd, constr,
+                                       TRUE, bCalcVir, vetanew);
                 }
             }
         }
 
-        if (bIterativeCase && do_per_step(step,ir->nstpcouple))
+        if (bIterativeCase && do_per_step(step, ir->nstpcouple))
         {
-            gmx_iterate_init(&iterate,TRUE);
+            gmx_iterate_init(&iterate, TRUE);
             /* for iterations, we save these vectors, as we will be redoing the calculations */
-            copy_coupling_state(state,bufstate,ekind,ekind_save,&(ir->opts));
+            copy_coupling_state(state, bufstate, ekind, ekind_save, &(ir->opts));
         }
 
         bFirstIterate = TRUE;
         while (bFirstIterate || iterate.bIterationActive)
         {
-            /* We now restore these vectors to redo the calculation with improved extended variables */    
+            /* We now restore these vectors to redo the calculation with improved extended variables */
             if (iterate.bIterationActive)
-            { 
-                copy_coupling_state(bufstate,state,ekind_save,ekind,&(ir->opts));
+            {
+                copy_coupling_state(bufstate, state, ekind_save, ekind, &(ir->opts));
             }
 
             /* We make the decision to break or not -after- the calculation of Ekin and Pressure,
                so scroll down for that logic */
-            
+
             /* #########   START SECOND UPDATE STEP ################# */
             /* Box is changed in update() when we do pressure coupling,
              * but we should still use the old box for energy corrections and when
              * writing it to the energy file, so it matches the trajectory files for
              * the same timestep above. Make a copy in a separate array.
              */
-            copy_mat(state->box,lastbox);
+            copy_mat(state->box, lastbox);
+
+            bOK         = TRUE;
+            dvdl_constr = 0;
 
-            bOK = TRUE;
             if (!(bRerunMD && !rerun_fr.bV && !bForceUpdate))
             {
-                wallcycle_start(wcycle,ewcUPDATE);
-                dvdl = 0;
+                wallcycle_start(wcycle, ewcUPDATE);
                 /* UPDATE PRESSURE VARIABLES IN TROTTER FORMULATION WITH CONSTRAINTS */
-                if (bTrotter) 
+                if (bTrotter)
                 {
                     if (iterate.bIterationActive)
                     {
-                        if (bFirstIterate) 
+                        if (bFirstIterate)
                         {
                             scalevir = 1;
                         }
-                        else 
+                        else
                         {
                             /* we use a new value of scalevir to converge the iterations faster */
                             scalevir = tracevir/trace(shake_vir);
                         }
-                        msmul(shake_vir,scalevir,shake_vir); 
-                        m_add(force_vir,shake_vir,total_vir);
+                        msmul(shake_vir, scalevir, shake_vir);
+                        m_add(force_vir, shake_vir, total_vir);
                         clear_mat(shake_vir);
                     }
-                    trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ3);
-                /* We can only do Berendsen coupling after we have summed
-                 * the kinetic energy or virial. Since the happens
-                 * in global_state after update, we should only do it at
-                 * step % nstlist = 1 with bGStatEveryStep=FALSE.
-                 */
+                    trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ3);
+                    /* We can only do Berendsen coupling after we have summed
+                     * the kinetic energy or virial. Since the happens
+                     * in global_state after update, we should only do it at
+                     * step % nstlist = 1 with bGStatEveryStep=FALSE.
+                     */
                 }
-                else 
+                else
                 {
-                    update_tcouple(fplog,step,ir,state,ekind,wcycle,upd,&MassQ,mdatoms);
-                    update_pcouple(fplog,step,ir,state,pcoupl_mu,M,wcycle,
-                                   upd,bInitStep);
+                    update_tcouple(step, ir, state, ekind, &MassQ, mdatoms);
+                    update_pcouple(fplog, step, ir, state, pcoupl_mu, M, bInitStep);
                 }
 
                 if (bVV)
                 {
-                    bUpdateDoLR = (fr->bTwinRange && do_per_step(step,ir->nstcalclr));
+                    bUpdateDoLR = (fr->bTwinRange && do_per_step(step, ir->nstcalclr));
 
                     /* velocity half-step update */
-                    update_coords(fplog,step,ir,mdatoms,state,fr->bMolPBC,f,
-                                  bUpdateDoLR,fr->f_twin,fcd,
-                                  ekind,M,wcycle,upd,FALSE,etrtVELOCITY2,
-                                  cr,nrnb,constr,&top->idef);
+                    update_coords(fplog, step, ir, mdatoms, state, fr->bMolPBC, f,
+                                  bUpdateDoLR, fr->f_twin, bCalcVir ? &fr->vir_twin_constr : NULL, fcd,
+                                  ekind, M, upd, FALSE, etrtVELOCITY2,
+                                  cr, nrnb, constr, &top->idef);
                 }
 
                 /* Above, initialize just copies ekinh into ekin,
                  * it doesn't copy position (for VV),
                  * and entire integrator for MD.
                  */
-                
-                if (ir->eI==eiVVAK) 
+
+                if (ir->eI == eiVVAK)
                 {
-                    copy_rvecn(state->x,cbuf,0,state->natoms);
+                    copy_rvecn(state->x, cbuf, 0, state->natoms);
                 }
-                bUpdateDoLR = (fr->bTwinRange && do_per_step(step,ir->nstcalclr));
-
-                update_coords(fplog,step,ir,mdatoms,state,fr->bMolPBC,f,
-                              bUpdateDoLR,fr->f_twin,fcd,
-                              ekind,M,wcycle,upd,bInitStep,etrtPOSITION,cr,nrnb,constr,&top->idef);
-                wallcycle_stop(wcycle,ewcUPDATE);
-
-                update_constraints(fplog,step,&dvdl,ir,ekind,mdatoms,state,
-                                   fr->bMolPBC,graph,f,
-                                   &top->idef,shake_vir,force_vir,
-                                   cr,nrnb,wcycle,upd,constr,
-                                   bInitStep,FALSE,bCalcVir,state->veta);  
-                
-                if (ir->eI==eiVVAK) 
+                bUpdateDoLR = (fr->bTwinRange && do_per_step(step, ir->nstcalclr));
+
+                update_coords(fplog, step, ir, mdatoms, state, fr->bMolPBC, f,
+                              bUpdateDoLR, fr->f_twin, bCalcVir ? &fr->vir_twin_constr : NULL, fcd,
+                              ekind, M, upd, bInitStep, etrtPOSITION, cr, nrnb, constr, &top->idef);
+                wallcycle_stop(wcycle, ewcUPDATE);
+
+                update_constraints(fplog, step, &dvdl_constr, ir, ekind, mdatoms, state,
+                                   fr->bMolPBC, graph, f,
+                                   &top->idef, shake_vir,
+                                   cr, nrnb, wcycle, upd, constr,
+                                   FALSE, bCalcVir, state->veta);
+
+                if (bCalcVir && bUpdateDoLR && ir->nstcalclr > 1)
+                {
+                    /* Correct the virial for multiple time stepping */
+                    m_sub(shake_vir, fr->vir_twin_constr, shake_vir);
+                }
+
+                if (ir->eI == eiVVAK)
                 {
                     /* erase F_EKIN and F_TEMP here? */
                     /* just compute the kinetic energy at the half step to perform a trotter step */
-                    compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
-                                    wcycle,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
-                                    constr,NULL,FALSE,lastbox,
-                                    top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
-                                    cglo_flags | CGLO_TEMPERATURE    
-                        );
-                    wallcycle_start(wcycle,ewcUPDATE);
-                    trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ4);            
+                    compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm,
+                                    wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
+                                    constr, NULL, FALSE, lastbox,
+                                    top_global, &bSumEkinhOld,
+                                    cglo_flags | CGLO_TEMPERATURE
+                                    );
+                    wallcycle_start(wcycle, ewcUPDATE);
+                    trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ4);
                     /* now we know the scaling, we can compute the positions again again */
-                    copy_rvecn(cbuf,state->x,0,state->natoms);
+                    copy_rvecn(cbuf, state->x, 0, state->natoms);
 
-                    bUpdateDoLR = (fr->bTwinRange && do_per_step(step,ir->nstcalclr));
+                    bUpdateDoLR = (fr->bTwinRange && do_per_step(step, ir->nstcalclr));
 
-                    update_coords(fplog,step,ir,mdatoms,state,fr->bMolPBC,f,
-                                  bUpdateDoLR,fr->f_twin,fcd,
-                                  ekind,M,wcycle,upd,bInitStep,etrtPOSITION,cr,nrnb,constr,&top->idef);
-                    wallcycle_stop(wcycle,ewcUPDATE);
+                    update_coords(fplog, step, ir, mdatoms, state, fr->bMolPBC, f,
+                                  bUpdateDoLR, fr->f_twin, bCalcVir ? &fr->vir_twin_constr : NULL, fcd,
+                                  ekind, M, upd, bInitStep, etrtPOSITION, cr, nrnb, constr, &top->idef);
+                    wallcycle_stop(wcycle, ewcUPDATE);
 
                     /* do we need an extra constraint here? just need to copy out of state->v to upd->xp? */
                     /* are the small terms in the shake_vir here due
                      * to numerical errors, or are they important
-                     * physically? I'm thinking they are just errors, but not completely sure. 
+                     * physically? I'm thinking they are just errors, but not completely sure.
                      * For now, will call without actually constraining, constr=NULL*/
-                    update_constraints(fplog,step,&dvdl,ir,ekind,mdatoms,
-                                       state,fr->bMolPBC,graph,f,
-                                       &top->idef,tmp_vir,force_vir,
-                                       cr,nrnb,wcycle,upd,NULL,
-                                       bInitStep,FALSE,bCalcVir,
-                                       state->veta);  
+                    update_constraints(fplog, step, NULL, ir, ekind, mdatoms,
+                                       state, fr->bMolPBC, graph, f,
+                                       &top->idef, tmp_vir,
+                                       cr, nrnb, wcycle, upd, NULL,
+                                       FALSE, bCalcVir,
+                                       state->veta);
+                }
+                if (!bOK)
+                {
+                    gmx_fatal(FARGS, "Constraint error: Shake, Lincs or Settle could not solve the constrains");
                 }
-                if (!bOK && !bFFscan) 
+
+                if (fr->bSepDVDL && fplog && do_log)
                 {
-                    gmx_fatal(FARGS,"Constraint error: Shake, Lincs or Settle could not solve the constrains");
+                    gmx_print_sepdvdl(fplog, "Constraint dV/dl", 0.0, dvdl_constr);
                 }
-                
-                if (fr->bSepDVDL && fplog && do_log) 
+                if (bVV)
                 {
-                    fprintf(fplog,sepdvdlformat,"Constraint dV/dl",0.0,dvdl);
+                    /* this factor or 2 correction is necessary
+                       because half of the constraint force is removed
+                       in the vv step, so we have to double it.  See
+                       the Redmine issue #1255.  It is not yet clear
+                       if the factor of 2 is exact, or just a very
+                       good approximation, and this will be
+                       investigated.  The next step is to see if this
+                       can be done adding a dhdl contribution from the
+                       rattle step, but this is somewhat more
+                       complicated with the current code. Will be
+                       investigated, hopefully for 4.6.3. However,
+                       this current solution is much better than
+                       having it completely wrong.
+                     */
+                    enerd->term[F_DVDL_CONSTR] += 2*dvdl_constr;
                 }
-                enerd->term[F_DVDL_BONDED] += dvdl;
-            } 
-            else if (graph) 
+                else
+                {
+                    enerd->term[F_DVDL_CONSTR] += dvdl_constr;
+                }
+            }
+            else if (graph)
             {
                 /* Need to unshift here */
-                unshift_self(graph,state->box,state->x);
+                unshift_self(graph, state->box, state->x);
             }
 
-            if (vsite != NULL) 
+            if (vsite != NULL)
             {
-                wallcycle_start(wcycle,ewcVSITECONSTR);
-                if (graph != NULL) 
+                wallcycle_start(wcycle, ewcVSITECONSTR);
+                if (graph != NULL)
                 {
-                    shift_self(graph,state->box,state->x);
+                    shift_self(graph, state->box, state->x);
                 }
-                construct_vsites(fplog,vsite,state->x,nrnb,ir->delta_t,state->v,
-                                 top->idef.iparams,top->idef.il,
-                                 fr->ePBC,fr->bMolPBC,graph,cr,state->box);
-                
-                if (graph != NULL) 
+                construct_vsites(vsite, state->x, ir->delta_t, state->v,
+                                 top->idef.iparams, top->idef.il,
+                                 fr->ePBC, fr->bMolPBC, cr, state->box);
+
+                if (graph != NULL)
                 {
-                    unshift_self(graph,state->box,state->x);
+                    unshift_self(graph, state->box, state->x);
                 }
-                wallcycle_stop(wcycle,ewcVSITECONSTR);
+                wallcycle_stop(wcycle, ewcVSITECONSTR);
             }
-            
+
             /* ############## IF NOT VV, Calculate globals HERE, also iterate constraints  ############ */
             /* With Leap-Frog we can skip compute_globals at
              * non-communication steps, but we need to calculate
              * the kinetic energy one step before communication.
              */
-            if (bGStat || (!EI_VV(ir->eI)&&do_per_step(step+1,nstglobalcomm)))
+            if (bGStat || (!EI_VV(ir->eI) && do_per_step(step+1, nstglobalcomm)))
             {
                 if (ir->nstlist == -1 && bFirstIterate)
                 {
                     gs.sig[eglsNABNSB] = nlh.nabnsb;
                 }
-                compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
-                                wcycle,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
+                compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm,
+                                wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
                                 constr,
-                                bFirstIterate ? &gs : NULL, 
-                                (step_rel % gs.nstms == 0) && 
-                                (multisim_nsteps<0 || (step_rel<multisim_nsteps)),
+                                bFirstIterate ? &gs : NULL,
+                                (step_rel % gs.nstms == 0) &&
+                                (multisim_nsteps < 0 || (step_rel < multisim_nsteps)),
                                 lastbox,
-                                top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
-                                cglo_flags 
+                                top_global, &bSumEkinhOld,
+                                cglo_flags
                                 | (!EI_VV(ir->eI) || bRerunMD ? CGLO_ENERGY : 0)
                                 | (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0)
-                                | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0) 
-                                | (!EI_VV(ir->eI) || bRerunMD ? CGLO_PRESSURE : 0) 
+                                | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0)
+                                | (!EI_VV(ir->eI) || bRerunMD ? CGLO_PRESSURE : 0)
                                 | (iterate.bIterationActive ? CGLO_ITERATE : 0)
                                 | (bFirstIterate ? CGLO_FIRSTITERATE : 0)
-                                | CGLO_CONSTRAINT 
-                    );
+                                | CGLO_CONSTRAINT
+                                );
                 if (ir->nstlist == -1 && bFirstIterate)
                 {
-                    nlh.nabnsb = gs.set[eglsNABNSB];
+                    nlh.nabnsb         = gs.set[eglsNABNSB];
                     gs.set[eglsNABNSB] = 0;
                 }
             }
             /* bIterate is set to keep it from eliminating the old ekin kinetic energy terms */
             /* #############  END CALC EKIN AND PRESSURE ################# */
-        
+
             /* Note: this is OK, but there are some numerical precision issues with using the convergence of
                the virial that should probably be addressed eventually. state->veta has better properies,
                but what we actually need entering the new cycle is the new shake_vir value. Ideally, we could
                generate the new shake_vir, but test the veta value for convergence.  This will take some thought. */
 
             if (iterate.bIterationActive &&
-                done_iterating(cr,fplog,step,&iterate,bFirstIterate,
-                               trace(shake_vir),&tracevir)) 
+                done_iterating(cr, fplog, step, &iterate, bFirstIterate,
+                               trace(shake_vir), &tracevir))
             {
                 break;
             }
             bFirstIterate = FALSE;
         }
 
-        /* only add constraint dvdl after constraints */
-        enerd->term[F_DVDL_BONDED] += dvdl;
         if (!bVV || bRerunMD)
         {
             /* sum up the foreign energy and dhdl terms for md and sd. currently done every step so that dhdl is correct in the .edr */
-            sum_dhdl(enerd,state->lambda,ir->fepvals);
+            sum_dhdl(enerd, state->lambda, ir->fepvals);
         }
-        update_box(fplog,step,ir,mdatoms,state,graph,f,
-                   ir->nstlist==-1 ? &nlh.scale_tot : NULL,pcoupl_mu,nrnb,wcycle,upd,bInitStep,FALSE);
-        
+        update_box(fplog, step, ir, mdatoms, state, f,
+                   ir->nstlist == -1 ? &nlh.scale_tot : NULL, pcoupl_mu, nrnb, upd);
+
         /* ################# END UPDATE STEP 2 ################# */
         /* #### We now have r(t+dt) and v(t+dt/2)  ############# */
-    
-        /* The coordinates (x) were unshifted in update */
-        if (bFFscan && (shellfc==NULL || bConverged))
-        {
-            if (print_forcefield(fplog,enerd->term,mdatoms->homenr,
-                                 f,NULL,xcopy,
-                                 &(top_global->mols),mdatoms->massT,pres))
-            {
-                gmx_finalize_par();
 
-                fprintf(stderr,"\n");
-                exit(0);
-            }
-        }
+        /* The coordinates (x) were unshifted in update */
         if (!bGStat)
         {
-            /* We will not sum ekinh_old,                                                            
-             * so signal that we still have to do it.                                                
+            /* We will not sum ekinh_old,
+             * so signal that we still have to do it.
              */
             bSumEkinhOld = TRUE;
         }
-        
-        if (bTCR)
-        {
-            /* Only do GCT when the relaxation of shells (minimization) has converged,
-             * otherwise we might be coupling to bogus energies. 
-             * In parallel we must always do this, because the other sims might
-             * update the FF.
-             */
-
-            /* Since this is called with the new coordinates state->x, I assume
-             * we want the new box state->box too. / EL 20040121
-             */
-            do_coupling(fplog,oenv,nfile,fnm,tcr,t,step,enerd->term,fr,
-                        ir,MASTER(cr),
-                        mdatoms,&(top->idef),mu_aver,
-                        top_global->mols.nr,cr,
-                        state->box,total_vir,pres,
-                        mu_tot,state->x,f,bConverged);
-            debug_gmx();
-        }
 
         /* #########  BEGIN PREPARING EDR OUTPUT  ###########  */
-        
+
         /* use the directly determined last velocity, not actually the averaged half steps */
-        if (bTrotter && ir->eI==eiVV) 
+        if (bTrotter && ir->eI == eiVV)
         {
             enerd->term[F_EKIN] = last_ekin;
         }
         enerd->term[F_ETOT] = enerd->term[F_EPOT] + enerd->term[F_EKIN];
-        
+
         if (bVV)
         {
             enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + saved_conserved_quantity;
         }
-        else 
-        {
-            enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + compute_conserved_from_auxiliary(ir,state,&MassQ);
-        }
-        /* Check for excessively large energies */
-        if (bIonize) 
+        else
         {
-#ifdef GMX_DOUBLE
-            real etot_max = 1e200;
-#else
-            real etot_max = 1e30;
-#endif
-            if (fabs(enerd->term[F_ETOT]) > etot_max) 
-            {
-                fprintf(stderr,"Energy too large (%g), giving up\n",
-                        enerd->term[F_ETOT]);
-            }
+            enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + compute_conserved_from_auxiliary(ir, state, &MassQ);
         }
         /* #########  END PREPARING EDR OUTPUT  ###########  */
-        
-        /* Time for performance */
-        if (((step % stepout) == 0) || bLastStep) 
-        {
-            runtime_upd_proc(runtime);
-        }
-        
+
         /* Output stuff */
         if (MASTER(cr))
         {
-            gmx_bool do_dr,do_or;
-            
+            gmx_bool do_dr, do_or;
+
             if (fplog && do_log && bDoExpanded)
             {
                 /* only needed if doing expanded ensemble */
-                PrintFreeEnergyInfoToFile(fplog,ir->fepvals,ir->expandedvals,ir->bSimTemp?ir->simtempvals:NULL,
-                                          &df_history,state->fep_state,ir->nstlog,step);
+                PrintFreeEnergyInfoToFile(fplog, ir->fepvals, ir->expandedvals, ir->bSimTemp ? ir->simtempvals : NULL,
+                                          &state_global->dfhist, state->fep_state, ir->nstlog, step);
             }
-            if (!(bStartingFromCpt && (EI_VV(ir->eI)))) 
+            if (!(bStartingFromCpt && (EI_VV(ir->eI))))
             {
                 if (bCalcEner)
                 {
-                    upd_mdebin(mdebin,bDoDHDL, TRUE,
-                               t,mdatoms->tmass,enerd,state,
-                               ir->fepvals,ir->expandedvals,lastbox,
-                               shake_vir,force_vir,total_vir,pres,
-                               ekind,mu_tot,constr);
+                    upd_mdebin(mdebin, bDoDHDL, TRUE,
+                               t, mdatoms->tmass, enerd, state,
+                               ir->fepvals, ir->expandedvals, lastbox,
+                               shake_vir, force_vir, total_vir, pres,
+                               ekind, mu_tot, constr);
                 }
                 else
                 {
                     upd_mdebin_step(mdebin);
                 }
-                
-                do_dr  = do_per_step(step,ir->nstdisreout);
-                do_or  = do_per_step(step,ir->nstorireout);
-                
-                print_ebin(outf->fp_ene,do_ene,do_dr,do_or,do_log?fplog:NULL,
-                           step,t,
-                           eprNORMAL,bCompact,mdebin,fcd,groups,&(ir->opts));
+
+                do_dr  = do_per_step(step, ir->nstdisreout);
+                do_or  = do_per_step(step, ir->nstorireout);
+
+                print_ebin(mdoutf_get_fp_ene(outf), do_ene, do_dr, do_or, do_log ? fplog : NULL,
+                           step, t,
+                           eprNORMAL, bCompact, mdebin, fcd, groups, &(ir->opts));
             }
             if (ir->ePull != epullNO)
             {
-                pull_print_output(ir->pull,step,t);
+                pull_print_output(ir->pull, step, t);
             }
-            
-            if (do_per_step(step,ir->nstlog))
+
+            if (do_per_step(step, ir->nstlog))
             {
-                if(fflush(fplog) != 0)
+                if (fflush(fplog) != 0)
                 {
-                    gmx_fatal(FARGS,"Cannot flush logfile - maybe you are out of disk space?");
+                    gmx_fatal(FARGS, "Cannot flush logfile - maybe you are out of disk space?");
                 }
             }
         }
         if (bDoExpanded)
         {
-            /* Have to do this part after outputting the logfile and the edr file */
+            /* Have to do this part _after_ outputting the logfile and the edr file */
+            /* Gets written into the state at the beginning of next loop*/
             state->fep_state = lamnew;
-            for (i=0;i<efptNR;i++)
+        }
+        /* Print the remaining wall clock time for the run */
+        if (MULTIMASTER(cr) && (do_verbose || gmx_got_usr_signal()) && !bPMETuneRunning)
+        {
+            if (shellfc)
             {
-                state_global->lambda[i] = ir->fepvals->all_lambda[i][lamnew];
+                fprintf(stderr, "\n");
             }
+            print_time(stderr, walltime_accounting, step, ir, cr);
         }
-        /* Remaining runtime */
-        if (MULTIMASTER(cr) && (do_verbose || gmx_got_usr_signal()) && !bPMETuneRunning)
+
+        /* Ion/water position swapping.
+         * Not done in last step since trajectory writing happens before this call
+         * in the MD loop and exchanges would be lost anyway. */
+        bNeedRepartition = FALSE;
+        if ((ir->eSwapCoords != eswapNO) && (step > 0) && !bLastStep &&
+            do_per_step(step, ir->swap->nstswap))
         {
-            if (shellfc) 
+            bNeedRepartition = do_swapcoords(cr, step, t, ir, wcycle,
+                                             bRerunMD ? rerun_fr.x   : state->x,
+                                             bRerunMD ? rerun_fr.box : state->box,
+                                             top_global, MASTER(cr) && bVerbose, bRerunMD);
+
+            if (bNeedRepartition && DOMAINDECOMP(cr))
             {
-                fprintf(stderr,"\n");
+                dd_collect_state(cr->dd, state, state_global);
             }
-            print_time(stderr,runtime,step,ir,cr);
         }
 
         /* Replica exchange */
         bExchanged = FALSE;
-        if ((repl_ex_nst > 0) && (step > 0) && !bLastStep &&
-            do_per_step(step,repl_ex_nst)) 
+        if (bDoReplEx)
         {
-            bExchanged = replica_exchange(fplog,cr,repl_ex,
-                                          state_global,enerd,
-                                          state,step,t);
+            bExchanged = replica_exchange(fplog, cr, repl_ex,
+                                          state_global, enerd,
+                                          state, step, t);
+        }
 
-            if (bExchanged && DOMAINDECOMP(cr)) 
-            {
-                dd_partition_system(fplog,step,cr,TRUE,1,
-                                    state_global,top_global,ir,
-                                    state,&f,mdatoms,top,fr,
-                                    vsite,shellfc,constr,
-                                    nrnb,wcycle,FALSE);
-            }
+        if ( (bExchanged || bNeedRepartition) && DOMAINDECOMP(cr) )
+        {
+            dd_partition_system(fplog, step, cr, TRUE, 1,
+                                state_global, top_global, ir,
+                                state, &f, mdatoms, top, fr,
+                                vsite, shellfc, constr,
+                                nrnb, wcycle, FALSE);
         }
-        
-        bFirstStep = FALSE;
-        bInitStep = FALSE;
+
+        bFirstStep       = FALSE;
+        bInitStep        = FALSE;
         bStartingFromCpt = FALSE;
 
         /* #######  SET VARIABLES FOR NEXT ITERATION IF THEY STILL NEED IT ###### */
@@ -2000,41 +1852,41 @@ double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
             /* Store the pressure in t_state for pressure coupling
              * at the next MD step.
              */
-            copy_mat(pres,state->pres_prev);
+            copy_mat(pres, state->pres_prev);
         }
-        
+
         /* #######  END SET VARIABLES FOR NEXT ITERATION ###### */
 
-        if ( (membed!=NULL) && (!bLastStep) )
+        if ( (membed != NULL) && (!bLastStep) )
         {
-            rescale_membed(step_rel,membed,state_global->x);
+            rescale_membed(step_rel, membed, state_global->x);
         }
 
-        if (bRerunMD) 
+        if (bRerunMD)
         {
             if (MASTER(cr))
             {
                 /* read next frame from input trajectory */
-                bNotLastFrame = read_next_frame(oenv,status,&rerun_fr);
+                bNotLastFrame = read_next_frame(oenv, status, &rerun_fr);
             }
 
             if (PAR(cr))
             {
-                rerun_parallel_comm(cr,&rerun_fr,&bNotLastFrame);
+                rerun_parallel_comm(cr, &rerun_fr, &bNotLastFrame);
             }
         }
-        
+
         if (!bRerunMD || !rerun_fr.bStep)
         {
             /* increase the MD step number */
             step++;
             step_rel++;
         }
-        
-        cycles = wallcycle_stop(wcycle,ewcSTEP);
+
+        cycles = wallcycle_stop(wcycle, ewcSTEP);
         if (DOMAINDECOMP(cr) && wcycle)
         {
-            dd_cycles_add(cr->dd,cycles,ddCyclStep);
+            dd_cycles_add(cr->dd, cycles, ddCyclStep);
         }
 
         if (bPMETuneRunning || bPMETuneTry)
@@ -2055,7 +1907,22 @@ double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
                         /* PME node load is too high, start tuning */
                         bPMETuneRunning = (dd_pme_f_ratio(cr->dd) >= 1.05);
                     }
-                    dd_bcast(cr->dd,sizeof(gmx_bool),&bPMETuneRunning);
+                    dd_bcast(cr->dd, sizeof(gmx_bool), &bPMETuneRunning);
+
+                    if (bPMETuneRunning &&
+                        fr->nbv->bUseGPU && DOMAINDECOMP(cr) &&
+                        !(cr->duty & DUTY_PME))
+                    {
+                        /* Lock DLB=auto to off (does nothing when DLB=yes/no).
+                         * With GPUs + separate PME ranks, we don't want DLB.
+                         * This could happen when we scan coarse grids and
+                         * it would then never be turned off again.
+                         * This would hurt performance at the final, optimal
+                         * grid spacing, where DLB almost never helps.
+                         * Also, DLB can limit the cut-off for PME tuning.
+                         */
+                        dd_dlb_set_lock(cr->dd, TRUE);
+                    }
 
                     if (bPMETuneRunning || step_rel > ir->nstlist*50)
                     {
@@ -2068,19 +1935,35 @@ double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
                      * but the first cycle is always skipped anyhow.
                      */
                     bPMETuneRunning =
-                        pme_load_balance(pme_loadbal,cr,
+                        pme_load_balance(pme_loadbal, cr,
                                          (bVerbose && MASTER(cr)) ? stderr : NULL,
                                          fplog,
-                                         ir,state,cycles_pmes,
-                                         fr->ic,fr->nbv,&fr->pmedata,
+                                         ir, state, cycles_pmes,
+                                         fr->ic, fr->nbv, &fr->pmedata,
                                          step);
 
                     /* Update constants in forcerec/inputrec to keep them in sync with fr->ic */
-                    fr->ewaldcoeff = fr->ic->ewaldcoeff;
-                    fr->rlist      = fr->ic->rlist;
-                    fr->rlistlong  = fr->ic->rlistlong;
-                    fr->rcoulomb   = fr->ic->rcoulomb;
-                    fr->rvdw       = fr->ic->rvdw;
+                    fr->ewaldcoeff_q  = fr->ic->ewaldcoeff_q;
+                    fr->ewaldcoeff_lj = fr->ic->ewaldcoeff_lj;
+                    fr->rlist         = fr->ic->rlist;
+                    fr->rlistlong     = fr->ic->rlistlong;
+                    fr->rcoulomb      = fr->ic->rcoulomb;
+                    fr->rvdw          = fr->ic->rvdw;
+
+                    if (ir->eDispCorr != edispcNO)
+                    {
+                        calc_enervirdiff(NULL, ir->eDispCorr, fr);
+                    }
+
+                    if (!bPMETuneRunning &&
+                        DOMAINDECOMP(cr) &&
+                        dd_dlb_is_locked(cr->dd))
+                    {
+                        /* Unlock the DLB=auto, DLB is allowed to activate
+                         * (but we don't expect it to activate in most cases).
+                         */
+                        dd_dlb_set_lock(cr->dd, FALSE);
+                    }
                 }
                 cycles_pmes = 0;
             }
@@ -2090,71 +1973,86 @@ double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
             gs.set[eglsRESETCOUNTERS] != 0)
         {
             /* Reset all the counters related to performance over the run */
-            reset_all_counters(fplog,cr,step,&step_rel,ir,wcycle,nrnb,runtime,
+            reset_all_counters(fplog, cr, step, &step_rel, ir, wcycle, nrnb, walltime_accounting,
                                fr->nbv != NULL && fr->nbv->bUseGPU ? fr->nbv->cu_nbv : NULL);
-            wcycle_set_reset_counters(wcycle,-1);
+            wcycle_set_reset_counters(wcycle, -1);
+            if (!(cr->duty & DUTY_PME))
+            {
+                /* Tell our PME node to reset its counters */
+                gmx_pme_send_resetcounters(cr, step);
+            }
             /* Correct max_hours for the elapsed time */
-            max_hours -= run_time/(60.0*60.0);
-            bResetCountersHalfMaxH = FALSE;
+            max_hours                -= elapsed_time/(60.0*60.0);
+            bResetCountersHalfMaxH    = FALSE;
             gs.set[eglsRESETCOUNTERS] = 0;
         }
 
+        /* If bIMD is TRUE, the master updates the IMD energy record and sends positions to VMD client */
+        IMD_prep_energies_send_positions(ir->bIMD && MASTER(cr), bIMDstep, ir->imd, enerd, step, bCalcEner, wcycle);
+
     }
     /* End of main MD loop */
     debug_gmx();
-    
-    /* Stop the time */
-    runtime_end(runtime);
-    
+
+    /* Closing TNG files can include compressing data. Therefore it is good to do that
+     * before stopping the time measurements. */
+    mdoutf_tng_close(outf);
+
+    /* Stop measuring walltime */
+    walltime_accounting_end(walltime_accounting);
+
     if (bRerunMD && MASTER(cr))
     {
         close_trj(status);
     }
-    
+
     if (!(cr->duty & DUTY_PME))
     {
         /* Tell the PME only node to finish */
         gmx_pme_send_finish(cr);
     }
-    
+
     if (MASTER(cr))
     {
-        if (ir->nstcalcenergy > 0 && !bRerunMD) 
+        if (ir->nstcalcenergy > 0 && !bRerunMD)
         {
-            print_ebin(outf->fp_ene,FALSE,FALSE,FALSE,fplog,step,t,
-                       eprAVER,FALSE,mdebin,fcd,groups,&(ir->opts));
+            print_ebin(mdoutf_get_fp_ene(outf), FALSE, FALSE, FALSE, fplog, step, t,
+                       eprAVER, FALSE, mdebin, fcd, groups, &(ir->opts));
         }
     }
 
     done_mdoutf(outf);
-
     debug_gmx();
 
     if (ir->nstlist == -1 && nlh.nns > 0 && fplog)
     {
-        fprintf(fplog,"Average neighborlist lifetime: %.1f steps, std.dev.: %.1f steps\n",nlh.s1/nlh.nns,sqrt(nlh.s2/nlh.nns - sqr(nlh.s1/nlh.nns)));
-        fprintf(fplog,"Average number of atoms that crossed the half buffer length: %.1f\n\n",nlh.ab/nlh.nns);
+        fprintf(fplog, "Average neighborlist lifetime: %.1f steps, std.dev.: %.1f steps\n", nlh.s1/nlh.nns, sqrt(nlh.s2/nlh.nns - sqr(nlh.s1/nlh.nns)));
+        fprintf(fplog, "Average number of atoms that crossed the half buffer length: %.1f\n\n", nlh.ab/nlh.nns);
     }
 
     if (pme_loadbal != NULL)
     {
-        pme_loadbal_done(pme_loadbal,fplog);
+        pme_loadbal_done(pme_loadbal, cr, fplog,
+                         fr->nbv != NULL && fr->nbv->bUseGPU);
     }
 
     if (shellfc && fplog)
     {
-        fprintf(fplog,"Fraction of iterations that converged:           %.2f %%\n",
+        fprintf(fplog, "Fraction of iterations that converged:           %.2f %%\n",
                 (nconverged*100.0)/step_rel);
-        fprintf(fplog,"Average number of force evaluations per MD step: %.2f\n\n",
+        fprintf(fplog, "Average number of force evaluations per MD step: %.2f\n\n",
                 tcount/step_rel);
     }
-    
+
     if (repl_ex_nst > 0 && MASTER(cr))
     {
-        print_replica_exchange_statistics(fplog,repl_ex);
+        print_replica_exchange_statistics(fplog, repl_ex);
     }
-    
-    runtime->nsteps_done = step_rel;
 
-   return 0;
+    /* IMD cleanup, if bIMD is TRUE. */
+    IMD_finalize(ir->bIMD, ir->imd);
+
+    walltime_accounting_set_nsteps_done(walltime_accounting, step_rel);
+
+    return 0;
 }