Balance event consumption for GPU update code path

[alexxy/gromacs.git] / src / gromacs / mdrun / md.cpp
diff --git a/src/gromacs/mdrun/md.cpp b/src/gromacs/mdrun/md.cpp

index 8c58a5bbc034cfbf39fa632f69c49c720972ba5a..3898e07d3af54303c6059984bfc88b8a3e7a430f 100644 (file)
--- a/src/gromacs/mdrun/md.cpp
+++ b/src/gromacs/mdrun/md.cpp
@@ -61,6 +61,7 @@
  #include "gromacs/domdec/domdec_network.h"
  #include "gromacs/domdec/domdec_struct.h"
  #include "gromacs/domdec/gpuhaloexchange.h"
+#include "gromacs/domdec/localtopologychecker.h"
  #include "gromacs/domdec/mdsetup.h"
  #include "gromacs/domdec/partition.h"
  #include "gromacs/essentialdynamics/edsam.h"
@@ -105,6 +106,7 @@
  #include "gromacs/mdlib/update_vv.h"
  #include "gromacs/mdlib/vcm.h"
  #include "gromacs/mdlib/vsite.h"
+#include "gromacs/mdrunutility/freeenergy.h"
  #include "gromacs/mdrunutility/handlerestart.h"
  #include "gromacs/mdrunutility/multisim.h"
  #include "gromacs/mdrunutility/printtime.h"
@@ -124,6 +126,7 @@
  #include "gromacs/mdtypes/mdrunoptions.h"
  #include "gromacs/mdtypes/multipletimestepping.h"
  #include "gromacs/mdtypes/observableshistory.h"
+#include "gromacs/mdtypes/observablesreducer.h"
  #include "gromacs/mdtypes/pullhistory.h"
  #include "gromacs/mdtypes/simulation_workload.h"
  #include "gromacs/mdtypes/state.h"
@@ -165,11 +168,10 @@ void gmx::LegacySimulator::do_md()
      // will go away eventually.
      const t_inputrec* ir = inputrec;
  
-    int64_t      step, step_rel;
      double       t, t0 = ir->init_t;
      gmx_bool     bGStatEveryStep, bGStat, bCalcVir, bCalcEnerStep, bCalcEner;
      gmx_bool     bNS = FALSE, bNStList, bStopCM, bFirstStep, bInitStep, bLastStep = FALSE;
-    gmx_bool     bDoDHDL = FALSE, bDoFEP = FALSE, bDoExpanded = FALSE;
+    gmx_bool     bDoExpanded = FALSE;
      gmx_bool     do_ene, do_log, do_verbose;
      gmx_bool     bMasterState;
      unsigned int force_flags;
@@ -187,7 +189,6 @@ void gmx::LegacySimulator::do_md()
      matrix            lastbox;
      int               lamnew = 0;
      /* for FEP */
-    int       nstfep = 0;
      double    cycles;
      real      saved_conserved_quantity = 0;
      real      last_ekin                = 0;
@@ -200,15 +201,6 @@ void gmx::LegacySimulator::do_md()
  
      bool bInteractiveMDstep = false;
  
-    /* Domain decomposition could incorrectly miss a bonded
-       interaction, but checking for that requires a global
-       communication stage, which does not otherwise happen in DD
-       code. So we do that alongside the first global energy reduction
-       after a new DD is made. These variables handle whether the
-       check happens, and the result it returns. */
-    bool shouldCheckNumberOfBondedInteractions = false;
-    int  totalNumberOfBondedInteractions       = -1;
-
      SimulationSignals signals;
      // Most global communnication stages don't propagate mdrun
      // signals, and will use this object to achieve that.
@@ -266,7 +258,15 @@ void gmx::LegacySimulator::do_md()
  
      int*                fep_state = MASTER(cr) ? &state_global->fep_state : nullptr;
      gmx::ArrayRef<real> lambda    = MASTER(cr) ? state_global->lambda : gmx::ArrayRef<real>();
-    initialize_lambdas(fplog, *ir, MASTER(cr), fep_state, lambda);
+    initialize_lambdas(fplog,
+                       ir->efep,
+                       ir->bSimTemp,
+                       *ir->fepvals,
+                       ir->simtempvals->temperatures,
+                       gmx::arrayRefFromArray(ir->opts.ref_t, ir->opts.ngtc),
+                       MASTER(cr),
+                       fep_state,
+                       lambda);
      Update upd(*ir, deform);
      bool   doSimulatedAnnealing = false;
      {
@@ -277,15 +277,14 @@ void gmx::LegacySimulator::do_md()
      }
      const bool useReplicaExchange = (replExParams.exchangeInterval > 0);
  
-    const t_fcdata& fcdata = *fr->fcdata;
+    t_fcdata& fcdata = *fr->fcdata;
  
      bool simulationsShareState = false;
      int  nstSignalComm         = nstglobalcomm;
      {
          // TODO This implementation of ensemble orientation restraints is nasty because
          // a user can't just do multi-sim with single-sim orientation restraints.
-        bool usingEnsembleRestraints =
-                (fcdata.disres->nsystems > 1) || ((ms != nullptr) && (fcdata.orires->nr != 0));
+        bool usingEnsembleRestraints = (fcdata.disres->nsystems > 1) || ((ms != nullptr) && fcdata.orires);
          bool awhUsesMultiSim = (ir->bDoAwh && ir->awhParams->shareBiasMultisim() && (ms != nullptr));
  
          // Replica exchange, ensemble restraints and AWH need all
@@ -316,7 +315,7 @@ void gmx::LegacySimulator::do_md()
                                     mdrunOptions,
                                     cr,
                                     outputProvider,
-                                   mdModulesNotifier,
+                                   mdModulesNotifiers,
                                     ir,
                                     top_global,
                                     oenv,
@@ -332,7 +331,7 @@ void gmx::LegacySimulator::do_md()
                                     false,
                                     startingBehavior,
                                     simulationsShareState,
-                                   mdModulesNotifier);
+                                   mdModulesNotifiers);
  
      gstat = global_stat_init(ir);
  
@@ -347,7 +346,7 @@ void gmx::LegacySimulator::do_md()
                                   top_global,
                                   constr ? constr->numFlexibleConstraints() : 0,
                                   ir->nstcalcenergy,
-                                 DOMAINDECOMP(cr),
+                                 haveDDAtomOrdering(*cr),
                                   useGpuForPme);
  
      {
@@ -358,22 +357,16 @@ void gmx::LegacySimulator::do_md()
          }
      }
  
-    // Local state only becomes valid now.
-    std::unique_ptr<t_state> stateInstance;
-    t_state*                 state;
-
-    gmx_localtop_t top(top_global.ffparams);
-
-    auto mdatoms = mdAtoms->mdatoms();
+    ObservablesReducer observablesReducer = observablesReducerBuilder->build();
  
-    ForceBuffers f(fr->useMts,
+    ForceBuffers     f(simulationWork.useMts,
                     ((useGpuForNonbonded && useGpuForBufferOps) || useGpuForUpdate)
-                           ? PinningPolicy::PinnedIfSupported
-                           : PinningPolicy::CannotBePinned);
-    if (DOMAINDECOMP(cr))
+                               ? PinningPolicy::PinnedIfSupported
+                               : PinningPolicy::CannotBePinned);
+    const t_mdatoms* md = mdAtoms->mdatoms();
+    if (haveDDAtomOrdering(*cr))
      {
-        stateInstance = std::make_unique<t_state>();
-        state         = stateInstance.get();
+        // Local state only becomes valid now.
          dd_init_local_state(*cr->dd, state_global, state);
  
          /* Distribute the charge groups over the nodes from the master node */
@@ -391,26 +384,33 @@ void gmx::LegacySimulator::do_md()
                              state,
                              &f,
                              mdAtoms,
-                            &top,
+                            top,
                              fr,
                              vsite,
                              constr,
                              nrnb,
                              nullptr,
                              FALSE);
-        shouldCheckNumberOfBondedInteractions = true;
-        upd.setNumAtoms(state->natoms);
+        upd.updateAfterPartition(state->natoms,
+                                 md->cFREEZE ? gmx::arrayRefFromArray(md->cFREEZE, md->nr)
+                                             : gmx::ArrayRef<const unsigned short>(),
+                                 md->cTC ? gmx::arrayRefFromArray(md->cTC, md->nr)
+                                         : gmx::ArrayRef<const unsigned short>());
+        fr->longRangeNonbondeds->updateAfterPartition(*md);
      }
      else
      {
          state_change_natoms(state_global, state_global->natoms);
-        /* Copy the pointer to the global state */
-        state = state_global;
  
          /* Generate and initialize new topology */
-        mdAlgorithmsSetupAtomData(cr, *ir, top_global, &top, fr, &f, mdAtoms, constr, vsite, shellfc);
-
-        upd.setNumAtoms(state->natoms);
+        mdAlgorithmsSetupAtomData(cr, *ir, top_global, top, fr, &f, mdAtoms, constr, vsite, shellfc);
+
+        upd.updateAfterPartition(state->natoms,
+                                 md->cFREEZE ? gmx::arrayRefFromArray(md->cFREEZE, md->nr)
+                                             : gmx::ArrayRef<const unsigned short>(),
+                                 md->cTC ? gmx::arrayRefFromArray(md->cTC, md->nr)
+                                         : gmx::ArrayRef<const unsigned short>());
+        fr->longRangeNonbondeds->updateAfterPartition(*md);
      }
  
      std::unique_ptr<UpdateConstrainGpu> integrator;
@@ -420,8 +420,8 @@ void gmx::LegacySimulator::do_md()
      // TODO: the assertions below should be handled by UpdateConstraintsBuilder.
      if (useGpuForUpdate)
      {
-        GMX_RELEASE_ASSERT(!DOMAINDECOMP(cr) || ddUsesUpdateGroups(*cr->dd) || constr == nullptr
-                                   || constr->numConstraintsTotal() == 0,
+        GMX_RELEASE_ASSERT(!haveDDAtomOrdering(*cr) || ddUsesUpdateGroups(*cr->dd)
+                                   || constr == nullptr || constr->numConstraintsTotal() == 0,
                             "Constraints in domain decomposition are only supported with update "
                             "groups if using GPU update.\n");
          GMX_RELEASE_ASSERT(ir->eConstrAlg != ConstraintAlgorithm::Shake || constr == nullptr
@@ -440,13 +440,13 @@ void gmx::LegacySimulator::do_md()
                          || ir->epc == PressureCoupling::Berendsen || ir->epc == PressureCoupling::CRescale,
                  "Only Parrinello-Rahman, Berendsen, and C-rescale pressure coupling are supported "
                  "with the GPU update.\n");
-        GMX_RELEASE_ASSERT(!mdatoms->haveVsites,
+        GMX_RELEASE_ASSERT(!md->haveVsites,
                             "Virtual sites are not supported with the GPU update.\n");
          GMX_RELEASE_ASSERT(ed == nullptr,
                             "Essential dynamics is not supported with the GPU update.\n");
          GMX_RELEASE_ASSERT(!ir->bPull || !pull_have_constraint(*ir->pull),
                             "Constraints pulling is not supported with the GPU update.\n");
-        GMX_RELEASE_ASSERT(fcdata.orires->nr == 0,
+        GMX_RELEASE_ASSERT(fcdata.orires == nullptr,
                             "Orientation restraints are not supported with the GPU update.\n");
          GMX_RELEASE_ASSERT(
                  ir->efep == FreeEnergyPerturbationType::No
@@ -477,9 +477,10 @@ void gmx::LegacySimulator::do_md()
                  ekind->ngtc,
                  fr->deviceStreamManager->context(),
                  fr->deviceStreamManager->stream(gmx::DeviceStreamType::UpdateAndConstraints),
-                stateGpu->xUpdatedOnDevice(),
                  wcycle);
  
+        stateGpu->setXUpdatedOnDeviceEvent(integrator->xUpdatedOnDeviceEvent());
+
          integrator->setPbc(PbcType::Xyz, state->box);
      }
  
@@ -497,7 +498,7 @@ void gmx::LegacySimulator::do_md()
      // the global state to file and potentially for replica exchange.
      // (Global topology should persist.)
  
-    update_mdatoms(mdatoms, state->lambda[FreeEnergyPerturbationCouplingType::Mass]);
+    update_mdatoms(mdAtoms->mdatoms(), state->lambda[FreeEnergyPerturbationCouplingType::Mass]);
  
      if (ir->bExpanded)
      {
@@ -517,7 +518,7 @@ void gmx::LegacySimulator::do_md()
  
      preparePrevStepPullCom(ir,
                             pull_work,
-                           gmx::arrayRefFromArray(mdatoms->massT, mdatoms->nr),
+                           gmx::arrayRefFromArray(md->massT, md->nr),
                             state,
                             state_global,
                             cr,
@@ -556,17 +557,17 @@ void gmx::LegacySimulator::do_md()
          {
              auto v = makeArrayRef(state->v);
              /* Set the velocities of vsites, shells and frozen atoms to zero */
-            for (i = 0; i < mdatoms->homenr; i++)
+            for (i = 0; i < md->homenr; i++)
              {
-                if (mdatoms->ptype[i] == ParticleType::Shell)
+                if (md->ptype[i] == ParticleType::Shell)
                  {
                      clear_rvec(v[i]);
                  }
-                else if (mdatoms->cFREEZE)
+                else if (md->cFREEZE)
                  {
                      for (m = 0; m < DIM; m++)
                      {
-                        if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m])
+                        if (ir->opts.nFreeze[md->cFREEZE[i]][m])
                          {
                              v[i][m] = 0;
                          }
@@ -581,8 +582,8 @@ void gmx::LegacySimulator::do_md()
              do_constrain_first(fplog,
                                 constr,
                                 ir,
-                               mdatoms->nr,
-                               mdatoms->homenr,
+                               md->nr,
+                               md->homenr,
                                 state->x.arrayRefWithPadding(),
                                 state->v.arrayRefWithPadding(),
                                 state->box,
@@ -590,24 +591,7 @@ void gmx::LegacySimulator::do_md()
          }
      }
  
-    if (ir->efep != FreeEnergyPerturbationType::No)
-    {
-        /* Set free energy calculation frequency as the greatest common
-         * denominator of nstdhdl and repl_ex_nst. */
-        nstfep = ir->fepvals->nstdhdl;
-        if (ir->bExpanded)
-        {
-            nstfep = std::gcd(ir->expandedvals->nstexpanded, nstfep);
-        }
-        if (useReplicaExchange)
-        {
-            nstfep = std::gcd(replExParams.exchangeInterval, nstfep);
-        }
-        if (ir->bDoAwh)
-        {
-            nstfep = std::gcd(ir->awhParams->nstSampleCoord(), nstfep);
-        }
-    }
+    const int nstfep = computeFepPeriod(*ir, replExParams);
  
      /* Be REALLY careful about what flags you set here. You CANNOT assume
       * this is the first step, since we might be restarting from a checkpoint,
@@ -644,6 +628,9 @@ void gmx::LegacySimulator::do_md()
      t_vcm vcm(top_global.groups, *ir);
      reportComRemovalInfo(fplog, vcm);
  
+    int64_t step     = ir->init_step;
+    int64_t step_rel = 0;
+
      /* To minimize communication, compute_globals computes the COM velocity
       * and the kinetic energy for the velocities without COM motion removed.
       * Thus to get the kinetic energy without the COM contribution, we need
@@ -665,7 +652,7 @@ void gmx::LegacySimulator::do_md()
                          makeConstArrayRef(state->x),
                          makeConstArrayRef(state->v),
                          state->box,
-                        mdatoms,
+                        md,
                          nrnb,
                          &vcm,
                          nullptr,
@@ -674,14 +661,15 @@ void gmx::LegacySimulator::do_md()
                          shake_vir,
                          total_vir,
                          pres,
-                        gmx::ArrayRef<real>{},
                          &nullSignaller,
                          state->box,
-                        &totalNumberOfBondedInteractions,
                          &bSumEkinhOld,
-                        cglo_flags_iteration
-                                | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS
-                                                                         : 0));
+                        cglo_flags_iteration,
+                        step,
+                        &observablesReducer);
+        // Clean up after pre-step use of compute_globals()
+        observablesReducer.markAsReadyToReduce();
+
          if (cglo_flags_iteration & CGLO_STOPCM)
          {
              /* At initialization, do not pass x with acceleration-correction mode
@@ -690,18 +678,10 @@ void gmx::LegacySimulator::do_md()
              auto x = (vcm.mode == ComRemovalAlgorithm::LinearAccelerationCorrection)
                               ? ArrayRef<RVec>()
                               : makeArrayRef(state->x);
-            process_and_stopcm_grp(fplog, &vcm, *mdatoms, x, makeArrayRef(state->v));
-            inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
+            process_and_stopcm_grp(fplog, &vcm, *md, x, makeArrayRef(state->v));
+            inc_nrnb(nrnb, eNR_STOPCM, md->homenr);
          }
      }
-    checkNumberOfBondedInteractions(mdlog,
-                                    cr,
-                                    totalNumberOfBondedInteractions,
-                                    top_global,
-                                    &top,
-                                    makeConstArrayRef(state->x),
-                                    state->box,
-                                    &shouldCheckNumberOfBondedInteractions);
      if (ir->eI == IntegrationAlgorithm::VVAK)
      {
          /* a second call to get the half step temperature initialized as well */
@@ -718,7 +698,7 @@ void gmx::LegacySimulator::do_md()
                          makeConstArrayRef(state->x),
                          makeConstArrayRef(state->v),
                          state->box,
-                        mdatoms,
+                        md,
                          nrnb,
                          &vcm,
                          nullptr,
@@ -727,12 +707,14 @@ void gmx::LegacySimulator::do_md()
                          shake_vir,
                          total_vir,
                          pres,
-                        gmx::ArrayRef<real>{},
                          &nullSignaller,
                          state->box,
-                        nullptr,
                          &bSumEkinhOld,
-                        cglo_flags & ~CGLO_PRESSURE);
+                        cglo_flags & ~CGLO_PRESSURE,
+                        step,
+                        &observablesReducer);
+        // Clean up after pre-step use of compute_globals()
+        observablesReducer.markAsReadyToReduce();
      }
  
      /* Calculate the initial half step temperature, and save the ekinh_old */
@@ -799,7 +781,7 @@ void gmx::LegacySimulator::do_md()
      }
  
      walltime_accounting_start_time(walltime_accounting);
-    wallcycle_start(wcycle, ewcRUN);
+    wallcycle_start(wcycle, WallCycleCounter::Run);
      print_start(fplog, cr, walltime_accounting, "mdrun");
  
      /***********************************************************
@@ -815,9 +797,6 @@ void gmx::LegacySimulator::do_md()
      bExchanged       = FALSE;
      bNeedRepartition = FALSE;
  
-    step     = ir->init_step;
-    step_rel = 0;
-
      auto stopHandler = stopHandlerBuilder->getStopHandlerMD(
              compat::not_null<SimulationSignal*>(&signals[eglsSTOPCOND]),
              simulationsShareState,
@@ -893,7 +872,7 @@ void gmx::LegacySimulator::do_md()
                             simulationWork.useGpuPmePpCommunication);
          }
  
-        wallcycle_start(wcycle, ewcSTEP);
+        wallcycle_start(wcycle, WallCycleCounter::Step);
  
          bLastStep = (step_rel == ir->nsteps);
          t         = t0 + step * ir->delta_t;
@@ -904,8 +883,6 @@ void gmx::LegacySimulator::do_md()
              /* find and set the current lambdas */
              state->lambda = currentLambdas(step, *(ir->fepvals), state->fep_state);
  
-            bDoDHDL = do_per_step(step, ir->fepvals->nstdhdl);
-            bDoFEP  = ((ir->efep != FreeEnergyPerturbationType::No) && do_per_step(step, nstfep));
              bDoExpanded = (do_per_step(step, ir->expandedvals->nstexpanded) && (ir->bExpanded)
                             && (!bFirstStep));
          }
@@ -945,15 +922,15 @@ void gmx::LegacySimulator::do_md()
          do_verbose = mdrunOptions.verbose
                       && (step % mdrunOptions.verboseStepPrintInterval == 0 || bFirstStep || bLastStep);
  
-        if (useGpuForUpdate && !bFirstStep && bNS)
+        // On search steps, when doing the update on the GPU, copy
+        // the coordinates and velocities to the host unless they are
+        // already there (ie on the first step and after replica
+        // exchange).
+        if (useGpuForUpdate && bNS && !bFirstStep && !bExchanged)
          {
-            // Copy velocities from the GPU on search steps to keep a copy on host (device buffers are reinitialized).
              stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local);
-            stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
-            // Copy coordinate from the GPU when needed at the search step.
-            // NOTE: The cases when coordinates needed on CPU for force evaluation are handled in sim_utils.
-            // NOTE: If the coordinates are to be written into output file they are also copied separately before the output.
              stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
+            stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
              stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
          }
  
@@ -965,7 +942,7 @@ void gmx::LegacySimulator::do_md()
          if (vsite != nullptr)
          {
              // Virtual sites need to be updated before domain decomposition and forces are calculated
-            wallcycle_start(wcycle, ewcVSITECONSTR);
+            wallcycle_start(wcycle, WallCycleCounter::VsiteConstr);
              // md-vv calculates virtual velocities once it has full-step real velocities
              vsite->construct(state->x,
                               state->v,
@@ -973,7 +950,7 @@ void gmx::LegacySimulator::do_md()
                               (!EI_VV(inputrec->eI) && needVirtualVelocitiesThisStep)
                                       ? VSiteOperation::PositionsAndVelocities
                                       : VSiteOperation::Positions);
-            wallcycle_stop(wcycle, ewcVSITECONSTR);
+            wallcycle_stop(wcycle, WallCycleCounter::VsiteConstr);
          }
  
          if (bNS && !(bFirstStep && ir->bContinuation))
@@ -985,19 +962,21 @@ void gmx::LegacySimulator::do_md()
                  if (correct_box(fplog, step, state->box))
                  {
                      bMasterState = TRUE;
-                    // If update is offloaded, it should be informed about the box size change
-                    if (useGpuForUpdate)
-                    {
-                        integrator->setPbc(PbcType::Xyz, state->box);
-                    }
                  }
              }
-            if (DOMAINDECOMP(cr) && bMasterState)
+            // If update is offloaded, and the box was changed either
+            // above or in a replica exchange on the previous step,
+            // the GPU Update object should be informed
+            if (useGpuForUpdate && (bMasterState || bExchanged))
+            {
+                integrator->setPbc(PbcType::Xyz, state->box);
+            }
+            if (haveDDAtomOrdering(*cr) && bMasterState)
              {
                  dd_collect_state(cr->dd, state, state_global);
              }
  
-            if (DOMAINDECOMP(cr))
+            if (haveDDAtomOrdering(*cr))
              {
                  /* Repartition the domain decomposition */
                  dd_partition_system(fplog,
@@ -1014,20 +993,24 @@ void gmx::LegacySimulator::do_md()
                                      state,
                                      &f,
                                      mdAtoms,
-                                    &top,
+                                    top,
                                      fr,
                                      vsite,
                                      constr,
                                      nrnb,
                                      wcycle,
                                      do_verbose && !bPMETunePrinting);
-                shouldCheckNumberOfBondedInteractions = true;
-                upd.setNumAtoms(state->natoms);
+                upd.updateAfterPartition(state->natoms,
+                                         md->cFREEZE ? gmx::arrayRefFromArray(md->cFREEZE, md->nr)
+                                                     : gmx::ArrayRef<const unsigned short>(),
+                                         md->cTC ? gmx::arrayRefFromArray(md->cTC, md->nr)
+                                                 : gmx::ArrayRef<const unsigned short>());
+                fr->longRangeNonbondeds->updateAfterPartition(*md);
              }
          }
  
          // Allocate or re-size GPU halo exchange object, if necessary
-        if (bNS && havePPDomainDecomposition(cr) && simulationWork.useGpuHaloExchange)
+        if (bNS && simulationWork.havePpDomainDecomposition && simulationWork.useGpuHaloExchange)
          {
              GMX_RELEASE_ASSERT(fr->deviceStreamManager != nullptr,
                                 "GPU device manager has to be initialized to use GPU "
@@ -1043,7 +1026,7 @@ void gmx::LegacySimulator::do_md()
  
          if (ir->efep != FreeEnergyPerturbationType::No)
          {
-            update_mdatoms(mdatoms, state->lambda[FreeEnergyPerturbationCouplingType::Mass]);
+            update_mdatoms(mdAtoms->mdatoms(), state->lambda[FreeEnergyPerturbationCouplingType::Mass]);
          }
  
          if (bExchanged)
@@ -1051,6 +1034,7 @@ void gmx::LegacySimulator::do_md()
              /* We need the kinetic energy at minus the half step for determining
               * the full step kinetic energy and possibly for T-coupling.*/
              /* This may not be quite working correctly yet . . . . */
+            int cglo_flags = CGLO_GSTAT | CGLO_TEMPERATURE;
              compute_globals(gstat,
                              cr,
                              ir,
@@ -1059,7 +1043,7 @@ void gmx::LegacySimulator::do_md()
                              makeConstArrayRef(state->x),
                              makeConstArrayRef(state->v),
                              state->box,
-                            mdatoms,
+                            md,
                              nrnb,
                              &vcm,
                              wcycle,
@@ -1068,20 +1052,12 @@ void gmx::LegacySimulator::do_md()
                              nullptr,
                              nullptr,
                              nullptr,
-                            gmx::ArrayRef<real>{},
                              &nullSignaller,
                              state->box,
-                            &totalNumberOfBondedInteractions,
                              &bSumEkinhOld,
-                            CGLO_GSTAT | CGLO_TEMPERATURE | CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS);
-            checkNumberOfBondedInteractions(mdlog,
-                                            cr,
-                                            totalNumberOfBondedInteractions,
-                                            top_global,
-                                            &top,
-                                            makeConstArrayRef(state->x),
-                                            state->box,
-                                            &shouldCheckNumberOfBondedInteractions);
+                            cglo_flags,
+                            step,
+                            &observablesReducer);
          }
          clear_mat(force_vir);
  
@@ -1113,15 +1089,20 @@ void gmx::LegacySimulator::do_md()
              bCalcEner = TRUE;
          }
  
+        // bCalcEner is only here for when the last step is not a mulitple of nstfep
+        const bool computeDHDL = ((ir->efep != FreeEnergyPerturbationType::No || ir->bSimTemp)
+                                  && (do_per_step(step, nstfep) || bCalcEner));
+
          /* Do we need global communication ? */
          bGStat = (bCalcVir || bCalcEner || bStopCM || do_per_step(step, nstglobalcomm)
                    || (EI_VV(ir->eI) && inputrecNvtTrotter(ir) && do_per_step(step - 1, nstglobalcomm)));
  
          force_flags = (GMX_FORCE_STATECHANGED | ((inputrecDynamicBox(ir)) ? GMX_FORCE_DYNAMICBOX : 0)
                         | GMX_FORCE_ALLFORCES | (bCalcVir ? GMX_FORCE_VIRIAL : 0)
-                       | (bCalcEner ? GMX_FORCE_ENERGY : 0) | (bDoFEP ? GMX_FORCE_DHDL : 0));
-        if (fr->useMts && !do_per_step(step, ir->nstfout))
+                       | (bCalcEner ? GMX_FORCE_ENERGY : 0) | (computeDHDL ? GMX_FORCE_DHDL : 0));
+        if (simulationWork.useMts && !do_per_step(step, ir->nstfout))
          {
+            // TODO: merge this with stepWork.useOnlyMtsCombinedForceBuffer
              force_flags |= GMX_FORCE_DO_NOT_NEED_NORMAL_FORCE;
          }
  
@@ -1139,7 +1120,7 @@ void gmx::LegacySimulator::do_md()
                                  pull_work,
                                  bNS,
                                  force_flags,
-                                &top,
+                                top,
                                  constr,
                                  enerd,
                                  state->natoms,
@@ -1150,7 +1131,8 @@ void gmx::LegacySimulator::do_md()
                                  &state->hist,
                                  &f.view(),
                                  force_vir,
-                                *mdatoms,
+                                *md,
+                                fr->longRangeNonbondeds.get(),
                                  nrnb,
                                  wcycle,
                                  shellfc,
@@ -1192,13 +1174,13 @@ void gmx::LegacySimulator::do_md()
                       step,
                       nrnb,
                       wcycle,
-                     &top,
+                     top,
                       state->box,
                       state->x.arrayRefWithPadding(),
                       &state->hist,
                       &f.view(),
                       force_vir,
-                     mdatoms,
+                     md,
                       enerd,
                       state->lambda,
                       fr,
@@ -1207,6 +1189,7 @@ void gmx::LegacySimulator::do_md()
                       mu_tot,
                       t,
                       ed ? ed->getLegacyED() : nullptr,
+                     fr->longRangeNonbondeds.get(),
                       (bNS ? GMX_FORCE_NS : 0) | force_flags,
                       ddBalanceRegionHandler);
          }
@@ -1226,13 +1209,12 @@ void gmx::LegacySimulator::do_md()
                                   fr,
                                   cr,
                                   state,
-                                 mdatoms,
-                                 fcdata,
+                                 mdAtoms->mdatoms(),
+                                 &fcdata,
                                   &MassQ,
                                   &vcm,
-                                 top_global,
-                                 top,
                                   enerd,
+                                 &observablesReducer,
                                   ekind,
                                   gstat,
                                   &last_ekin,
@@ -1250,7 +1232,6 @@ void gmx::LegacySimulator::do_md()
                                   bTrotter,
                                   bExchanged,
                                   &bSumEkinhOld,
-                                 &shouldCheckNumberOfBondedInteractions,
                                   &saved_conserved_quantity,
                                   &f,
                                   &upd,
@@ -1258,15 +1239,14 @@ void gmx::LegacySimulator::do_md()
                                   &nullSignaller,
                                   trotter_seq,
                                   nrnb,
-                                 mdlog,
                                   fplog,
                                   wcycle);
              if (vsite != nullptr && needVirtualVelocitiesThisStep)
              {
                  // Positions were calculated earlier
-                wallcycle_start(wcycle, ewcVSITECONSTR);
+                wallcycle_start(wcycle, WallCycleCounter::VsiteConstr);
                  vsite->construct(state->x, state->v, state->box, VSiteOperation::Velocities);
-                wallcycle_stop(wcycle, ewcVSITECONSTR);
+                wallcycle_stop(wcycle, WallCycleCounter::VsiteConstr);
              }
          }
  
@@ -1291,7 +1271,9 @@ void gmx::LegacySimulator::do_md()
                                                state->dfhist,
                                                step,
                                                state->v.rvec_array(),
-                                              mdatoms);
+                                              md->homenr,
+                                              md->cTC ? gmx::arrayRefFromArray(md->cTC, md->nr)
+                                                                      : gmx::ArrayRef<const unsigned short>());
              /* history is maintained in state->dfhist, but state_global is what is sent to trajectory and log output */
              if (MASTER(cr))
              {
@@ -1389,7 +1371,16 @@ void gmx::LegacySimulator::do_md()
          if (ETC_ANDERSEN(ir->etc)) /* keep this outside of update_tcouple because of the extra info required to pass */
          {
              gmx_bool bIfRandomize;
-            bIfRandomize = update_randomize_velocities(ir, step, cr, mdatoms, state->v, &upd, constr);
+            bIfRandomize = update_randomize_velocities(ir,
+                                                       step,
+                                                       cr,
+                                                       md->homenr,
+                                                       md->cTC ? gmx::arrayRefFromArray(md->cTC, md->nr)
+                                                               : gmx::ArrayRef<const unsigned short>(),
+                                                       gmx::arrayRefFromArray(md->invmass, md->nr),
+                                                       state->v,
+                                                       &upd,
+                                                       constr);
              /* if we have constraints, we have to remove the kinetic energy parallel to the bonds */
              if (constr && bIfRandomize)
              {
@@ -1407,12 +1398,24 @@ void gmx::LegacySimulator::do_md()
  
          if (!useGpuForUpdate)
          {
-            wallcycle_start(wcycle, ewcUPDATE);
+            wallcycle_start(wcycle, WallCycleCounter::Update);
          }
          /* UPDATE PRESSURE VARIABLES IN TROTTER FORMULATION WITH CONSTRAINTS */
          if (bTrotter)
          {
-            trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ3);
+            trotter_update(ir,
+                           step,
+                           ekind,
+                           enerd,
+                           state,
+                           total_vir,
+                           md->homenr,
+                           md->cTC ? gmx::arrayRefFromArray(md->cTC, md->nr)
+                                   : gmx::ArrayRef<const unsigned short>(),
+                           gmx::arrayRefFromArray(md->invmass, md->nr),
+                           &MassQ,
+                           trotter_seq,
+                           TrotterSequence::Three);
              /* We can only do Berendsen coupling after we have summed
               * the kinetic energy or virial. Since the happens
               * in global_state after update, we should only do it at
@@ -1421,7 +1424,14 @@ void gmx::LegacySimulator::do_md()
          }
          else
          {
-            update_tcouple(step, ir, state, ekind, &MassQ, mdatoms);
+            update_tcouple(step,
+                           ir,
+                           state,
+                           ekind,
+                           &MassQ,
+                           md->homenr,
+                           md->cTC ? gmx::arrayRefFromArray(md->cTC, md->nr)
+                                   : gmx::ArrayRef<const unsigned short>());
              update_pcouple_before_coordinates(fplog, step, ir, state, pressureCouplingMu, M, bInitStep);
          }
  
@@ -1447,12 +1457,13 @@ void gmx::LegacySimulator::do_md()
                                    fr,
                                    cr,
                                    state,
-                                  mdatoms,
-                                  fcdata,
+                                  mdAtoms->mdatoms(),
+                                  &fcdata,
                                    &MassQ,
                                    &vcm,
                                    pull_work,
                                    enerd,
+                                  &observablesReducer,
                                    ekind,
                                    gstat,
                                    &dvdl_constr,
@@ -1480,58 +1491,52 @@ void gmx::LegacySimulator::do_md()
          {
              if (useGpuForUpdate)
              {
-                if (bNS && (bFirstStep || DOMAINDECOMP(cr)))
+                // On search steps, update handles to device vectors
+                if (bNS && (bFirstStep || haveDDAtomOrdering(*cr) || bExchanged))
                  {
                      integrator->set(stateGpu->getCoordinates(),
                                      stateGpu->getVelocities(),
                                      stateGpu->getForces(),
-                                    top.idef,
-                                    *mdatoms);
+                                    top->idef,
+                                    *md);
  
                      // Copy data to the GPU after buffers might have being reinitialized
+                    /* The velocity copy is redundant if we had Center-of-Mass motion removed on
+                     * the previous step. We don't check that now. */
                      stateGpu->copyVelocitiesToGpu(state->v, AtomLocality::Local);
-                    stateGpu->copyCoordinatesToGpu(state->x, AtomLocality::Local);
+                    if (bExchanged
+                        || (!runScheduleWork->stepWork.haveGpuPmeOnThisRank
+                            && !runScheduleWork->stepWork.useGpuXBufferOps))
+                    {
+                        stateGpu->copyCoordinatesToGpu(state->x, AtomLocality::Local);
+                    }
                  }
  
-                if (simulationWork.useGpuPme && !runScheduleWork->simulationWork.useGpuPmePpCommunication
-                    && !thisRankHasDuty(cr, DUTY_PME))
+                if ((simulationWork.useGpuPme && simulationWork.useCpuPmePpCommunication)
+                    || (!runScheduleWork->stepWork.useGpuFBufferOps))
                  {
-                    // The PME forces were recieved to the host, so have to be copied
-                    stateGpu->copyForcesToGpu(f.view().force(), AtomLocality::All);
-                }
-                else if (!runScheduleWork->stepWork.useGpuFBufferOps)
-                {
-                    // The buffer ops were not offloaded this step, so the forces are on the
-                    // host and have to be copied
+                    // The PME forces were recieved to the host, and reduced on the CPU with the
+                    // rest of the forces computed on the GPU, so the final forces have to be copied
+                    // back to the GPU. Or the buffer ops were not offloaded this step, so the
+                    // forces are on the host and have to be copied
                      stateGpu->copyForcesToGpu(f.view().force(), AtomLocality::Local);
                  }
-
                  const bool doTemperatureScaling =
                          (ir->etc != TemperatureCoupling::No
                           && do_per_step(step + ir->nsttcouple - 1, ir->nsttcouple));
  
                  // This applies Leap-Frog, LINCS and SETTLE in succession
-                integrator->integrate(
-                        stateGpu->getForcesReadyOnDeviceEvent(
-                                AtomLocality::Local, runScheduleWork->stepWork.useGpuFBufferOps),
-                        ir->delta_t,
-                        true,
-                        bCalcVir,
-                        shake_vir,
-                        doTemperatureScaling,
-                        ekind->tcstat,
-                        doParrinelloRahman,
-                        ir->nstpcouple * ir->delta_t,
-                        M);
-
-                // Copy velocities D2H after update if:
-                // - Globals are computed this step (includes the energy output steps).
-                // - Temperature is needed for the next step.
-                if (bGStat || needHalfStepKineticEnergy)
-                {
-                    stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local);
-                    stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
-                }
+                integrator->integrate(stateGpu->getLocalForcesReadyOnDeviceEvent(
+                                              runScheduleWork->stepWork, runScheduleWork->simulationWork),
+                                      ir->delta_t,
+                                      true,
+                                      bCalcVir,
+                                      shake_vir,
+                                      doTemperatureScaling,
+                                      ekind->tcstat,
+                                      doParrinelloRahman,
+                                      ir->nstpcouple * ir->delta_t,
+                                      M);
              }
              else
              {
@@ -1541,10 +1546,16 @@ void gmx::LegacySimulator::do_md()
                   * Using that acceleration would result in a virial with the slow
                   * force contribution would be a factor mtsFactor too large.
                   */
-                if (fr->useMts && bCalcVir && constr != nullptr)
+                if (simulationWork.useMts && bCalcVir && constr != nullptr)
                  {
-                    upd.update_for_constraint_virial(
-                            *ir, *mdatoms, *state, f.view().forceWithPadding(), *ekind);
+                    upd.update_for_constraint_virial(*ir,
+                                                     md->homenr,
+                                                     md->havePartiallyFrozenAtoms,
+                                                     gmx::arrayRefFromArray(md->invmass, md->nr),
+                                                     gmx::arrayRefFromArray(md->invMassPerDim, md->nr),
+                                                     *state,
+                                                     f.view().forceWithPadding(),
+                                                     *ekind);
  
                      constrain_coordinates(constr,
                                            do_log,
@@ -1558,13 +1569,26 @@ void gmx::LegacySimulator::do_md()
                  }
  
                  ArrayRefWithPadding<const RVec> forceCombined =
-                        (fr->useMts && step % ir->mtsLevels[1].stepFactor == 0)
+                        (simulationWork.useMts && step % ir->mtsLevels[1].stepFactor == 0)
                                  ? f.view().forceMtsCombinedWithPadding()
                                  : f.view().forceWithPadding();
-                upd.update_coords(
-                        *ir, step, mdatoms, state, forceCombined, fcdata, ekind, M, etrtPOSITION, cr, constr != nullptr);
+                upd.update_coords(*ir,
+                                  step,
+                                  md->homenr,
+                                  md->havePartiallyFrozenAtoms,
+                                  gmx::arrayRefFromArray(md->ptype, md->nr),
+                                  gmx::arrayRefFromArray(md->invmass, md->nr),
+                                  gmx::arrayRefFromArray(md->invMassPerDim, md->nr),
+                                  state,
+                                  forceCombined,
+                                  &fcdata,
+                                  ekind,
+                                  M,
+                                  etrtPOSITION,
+                                  cr,
+                                  constr != nullptr);
  
-                wallcycle_stop(wcycle, ewcUPDATE);
+                wallcycle_stop(wcycle, WallCycleCounter::Update);
  
                  constrain_coordinates(constr,
                                        do_log,
@@ -1573,17 +1597,29 @@ void gmx::LegacySimulator::do_md()
                                        state,
                                        upd.xp()->arrayRefWithPadding(),
                                        &dvdl_constr,
-                                      bCalcVir && !fr->useMts,
+                                      bCalcVir && !simulationWork.useMts,
                                        shake_vir);
  
-                upd.update_sd_second_half(
-                        *ir, step, &dvdl_constr, mdatoms, state, cr, nrnb, wcycle, constr, do_log, do_ene);
-                upd.finish_update(*ir, mdatoms, state, wcycle, constr != nullptr);
+                upd.update_sd_second_half(*ir,
+                                          step,
+                                          &dvdl_constr,
+                                          md->homenr,
+                                          gmx::arrayRefFromArray(md->ptype, md->nr),
+                                          gmx::arrayRefFromArray(md->invmass, md->nr),
+                                          state,
+                                          cr,
+                                          nrnb,
+                                          wcycle,
+                                          constr,
+                                          do_log,
+                                          do_ene);
+                upd.finish_update(
+                        *ir, md->havePartiallyFrozenAtoms, md->homenr, state, wcycle, constr != nullptr);
              }
  
              if (ir->bPull && ir->pull->bSetPbcRefToPrevStepCOM)
              {
-                updatePrevStepPullCom(pull_work, state);
+                updatePrevStepPullCom(pull_work, state->pull_com_prev_step);
              }
  
              enerd->term[F_DVDL_CONSTR] += dvdl_constr;
@@ -1599,14 +1635,33 @@ void gmx::LegacySimulator::do_md()
              // and when algorithms require it.
              const bool doInterSimSignal = (simulationsShareState && do_per_step(step, nstSignalComm));
  
-            if (bGStat || needHalfStepKineticEnergy || doInterSimSignal)
+            if (useGpuForUpdate)
              {
-                // Copy coordinates when needed to stop the CM motion.
-                if (useGpuForUpdate && !EI_VV(ir->eI) && bStopCM)
+                const bool coordinatesRequiredForStopCM =
+                        bStopCM && (bGStat || needHalfStepKineticEnergy || doInterSimSignal)
+                        && !EI_VV(ir->eI);
+
+                // Copy coordinates when needed to stop the CM motion or for replica exchange
+                if (coordinatesRequiredForStopCM || bDoReplEx)
                  {
                      stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
                      stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
                  }
+
+                // Copy velocities back to the host if:
+                // - Globals are computed this step (includes the energy output steps).
+                // - Temperature is needed for the next step.
+                // - This is a replica exchange step (even though we will only need
+                //     the velocities if an exchange succeeds)
+                if (bGStat || needHalfStepKineticEnergy || bDoReplEx)
+                {
+                    stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local);
+                    stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
+                }
+            }
+
+            if (bGStat || needHalfStepKineticEnergy || doInterSimSignal)
+            {
                  // Since we're already communicating at this step, we
                  // can propagate intra-simulation signals. Note that
                  // check_nstglobalcomm has the responsibility for
@@ -1617,53 +1672,43 @@ void gmx::LegacySimulator::do_md()
                  bool                doIntraSimSignal = true;
                  SimulationSignaller signaller(&signals, cr, ms, doInterSimSignal, doIntraSimSignal);
  
-                compute_globals(
-                        gstat,
-                        cr,
-                        ir,
-                        fr,
-                        ekind,
-                        makeConstArrayRef(state->x),
-                        makeConstArrayRef(state->v),
-                        state->box,
-                        mdatoms,
-                        nrnb,
-                        &vcm,
-                        wcycle,
-                        enerd,
-                        force_vir,
-                        shake_vir,
-                        total_vir,
-                        pres,
-                        (!EI_VV(ir->eI) && bCalcEner && constr != nullptr) ? constr->rmsdData()
-                                                                           : gmx::ArrayRef<real>{},
-                        &signaller,
-                        lastbox,
-                        &totalNumberOfBondedInteractions,
-                        &bSumEkinhOld,
-                        (bGStat ? CGLO_GSTAT : 0) | (!EI_VV(ir->eI) && bCalcEner ? CGLO_ENERGY : 0)
-                                | (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0)
-                                | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0)
-                                | (!EI_VV(ir->eI) ? CGLO_PRESSURE : 0) | CGLO_CONSTRAINT
-                                | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS
-                                                                         : 0));
-                checkNumberOfBondedInteractions(mdlog,
-                                                cr,
-                                                totalNumberOfBondedInteractions,
-                                                top_global,
-                                                &top,
-                                                makeConstArrayRef(state->x),
-                                                state->box,
-                                                &shouldCheckNumberOfBondedInteractions);
+                compute_globals(gstat,
+                                cr,
+                                ir,
+                                fr,
+                                ekind,
+                                makeConstArrayRef(state->x),
+                                makeConstArrayRef(state->v),
+                                state->box,
+                                md,
+                                nrnb,
+                                &vcm,
+                                wcycle,
+                                enerd,
+                                force_vir,
+                                shake_vir,
+                                total_vir,
+                                pres,
+                                &signaller,
+                                lastbox,
+                                &bSumEkinhOld,
+                                (bGStat ? CGLO_GSTAT : 0) | (!EI_VV(ir->eI) && bCalcEner ? CGLO_ENERGY : 0)
+                                        | (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0)
+                                        | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0)
+                                        | (!EI_VV(ir->eI) ? CGLO_PRESSURE : 0) | CGLO_CONSTRAINT,
+                                step,
+                                &observablesReducer);
                  if (!EI_VV(ir->eI) && bStopCM)
                  {
                      process_and_stopcm_grp(
-                            fplog, &vcm, *mdatoms, makeArrayRef(state->x), makeArrayRef(state->v));
-                    inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
+                            fplog, &vcm, *md, makeArrayRef(state->x), makeArrayRef(state->v));
+                    inc_nrnb(nrnb, eNR_STOPCM, md->homenr);
  
                      // TODO: The special case of removing CM motion should be dealt more gracefully
                      if (useGpuForUpdate)
                      {
+                        // Issue #3988, #4106.
+                        stateGpu->resetCoordinatesCopiedToDeviceEvent(AtomLocality::Local);
                          stateGpu->copyCoordinatesToGpu(state->x, AtomLocality::Local);
                          // Here we block until the H2D copy completes because event sync with the
                          // force kernels that use the coordinates on the next steps is not implemented
@@ -1693,8 +1738,21 @@ void gmx::LegacySimulator::do_md()
              accumulateKineticLambdaComponents(enerd, state->lambda, *ir->fepvals);
          }
  
-        update_pcouple_after_coordinates(
-                fplog, step, ir, mdatoms, pres, force_vir, shake_vir, pressureCouplingMu, state, nrnb, upd.deform(), !useGpuForUpdate);
+        bool scaleCoordinates = !useGpuForUpdate || bDoReplEx;
+        update_pcouple_after_coordinates(fplog,
+                                         step,
+                                         ir,
+                                         md->homenr,
+                                         md->cFREEZE ? gmx::arrayRefFromArray(md->cFREEZE, md->nr)
+                                                     : gmx::ArrayRef<const unsigned short>(),
+                                         pres,
+                                         force_vir,
+                                         shake_vir,
+                                         pressureCouplingMu,
+                                         state,
+                                         nrnb,
+                                         upd.deform(),
+                                         scaleCoordinates);
  
          const bool doBerendsenPressureCoupling = (inputrec->epc == PressureCoupling::Berendsen
                                                    && do_per_step(step, inputrec->nstpcouple));
@@ -1767,13 +1825,14 @@ void gmx::LegacySimulator::do_md()
              }
              if (bCalcEner)
              {
-                energyOutput.addDataAtEnergyStep(bDoDHDL,
+                const bool outputDHDL = (computeDHDL && do_per_step(step, ir->fepvals->nstdhdl));
+
+                energyOutput.addDataAtEnergyStep(outputDHDL,
                                                   bCalcEnerStep,
                                                   t,
-                                                 mdatoms->tmass,
+                                                 md->tmass,
                                                   enerd,
                                                   ir->fepvals.get(),
-                                                 ir->expandedvals.get(),
                                                   lastbox,
                                                   PTCouplingArrays{ state->boxv,
                                                                     state->nosehoover_xi,
@@ -1781,8 +1840,6 @@ void gmx::LegacySimulator::do_md()
                                                                     state->nhpres_xi,
                                                                     state->nhpres_vxi },
                                                   state->fep_state,
-                                                 shake_vir,
-                                                 force_vir,
                                                   total_vir,
                                                   pres,
                                                   ekind,
@@ -1871,7 +1928,7 @@ void gmx::LegacySimulator::do_md()
                                               MASTER(cr) && mdrunOptions.verbose,
                                               bRerunMD);
  
-            if (bNeedRepartition && DOMAINDECOMP(cr))
+            if (bNeedRepartition && haveDDAtomOrdering(*cr))
              {
                  dd_collect_state(cr->dd, state, state_global);
              }
@@ -1884,7 +1941,7 @@ void gmx::LegacySimulator::do_md()
              bExchanged = replica_exchange(fplog, cr, ms, repl_ex, state_global, enerd, state, step, t);
          }
  
-        if ((bExchanged || bNeedRepartition) && DOMAINDECOMP(cr))
+        if ((bExchanged || bNeedRepartition) && haveDDAtomOrdering(*cr))
          {
              dd_partition_system(fplog,
                                  mdlog,
@@ -1900,15 +1957,19 @@ void gmx::LegacySimulator::do_md()
                                  state,
                                  &f,
                                  mdAtoms,
-                                &top,
+                                top,
                                  fr,
                                  vsite,
                                  constr,
                                  nrnb,
                                  wcycle,
                                  FALSE);
-            shouldCheckNumberOfBondedInteractions = true;
-            upd.setNumAtoms(state->natoms);
+            upd.updateAfterPartition(state->natoms,
+                                     md->cFREEZE ? gmx::arrayRefFromArray(md->cFREEZE, md->nr)
+                                                 : gmx::ArrayRef<const unsigned short>(),
+                                     md->cTC ? gmx::arrayRefFromArray(md->cTC, md->nr)
+                                             : gmx::ArrayRef<const unsigned short>());
+            fr->longRangeNonbondeds->updateAfterPartition(*md);
          }
  
          bFirstStep = FALSE;
@@ -1934,8 +1995,8 @@ void gmx::LegacySimulator::do_md()
              rescale_membed(step_rel, membed, as_rvec_array(state_global->x.data()));
          }
  
-        cycles = wallcycle_stop(wcycle, ewcSTEP);
-        if (DOMAINDECOMP(cr) && wcycle)
+        cycles = wallcycle_stop(wcycle, WallCycleCounter::Step);
+        if (haveDDAtomOrdering(*cr) && wcycle)
          {
              dd_cycles_add(cr->dd, cycles, ddCyclStep);
          }
@@ -1943,6 +2004,7 @@ void gmx::LegacySimulator::do_md()
          /* increase the MD step number */
          step++;
          step_rel++;
+        observablesReducer.markAsReadyToReduce();
  
  #if GMX_FAHCORE
          if (MASTER(cr))
@@ -1966,7 +2028,7 @@ void gmx::LegacySimulator::do_md()
      /* Stop measuring walltime */
      walltime_accounting_end_time(walltime_accounting);
  
-    if (!thisRankHasDuty(cr, DUTY_PME))
+    if (simulationWork.haveSeparatePmeRank)
      {
          /* Tell the PME only node to finish */
          gmx_pme_send_finish(cr);