Add lambda state to dhdl.xvg with AWH fep

[alexxy/gromacs.git] / src / gromacs / mdrun / minimize.cpp
diff --git a/src/gromacs/mdrun/minimize.cpp b/src/gromacs/mdrun/minimize.cpp

index c46bbec827592184c63ae2676dc71fe70ad8d464..eec144faf42604fa6ae6a41a0607abc4bf580253 100644 (file)
--- a/src/gromacs/mdrun/minimize.cpp
+++ b/src/gromacs/mdrun/minimize.cpp
@@ -52,6 +52,7 @@
  #include <ctime>
  
  #include <algorithm>
+#include <limits>
  #include <vector>
  
  #include "gromacs/commandline/filenm.h"
@@ -99,6 +100,7 @@
  #include "gromacs/mdtypes/md_enums.h"
  #include "gromacs/mdtypes/mdatom.h"
  #include "gromacs/mdtypes/mdrunoptions.h"
+#include "gromacs/mdtypes/observablesreducer.h"
  #include "gromacs/mdtypes/state.h"
  #include "gromacs/pbcutil/pbc.h"
  #include "gromacs/timing/wallcycle.h"
@@ -140,18 +142,18 @@ typedef struct em_state
  static void print_em_start(FILE*                     fplog,
                             const t_commrec*          cr,
                             gmx_walltime_accounting_t walltime_accounting,
-                           gmx_wallcycle_t           wcycle,
+                           gmx_wallcycle*            wcycle,
                             const char*               name)
  {
      walltime_accounting_start_time(walltime_accounting);
-    wallcycle_start(wcycle, ewcRUN);
+    wallcycle_start(wcycle, WallCycleCounter::Run);
      print_start(fplog, cr, walltime_accounting, name);
  }
  
  //! Stop counting time for EM
-static void em_time_end(gmx_walltime_accounting_t walltime_accounting, gmx_wallcycle_t wcycle)
+static void em_time_end(gmx_walltime_accounting_t walltime_accounting, gmx_wallcycle* wcycle)
  {
-    wallcycle_stop(wcycle, ewcRUN);
+    wallcycle_stop(wcycle, WallCycleCounter::Run);
  
      walltime_accounting_end_time(walltime_accounting);
  }
@@ -313,7 +315,7 @@ static void get_f_norm_max(const t_commrec*               cr,
          }
      }
  
-    if (la_max >= 0 && DOMAINDECOMP(cr))
+    if (la_max >= 0 && haveDDAtomOrdering(*cr))
      {
          a_max = cr->dd->globalAtomIndices[la_max];
      }
@@ -370,7 +372,7 @@ static void init_em(FILE*                fplog,
                      gmx::ImdSession*     imdSession,
                      pull_t*              pull_work,
                      t_state*             state_global,
-                    const gmx_mtop_t*    top_global,
+                    const gmx_mtop_t&    top_global,
                      em_state_t*          ems,
                      gmx_localtop_t*      top,
                      t_nrnb*              nrnb,
@@ -394,7 +396,15 @@ static void init_em(FILE*                fplog,
      }
      int*                fep_state = MASTER(cr) ? &state_global->fep_state : nullptr;
      gmx::ArrayRef<real> lambda    = MASTER(cr) ? state_global->lambda : gmx::ArrayRef<real>();
-    initialize_lambdas(fplog, *ir, MASTER(cr), fep_state, lambda);
+    initialize_lambdas(fplog,
+                       ir->efep,
+                       ir->bSimTemp,
+                       *ir->fepvals,
+                       ir->simtempvals->temperatures,
+                       gmx::arrayRefFromArray(ir->opts.ref_t, ir->opts.ngtc),
+                       MASTER(cr),
+                       fep_state,
+                       lambda);
  
      if (ir->eI == IntegrationAlgorithm::NM)
      {
@@ -404,7 +414,7 @@ static void init_em(FILE*                fplog,
                                        top_global,
                                        constr ? constr->numFlexibleConstraints() : 0,
                                        ir->nstcalcenergy,
-                                      DOMAINDECOMP(cr),
+                                      haveDDAtomOrdering(*cr),
                                        thisRankHasDuty(cr, DUTY_PME));
      }
      else
@@ -422,8 +432,9 @@ static void init_em(FILE*                fplog,
          }
      }
  
-    if (DOMAINDECOMP(cr))
+    if (haveDDAtomOrdering(*cr))
      {
+        // Local state only becomes valid now.
          dd_init_local_state(*cr->dd, state_global, &ems->s);
  
          /* Distribute the charge groups over the nodes from the master node */
@@ -434,7 +445,7 @@ static void init_em(FILE*                fplog,
                              TRUE,
                              1,
                              state_global,
-                            *top_global,
+                            top_global,
                              *ir,
                              imdSession,
                              pull_work,
@@ -458,7 +469,7 @@ static void init_em(FILE*                fplog,
          state_change_natoms(&ems->s, ems->s.natoms);
  
          mdAlgorithmsSetupAtomData(
-                cr, *ir, *top_global, top, fr, &ems->f, mdAtoms, constr, vsite, shellfc ? *shellfc : nullptr);
+                cr, *ir, top_global, top, fr, &ems->f, mdAtoms, constr, vsite, shellfc ? *shellfc : nullptr);
      }
  
      update_mdatoms(mdAtoms->mdatoms(), ems->s.lambda[FreeEnergyPerturbationCouplingType::Mass]);
@@ -515,7 +526,7 @@ static void init_em(FILE*                fplog,
  static void finish_em(const t_commrec*          cr,
                        gmx_mdoutf_t              outf,
                        gmx_walltime_accounting_t walltime_accounting,
-                      gmx_wallcycle_t           wcycle)
+                      gmx_wallcycle*            wcycle)
  {
      if (!thisRankHasDuty(cr, DUTY_PME))
      {
@@ -545,7 +556,7 @@ static void write_em_traj(FILE*               fplog,
                            gmx_bool            bX,
                            gmx_bool            bF,
                            const char*         confout,
-                          const gmx_mtop_t*   top_global,
+                          const gmx_mtop_t&   top_global,
                            const t_inputrec*   ir,
                            int64_t             step,
                            em_state_t*         state,
@@ -574,7 +585,7 @@ static void write_em_traj(FILE*               fplog,
                                       cr,
                                       outf,
                                       mdof_flags,
-                                     top_global->natoms,
+                                     top_global.natoms,
                                       step,
                                       static_cast<double>(step),
                                       &state->s,
@@ -585,7 +596,7 @@ static void write_em_traj(FILE*               fplog,
  
      if (confout != nullptr)
      {
-        if (DOMAINDECOMP(cr))
+        if (haveDDAtomOrdering(*cr))
          {
              /* If bX=true, x was collected to state_global in the call above */
              if (!bX)
@@ -603,14 +614,14 @@ static void write_em_traj(FILE*               fplog,
  
          if (MASTER(cr))
          {
-            if (ir->pbcType != PbcType::No && !ir->bPeriodicMols && DOMAINDECOMP(cr))
+            if (ir->pbcType != PbcType::No && !ir->bPeriodicMols && haveDDAtomOrdering(*cr))
              {
                  /* Make molecules whole only for confout writing */
-                do_pbc_mtop(ir->pbcType, state->s.box, top_global, state_global->x.rvec_array());
+                do_pbc_mtop(ir->pbcType, state->s.box, &top_global, state_global->x.rvec_array());
              }
  
              write_sto_conf_mtop(confout,
-                                *top_global->name,
+                                *top_global.name,
                                  top_global,
                                  state_global->x.rvec_array(),
                                  nullptr,
@@ -634,9 +645,9 @@ static bool do_em_step(const t_commrec*                          cr,
                         int64_t                                   count)
  
  {
-    t_state *s1, *s2;
-    int      start, end;
-    real     dvdl_constr;
+    t_state *    s1, *s2;
+    int          start, end;
+    real         dvdl_constr;
      int nthreads gmx_unused;
  
      bool validStep = true;
@@ -644,7 +655,7 @@ static bool do_em_step(const t_commrec*                          cr,
      s1 = &ems1->s;
      s2 = &ems2->s;
  
-    if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count)
+    if (haveDDAtomOrdering(*cr) && s1->ddp_count != cr->dd->ddp_count)
      {
          gmx_incons("state mismatch in do_em_step");
      }
@@ -656,7 +667,7 @@ static bool do_em_step(const t_commrec*                          cr,
          state_change_natoms(s2, s1->natoms);
          ems2->f.resize(s2->natoms);
      }
-    if (DOMAINDECOMP(cr) && s2->cg_gl.size() != s1->cg_gl.size())
+    if (haveDDAtomOrdering(*cr) && s2->cg_gl.size() != s1->cg_gl.size())
      {
          s2->cg_gl.resize(s1->cg_gl.size());
      }
@@ -669,7 +680,7 @@ static bool do_em_step(const t_commrec*                          cr,
      start = 0;
      end   = md->homenr;
  
-    nthreads = gmx_omp_nthreads_get(emntUpdate);
+    nthreads = gmx_omp_nthreads_get(ModuleMultiThread::Update);
  #pragma omp parallel num_threads(nthreads)
      {
          const rvec* x1 = s1->x.rvec_array();
@@ -714,7 +725,7 @@ static bool do_em_step(const t_commrec*                          cr,
              }
          }
  
-        if (DOMAINDECOMP(cr))
+        if (haveDDAtomOrdering(*cr))
          {
              /* OpenMP does not supported unsigned loop variables */
  #pragma omp for schedule(static) nowait
@@ -725,7 +736,7 @@ static bool do_em_step(const t_commrec*                          cr,
          }
      }
  
-    if (DOMAINDECOMP(cr))
+    if (haveDDAtomOrdering(*cr))
      {
          s2->ddp_count       = s1->ddp_count;
          s2->ddp_count_cg_gl = s1->ddp_count_cg_gl;
@@ -781,7 +792,7 @@ static void em_dd_partition_system(FILE*                fplog,
                                     const gmx::MDLogger& mdlog,
                                     int                  step,
                                     const t_commrec*     cr,
-                                   const gmx_mtop_t*    top_global,
+                                   const gmx_mtop_t&    top_global,
                                     const t_inputrec*    ir,
                                     gmx::ImdSession*     imdSession,
                                     pull_t*              pull_work,
@@ -792,7 +803,7 @@ static void em_dd_partition_system(FILE*                fplog,
                                     VirtualSitesHandler* vsite,
                                     gmx::Constraints*    constr,
                                     t_nrnb*              nrnb,
-                                   gmx_wallcycle_t      wcycle)
+                                   gmx_wallcycle*       wcycle)
  {
      /* Repartition the domain decomposition */
      dd_partition_system(fplog,
@@ -802,7 +813,7 @@ static void em_dd_partition_system(FILE*                fplog,
                          FALSE,
                          1,
                          nullptr,
-                        *top_global,
+                        top_global,
                          *ir,
                          imdSession,
                          pull_work,
@@ -822,6 +833,53 @@ static void em_dd_partition_system(FILE*                fplog,
  namespace
  {
  
+//! Copy coordinates, OpenMP parallelized, from \p refCoords to coords
+void setCoordinates(std::vector<RVec>* coords, ArrayRef<const RVec> refCoords)
+{
+    coords->resize(refCoords.size());
+
+    const int gmx_unused nthreads = gmx_omp_nthreads_get(ModuleMultiThread::Update);
+#pragma omp parallel for num_threads(nthreads) schedule(static)
+    for (int i = 0; i < ssize(refCoords); i++)
+    {
+        (*coords)[i] = refCoords[i];
+    }
+}
+
+//! Returns the maximum difference an atom moved between two coordinate sets, over all ranks
+real maxCoordinateDifference(ArrayRef<const RVec> coords1, ArrayRef<const RVec> coords2, MPI_Comm mpiCommMyGroup)
+{
+    GMX_RELEASE_ASSERT(coords1.size() == coords2.size(), "Coordinate counts should match");
+
+    real maxDiffSquared = 0;
+
+    const int gmx_unused nthreads = gmx_omp_nthreads_get(ModuleMultiThread::Update);
+#pragma omp parallel for reduction(max : maxDiffSquared) num_threads(nthreads) schedule(static)
+    for (int i = 0; i < ssize(coords1); i++)
+    {
+        maxDiffSquared = std::max(maxDiffSquared, gmx::norm2(coords1[i] - coords2[i]));
+    }
+
+#if GMX_MPI
+    int numRanks = 1;
+    if (mpiCommMyGroup != MPI_COMM_NULL)
+    {
+        MPI_Comm_size(mpiCommMyGroup, &numRanks);
+    }
+    if (numRanks > 1)
+    {
+        real maxDiffSquaredReduced;
+        MPI_Allreduce(
+                &maxDiffSquared, &maxDiffSquaredReduced, 1, GMX_DOUBLE ? MPI_DOUBLE : MPI_FLOAT, MPI_MAX, mpiCommMyGroup);
+        maxDiffSquared = maxDiffSquaredReduced;
+    }
+#else
+    GMX_UNUSED_VALUE(mpiCommMyGroup);
+#endif
+
+    return std::sqrt(maxDiffSquared);
+}
+
  /*! \brief Class to handle the work of setting and doing an energy evaluation.
   *
   * This class is a mere aggregate of parameters to pass to evaluate an
@@ -845,7 +903,7 @@ public:
       * unsuited for aggregate initialization. When the types
       * improve, the call signature of this method can be reduced.
       */
-    void run(em_state_t* ems, rvec mu_tot, tensor vir, tensor pres, int64_t count, gmx_bool bFirst);
+    void run(em_state_t* ems, rvec mu_tot, tensor vir, tensor pres, int64_t count, gmx_bool bFirst, int64_t step);
      //! Handles logging (deprecated).
      FILE* fplog;
      //! Handles logging.
@@ -855,7 +913,7 @@ public:
      //! Coordinates multi-simulations.
      const gmx_multisim_t* ms;
      //! Holds the simulation topology.
-    const gmx_mtop_t* top_global;
+    const gmx_mtop_t& top_global;
      //! Holds the domain topology.
      gmx_localtop_t* top;
      //! User input options.
@@ -867,9 +925,11 @@ public:
      //! Manages flop accounting.
      t_nrnb* nrnb;
      //! Manages wall cycle accounting.
-    gmx_wallcycle_t wcycle;
-    //! Coordinates global reduction.
+    gmx_wallcycle* wcycle;
+    //! Legacy coordinator of global reduction.
      gmx_global_stat_t gstat;
+    //! Coordinates reduction for observables
+    gmx::ObservablesReducer* observablesReducer;
      //! Handles virtual sites.
      VirtualSitesHandler* vsite;
      //! Handles constraints.
@@ -882,9 +942,13 @@ public:
      MdrunScheduleWorkload* runScheduleWork;
      //! Stores the computed energies.
      gmx_enerdata_t* enerd;
+    //! The DD partitioning count at which the pair list was generated
+    int ddpCountPairSearch;
+    //! The local coordinates that were used for pair searching, stored for computing displacements
+    std::vector<RVec> pairSearchCoordinates;
  };
  
-void EnergyEvaluator::run(em_state_t* ems, rvec mu_tot, tensor vir, tensor pres, int64_t count, gmx_bool bFirst)
+void EnergyEvaluator::run(em_state_t* ems, rvec mu_tot, tensor vir, tensor pres, int64_t count, gmx_bool bFirst, int64_t step)
  {
      real     t;
      gmx_bool bNS;
@@ -895,32 +959,46 @@ void EnergyEvaluator::run(em_state_t* ems, rvec mu_tot, tensor vir, tensor pres,
      /* Set the time to the initial time, the time does not change during EM */
      t = inputrec->init_t;
  
-    if (bFirst || (DOMAINDECOMP(cr) && ems->s.ddp_count < cr->dd->ddp_count))
+    if (vsite)
+    {
+        vsite->construct(ems->s.x, {}, ems->s.box, gmx::VSiteOperation::Positions);
+    }
+
+    // Compute the buffer size of the pair list
+    const real bufferSize = inputrec->rlist - std::max(inputrec->rcoulomb, inputrec->rvdw);
+
+    if (bFirst || bufferSize <= 0 || (haveDDAtomOrdering(*cr) && ems->s.ddp_count != ddpCountPairSearch))
      {
          /* This is the first state or an old state used before the last ns */
          bNS = TRUE;
      }
      else
      {
-        bNS = FALSE;
-        if (inputrec->nstlist > 0)
-        {
-            bNS = TRUE;
-        }
+        // We need to generate a new pairlist when one atom moved more than half the buffer size
+        ArrayRef<const RVec> localCoordinates =
+                ArrayRef<const RVec>(ems->s.x).subArray(0, mdAtoms->mdatoms()->homenr);
+        bNS = 2 * maxCoordinateDifference(pairSearchCoordinates, localCoordinates, cr->mpi_comm_mygroup)
+              > bufferSize;
      }
  
-    if (vsite)
-    {
-        vsite->construct(ems->s.x, {}, ems->s.box, gmx::VSiteOperation::Positions);
-    }
-
-    if (DOMAINDECOMP(cr) && bNS)
+    if (haveDDAtomOrdering(*cr) && bNS)
      {
          /* Repartition the domain decomposition */
          em_dd_partition_system(
                  fplog, mdlog, count, cr, top_global, inputrec, imdSession, pull_work, ems, top, mdAtoms, fr, vsite, constr, nrnb, wcycle);
+        ddpCountPairSearch = cr->dd->ddp_count;
+    }
+
+    /* Store the local coordinates that will be used in the pair search, after we re-partitioned */
+    if (bufferSize > 0 && bNS)
+    {
+        ArrayRef<const RVec> localCoordinates =
+                constArrayRefFromArray(ems->s.x.data(), mdAtoms->mdatoms()->homenr);
+        setCoordinates(&pairSearchCoordinates, localCoordinates);
      }
  
+    fr->longRangeNonbondeds->updateAfterPartition(*mdAtoms->mdatoms());
+
      /* Calc force & energy on new trial position  */
      /* do_force always puts the charge groups in the box and shifts again
       * We do not unshift, so molecules are always whole in congrad.c
@@ -951,6 +1029,7 @@ void EnergyEvaluator::run(em_state_t* ems, rvec mu_tot, tensor vir, tensor pres,
               mu_tot,
               t,
               nullptr,
+             fr->longRangeNonbondeds.get(),
               GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES | GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY
                       | (bNS ? GMX_FORCE_NS : 0),
               DDBalanceRegionHandler(cr));
@@ -962,24 +1041,23 @@ void EnergyEvaluator::run(em_state_t* ems, rvec mu_tot, tensor vir, tensor pres,
      /* Communicate stuff when parallel */
      if (PAR(cr) && inputrec->eI != IntegrationAlgorithm::NM)
      {
-        wallcycle_start(wcycle, ewcMoveE);
+        wallcycle_start(wcycle, WallCycleCounter::MoveE);
  
-        global_stat(gstat,
+        global_stat(*gstat,
                      cr,
                      enerd,
                      force_vir,
                      shake_vir,
-                    inputrec,
+                    *inputrec,
                      nullptr,
-                    gmx::ArrayRef<real>{},
-                    nullptr,
-                    1,
-                    &terminate,
                      nullptr,
+                    std::vector<real>(1, terminate),
                      FALSE,
-                    CGLO_ENERGY | CGLO_PRESSURE | CGLO_CONSTRAINT);
+                    CGLO_ENERGY | CGLO_PRESSURE | CGLO_CONSTRAINT,
+                    step,
+                    observablesReducer);
  
-        wallcycle_stop(wcycle, ewcMoveE);
+        wallcycle_stop(wcycle, WallCycleCounter::MoveE);
      }
  
      if (fr->dispersionCorrection)
@@ -1050,7 +1128,7 @@ void EnergyEvaluator::run(em_state_t* ems, rvec mu_tot, tensor vir, tensor pres,
  //! Parallel utility summing energies and forces
  static double reorder_partsum(const t_commrec*  cr,
                                const t_grpopts*  opts,
-                              const gmx_mtop_t* top_global,
+                              const gmx_mtop_t& top_global,
                                const em_state_t* s_min,
                                const em_state_t* s_b)
  {
@@ -1066,7 +1144,7 @@ static double reorder_partsum(const t_commrec*  cr,
       * This conflicts with the spirit of domain decomposition,
       * but to fully optimize this a much more complicated algorithm is required.
       */
-    const int natoms = top_global->natoms;
+    const int natoms = top_global.natoms;
      rvec*     fmg;
      snew(fmg, natoms);
  
@@ -1077,7 +1155,7 @@ static double reorder_partsum(const t_commrec*  cr,
          copy_rvec(fm[i], fmg[a]);
          i++;
      }
-    gmx_sum(top_global->natoms * 3, fmg[0], cr);
+    gmx_sum(top_global.natoms * 3, fmg[0], cr);
  
      /* Now we will determine the part of the sum for the cgs in state s_b */
      gmx::ArrayRef<const int> indicesB = s_b->s.cg_gl;
@@ -1086,7 +1164,7 @@ static double reorder_partsum(const t_commrec*  cr,
      i                                     = 0;
      int                                gf = 0;
      gmx::ArrayRef<const unsigned char> grpnrFREEZE =
-            top_global->groups.groupNumbers[SimulationAtomGroupType::Freeze];
+            top_global.groups.groupNumbers[SimulationAtomGroupType::Freeze];
      for (int a : indicesB)
      {
          if (!grpnrFREEZE.empty())
@@ -1112,7 +1190,7 @@ static double reorder_partsum(const t_commrec*  cr,
  static real pr_beta(const t_commrec*  cr,
                      const t_grpopts*  opts,
                      t_mdatoms*        mdatoms,
-                    const gmx_mtop_t* top_global,
+                    const gmx_mtop_t& top_global,
                      const em_state_t* s_min,
                      const em_state_t* s_b)
  {
@@ -1123,7 +1201,7 @@ static real pr_beta(const t_commrec*  cr,
       * and might have to sum it in parallel runs.
       */
  
-    if (!DOMAINDECOMP(cr)
+    if (!haveDDAtomOrdering(*cr)
          || (s_min->s.ddp_count == cr->dd->ddp_count && s_b->s.ddp_count == cr->dd->ddp_count))
      {
          auto fm = s_min->f.view().force();
@@ -1168,7 +1246,6 @@ void LegacySimulator::do_cg()
  {
      const char* CG = "Polak-Ribiere Conjugate Gradients";
  
-    gmx_localtop_t    top(top_global->ffparams);
      gmx_global_stat_t gstat;
      double            tmp, minstep;
      real              stepsize;
@@ -1181,7 +1258,7 @@ void LegacySimulator::do_cg()
      tensor            vir, pres;
      int               number_steps, neval = 0, nstcg = inputrec->nstcgsteep;
      int               m, step, nminstep;
-    auto              mdatoms = mdAtoms->mdatoms();
+    auto*             mdatoms = mdAtoms->mdatoms();
  
      GMX_LOG(mdlog.info)
              .asParagraph()
@@ -1215,6 +1292,8 @@ void LegacySimulator::do_cg()
      em_state_t* s_b   = &s2;
      em_state_t* s_c   = &s3;
  
+    ObservablesReducer observablesReducer = observablesReducerBuilder->build();
+
      /* Init em and store the local state in s_min */
      init_em(fplog,
              mdlog,
@@ -1226,7 +1305,7 @@ void LegacySimulator::do_cg()
              state_global,
              top_global,
              s_min,
-            &top,
+            top,
              nrnb,
              fr,
              mdAtoms,
@@ -1241,7 +1320,7 @@ void LegacySimulator::do_cg()
                                     mdrunOptions,
                                     cr,
                                     outputProvider,
-                                   mdModulesNotifier,
+                                   mdModulesNotifiers,
                                     inputrec,
                                     top_global,
                                     nullptr,
@@ -1251,13 +1330,13 @@ void LegacySimulator::do_cg()
                                     ms);
      gmx::EnergyOutput energyOutput(mdoutf_get_fp_ene(outf),
                                     top_global,
-                                   inputrec,
+                                   *inputrec,
                                     pull_work,
                                     nullptr,
                                     false,
                                     StartingBehavior::NewSimulation,
                                     simulationsShareState,
-                                   mdModulesNotifier);
+                                   mdModulesNotifiers);
  
      /* Print to log file */
      print_em_start(fplog, cr, walltime_accounting, wcycle, CG);
@@ -1274,14 +1353,33 @@ void LegacySimulator::do_cg()
          sp_header(fplog, CG, inputrec->em_tol, number_steps);
      }
  
-    EnergyEvaluator energyEvaluator{ fplog,    mdlog,      cr,        ms,   top_global,      &top,
-                                     inputrec, imdSession, pull_work, nrnb, wcycle,          gstat,
-                                     vsite,    constr,     mdAtoms,   fr,   runScheduleWork, enerd };
+    EnergyEvaluator energyEvaluator{ fplog,
+                                     mdlog,
+                                     cr,
+                                     ms,
+                                     top_global,
+                                     top,
+                                     inputrec,
+                                     imdSession,
+                                     pull_work,
+                                     nrnb,
+                                     wcycle,
+                                     gstat,
+                                     &observablesReducer,
+                                     vsite,
+                                     constr,
+                                     mdAtoms,
+                                     fr,
+                                     runScheduleWork,
+                                     enerd,
+                                     -1,
+                                     {} };
      /* Call the force routine and some auxiliary (neighboursearching etc.) */
      /* do_force always puts the charge groups in the box and shifts again
       * We do not unshift, so molecules are always whole in congrad.c
       */
-    energyEvaluator.run(s_min, mu_tot, vir, pres, -1, TRUE);
+    energyEvaluator.run(s_min, mu_tot, vir, pres, -1, TRUE, step);
+    observablesReducer.markAsReadyToReduce();
  
      if (MASTER(cr))
      {
@@ -1293,12 +1391,9 @@ void LegacySimulator::do_cg()
                                           mdatoms->tmass,
                                           enerd,
                                           nullptr,
-                                         nullptr,
                                           nullBox,
                                           PTCouplingArrays(),
                                           0,
-                                         nullptr,
-                                         nullptr,
                                           vir,
                                           pres,
                                           nullptr,
@@ -1415,7 +1510,7 @@ void LegacySimulator::do_cg()
              gmx_sumd(1, &minstep, cr);
          }
  
-        minstep = GMX_REAL_EPS / sqrt(minstep / (3 * top_global->natoms));
+        minstep = GMX_REAL_EPS / sqrt(minstep / (3 * top_global.natoms));
  
          if (stepsize < minstep)
          {
@@ -1451,7 +1546,7 @@ void LegacySimulator::do_cg()
          a         = 0.0;
          c         = a + stepsize; /* reference position along line is zero */
  
-        if (DOMAINDECOMP(cr) && s_min->s.ddp_count < cr->dd->ddp_count)
+        if (haveDDAtomOrdering(*cr) && s_min->s.ddp_count < cr->dd->ddp_count)
          {
              em_dd_partition_system(fplog,
                                     mdlog,
@@ -1462,7 +1557,7 @@ void LegacySimulator::do_cg()
                                     imdSession,
                                     pull_work,
                                     s_min,
-                                   &top,
+                                   top,
                                     mdAtoms,
                                     fr,
                                     vsite,
@@ -1476,7 +1571,8 @@ void LegacySimulator::do_cg()
  
          neval++;
          /* Calculate energy for the trial step */
-        energyEvaluator.run(s_c, mu_tot, vir, pres, -1, FALSE);
+        energyEvaluator.run(s_c, mu_tot, vir, pres, -1, FALSE, step);
+        observablesReducer.markAsReadyToReduce();
  
          /* Calc derivative along line */
          const rvec*                    pc  = s_c->s.cg_p.rvec_array();
@@ -1565,7 +1661,7 @@ void LegacySimulator::do_cg()
                      b = 0.5 * (a + c);
                  }
  
-                if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count)
+                if (haveDDAtomOrdering(*cr) && s_min->s.ddp_count != cr->dd->ddp_count)
                  {
                      /* Reload the old state */
                      em_dd_partition_system(fplog,
@@ -1577,7 +1673,7 @@ void LegacySimulator::do_cg()
                                             imdSession,
                                             pull_work,
                                             s_min,
-                                           &top,
+                                           top,
                                             mdAtoms,
                                             fr,
                                             vsite,
@@ -1591,7 +1687,8 @@ void LegacySimulator::do_cg()
  
                  neval++;
                  /* Calculate energy for the trial step */
-                energyEvaluator.run(s_b, mu_tot, vir, pres, -1, FALSE);
+                energyEvaluator.run(s_b, mu_tot, vir, pres, -1, FALSE, step);
+                observablesReducer.markAsReadyToReduce();
  
                  /* p does not change within a step, but since the domain decomposition
                   * might change, we have to use cg_p of s_b here.
@@ -1744,12 +1841,9 @@ void LegacySimulator::do_cg()
                                               mdatoms->tmass,
                                               enerd,
                                               nullptr,
-                                             nullptr,
                                               nullBox,
                                               PTCouplingArrays(),
                                               0,
-                                             nullptr,
-                                             nullptr,
                                               vir,
                                               pres,
                                               nullptr,
@@ -1777,7 +1871,7 @@ void LegacySimulator::do_cg()
          }
  
          /* Send energies and positions to the IMD client if bIMD is TRUE. */
-        if (MASTER(cr) && imdSession->run(step, TRUE, state_global->box, state_global->x.rvec_array(), 0))
+        if (MASTER(cr) && imdSession->run(step, TRUE, state_global->box, state_global->x, 0))
          {
              imdSession->sendPositionsAndEnergies();
          }
@@ -1786,7 +1880,7 @@ void LegacySimulator::do_cg()
           * If we have reached machine precision, converged is already set to true.
           */
          converged = converged || (s_min->fmax < inputrec->em_tol);
-
+        observablesReducer.markAsReadyToReduce();
      } /* End of the loop */
  
      if (converged)
@@ -1870,22 +1964,8 @@ void LegacySimulator::do_lbfgs()
  {
      static const char* LBFGS = "Low-Memory BFGS Minimizer";
      em_state_t         ems;
-    gmx_localtop_t     top(top_global->ffparams);
      gmx_global_stat_t  gstat;
-    int                ncorr, nmaxcorr, point, cp, neval, nminstep;
-    double             stepsize, step_taken, gpa, gpb, gpc, tmp, minstep;
-    real *             rho, *alpha, *p, *s, **dx, **dg;
-    real               a, b, c, maxdelta, delta;
-    real               diag, Epot0;
-    real               dgdx, dgdg, sq, yr, beta;
-    gmx_bool           converged;
-    rvec               mu_tot = { 0 };
-    gmx_bool           do_log, do_ene, do_x, do_f, foundlower, *frozen;
-    tensor             vir, pres;
-    int                start, end, number_steps;
-    int                i, k, m, n, gf, step;
-    int                mdof_flags;
-    auto               mdatoms = mdAtoms->mdatoms();
+    auto*              mdatoms = mdAtoms->mdatoms();
  
      GMX_LOG(mdlog.info)
              .asParagraph()
@@ -1895,6 +1975,10 @@ void LegacySimulator::do_lbfgs()
                      "be available in a different form in a future version of GROMACS, "
                      "e.g. gmx minimize and an .mdp option.");
  
+    if (haveDDAtomOrdering(*cr))
+    {
+        gmx_fatal(FARGS, "L_BFGS is currently not supported");
+    }
      if (PAR(cr))
      {
          gmx_fatal(FARGS, "L-BFGS minimization only supports a single rank");
@@ -1908,29 +1992,29 @@ void LegacySimulator::do_lbfgs()
                  "do not use constraints, or use another minimizer (e.g. steepest descent).");
      }
  
-    n        = 3 * state_global->natoms;
-    nmaxcorr = inputrec->nbfgscorr;
+    const int n        = 3 * state_global->natoms;
+    const int nmaxcorr = inputrec->nbfgscorr;
  
-    snew(frozen, n);
+    std::vector<real> p(n);
+    std::vector<real> rho(nmaxcorr);
+    std::vector<real> alpha(nmaxcorr);
  
-    snew(p, n);
-    snew(rho, nmaxcorr);
-    snew(alpha, nmaxcorr);
-
-    snew(dx, nmaxcorr);
-    for (i = 0; i < nmaxcorr; i++)
+    std::vector<std::vector<real>> dx(nmaxcorr);
+    for (auto& dxCorr : dx)
      {
-        snew(dx[i], n);
+        dxCorr.resize(n);
      }
  
-    snew(dg, nmaxcorr);
-    for (i = 0; i < nmaxcorr; i++)
+    std::vector<std::vector<real>> dg(nmaxcorr);
+    for (auto& dgCorr : dg)
      {
-        snew(dg[i], n);
+        dgCorr.resize(n);
      }
  
-    step  = 0;
-    neval = 0;
+    int step  = 0;
+    int neval = 0;
+
+    ObservablesReducer observablesReducer = observablesReducerBuilder->build();
  
      /* Init em */
      init_em(fplog,
@@ -1943,7 +2027,7 @@ void LegacySimulator::do_lbfgs()
              state_global,
              top_global,
              &ems,
-            &top,
+            top,
              nrnb,
              fr,
              mdAtoms,
@@ -1958,7 +2042,7 @@ void LegacySimulator::do_lbfgs()
                                     mdrunOptions,
                                     cr,
                                     outputProvider,
-                                   mdModulesNotifier,
+                                   mdModulesNotifiers,
                                     inputrec,
                                     top_global,
                                     nullptr,
@@ -1968,16 +2052,16 @@ void LegacySimulator::do_lbfgs()
                                     ms);
      gmx::EnergyOutput energyOutput(mdoutf_get_fp_ene(outf),
                                     top_global,
-                                   inputrec,
+                                   *inputrec,
                                     pull_work,
                                     nullptr,
                                     false,
                                     StartingBehavior::NewSimulation,
                                     simulationsShareState,
-                                   mdModulesNotifier);
+                                   mdModulesNotifiers);
  
-    start = 0;
-    end   = mdatoms->homenr;
+    const int start = 0;
+    const int end   = mdatoms->homenr;
  
      /* We need 4 working states */
      em_state_t  s0{}, s1{}, s2{}, s3{};
@@ -1993,20 +2077,19 @@ void LegacySimulator::do_lbfgs()
      /* Print to log file */
      print_em_start(fplog, cr, walltime_accounting, wcycle, LBFGS);
  
-    do_log = do_ene = do_f = TRUE;
-
      /* Max number of steps */
-    number_steps = inputrec->nsteps;
+    const int number_steps = inputrec->nsteps;
  
      /* Create a 3*natoms index to tell whether each degree of freedom is frozen */
-    gf = 0;
-    for (i = start; i < end; i++)
+    std::vector<bool> frozen(n);
+    int               gf = 0;
+    for (int i = start; i < end; i++)
      {
          if (mdatoms->cFREEZE)
          {
              gf = mdatoms->cFREEZE[i];
          }
-        for (m = 0; m < DIM; m++)
+        for (int m = 0; m < DIM; m++)
          {
              frozen[3 * i + m] = (inputrec->opts.nFreeze[gf][m] != 0);
          }
@@ -2030,10 +2113,29 @@ void LegacySimulator::do_lbfgs()
       * We do not unshift, so molecules are always whole
       */
      neval++;
-    EnergyEvaluator energyEvaluator{ fplog,    mdlog,      cr,        ms,   top_global,      &top,
-                                     inputrec, imdSession, pull_work, nrnb, wcycle,          gstat,
-                                     vsite,    constr,     mdAtoms,   fr,   runScheduleWork, enerd };
-    energyEvaluator.run(&ems, mu_tot, vir, pres, -1, TRUE);
+    EnergyEvaluator energyEvaluator{ fplog,
+                                     mdlog,
+                                     cr,
+                                     ms,
+                                     top_global,
+                                     top,
+                                     inputrec,
+                                     imdSession,
+                                     pull_work,
+                                     nrnb,
+                                     wcycle,
+                                     gstat,
+                                     &observablesReducer,
+                                     vsite,
+                                     constr,
+                                     mdAtoms,
+                                     fr,
+                                     runScheduleWork,
+                                     enerd };
+    rvec            mu_tot;
+    tensor          vir;
+    tensor          pres;
+    energyEvaluator.run(&ems, mu_tot, vir, pres, -1, TRUE, step);
  
      if (MASTER(cr))
      {
@@ -2045,12 +2147,9 @@ void LegacySimulator::do_lbfgs()
                                           mdatoms->tmass,
                                           enerd,
                                           nullptr,
-                                         nullptr,
                                           nullBox,
                                           PTCouplingArrays(),
                                           0,
-                                         nullptr,
-                                         nullptr,
                                           vir,
                                           pres,
                                           nullptr,
@@ -2083,11 +2182,11 @@ void LegacySimulator::do_lbfgs()
      }
  
      // Point is an index to the memory of search directions, where 0 is the first one.
-    point = 0;
+    int point = 0;
  
      // Set initial search direction to the force (-gradient), or 0 for frozen particles.
      real* fInit = static_cast<real*>(ems.f.view().force().data()[0]);
-    for (i = 0; i < n; i++)
+    for (int i = 0; i < n; i++)
      {
          if (!frozen[i])
          {
@@ -2103,25 +2202,28 @@ void LegacySimulator::do_lbfgs()
      // (the main efficiency in the algorithm comes from changing directions), but
      // we still need an initial value, so estimate it as the inverse of the norm
      // so we take small steps where the potential fluctuates a lot.
-    stepsize = 1.0 / ems.fnorm;
+    double stepsize = 1.0 / ems.fnorm;
  
      /* Start the loop over BFGS steps.
       * Each successful step is counted, and we continue until
       * we either converge or reach the max number of steps.
       */
  
-    ncorr = 0;
+    bool do_log = true;
+    bool do_ene = true;
+
+    int ncorr = 0;
  
      /* Set the gradient from the force */
-    converged = FALSE;
-    for (step = 0; (number_steps < 0 || step <= number_steps) && !converged; step++)
+    bool converged = false;
+    for (int step = 0; (number_steps < 0 || step <= number_steps) && !converged; step++)
      {
  
          /* Write coordinates if necessary */
-        do_x = do_per_step(step, inputrec->nstxout);
-        do_f = do_per_step(step, inputrec->nstfout);
+        const bool do_x = do_per_step(step, inputrec->nstxout);
+        const bool do_f = do_per_step(step, inputrec->nstfout);
  
-        mdof_flags = 0;
+        int mdof_flags = 0;
          if (do_x)
          {
              mdof_flags |= MDOF_X;
@@ -2142,7 +2244,7 @@ void LegacySimulator::do_lbfgs()
                                           cr,
                                           outf,
                                           mdof_flags,
-                                         top_global->natoms,
+                                         top_global.natoms,
                                           step,
                                           static_cast<real>(step),
                                           &ems.s,
@@ -2154,13 +2256,14 @@ void LegacySimulator::do_lbfgs()
          /* Do the linesearching in the direction dx[point][0..(n-1)] */
  
          /* make s a pointer to current search direction - point=0 first time we get here */
-        s = dx[point];
+        gmx::ArrayRef<const real> s = dx[point];
  
-        real* xx = static_cast<real*>(ems.s.x.rvec_array()[0]);
-        real* ff = static_cast<real*>(ems.f.view().force().data()[0]);
+        const real* xx = static_cast<real*>(ems.s.x.rvec_array()[0]);
+        const real* ff = static_cast<real*>(ems.f.view().force().data()[0]);
  
          // calculate line gradient in position A
-        for (gpa = 0, i = 0; i < n; i++)
+        double gpa = 0;
+        for (int i = 0; i < n; i++)
          {
              gpa -= s[i] * ff[i];
          }
@@ -2168,9 +2271,10 @@ void LegacySimulator::do_lbfgs()
          /* Calculate minimum allowed stepsize along the line, before the average (norm)
           * relative change in coordinate is smaller than precision
           */
-        for (minstep = 0, i = 0; i < n; i++)
+        double minstep = 0;
+        for (int i = 0; i < n; i++)
          {
-            tmp = fabs(xx[i]);
+            double tmp = fabs(xx[i]);
              if (tmp < 1.0)
              {
                  tmp = 1.0;
@@ -2182,15 +2286,15 @@ void LegacySimulator::do_lbfgs()
  
          if (stepsize < minstep)
          {
-            converged = TRUE;
+            converged = true;
              break;
          }
  
          // Before taking any steps along the line, store the old position
-        *last       = ems;
-        real* lastx = static_cast<real*>(last->s.x.data()[0]);
-        real* lastf = static_cast<real*>(last->f.view().force().data()[0]);
-        Epot0       = ems.epot;
+        *last            = ems;
+        real*      lastx = static_cast<real*>(last->s.x.data()[0]);
+        real*      lastf = static_cast<real*>(last->f.view().force().data()[0]);
+        const real Epot0 = ems.epot;
  
          *sa = ems;
  
@@ -2221,11 +2325,13 @@ void LegacySimulator::do_lbfgs()
  
          // State "A" is the first position along the line.
          // reference position along line is initially zero
-        a = 0.0;
+        real a = 0;
  
          // Check stepsize first. We do not allow displacements
          // larger than emstep.
          //
+        real c;
+        real maxdelta;
          do
          {
              // Pick a new position C by adding stepsize to A.
@@ -2234,9 +2340,9 @@ void LegacySimulator::do_lbfgs()
              // Calculate what the largest change in any individual coordinate
              // would be (translation along line * gradient along line)
              maxdelta = 0;
-            for (i = 0; i < n; i++)
+            for (int i = 0; i < n; i++)
              {
-                delta = c * s[i];
+                real delta = c * s[i];
                  if (delta > maxdelta)
                  {
                      maxdelta = delta;
@@ -2251,18 +2357,19 @@ void LegacySimulator::do_lbfgs()
  
          // Take a trial step and move the coordinate array xc[] to position C
          real* xc = static_cast<real*>(sc->s.x.rvec_array()[0]);
-        for (i = 0; i < n; i++)
+        for (int i = 0; i < n; i++)
          {
              xc[i] = lastx[i] + c * s[i];
          }
  
          neval++;
          // Calculate energy for the trial step in position C
-        energyEvaluator.run(sc, mu_tot, vir, pres, step, FALSE);
+        energyEvaluator.run(sc, mu_tot, vir, pres, step, FALSE, step);
  
          // Calc line gradient in position C
-        real* fc = static_cast<real*>(sc->f.view().force()[0]);
-        for (gpc = 0, i = 0; i < n; i++)
+        real*  fc  = static_cast<real*>(sc->f.view().force()[0]);
+        double gpc = 0;
+        for (int i = 0; i < n; i++)
          {
              gpc -= s[i] * fc[i]; /* f is negative gradient, thus the sign */
          }
@@ -2275,11 +2382,11 @@ void LegacySimulator::do_lbfgs()
          // This is the max amount of increase in energy we tolerate.
          // By allowing VERY small changes (close to numerical precision) we
          // frequently find even better (lower) final energies.
-        tmp = std::sqrt(GMX_REAL_EPS) * fabs(sa->epot);
+        double tmp = std::sqrt(GMX_REAL_EPS) * fabs(sa->epot);
  
          // Accept the step if the energy is lower in the new position C (compared to A),
          // or if it is not significantly higher and the line derivative is still negative.
-        foundlower = sc->epot < sa->epot || (gpc < 0 && sc->epot < (sa->epot + tmp));
+        bool foundlower = sc->epot < sa->epot || (gpc < 0 && sc->epot < (sa->epot + tmp));
          // If true, great, we found a better energy. We no longer try to alter the
          // stepsize, but simply accept this new better position. The we select a new
          // search direction instead, which will be much more efficient than continuing
@@ -2295,6 +2402,7 @@ void LegacySimulator::do_lbfgs()
          // than with the stepsize, so no need to modify it. For the next search direction
          // it will be reset to 1/fnorm anyway.
  
+        double step_taken;
          if (!foundlower)
          {
              // OK, if we didn't find a lower value we will have to locate one now - there must
@@ -2305,14 +2413,15 @@ void LegacySimulator::do_lbfgs()
              // I also have a safeguard for potentially really pathological functions so we never
              // take more than 20 steps before we give up.
              // If we already found a lower value we just skip this step and continue to the update.
-            real fnorm = 0;
-            nminstep   = 0;
+            real fnorm    = 0;
+            int  nminstep = 0;
              do
              {
                  // Select a new trial point B in the interval [A,C].
                  // If the derivatives at points a & c have different sign we interpolate to zero,
                  // otherwise just do a bisection since there might be multiple minima/maxima
                  // inside the interval.
+                real b;
                  if (gpa < 0 && gpc > 0)
                  {
                      b = a + gpa * (a - c) / (gpc - gpa);
@@ -2332,19 +2441,20 @@ void LegacySimulator::do_lbfgs()
  
                  // Take a trial step to point B
                  real* xb = static_cast<real*>(sb->s.x.rvec_array()[0]);
-                for (i = 0; i < n; i++)
+                for (int i = 0; i < n; i++)
                  {
                      xb[i] = lastx[i] + b * s[i];
                  }
  
                  neval++;
                  // Calculate energy for the trial step in point B
-                energyEvaluator.run(sb, mu_tot, vir, pres, step, FALSE);
+                energyEvaluator.run(sb, mu_tot, vir, pres, step, FALSE, step);
                  fnorm = sb->fnorm;
  
                  // Calculate gradient in point B
-                real* fb = static_cast<real*>(sb->f.view().force()[0]);
-                for (gpb = 0, i = 0; i < n; i++)
+                real*  fb  = static_cast<real*>(sb->f.view().force()[0]);
+                double gpb = 0;
+                for (int i = 0; i < n; i++)
                  {
                      gpb -= s[i] * fb[i]; /* f is negative gradient, thus the sign */
                  }
@@ -2388,7 +2498,7 @@ void LegacySimulator::do_lbfgs()
                  if (ncorr == 0)
                  {
                      /* Converged */
-                    converged = TRUE;
+                    converged = true;
                      break;
                  }
                  else
@@ -2396,7 +2506,7 @@ void LegacySimulator::do_lbfgs()
                      /* Reset memory */
                      ncorr = 0;
                      /* Search in gradient direction */
-                    for (i = 0; i < n; i++)
+                    for (int i = 0; i < n; i++)
                      {
                          dx[point][i] = ff[i];
                      }
@@ -2439,21 +2549,21 @@ void LegacySimulator::do_lbfgs()
              ncorr++;
          }
  
-        for (i = 0; i < n; i++)
+        for (int i = 0; i < n; i++)
          {
              dg[point][i] = lastf[i] - ff[i];
              dx[point][i] *= step_taken;
          }
  
-        dgdg = 0;
-        dgdx = 0;
-        for (i = 0; i < n; i++)
+        real dgdg = 0;
+        real dgdx = 0;
+        for (int i = 0; i < n; i++)
          {
              dgdg += dg[point][i] * dg[point][i];
              dgdx += dg[point][i] * dx[point][i];
          }
  
-        diag = dgdx / dgdg;
+        const real diag = dgdx / dgdg;
  
          rho[point] = 1.0 / dgdx;
          point++;
@@ -2464,15 +2574,15 @@ void LegacySimulator::do_lbfgs()
          }
  
          /* Update */
-        for (i = 0; i < n; i++)
+        for (int i = 0; i < n; i++)
          {
              p[i] = ff[i];
          }
  
-        cp = point;
+        int cp = point;
  
          /* Recursive update. First go back over the memory points */
-        for (k = 0; k < ncorr; k++)
+        for (int k = 0; k < ncorr; k++)
          {
              cp--;
              if (cp < 0)
@@ -2480,38 +2590,38 @@ void LegacySimulator::do_lbfgs()
                  cp = ncorr - 1;
              }
  
-            sq = 0;
-            for (i = 0; i < n; i++)
+            real sq = 0;
+            for (int i = 0; i < n; i++)
              {
                  sq += dx[cp][i] * p[i];
              }
  
              alpha[cp] = rho[cp] * sq;
  
-            for (i = 0; i < n; i++)
+            for (int i = 0; i < n; i++)
              {
                  p[i] -= alpha[cp] * dg[cp][i];
              }
          }
  
-        for (i = 0; i < n; i++)
+        for (int i = 0; i < n; i++)
          {
              p[i] *= diag;
          }
  
          /* And then go forward again */
-        for (k = 0; k < ncorr; k++)
+        for (int k = 0; k < ncorr; k++)
          {
-            yr = 0;
-            for (i = 0; i < n; i++)
+            real yr = 0;
+            for (int i = 0; i < n; i++)
              {
                  yr += p[i] * dg[cp][i];
              }
  
-            beta = rho[cp] * yr;
-            beta = alpha[cp] - beta;
+            real beta = rho[cp] * yr;
+            beta      = alpha[cp] - beta;
  
-            for (i = 0; i < n; i++)
+            for (int i = 0; i < n; i++)
              {
                  p[i] += beta * dx[cp][i];
              }
@@ -2523,7 +2633,7 @@ void LegacySimulator::do_lbfgs()
              }
          }
  
-        for (i = 0; i < n; i++)
+        for (int i = 0; i < n; i++)
          {
              if (!frozen[i])
              {
@@ -2558,12 +2668,9 @@ void LegacySimulator::do_lbfgs()
                                               mdatoms->tmass,
                                               enerd,
                                               nullptr,
-                                             nullptr,
                                               nullBox,
                                               PTCouplingArrays(),
                                               0,
-                                             nullptr,
-                                             nullptr,
                                               vir,
                                               pres,
                                               nullptr,
@@ -2591,7 +2698,7 @@ void LegacySimulator::do_lbfgs()
          }
  
          /* Send x and E to IMD client, if bIMD is TRUE. */
-        if (imdSession->run(step, TRUE, state_global->box, state_global->x.rvec_array(), 0) && MASTER(cr))
+        if (imdSession->run(step, TRUE, state_global->box, state_global->x, 0) && MASTER(cr))
          {
              imdSession->sendPositionsAndEnergies();
          }
@@ -2603,7 +2710,7 @@ void LegacySimulator::do_lbfgs()
           * If we have reached machine precision, converged is already set to true.
           */
          converged = converged || (ems.fmax < inputrec->em_tol);
-
+        observablesReducer.markAsReadyToReduce();
      } /* End of the loop */
  
      if (converged)
@@ -2652,8 +2759,8 @@ void LegacySimulator::do_lbfgs()
       * However, we should only do it if we did NOT already write this step
       * above (which we did if do_x or do_f was true).
       */
-    do_x = !do_per_step(step, inputrec->nstxout);
-    do_f = !do_per_step(step, inputrec->nstfout);
+    const bool do_x = !do_per_step(step, inputrec->nstxout);
+    const bool do_f = !do_per_step(step, inputrec->nstfout);
      write_em_traj(
              fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm), top_global, inputrec, step, &ems, state_global, observablesHistory);
  
@@ -2675,7 +2782,6 @@ void LegacySimulator::do_lbfgs()
  void LegacySimulator::do_steep()
  {
      const char*       SD = "Steepest Descents";
-    gmx_localtop_t    top(top_global->ffparams);
      gmx_global_stat_t gstat;
      real              stepsize;
      real              ustep;
@@ -2685,7 +2791,7 @@ void LegacySimulator::do_steep()
      int               nsteps;
      int               count          = 0;
      int               steps_accepted = 0;
-    auto              mdatoms        = mdAtoms->mdatoms();
+    auto*             mdatoms        = mdAtoms->mdatoms();
  
      GMX_LOG(mdlog.info)
              .asParagraph()
@@ -2700,6 +2806,8 @@ void LegacySimulator::do_steep()
      em_state_t* s_min = &s0;
      em_state_t* s_try = &s1;
  
+    ObservablesReducer observablesReducer = observablesReducerBuilder->build();
+
      /* Init em and store the local state in s_try */
      init_em(fplog,
              mdlog,
@@ -2711,7 +2819,7 @@ void LegacySimulator::do_steep()
              state_global,
              top_global,
              s_try,
-            &top,
+            top,
              nrnb,
              fr,
              mdAtoms,
@@ -2726,7 +2834,7 @@ void LegacySimulator::do_steep()
                                     mdrunOptions,
                                     cr,
                                     outputProvider,
-                                   mdModulesNotifier,
+                                   mdModulesNotifiers,
                                     inputrec,
                                     top_global,
                                     nullptr,
@@ -2736,13 +2844,13 @@ void LegacySimulator::do_steep()
                                     ms);
      gmx::EnergyOutput energyOutput(mdoutf_get_fp_ene(outf),
                                     top_global,
-                                   inputrec,
+                                   *inputrec,
                                     pull_work,
                                     nullptr,
                                     false,
                                     StartingBehavior::NewSimulation,
                                     simulationsShareState,
-                                   mdModulesNotifier);
+                                   mdModulesNotifiers);
  
      /* Print to log file  */
      print_em_start(fplog, cr, walltime_accounting, wcycle, SD);
@@ -2765,9 +2873,25 @@ void LegacySimulator::do_steep()
      {
          sp_header(fplog, SD, inputrec->em_tol, nsteps);
      }
-    EnergyEvaluator energyEvaluator{ fplog,    mdlog,      cr,        ms,   top_global,      &top,
-                                     inputrec, imdSession, pull_work, nrnb, wcycle,          gstat,
-                                     vsite,    constr,     mdAtoms,   fr,   runScheduleWork, enerd };
+    EnergyEvaluator energyEvaluator{ fplog,
+                                     mdlog,
+                                     cr,
+                                     ms,
+                                     top_global,
+                                     top,
+                                     inputrec,
+                                     imdSession,
+                                     pull_work,
+                                     nrnb,
+                                     wcycle,
+                                     gstat,
+                                     &observablesReducer,
+                                     vsite,
+                                     constr,
+                                     mdAtoms,
+                                     fr,
+                                     runScheduleWork,
+                                     enerd };
  
      /**** HERE STARTS THE LOOP ****
       * count is the counter for the number of steps
@@ -2792,7 +2916,7 @@ void LegacySimulator::do_steep()
  
          if (validStep)
          {
-            energyEvaluator.run(s_try, mu_tot, vir, pres, count, count == 0);
+            energyEvaluator.run(s_try, mu_tot, vir, pres, count, count == 0, count);
          }
          else
          {
@@ -2836,12 +2960,9 @@ void LegacySimulator::do_steep()
                                                   mdatoms->tmass,
                                                   enerd,
                                                   nullptr,
-                                                 nullptr,
                                                   nullBox,
                                                   PTCouplingArrays(),
                                                   0,
-                                                 nullptr,
-                                                 nullptr,
                                                   vir,
                                                   pres,
                                                   nullptr,
@@ -2890,7 +3011,7 @@ void LegacySimulator::do_steep()
              /* If energy is not smaller make the step smaller...  */
              ustep *= 0.5;
  
-            if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count)
+            if (haveDDAtomOrdering(*cr) && s_min->s.ddp_count != cr->dd->ddp_count)
              {
                  /* Reload the old state */
                  em_dd_partition_system(fplog,
@@ -2902,7 +3023,7 @@ void LegacySimulator::do_steep()
                                         imdSession,
                                         pull_work,
                                         s_min,
-                                       &top,
+                                       top,
                                         mdAtoms,
                                         fr,
                                         vsite,
@@ -2940,7 +3061,7 @@ void LegacySimulator::do_steep()
          if (imdSession->run(count,
                              TRUE,
                              MASTER(cr) ? state_global->box : nullptr,
-                            MASTER(cr) ? state_global->x.rvec_array() : nullptr,
+                            MASTER(cr) ? state_global->x : gmx::ArrayRef<gmx::RVec>(),
                              0)
              && MASTER(cr))
          {
@@ -2948,6 +3069,7 @@ void LegacySimulator::do_steep()
          }
  
          count++;
+        observablesReducer.markAsReadyToReduce();
      } /* End of the loop  */
  
      /* Print some data...  */
@@ -2992,7 +3114,6 @@ void LegacySimulator::do_nm()
  {
      const char*         NM = "Normal Mode Analysis";
      int                 nnodes;
-    gmx_localtop_t      top(top_global->ffparams);
      gmx_global_stat_t   gstat;
      tensor              vir, pres;
      rvec                mu_tot = { 0 };
@@ -3003,11 +3124,11 @@ void LegacySimulator::do_nm()
      real*               full_matrix   = nullptr;
  
      /* added with respect to mdrun */
-    int  row, col;
-    real der_range = 10.0 * std::sqrt(GMX_REAL_EPS);
-    real x_min;
-    bool bIsMaster = MASTER(cr);
-    auto mdatoms   = mdAtoms->mdatoms();
+    int   row, col;
+    real  der_range = 10.0 * std::sqrt(GMX_REAL_EPS);
+    real  x_min;
+    bool  bIsMaster = MASTER(cr);
+    auto* mdatoms   = mdAtoms->mdatoms();
  
      GMX_LOG(mdlog.info)
              .asParagraph()
@@ -3028,6 +3149,9 @@ void LegacySimulator::do_nm()
  
      em_state_t state_work{};
  
+    fr->longRangeNonbondeds->updateAfterPartition(*mdAtoms->mdatoms());
+    ObservablesReducer observablesReducer = observablesReducerBuilder->build();
+
      /* Init em and store the local state in state_minimum */
      init_em(fplog,
              mdlog,
@@ -3039,7 +3163,7 @@ void LegacySimulator::do_nm()
              state_global,
              top_global,
              &state_work,
-            &top,
+            top,
              nrnb,
              fr,
              mdAtoms,
@@ -3054,7 +3178,7 @@ void LegacySimulator::do_nm()
                                     mdrunOptions,
                                     cr,
                                     outputProvider,
-                                   mdModulesNotifier,
+                                   mdModulesNotifiers,
                                     inputrec,
                                     top_global,
                                     nullptr,
@@ -3132,7 +3256,7 @@ void LegacySimulator::do_nm()
      {
          fprintf(stderr,
                  "starting normal mode calculation '%s'\n%" PRId64 " steps.\n\n",
-                *(top_global->name),
+                *(top_global.name),
                  inputrec->nsteps);
      }
  
@@ -3140,10 +3264,26 @@ void LegacySimulator::do_nm()
  
      /* Make evaluate_energy do a single node force calculation */
      cr->nnodes = 1;
-    EnergyEvaluator energyEvaluator{ fplog,    mdlog,      cr,        ms,   top_global,      &top,
-                                     inputrec, imdSession, pull_work, nrnb, wcycle,          gstat,
-                                     vsite,    constr,     mdAtoms,   fr,   runScheduleWork, enerd };
-    energyEvaluator.run(&state_work, mu_tot, vir, pres, -1, TRUE);
+    EnergyEvaluator energyEvaluator{ fplog,
+                                     mdlog,
+                                     cr,
+                                     ms,
+                                     top_global,
+                                     top,
+                                     inputrec,
+                                     imdSession,
+                                     pull_work,
+                                     nrnb,
+                                     wcycle,
+                                     gstat,
+                                     &observablesReducer,
+                                     vsite,
+                                     constr,
+                                     mdAtoms,
+                                     fr,
+                                     runScheduleWork,
+                                     enerd };
+    energyEvaluator.run(&state_work, mu_tot, vir, pres, -1, TRUE, 0);
      cr->nnodes = nnodes;
  
      /* if forces are not small, warn user */
@@ -3211,7 +3351,7 @@ void LegacySimulator::do_nm()
                                          pull_work,
                                          bNS,
                                          force_flags,
-                                        &top,
+                                        top,
                                          constr,
                                          enerd,
                                          state_work.s.natoms,
@@ -3222,7 +3362,8 @@ void LegacySimulator::do_nm()
                                          &state_work.s.hist,
                                          &state_work.f.view(),
                                          vir,
-                                        mdatoms,
+                                        *mdatoms,
+                                        fr->longRangeNonbondeds.get(),
                                          nrnb,
                                          wcycle,
                                          shellfc,
@@ -3237,7 +3378,7 @@ void LegacySimulator::do_nm()
                  }
                  else
                  {
-                    energyEvaluator.run(&state_work, mu_tot, vir, pres, aid * 2 + dx, FALSE);
+                    energyEvaluator.run(&state_work, mu_tot, vir, pres, aid * 2 + dx, FALSE, step);
                  }
  
                  cr->nnodes = nnodes;